diff --git a/README.md b/README.md index a8ac6ae..429e9cf 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # mdai-utils -Utility functions for MD.ai +Utility functions for MD.ai. Download and upload 2D and 3D segmentation images. ## Download data @@ -22,16 +22,38 @@ python -m mdai_utils.download_annotations \ -o ./data ``` -## Upload annotations / segmentations +## Upload 2D segmentations ```bash -python -m mdai_utils.upload_annotations \ +python -m mdai_utils.upload_annotation_slice \ --parameters ./tests/test_local_parameters.json \ --sop_instance_uid "1.2.826.0.1.3680043.2.1125.1.75064541463040.2005072610414630768" \ --label_name mylabel \ -i ./tests/fixtures/humanct_0002_1000_seg.nii.gz ``` +## Upload 3D segmentations + +MDai works with dicoms, and use the SOPInstanceUID as the key to match slices. +Your algorithm might work with 3D volumes, (.nrrd, .nii.gz, etc). You can convert the an input dicom_folder to a 3D volume, and also store the mapping between the new volume indices and the original dicom file, with its SOPInstanceUID. + +```bash +python -m mdai_utils.dicom_to_volume -i ./tests/fixtures/humanct_0002_1000_1004 -o /tmp/humanct_0002_1000_1004.nrrd +``` + +Parallel to the output image location, a `{image_filename}_SOPInstanceUIDs.json` will be saved with the slice +mappings. + +If we have a 3D volume segmentation we want to upload, use the mappings: + +```bash +python -m mdai_utils.upload_annotation_volume \ + --parameters ./tests/test_local_parameters.json \ + --sop_instance_uids_file ./tests/fixtures/humanct_0002_1000_1004_SOPInstanceUIDs.json \ + --label_name mylabel \ + -i ./tests/fixtures/humanct_0002_1000_1004_seg.nii.gz +``` + ## Development For information about building, running, and contributing to this code base, diff --git a/mdai_utils/dicom_utils.py b/mdai_utils/dicoms_to_volume.py similarity index 63% rename from mdai_utils/dicom_utils.py rename to mdai_utils/dicoms_to_volume.py index 24e66eb..90b7a34 100644 --- a/mdai_utils/dicom_utils.py +++ b/mdai_utils/dicoms_to_volume.py @@ -27,7 +27,12 @@ class ImageAndMetadata(NamedTuple): metadata: dict -def read_dicoms_into_volume(valid_dcm_file_list) -> ImageAndMetadata: +def metadata_dict_to_sop_instance_uids(metadata_dict): + """Convert a metadata dictionary to a dictionary mapping slice index to SOPInstanceUID.""" + return {int(k): v["SOPInstanceUID"] for k, v in metadata_dict.items()} + + +def dicoms_to_volume(valid_dcm_file_list) -> ImageAndMetadata: """Convert a list of DICOM files to a image volume. Also returns metadata (SOPInstanceUID) for each slice in the volume. @@ -58,3 +63,43 @@ def read_dicoms_into_volume(valid_dcm_file_list) -> ImageAndMetadata: for slice_idx, (fn, uid) in enumerate(zip(dicom_names, uids)) } return ImageAndMetadata(image=image, metadata=metadata) + + +def _get_parser(): + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument( + "-i", + "--input_directory", + type=str, + required=True, + help="Path to the directory containing the DICOM files.", + ) + parser.add_argument( + "-o", + "--output_image", + type=str, + required=True, + help="Path to the output image.", + ) + return parser + + +def main(input_directory, output_image): + """ + Convert a directory containing DICOM files to a volume image, and save the metadata to map SOPInstanceUID to slice indices. + """ + valid_dcm_file_list = file_list_from_directory(input_directory) + image_and_metadata = dicoms_to_volume(valid_dcm_file_list) + write_image_and_metadata( + image=image_and_metadata.image, + metadata=image_and_metadata.metadata, + output_image_filename=output_image, + ) + + +if __name__ == "__main__": + parser = _get_parser() + args = parser.parse_args() + main(args.input_directory, args.output_image) diff --git a/mdai_utils/upload_annotations.py b/mdai_utils/upload_annotation_slice.py similarity index 88% rename from mdai_utils/upload_annotations.py rename to mdai_utils/upload_annotation_slice.py index 3d7224d..ec610f8 100644 --- a/mdai_utils/upload_annotations.py +++ b/mdai_utils/upload_annotation_slice.py @@ -3,6 +3,15 @@ import numpy as np +def read_data_image(image_path): + """ + Read an image using itk, and returns a numpy data array. + """ + image = itk.imread(image_path) + data_np = itk.array_from_image(image) + return data_np + + def upload_data_annotation_slice( data_np: np.ndarray, sop_instance_uid: str, @@ -36,15 +45,6 @@ def upload_data_annotation_slice( return failed_annotations -def read_data_image(image_path): - """ - Read an image using itk, and returns a numpy data array. - """ - image = itk.imread(image_path) - data_np = itk.array_from_image(image) - return data_np - - def upload_image_annotation_slice( segmentation_image_path: str, sop_instance_uid: str, @@ -56,9 +56,10 @@ def upload_image_annotation_slice( """ Uploads an annotation to the server. It requires that the DICOM image is already uploaded to the server. See upload_dicom_image.py for that. + The input image can be in any format supported by ITK. Args: - segmentation_image_path (str): Path to the segmentation image. With fixed metadata. See @inverse_transform + segmentation_image_path (str): Path to the segmentation image. sop_instance_uid (str): SOPInstanceUID of the DICOM key-slice image. Returned from @inverse_transform mdai_client (mdai.Client): Client to the MD.ai API. See @get_mdai_client mdai_project_id (str): Project ID. Check in the MD.ai web interface. @@ -74,11 +75,11 @@ def upload_image_annotation_slice( data_np = data_np.squeeze(0) return upload_data_annotation_slice( data_np=data_np, + sop_instance_uid=sop_instance_uid, mdai_client=mdai_client, mdai_project_id=mdai_project_id, mdai_dataset_id=mdai_dataset_id, mdai_label_id=mdai_label_id, - sop_instance_uid=sop_instance_uid, ) @@ -106,12 +107,6 @@ def _get_parser(): default=None, help="sop_instance_uid of the annotation file. Needed to match the annotation with the DICOM image in mdai.", ) - parser.add_argument( - "--sop_instance_metadata", - type=str, - default=None, - help="json file generated by storing the metadata resulting from dicom_utils.read_dicoms_into_volume", - ) parser.add_argument( "-p", "--parameters", @@ -126,6 +121,27 @@ def _get_parser(): return parser +def main( + input_annotation, + label_name, + sop_instance_uid, + mdai_client, + mdai_project_id, + mdai_dataset_id, + mdai_label_ids, +): + mdai_label_id = mdai_label_ids[label_name] + failed_annotations = upload_image_annotation_slice( + segmentation_image_path=input_annotation, + sop_instance_uid=sop_instance_uid, + mdai_client=mdai_client, + mdai_project_id=mdai_project_id, + mdai_dataset_id=mdai_dataset_id, + mdai_label_id=mdai_label_id, + ) + return failed_annotations + + if __name__ == "__main__": import json @@ -140,7 +156,6 @@ def _get_parser(): mdai_project_id = parameters["mdai_project_id"] mdai_dataset_id = parameters["mdai_dataset_id"] - mdai_label_group_id = parameters["mdai_label_group_id"] mdai_label_ids = parameters["mdai_label_ids"] mdai_domain = parameters["mdai_domain"] @@ -148,22 +163,23 @@ def _get_parser(): label_name = args.label_name mdai_label_id = mdai_label_ids[label_name] sop_instance_uid = args.sop_instance_uid - sop_instance_metadata = args.sop_instance_metadata if sop_instance_uid is None: raise ValueError( - "Either sop_instance_uid or sop_instance_metadata must be set." + "sop_instance_uid is required to match the annotation with the DICOM image in mdai." ) token = get_mdai_access_token() mdai_client = mdai.Client(domain=mdai_domain, access_token=token) - failed_annotations = upload_image_annotation_slice( - segmentation_image_path=input_annotation, + failed_annotations = main( + input_annotation=input_annotation, + label_name=label_name, sop_instance_uid=sop_instance_uid, mdai_client=mdai_client, mdai_project_id=mdai_project_id, mdai_dataset_id=mdai_dataset_id, - mdai_label_id=mdai_label_id, + mdai_label_ids=mdai_label_ids, ) + if len(failed_annotations) == 0: print("All annotations uploaded successfully.") exit(0) diff --git a/mdai_utils/upload_annotation_volume.py b/mdai_utils/upload_annotation_volume.py new file mode 100644 index 0000000..a62c71c --- /dev/null +++ b/mdai_utils/upload_annotation_volume.py @@ -0,0 +1,212 @@ +from pathlib import Path + +import mdai +import numpy as np + +from mdai_utils.dicoms_to_volume import metadata_dict_to_sop_instance_uids +from mdai_utils.upload_annotation_slice import read_data_image + + +def upload_data_annotation_volume( + data_np: np.ndarray, + sop_instance_uids: dict[int, str], + mdai_client: mdai.Client, + mdai_project_id: str, + mdai_dataset_id: str, + mdai_label_id: str, +) -> list: + """ + Upload A 3D numpy array representing a segmentation mask. It requires that the DICOM image is already in md.ai. + Args: + data_np: A 3D numpy array representing a segmentation mask. + sop_instance_uids: A dictionary mapping the slice index to the SOP Instance UID. See @dicoms_to_volume.dicoms_to_volume to get the mapping from the output metadata. + mdai_client: An instance of the mdai.Client class. + mdai_project_id: The md.ai project ID. + mdai_dataset_id: The md.ai dataset ID. + mdai_label_id: The md.ai label ID. + Returns: + A list of failed annotations. + """ + + if data_np.ndim != 3: + raise ValueError("data_np must be a 3D numpy array.") + + mdai_annotations = [] + # Check that data_np and sop_instance_uids have the same number of slices + len_data_np = len(data_np) + len_sop_instance_uids = len(sop_instance_uids) + if len_data_np != len_sop_instance_uids: + raise ValueError( + f"The number of slices in data_np ({len_data_np}) and sop_instance_uids ({len_sop_instance_uids}) must be the same." + ) + # Iterate over the numpy slices (first index) and append the annotations to the list + for i, data_slice in enumerate(data_np): + # Get the SOP Instance UID + sop_instance_uid = sop_instance_uids.get(i) + # Upload the annotation + annotation_dict = { + "labelId": mdai_label_id, + "SOPInstanceUID": sop_instance_uid, + "data": mdai.common_utils.convert_mask_data(data_slice), + } + mdai_annotations.append(annotation_dict) + + failed_annotations = mdai_client.import_annotations( + mdai_annotations, mdai_project_id, mdai_dataset_id + ) + return failed_annotations + + +def upload_image_annotation_volume( + segmentation_image_path: str, + sop_instance_uids: dict[int, str], + mdai_client: mdai.Client, + mdai_project_id: str, + mdai_dataset_id: str, + mdai_label_id: str, +) -> list: + """ + Upload a volume image containing a segmentation mask. It requires that the DICOM image is already in md.ai. + The input image can be in any format supported by ITK. + + Args: + segmentation_image_path: Path to the segmentation image. Any format compatible with ITK. + sop_instance_uids: A dictionary mapping the slice index to the SOP Instance UID. See @dicoms_to_volume.dicoms_to_volume to get the mapping from the output metadata. + mdai_client: An instance of the mdai.Client class. + mdai_project_id: The md.ai project ID. + mdai_dataset_id: The md.ai dataset ID. + mdai_label_id: The md.ai label ID. + Returns: + A list of failed annotations. + """ + data_np = read_data_image(segmentation_image_path) + if data_np.ndim < 3: + raise ValueError( + f"Image has dimension {data_np.ndim}. Use aupload_image_annotation_slice instead of volume." + ) + + if data_np.ndim > 3: + data_np = data_np.squeeze() + if data_np.ndim != 3: + raise ValueError("The input image seems to have more than 3 dimensions.") + + return upload_data_annotation_volume( + data_np=data_np, + sop_instance_uids=sop_instance_uids, + mdai_client=mdai_client, + mdai_project_id=mdai_project_id, + mdai_dataset_id=mdai_dataset_id, + mdai_label_id=mdai_label_id, + ) + + +def _get_parser(): + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument( + "-i", + "--input_annotation", + type=str, + required=True, + help="Path to the segmentation image to upload.", + ) + parser.add_argument( + "-l", + "--label_name", + type=str, + required=True, + help="label name corresponding to the annotation.", + ) + parser.add_argument( + "--sop_instance_uids_file", + type=str, + required=True, + help="json file generated by storing the metadata resulting from dicoms_to_volume.dicoms_to_volume", + ) + parser.add_argument( + "-p", + "--parameters", + type=str, + default=None, + help=""" +Path to a json file containing the parameters for md.ai variables: mdai_project_id, mdai_dataset_id, mdai_label_ids, etc. +See example in tests/test_parameters.json. +""", + ) + + return parser + + +def main( + input_annotation, + label_name, + sop_instance_uids_file, + mdai_client, + mdai_project_id, + mdai_dataset_id, + mdai_label_ids, +): + sop_instance_uids_file_path = Path(sop_instance_uids_file) + if not sop_instance_uids_file_path.exists(): + raise FileNotFoundError( + f"The file {sop_instance_uids_file_path} does not exist." + ) + with open(sop_instance_uids_file_path) as f: + metadata_dict = json.load(f) + + sop_instance_uids = metadata_dict_to_sop_instance_uids(metadata_dict) + + mdai_label_id = mdai_label_ids[label_name] + failed_annotations = upload_image_annotation_volume( + segmentation_image_path=input_annotation, + sop_instance_uids=sop_instance_uids, + mdai_client=mdai_client, + mdai_project_id=mdai_project_id, + mdai_dataset_id=mdai_dataset_id, + mdai_label_id=mdai_label_id, + ) + return failed_annotations + + +if __name__ == "__main__": + import json + + from mdai_utils.common import get_mdai_access_token + + parser = _get_parser() + args = parser.parse_args() + print(args) + + with open(args.parameters, "r") as f: + parameters = json.load(f) + + mdai_project_id = parameters["mdai_project_id"] + mdai_dataset_id = parameters["mdai_dataset_id"] + mdai_label_ids = parameters["mdai_label_ids"] + mdai_domain = parameters["mdai_domain"] + + input_annotation = args.input_annotation + label_name = args.label_name + mdai_label_id = mdai_label_ids[label_name] + sop_instance_uids_file = args.sop_instance_uids_file + + token = get_mdai_access_token() + mdai_client = mdai.Client(domain=mdai_domain, access_token=token) + + failed_annotations = main( + input_annotation=input_annotation, + label_name=label_name, + sop_instance_uids_file=sop_instance_uids_file, + mdai_client=mdai_client, + mdai_project_id=mdai_project_id, + mdai_dataset_id=mdai_dataset_id, + mdai_label_ids=mdai_label_ids, + ) + + if len(failed_annotations) == 0: + print("All annotations uploaded successfully.") + exit(0) + else: + print(f"Failed annotations: {failed_annotations}") + exit(1) diff --git a/tests/fixtures/humanct_0002_1000_1004_SOPInstanceUIDs.json b/tests/fixtures/humanct_0002_1000_1004_SOPInstanceUIDs.json new file mode 100644 index 0000000..3df5687 --- /dev/null +++ b/tests/fixtures/humanct_0002_1000_1004_SOPInstanceUIDs.json @@ -0,0 +1,22 @@ +{ + "0": { + "dicom_file": "tests/fixtures/humanct_0002_1000_1004/vhf.1000.dcm", + "SOPInstanceUID": "1.2.826.0.1.3680043.2.1125.1.75064541463040.2005072610414630768" + }, + "1": { + "dicom_file": "tests/fixtures/humanct_0002_1000_1004/vhf.1001.dcm", + "SOPInstanceUID": "1.2.826.0.1.3680043.2.1125.1.75064541463040.2005072610414645741" + }, + "2": { + "dicom_file": "tests/fixtures/humanct_0002_1000_1004/vhf.1002.dcm", + "SOPInstanceUID": "1.2.826.0.1.3680043.2.1125.1.75064541463040.2005072610414662833" + }, + "3": { + "dicom_file": "tests/fixtures/humanct_0002_1000_1004/vhf.1003.dcm", + "SOPInstanceUID": "1.2.826.0.1.3680043.2.1125.1.75064541463040.2005072610414677861" + }, + "4": { + "dicom_file": "tests/fixtures/humanct_0002_1000_1004/vhf.1004.dcm", + "SOPInstanceUID": "1.2.826.0.1.3680043.2.1125.1.75064541463040.2005072610414694890" + } +} diff --git a/tests/upload_annotation_slice_test.py b/tests/upload_annotation_slice_test.py new file mode 100644 index 0000000..adc0c14 --- /dev/null +++ b/tests/upload_annotation_slice_test.py @@ -0,0 +1,23 @@ +from mdai_utils.upload_annotation_slice import upload_image_annotation_slice + + +def test_upload_image_annotation_slice(mdai_setup): + parameters = mdai_setup["parameters"] + fixtures_dir = mdai_setup["fixtures_dir"] + mdai_client = mdai_setup["mdai_client"] + # sop_instance_uid can be acquired from mdai, or from the metadata generated + # by the function dicom_utils.read_dicoms_into_volume. + sop_instance_uid = "1.2.826.0.1.3680043.2.1125.1.75064541463040.2005072610414630768" + mdai_label_ids = parameters.get("mdai_label_ids") + labels_to_upload = parameters.get("labels") + label_id = mdai_label_ids.get(labels_to_upload[0]) + + failed_annotations = upload_image_annotation_slice( + segmentation_image_path=fixtures_dir / "humanct_0002_1000_seg.nii.gz", + sop_instance_uid=sop_instance_uid, + mdai_client=mdai_client, + mdai_project_id=parameters.get("mdai_project_id"), + mdai_dataset_id=parameters.get("mdai_dataset_id"), + mdai_label_id=label_id, + ) + assert len(failed_annotations) == 0 diff --git a/tests/upload_annotation_volume_test.py b/tests/upload_annotation_volume_test.py new file mode 100644 index 0000000..37e6b60 --- /dev/null +++ b/tests/upload_annotation_volume_test.py @@ -0,0 +1,40 @@ +import json + +from mdai_utils.dicoms_to_volume import metadata_dict_to_sop_instance_uids +from mdai_utils.upload_annotation_volume import upload_image_annotation_volume + + +def test_upload_image_annotation_volume(mdai_setup): + parameters = mdai_setup["parameters"] + fixtures_dir = mdai_setup["fixtures_dir"] + mdai_client = mdai_setup["mdai_client"] + + mdai_project_id = parameters["mdai_project_id"] + mdai_dataset_id = parameters["mdai_dataset_id"] + mdai_label_ids = parameters["mdai_label_ids"] + label_name = parameters["labels"][0] + mdai_label_id = mdai_label_ids[label_name] + + sop_instance_uids_file_path = ( + fixtures_dir / "humanct_0002_1000_1004_SOPInstanceUIDs.json" + ) + if not sop_instance_uids_file_path.exists(): + raise FileNotFoundError( + f"The file {sop_instance_uids_file_path} does not exist." + ) + with open(sop_instance_uids_file_path) as f: + metadata_dict = json.load(f) + + sop_instance_uids = metadata_dict_to_sop_instance_uids(metadata_dict) + + input_annotation = fixtures_dir / "humanct_0002_1000_1004_seg.nii.gz" + + failed_annotations = upload_image_annotation_volume( + segmentation_image_path=input_annotation, + sop_instance_uids=sop_instance_uids, + mdai_client=mdai_client, + mdai_project_id=mdai_project_id, + mdai_dataset_id=mdai_dataset_id, + mdai_label_id=mdai_label_id, + ) + assert len(failed_annotations) == 0 diff --git a/tests/upload_dataset_test.py b/tests/upload_dataset_test.py new file mode 100644 index 0000000..c80e56b --- /dev/null +++ b/tests/upload_dataset_test.py @@ -0,0 +1,19 @@ +import pytest + +from mdai_utils.upload_dataset import upload_dataset + + +@pytest.mark.upload_only( + reason="Only need to upload once. run pytest tests with --upload-only to run it." +) +def test_upload_dataset(mdai_setup): + parameters = mdai_setup["parameters"] + mdai_dataset_id = parameters.get("mdai_dataset_id") + fixtures_dir = mdai_setup["fixtures_dir"] + dicom_dir = fixtures_dir / "humanct_0002_1000_1004" + assert dicom_dir.exists() + completed_process = upload_dataset(mdai_dataset_id, dicom_dir) + process_message = completed_process.stdout.strip() + print(process_message) + # Check the status of subprocess + assert completed_process.returncode == 0 diff --git a/tests/upload_test.py b/tests/upload_test.py deleted file mode 100644 index 9ebbe09..0000000 --- a/tests/upload_test.py +++ /dev/null @@ -1,48 +0,0 @@ -import pytest - -from mdai_utils.upload_annotations import upload_image_annotation_slice -from mdai_utils.upload_dataset import upload_dataset - - -def test_pytest_fixture(mdai_setup): - mdai_parameters = mdai_setup["parameters"] - mdai_project_id = mdai_parameters.get("mdai_project_id") - assert mdai_project_id is not None - - -@pytest.mark.upload_only( - reason="Only need to upload once. run pytest tests with --upload-only to run it." -) -def test_upload_dataset(mdai_setup): - parameters = mdai_setup["parameters"] - mdai_dataset_id = parameters.get("mdai_dataset_id") - fixtures_dir = mdai_setup["fixtures_dir"] - dicom_dir = fixtures_dir / "humanct_0002_1000_1004" - assert dicom_dir.exists() - completed_process = upload_dataset(mdai_dataset_id, dicom_dir) - process_message = completed_process.stdout.strip() - print(process_message) - # Check the status of subprocess - assert completed_process.returncode == 0 - - -def test_upload_annotation(mdai_setup): - parameters = mdai_setup["parameters"] - fixtures_dir = mdai_setup["fixtures_dir"] - mdai_client = mdai_setup["mdai_client"] - # sop_instance_uid can be acquired from mdai, or from the metadata generated - # by the function dicom_utils.read_dicoms_into_volume. - sop_instance_uid = "1.2.826.0.1.3680043.2.1125.1.75064541463040.2005072610414630768" - mdai_label_ids = parameters.get("mdai_label_ids") - labels_to_upload = parameters.get("labels") - label_id = mdai_label_ids.get(labels_to_upload[0]) - - failed_annotations = upload_image_annotation_slice( - segmentation_image_path=fixtures_dir / "humanct_0002_1000_seg.nii.gz", - sop_instance_uid=sop_instance_uid, - mdai_client=mdai_client, - mdai_project_id=parameters.get("mdai_project_id"), - mdai_dataset_id=parameters.get("mdai_dataset_id"), - mdai_label_id=label_id, - ) - assert len(failed_annotations) == 0