Project-MONAI
diff --git a/‎SECURITY.md‎
Lines changed: 18 additions & 0 deletions b/‎SECURITY.md‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎monai/apps/nnunet/nnunet_bundle.py‎
Lines changed: 17 additions & 9 deletions b/‎monai/apps/nnunet/nnunet_bundle.py‎
Lines changed: 17 additions & 9 deletions
diff --git a/‎monai/apps/utils.py‎
Lines changed: 60 additions & 6 deletions b/‎monai/apps/utils.py‎
Lines changed: 60 additions & 6 deletions
diff --git a/‎monai/data/__init__.py‎
Lines changed: 0 additions & 1 deletion b/‎monai/data/__init__.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎monai/data/dataset.py‎
Lines changed: 34 additions & 16 deletions b/‎monai/data/dataset.py‎
Lines changed: 34 additions & 16 deletions
diff --git a/‎monai/data/meta_tensor.py‎
Lines changed: 1 addition & 1 deletion b/‎monai/data/meta_tensor.py‎
Lines changed: 1 addition & 1 deletion
@@ -0,0 +1,18 @@
+# Security Policy
+
+## Reporting a Vulnerability
+MONAI takes security seriously and appreciate your efforts to responsibly disclose vulnerabilities. If you discover a security issue, please report it as soon as possible.
+
+To report a security issue:
+* please use the GitHub Security Advisories tab to "[Open a draft security advisory](https://github.com/Project-MONAI/MONAI/security/advisories/new)".
+* Include a detailed description of the issue, steps to reproduce, potential impact, and any possible mitigations.
+* If applicable, please also attach proof-of-concept code or screenshots.
+* We aim to acknowledge your report within 72 hours and provide a status update as we investigate.
+* Please do not create public issues for security-related reports.
+
+## Disclosure Policy
+* We follow a coordinated disclosure approach.
+* We will not publicly disclose vulnerabilities until a fix has been developed and released.
+* Credit will be given to researchers who responsibly disclose vulnerabilities, if requested.
+## Acknowledgements
+We greatly appreciate contributions from the security community and strive to recognize all researchers who help keep MONAI safe.
@@ -133,7 +133,7 @@ def get_nnunet_trainer(
         cudnn.benchmark = True
 
     if pretrained_model is not None:
-        state_dict = torch.load(pretrained_model)
+        state_dict = torch.load(pretrained_model, weights_only=True)
         if "network_weights" in state_dict:
             nnunet_trainer.network._orig_mod.load_state_dict(state_dict["network_weights"])
     return nnunet_trainer
@@ -182,7 +182,9 @@ def __init__(self, predictor: object, model_folder: Union[str, Path], model_name
         parameters = []
 
         checkpoint = torch.load(
-            join(Path(model_training_output_dir).parent, "nnunet_checkpoint.pth"), map_location=torch.device("cpu")
+            join(Path(model_training_output_dir).parent, "nnunet_checkpoint.pth"),
+            map_location=torch.device("cpu"),
+            weights_only=True,
         )
         trainer_name = checkpoint["trainer_name"]
         configuration_name = checkpoint["init_args"]["configuration"]
@@ -192,7 +194,9 @@ def __init__(self, predictor: object, model_folder: Union[str, Path], model_name
             else None
         )
         if Path(model_training_output_dir).joinpath(model_name).is_file():
-            monai_checkpoint = torch.load(join(model_training_output_dir, model_name), map_location=torch.device("cpu"))
+            monai_checkpoint = torch.load(
+                join(model_training_output_dir, model_name), map_location=torch.device("cpu"), weights_only=True
+            )
             if "network_weights" in monai_checkpoint.keys():
                 parameters.append(monai_checkpoint["network_weights"])
             else:
@@ -383,8 +387,12 @@ def convert_nnunet_to_monai_bundle(nnunet_config: dict, bundle_root_folder: str,
         dataset_name, f"{nnunet_trainer}__{nnunet_plans}__{nnunet_configuration}"
     )
 
-    nnunet_checkpoint_final = torch.load(Path(nnunet_model_folder).joinpath(f"fold_{fold}", "checkpoint_final.pth"))
-    nnunet_checkpoint_best = torch.load(Path(nnunet_model_folder).joinpath(f"fold_{fold}", "checkpoint_best.pth"))
+    nnunet_checkpoint_final = torch.load(
+        Path(nnunet_model_folder).joinpath(f"fold_{fold}", "checkpoint_final.pth"), weights_only=True
+    )
+    nnunet_checkpoint_best = torch.load(
+        Path(nnunet_model_folder).joinpath(f"fold_{fold}", "checkpoint_best.pth"), weights_only=True
+    )
 
     nnunet_checkpoint = {}
     nnunet_checkpoint["inference_allowed_mirroring_axes"] = nnunet_checkpoint_final["inference_allowed_mirroring_axes"]
@@ -470,7 +478,7 @@ def get_network_from_nnunet_plans(
     if model_ckpt is None:
         return network
     else:
-        state_dict = torch.load(model_ckpt)
+        state_dict = torch.load(model_ckpt, weights_only=True)
         network.load_state_dict(state_dict[model_key_in_ckpt])
         return network
 
@@ -534,7 +542,7 @@ def subfiles(
 
     Path(nnunet_model_folder).joinpath(f"fold_{fold}").mkdir(parents=True, exist_ok=True)
 
-    nnunet_checkpoint: dict = torch.load(f"{bundle_root_folder}/models/nnunet_checkpoint.pth")
+    nnunet_checkpoint: dict = torch.load(f"{bundle_root_folder}/models/nnunet_checkpoint.pth", weights_only=True)
     latest_checkpoints: list[str] = subfiles(
         Path(bundle_root_folder).joinpath("models", f"fold_{fold}"), prefix="checkpoint_epoch", sort=True
     )
@@ -545,7 +553,7 @@ def subfiles(
     epochs.sort()
     final_epoch: int = epochs[-1]
     monai_last_checkpoint: dict = torch.load(
-        f"{bundle_root_folder}/models/fold_{fold}/checkpoint_epoch={final_epoch}.pt"
+        f"{bundle_root_folder}/models/fold_{fold}/checkpoint_epoch={final_epoch}.pt", weights_only=True
     )
 
     best_checkpoints: list[str] = subfiles(
@@ -558,7 +566,7 @@ def subfiles(
     key_metrics.sort()
     best_key_metric: str = key_metrics[-1]
     monai_best_checkpoint: dict = torch.load(
-        f"{bundle_root_folder}/models/fold_{fold}/checkpoint_key_metric={best_key_metric}.pt"
+        f"{bundle_root_folder}/models/fold_{fold}/checkpoint_key_metric={best_key_metric}.pt", weights_only=True
     )
 
     nnunet_checkpoint["optimizer_state"] = monai_last_checkpoint["optimizer_state"]
 
@@ -122,6 +122,38 @@ def update_to(self, b: int = 1, bsize: int = 1, tsize: int | None = None) -> Non
         raise e
 
 
+def safe_extract_member(member, extract_to):
+    """Securely verify compressed package member paths to prevent path traversal attacks"""
+    # Get member path (handle different compression formats)
+    if hasattr(member, "filename"):
+        member_path = member.filename  # zipfile
+    elif hasattr(member, "name"):
+        member_path = member.name  # tarfile
+    else:
+        member_path = str(member)
+
+    if hasattr(member, "issym") and member.issym():
+        raise ValueError(f"Symbolic link detected in archive: {member_path}")
+    if hasattr(member, "islnk") and member.islnk():
+        raise ValueError(f"Hard link detected in archive: {member_path}")
+
+    member_path = os.path.normpath(member_path)
+
+    if os.path.isabs(member_path) or ".." in member_path.split(os.sep):
+        raise ValueError(f"Unsafe path detected in archive: {member_path}")
+
+    full_path = os.path.join(extract_to, member_path)
+    full_path = os.path.normpath(full_path)
+
+    extract_root = os.path.realpath(extract_to)
+    target_real = os.path.realpath(full_path)
+    # Ensure the resolved path stays within the extraction root
+    if os.path.commonpath([extract_root, target_real]) != extract_root:
+        raise ValueError(f"Unsafe path: path traversal {member_path}")
+
+    return full_path
+
+
 def check_hash(filepath: PathLike, val: str | None = None, hash_type: str = "md5") -> bool:
     """
     Verify hash signature of specified file.
@@ -242,6 +274,32 @@ def download_url(
         )
 
 
+def _extract_zip(filepath, output_dir):
+    with zipfile.ZipFile(filepath, "r") as zip_file:
+        for member in zip_file.infolist():
+            safe_path = safe_extract_member(member, output_dir)
+            if member.is_dir():
+                continue
+            os.makedirs(os.path.dirname(safe_path), exist_ok=True)
+            with zip_file.open(member) as source:
+                with open(safe_path, "wb") as target:
+                    shutil.copyfileobj(source, target)
+
+
+def _extract_tar(filepath, output_dir):
+    with tarfile.open(filepath, "r") as tar_file:
+        for member in tar_file.getmembers():
+            safe_path = safe_extract_member(member, output_dir)
+            if not member.isfile():
+                continue
+            os.makedirs(os.path.dirname(safe_path), exist_ok=True)
+            source = tar_file.extractfile(member)
+            if source is not None:
+                with source:
+                    with open(safe_path, "wb") as target:
+                        shutil.copyfileobj(source, target)
+
+
 def extractall(
     filepath: PathLike,
     output_dir: PathLike = ".",
@@ -287,14 +345,10 @@ def extractall(
     logger.info(f"Writing into directory: {output_dir}.")
     _file_type = file_type.lower().strip()
     if filepath.name.endswith("zip") or _file_type == "zip":
-        zip_file = zipfile.ZipFile(filepath)
-        zip_file.extractall(output_dir)
-        zip_file.close()
+        _extract_zip(filepath, output_dir)
         return
     if filepath.name.endswith("tar") or filepath.name.endswith("tar.gz") or "tar" in _file_type:
-        tar_file = tarfile.open(filepath)
-        tar_file.extractall(output_dir)
-        tar_file.close()
+        _extract_tar(filepath, output_dir)
         return
     raise NotImplementedError(
         f'Unsupported file type, available options are: ["zip", "tar.gz", "tar"]. name={filepath} type={file_type}.'
 
@@ -78,7 +78,6 @@
 from .thread_buffer import ThreadBuffer, ThreadDataLoader
 from .torchscript_utils import load_net_with_metadata, save_net_with_metadata
 from .utils import (
-    PICKLE_KEY_SUFFIX,
     affine_to_spacing,
     compute_importance_map,
     compute_shape_offset,
 
@@ -13,7 +13,6 @@
 
 import collections.abc
 import math
-import pickle
 import shutil
 import sys
 import tempfile
@@ -22,9 +21,11 @@
 import warnings
 from collections.abc import Callable, Sequence
 from copy import copy, deepcopy
+from io import BytesIO
 from multiprocessing.managers import ListProxy
 from multiprocessing.pool import ThreadPool
 from pathlib import Path
+from pickle import UnpicklingError
 from typing import IO, TYPE_CHECKING, Any, cast
 
 import numpy as np
@@ -207,6 +208,11 @@ class PersistentDataset(Dataset):
         not guaranteed, so caution should be used when modifying transforms to avoid unexpected
         errors. If in doubt, it is advisable to clear the cache directory.
 
+        Cached data is expected to be tensors, primitives, or dictionaries keying to these values. Numpy arrays will
+        be converted to tensors, however any other object type returned by transforms will not be loadable since
+        `torch.load` will be used with `weights_only=True` to prevent loading of potentially malicious objects.
+        Legacy cache files may not be loadable and may need to be recomputed.
+
     Lazy Resampling:
         If you make use of the lazy resampling feature of `monai.transforms.Compose`, please refer to
         its documentation to familiarize yourself with the interaction between `PersistentDataset` and
@@ -248,8 +254,8 @@ def __init__(
                 this arg is used by `torch.save`, for more details, please check:
                 https://pytorch.org/docs/stable/generated/torch.save.html#torch.save,
                 and ``monai.data.utils.SUPPORTED_PICKLE_MOD``.
-            pickle_protocol: can be specified to override the default protocol, default to `2`.
-                this arg is used by `torch.save`, for more details, please check:
+            pickle_protocol: specifies pickle protocol when saving, with `torch.save`.
+                Defaults to torch.serialization.DEFAULT_PROTOCOL. For more details, please check:
                 https://pytorch.org/docs/stable/generated/torch.save.html#torch.save.
             hash_transform: a callable to compute hash from the transform information when caching.
                 This may reduce errors due to transforms changing during experiments. Default to None (no hash).
@@ -371,12 +377,12 @@ def _cachecheck(self, item_transformed):
 
         if hashfile is not None and hashfile.is_file():  # cache hit
             try:
-                return torch.load(hashfile, weights_only=False)
+                return torch.load(hashfile, weights_only=True)
             except PermissionError as e:
                 if sys.platform != "win32":
                     raise e
-            except RuntimeError as e:
-                if "Invalid magic number; corrupt file" in str(e):
+            except (UnpicklingError, RuntimeError) as e:  # corrupt or unloadable cached files are recomputed
+                if "Invalid magic number; corrupt file" in str(e) or isinstance(e, UnpicklingError):
                     warnings.warn(f"Corrupt cache file detected: {hashfile}. Deleting and recomputing.")
                     hashfile.unlink()
                 else:
@@ -392,7 +398,7 @@ def _cachecheck(self, item_transformed):
             with tempfile.TemporaryDirectory() as tmpdirname:
                 temp_hash_file = Path(tmpdirname) / hashfile.name
                 torch.save(
-                    obj=_item_transformed,
+                    obj=convert_to_tensor(_item_transformed, convert_numeric=False),
                     f=temp_hash_file,
                     pickle_module=look_up_option(self.pickle_module, SUPPORTED_PICKLE_MOD),
                     pickle_protocol=self.pickle_protocol,
@@ -455,8 +461,8 @@ def __init__(
                 this arg is used by `torch.save`, for more details, please check:
                 https://pytorch.org/docs/stable/generated/torch.save.html#torch.save,
                 and ``monai.data.utils.SUPPORTED_PICKLE_MOD``.
-            pickle_protocol: can be specified to override the default protocol, default to `2`.
-                this arg is used by `torch.save`, for more details, please check:
+            pickle_protocol: specifies pickle protocol when saving, with `torch.save`.
+                Defaults to torch.serialization.DEFAULT_PROTOCOL. For more details, please check:
                 https://pytorch.org/docs/stable/generated/torch.save.html#torch.save.
             hash_transform: a callable to compute hash from the transform information when caching.
                 This may reduce errors due to transforms changing during experiments. Default to None (no hash).
@@ -531,7 +537,7 @@ def __init__(
         hash_func: Callable[..., bytes] = pickle_hashing,
         db_name: str = "monai_cache",
         progress: bool = True,
-        pickle_protocol=pickle.HIGHEST_PROTOCOL,
+        pickle_protocol=DEFAULT_PROTOCOL,
         hash_transform: Callable[..., bytes] | None = None,
         reset_ops_id: bool = True,
         lmdb_kwargs: dict | None = None,
@@ -551,8 +557,9 @@ def __init__(
                 defaults to `monai.data.utils.pickle_hashing`.
             db_name: lmdb database file name. Defaults to "monai_cache".
             progress: whether to display a progress bar.
-            pickle_protocol: pickle protocol version. Defaults to pickle.HIGHEST_PROTOCOL.
-                https://docs.python.org/3/library/pickle.html#pickle-protocols
+            pickle_protocol: specifies pickle protocol when saving, with `torch.save`.
+                Defaults to torch.serialization.DEFAULT_PROTOCOL. For more details, please check:
+                https://pytorch.org/docs/stable/generated/torch.save.html#torch.save.
             hash_transform: a callable to compute hash from the transform information when caching.
                 This may reduce errors due to transforms changing during experiments. Default to None (no hash).
                 Other options are `pickle_hashing` and `json_hashing` functions from `monai.data.utils`.
@@ -594,6 +601,15 @@ def set_data(self, data: Sequence):
         super().set_data(data=data)
         self._read_env = self._fill_cache_start_reader(show_progress=self.progress)
 
+    def _safe_serialize(self, val):
+        out = BytesIO()
+        torch.save(convert_to_tensor(val), out, pickle_protocol=self.pickle_protocol)
+        out.seek(0)
+        return out.read()
+
+    def _safe_deserialize(self, val):
+        return torch.load(BytesIO(val), map_location="cpu", weights_only=True)
+
     def _fill_cache_start_reader(self, show_progress=True):
         """
         Check the LMDB cache and write the cache if needed. py-lmdb doesn't have a good support for concurrent write.
@@ -619,7 +635,8 @@ def _fill_cache_start_reader(self, show_progress=True):
                             continue
                         if val is None:
                             val = self._pre_transform(deepcopy(item))  # keep the original hashed
-                            val = pickle.dumps(val, protocol=self.pickle_protocol)
+                            # val = pickle.dumps(val, protocol=self.pickle_protocol)
+                            val = self._safe_serialize(val)
                         with env.begin(write=True) as txn:
                             txn.put(key, val)
                         done = True
@@ -664,7 +681,8 @@ def _cachecheck(self, item_transformed):
             warnings.warn("LMDBDataset: cache key not found, running fallback caching.")
             return super()._cachecheck(item_transformed)
         try:
-            return pickle.loads(data)
+            # return pickle.loads(data)
+            return self._safe_deserialize(data)
         except Exception as err:
             raise RuntimeError("Invalid cache value, corrupted lmdb file?") from err
 
@@ -1650,7 +1668,7 @@ def _create_new_cache(self, data, data_hashfile, meta_hash_file_name):
                 meta_hash_file = self.cache_dir / meta_hash_file_name
                 temp_hash_file = Path(tmpdirname) / meta_hash_file_name
                 torch.save(
-                    obj=self._meta_cache[meta_hash_file_name],
+                    obj=convert_to_tensor(self._meta_cache[meta_hash_file_name], convert_numeric=False),
                     f=temp_hash_file,
                     pickle_module=look_up_option(self.pickle_module, SUPPORTED_PICKLE_MOD),
                     pickle_protocol=self.pickle_protocol,
@@ -1670,4 +1688,4 @@ def _load_meta_cache(self, meta_hash_file_name):
         if meta_hash_file_name in self._meta_cache:
             return self._meta_cache[meta_hash_file_name]
         else:
-            return torch.load(self.cache_dir / meta_hash_file_name, weights_only=False)
+            return torch.load(self.cache_dir / meta_hash_file_name, weights_only=True)
@@ -611,4 +611,4 @@ def print_verbose(self) -> None:
 
 # needed in later versions of Pytorch to indicate the class is safe for serialisation
 if hasattr(torch.serialization, "add_safe_globals"):
-    torch.serialization.add_safe_globals([MetaTensor])
+    torch.serialization.add_safe_globals([MetaObj, MetaTensor, MetaKeys, SpaceKeys])