From 537945fbce8f9a315ebaa19865ca895ce671ebbb Mon Sep 17 00:00:00 2001
From: Virginia Fernandez <virginia.fernandez@kcl.ac.uk>
Date: Fri, 19 Sep 2025 15:22:20 +0100
Subject: [PATCH 1/8] Changes on diffusion model encoder to avoid dimension
 hard-coding and crash.

---
 monai/networks/nets/diffusion_model_unet.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/monai/networks/nets/diffusion_model_unet.py b/monai/networks/nets/diffusion_model_unet.py
index 113aa505e9..e8aeff4868 100644
--- a/monai/networks/nets/diffusion_model_unet.py
+++ b/monai/networks/nets/diffusion_model_unet.py
@@ -37,10 +37,12 @@
 
 import torch
 from torch import nn
+import numpy as np
 
 from monai.networks.blocks import Convolution, CrossAttentionBlock, MLPBlock, SABlock, SpatialAttentionBlock, Upsample
 from monai.networks.layers.factories import Pool
 from monai.utils import ensure_tuple_rep, optional_import
+from functools import reduce
 
 Rearrange, _ = optional_import("einops.layers.torch", name="Rearrange")
 
@@ -1882,6 +1884,7 @@ class DiffusionModelEncoder(nn.Module):
         spatial_dims: number of spatial dimensions.
         in_channels: number of input channels.
         out_channels: number of output channels.
+        input_shape: spatial shape of the input (without batch and channel dims).
         num_res_blocks: number of residual blocks (see _ResnetBlock) per level.
         channels: tuple of block output channels.
         attention_levels: list of levels to add attention.
@@ -1901,6 +1904,7 @@ def __init__(
         spatial_dims: int,
         in_channels: int,
         out_channels: int,
+        input_shape: Sequence[int], 
         num_res_blocks: Sequence[int] | int = (2, 2, 2, 2),
         channels: Sequence[int] = (32, 64, 64, 64),
         attention_levels: Sequence[bool] = (False, False, True, True),
@@ -2007,7 +2011,15 @@ def __init__(
 
             self.down_blocks.append(down_block)
 
-        self.out: Optional[nn.Module] = None
+        for _ in channels:
+            input_shape = [int(np.ceil(i_/2)) for i_ in input_shape]
+
+        last_dim_flattened = reduce(lambda x, y: x*y, input_shape) * self.down_blocks[-1].downsampler.op.conv.out_channels
+        self.out: Optional[nn.Module] = nn.Sequential(
+            nn.Linear(last_dim_flattened, 512),
+            nn.ReLU(), nn.Dropout(0.1),
+            nn.Linear(512, self.out_channels)
+            )
 
     def forward(
         self,

From d5856ecb1bd1126ac5ec5050ae79d885b1dfb2e5 Mon Sep 17 00:00:00 2001
From: Virginia Fernandez <virginia.fernandez@kcl.ac.uk>
Date: Fri, 19 Sep 2025 15:30:06 +0100
Subject: [PATCH 2/8] fix mypy issues.

---
 monai/networks/nets/diffusion_model_unet.py | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/monai/networks/nets/diffusion_model_unet.py b/monai/networks/nets/diffusion_model_unet.py
index e8aeff4868..f24386858b 100644
--- a/monai/networks/nets/diffusion_model_unet.py
+++ b/monai/networks/nets/diffusion_model_unet.py
@@ -33,16 +33,16 @@
 
 import math
 from collections.abc import Sequence
+from functools import reduce
 from typing import Optional
 
+import numpy as np
 import torch
 from torch import nn
-import numpy as np
 
 from monai.networks.blocks import Convolution, CrossAttentionBlock, MLPBlock, SABlock, SpatialAttentionBlock, Upsample
 from monai.networks.layers.factories import Pool
 from monai.utils import ensure_tuple_rep, optional_import
-from functools import reduce
 
 Rearrange, _ = optional_import("einops.layers.torch", name="Rearrange")
 
@@ -1904,7 +1904,7 @@ def __init__(
         spatial_dims: int,
         in_channels: int,
         out_channels: int,
-        input_shape: Sequence[int], 
+        input_shape: Sequence[int],
         num_res_blocks: Sequence[int] | int = (2, 2, 2, 2),
         channels: Sequence[int] = (32, 64, 64, 64),
         attention_levels: Sequence[bool] = (False, False, True, True),
@@ -2012,14 +2012,13 @@ def __init__(
             self.down_blocks.append(down_block)
 
         for _ in channels:
-            input_shape = [int(np.ceil(i_/2)) for i_ in input_shape]
+            input_shape = [int(np.ceil(i_ / 2)) for i_ in input_shape]
+
+        last_dim_flattened = int(reduce(lambda x, y: x * y, input_shape) * channels[-1])
 
-        last_dim_flattened = reduce(lambda x, y: x*y, input_shape) * self.down_blocks[-1].downsampler.op.conv.out_channels
         self.out: Optional[nn.Module] = nn.Sequential(
-            nn.Linear(last_dim_flattened, 512),
-            nn.ReLU(), nn.Dropout(0.1),
-            nn.Linear(512, self.out_channels)
-            )
+            nn.Linear(last_dim_flattened, 512), nn.ReLU(), nn.Dropout(0.1), nn.Linear(512, self.out_channels)
+        )
 
     def forward(
         self,

From c9bb8f7f6bf699dbd78d9c847d92a49f09525f81 Mon Sep 17 00:00:00 2001
From: Virginia Fernandez <virginia.fernandez@kcl.ac.uk>
Date: Fri, 19 Sep 2025 15:36:49 +0100
Subject: [PATCH 3/8] out should never be none.

Co-authored-by: Virginia Fernandez <virginia.fernandez@kcl.ac.uk>
---
 monai/networks/nets/diffusion_model_unet.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/monai/networks/nets/diffusion_model_unet.py b/monai/networks/nets/diffusion_model_unet.py
index f24386858b..2e013d4d3b 100644
--- a/monai/networks/nets/diffusion_model_unet.py
+++ b/monai/networks/nets/diffusion_model_unet.py
@@ -2063,10 +2063,9 @@ def forward(
         h = h.reshape(h.shape[0], -1)
 
         # 5. out
-        if self.out is None:
-            self.out = nn.Sequential(
-                nn.Linear(h.shape[1], 512), nn.ReLU(), nn.Dropout(0.1), nn.Linear(512, self.out_channels)
-            )
+        self.out = nn.Sequential(
+            nn.Linear(h.shape[1], 512), nn.ReLU(), nn.Dropout(0.1), nn.Linear(512, self.out_channels)
+        )
         output: torch.Tensor = self.out(h)
 
         return output

From cbf5dc58892988ba47daae8f8a7abc3e0c951ba2 Mon Sep 17 00:00:00 2001
From: Virginia Fernandez <virginia.fernandez@kcl.ac.uk>
Date: Fri, 19 Sep 2025 15:57:42 +0100
Subject: [PATCH 4/8] Fix default

---
 monai/networks/nets/diffusion_model_unet.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/monai/networks/nets/diffusion_model_unet.py b/monai/networks/nets/diffusion_model_unet.py
index 2e013d4d3b..3e496979bf 100644
--- a/monai/networks/nets/diffusion_model_unet.py
+++ b/monai/networks/nets/diffusion_model_unet.py
@@ -1904,7 +1904,7 @@ def __init__(
         spatial_dims: int,
         in_channels: int,
         out_channels: int,
-        input_shape: Sequence[int],
+        input_shape: Sequence[int] = (64, 64),
         num_res_blocks: Sequence[int] | int = (2, 2, 2, 2),
         channels: Sequence[int] = (32, 64, 64, 64),
         attention_levels: Sequence[bool] = (False, False, True, True),

From 59325dc7346aeefa99750d062d11394d8e7ce8f3 Mon Sep 17 00:00:00 2001
From: Virginia Fernandez <virginia.fernandez@kcl.ac.uk>
Date: Fri, 19 Sep 2025 16:52:06 +0100
Subject: [PATCH 5/8] DCO Remediation Commit for Virginia Fernandez
 <virginia.fernandez@kcl.ac.uk>

I, Virginia Fernandez <virginia.fernandez@kcl.ac.uk>, hereby add my Signed-off-by to this commit: 537945fbce8f9a315ebaa19865ca895ce671ebbb
I, Virginia Fernandez <virginia.fernandez@kcl.ac.uk>, hereby add my Signed-off-by to this commit: d5856ecb1bd1126ac5ec5050ae79d885b1dfb2e5
I, Virginia Fernandez <virginia.fernandez@kcl.ac.uk>, hereby add my Signed-off-by to this commit: c9bb8f7f6bf699dbd78d9c847d92a49f09525f81
I, Virginia Fernandez <virginia.fernandez@kcl.ac.uk>, hereby add my Signed-off-by to this commit: cbf5dc58892988ba47daae8f8a7abc3e0c951ba2

Signed-off-by: Virginia Fernandez <virginia.fernandez@kcl.ac.uk>
---
 monai/networks/nets/diffusion_model_unet.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/monai/networks/nets/diffusion_model_unet.py b/monai/networks/nets/diffusion_model_unet.py
index 3e496979bf..1d86df845e 100644
--- a/monai/networks/nets/diffusion_model_unet.py
+++ b/monai/networks/nets/diffusion_model_unet.py
@@ -1894,7 +1894,7 @@ class DiffusionModelEncoder(nn.Module):
         num_head_channels: number of channels in each attention head.
         with_conditioning: if True add spatial transformers to perform conditioning.
         transformer_num_layers: number of layers of Transformer blocks to use.
-        cross_attention_dim: number of context dimensions to use.
+        cross_attention_dim: number of context dimensions to use. 
         num_class_embeds: if specified (as an int), then this model will be class-conditional with `num_class_embeds` classes.
         upcast_attention: if True, upcast attention operations to full precision.
     """

From 2a75fe30fefe03fc5f8c58365b50275b63a5aa20 Mon Sep 17 00:00:00 2001
From: Virginia Fernandez <virginia.fernandez@kcl.ac.uk>
Date: Fri, 19 Sep 2025 17:00:08 +0100
Subject: [PATCH 6/8] Remove whitespace

Signed-off-by: Virginia Fernandez <virginia.fernandez@kcl.ac.uk>
---
 monai/networks/nets/diffusion_model_unet.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/monai/networks/nets/diffusion_model_unet.py b/monai/networks/nets/diffusion_model_unet.py
index 1d86df845e..3e496979bf 100644
--- a/monai/networks/nets/diffusion_model_unet.py
+++ b/monai/networks/nets/diffusion_model_unet.py
@@ -1894,7 +1894,7 @@ class DiffusionModelEncoder(nn.Module):
         num_head_channels: number of channels in each attention head.
         with_conditioning: if True add spatial transformers to perform conditioning.
         transformer_num_layers: number of layers of Transformer blocks to use.
-        cross_attention_dim: number of context dimensions to use. 
+        cross_attention_dim: number of context dimensions to use.
         num_class_embeds: if specified (as an int), then this model will be class-conditional with `num_class_embeds` classes.
         upcast_attention: if True, upcast attention operations to full precision.
     """

From 0cee3730db96c6e417f1e2be77b11f7ebf0f5adb Mon Sep 17 00:00:00 2001
From: Virginia Fernandez <virginia.fernandez@kcl.ac.uk>
Date: Fri, 19 Sep 2025 17:00:45 +0100
Subject: [PATCH 7/8] DCO Remediation Commit for Virginia Fernandez
 <virginia.fernandez@kcl.ac.uk>

I, Virginia Fernandez <virginia.fernandez@kcl.ac.uk>, hereby add my Signed-off-by to this commit: 537945fbce8f9a315ebaa19865ca895ce671ebbb
I, Virginia Fernandez <virginia.fernandez@kcl.ac.uk>, hereby add my Signed-off-by to this commit: d5856ecb1bd1126ac5ec5050ae79d885b1dfb2e5
I, Virginia Fernandez <virginia.fernandez@kcl.ac.uk>, hereby add my Signed-off-by to this commit: c9bb8f7f6bf699dbd78d9c847d92a49f09525f81
I, Virginia Fernandez <virginia.fernandez@kcl.ac.uk>, hereby add my Signed-off-by to this commit: cbf5dc58892988ba47daae8f8a7abc3e0c951ba2

Signed-off-by: Virginia Fernandez <virginia.fernandez@kcl.ac.uk>
---
 monai/networks/nets/diffusion_model_unet.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/monai/networks/nets/diffusion_model_unet.py b/monai/networks/nets/diffusion_model_unet.py
index 3e496979bf..e5dfc03960 100644
--- a/monai/networks/nets/diffusion_model_unet.py
+++ b/monai/networks/nets/diffusion_model_unet.py
@@ -1891,7 +1891,7 @@ class DiffusionModelEncoder(nn.Module):
         norm_num_groups: number of groups for the normalization.
         norm_eps: epsilon for the normalization.
         resblock_updown: if True use residual blocks for downsampling.
-        num_head_channels: number of channels in each attention head.
+        num_head_channels: number of channels in each attention head. 
         with_conditioning: if True add spatial transformers to perform conditioning.
         transformer_num_layers: number of layers of Transformer blocks to use.
         cross_attention_dim: number of context dimensions to use.

From 7ca0676c1a25d779ef3cab24fbd1f3ba285aff9d Mon Sep 17 00:00:00 2001
From: Virginia Fernandez <virginia.fernandez@kcl.ac.uk>
Date: Fri, 19 Sep 2025 17:02:08 +0100
Subject: [PATCH 8/8] Remove whitespace

Signed-off-by: Virginia Fernandez <virginia.fernandez@kcl.ac.uk>
---
 monai/networks/nets/diffusion_model_unet.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/monai/networks/nets/diffusion_model_unet.py b/monai/networks/nets/diffusion_model_unet.py
index e5dfc03960..3e496979bf 100644
--- a/monai/networks/nets/diffusion_model_unet.py
+++ b/monai/networks/nets/diffusion_model_unet.py
@@ -1891,7 +1891,7 @@ class DiffusionModelEncoder(nn.Module):
         norm_num_groups: number of groups for the normalization.
         norm_eps: epsilon for the normalization.
         resblock_updown: if True use residual blocks for downsampling.
-        num_head_channels: number of channels in each attention head. 
+        num_head_channels: number of channels in each attention head.
         with_conditioning: if True add spatial transformers to perform conditioning.
         transformer_num_layers: number of layers of Transformer blocks to use.
         cross_attention_dim: number of context dimensions to use.