NVIDIA · CharlelieLrt · Apr 8, 2025 · Jan 28, 2025 · Feb 4, 2025 · Feb 5, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -15,6 +15,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - DrivAerML dataset support in FIGConvNet example.
 - Retraining recipe for DoMINO from a pretrained model checkpoint
 - Added Datacenter CFD use case.
+- General purpose patching API for patch-based diffusion
+- New positional embedding selection strategy for CorrDiff SongUNet models
 
 ### Changed
 
@@ -25,6 +27,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Updated utils in `modulus.launch.logging` to avoid unnecessary `wandb` and `mlflow` imports
 - Moved to experiment-based Hydra config in Lagrangian-MGN example
 - Make data caching optional in `MeshDatapipe`
+- Simplified CorrDiff config files, updated default values
+- Refactored CorrDiff losses and samplers to use the patching API
+- Support for non-square images and patches in patch-based diffusion
 
 ### Deprecated
 

diff --git a/docs/api/modulus.utils.rst b/docs/api/modulus.utils.rst
@@ -40,7 +40,11 @@ Filesystem utils
 Generative utils
 ----------------
 
-.. automodule:: modulus.utils.generative.sampler
+.. automodule:: modulus.utils.generative.deterministic_sampler
+    :members:
+    :show-inheritance:
+
+.. automodule:: modulus.utils.generative.stochastic_sampler
     :members:
     :show-inheritance:
 
@@ -66,4 +70,11 @@ Weather / Climate utils
     :show-inheritance:
 
 .. automodule:: modulus.utils.zenith_angle
-    :show-inheritance:
+    :show-inheritance:
+
+Patching utils
+--------------
+
+.. automodule:: modulus.utils.patching
+    :members:
+    :show-inheritance:
diff --git a/docs/img/corrdiff_training_loss.png b/docs/img/corrdiff_training_loss.png
diff --git a/examples/generative/corrdiff/README.md b/examples/generative/corrdiff/README.md
diff --git a/...ative/corrdiff/conf/dataset/hrrrmini.yaml → ...generative/corrdiff/conf/base/__init__.py b/...ative/corrdiff/conf/dataset/hrrrmini.yaml → ...generative/corrdiff/conf/base/__init__.py
@@ -13,8 +13,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-type: hrrr_mini
-data_path: /data/corrdiff-mini/hrrr_mini_train.nc
-stats_path: /data/corrdiff-mini/stats.json
-output_variables: ['10u', '10v']
diff --git a/...rative/corrdiff/conf/config_generate.yaml → ...ve/corrdiff/conf/base/dataset/custom.yaml b/...rative/corrdiff/conf/config_generate.yaml → ...ve/corrdiff/conf/base/dataset/custom.yaml
@@ -14,23 +14,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-hydra:
-  job:
-    chdir: true
-    name: generation
-  run:
-    dir: ./outputs/${hydra:job.name}
-
-# Get defaults
-defaults:
-
-  # Dataset
-  - dataset/cwb_generate
-
-  # Sampler
-  - sampler/stochastic
-  #- sampler/deterministic
-
-  # Generation
-  - generation/base
-    #- generation/patched_based
+# Dataset type. Must be overridden.
+type: ???
+# Path to .nc data file. Must be overridden.
+data_path: ???
+# Path to json stats file. Must be overriden.
+stats_path: ???
+# Names of input channels. Must be overridden.
+input_variables: ???
+# Names of output channels. Must be overridden.
+output_variables: ???
+# Names of invariants variables. Optional.
+invariant_variables: ???
diff --git a/...tive/corrdiff/conf/dataset/cwb_train.yaml → ...ative/corrdiff/conf/base/dataset/cwb.yaml b/...tive/corrdiff/conf/dataset/cwb_train.yaml → ...ative/corrdiff/conf/base/dataset/cwb.yaml
@@ -1,4 +1,3 @@
-
 # SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES.
 # SPDX-FileCopyrightText: All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
@@ -15,15 +14,25 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# Dataset type. Do not modify.
 type: cwb
-data_path: /code/2023-01-24-cwb-4years.zarr
+# Path to data file. Must be overridden.
+data_path: ???
+# Indices of input channels
 in_channels: [0, 1, 2, 3, 4, 9, 10, 11, 12, 17, 18, 19]
+# Indices of output channels
 out_channels: [0, 1, 2, 3]
+# Shape of the image
 img_shape_x: 448
 img_shape_y: 448
+# Add grid coordinates to the image
 add_grid: true
+# Factor to downscale the image
 ds_factor: 4
+# Path to min and max values of the data
 min_path: null
 max_path: null
+# Path to global means of the data
 global_means_path: null
+# Path to global stds of the data
 global_stds_path: null
diff --git a/...tive/corrdiff/conf/dataset/gefs_hrrr.yaml → ...corrdiff/conf/base/dataset/gefs_hrrr.yaml b/...tive/corrdiff/conf/dataset/gefs_hrrr.yaml → ...corrdiff/conf/base/dataset/gefs_hrrr.yaml
@@ -14,13 +14,33 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# Dataset type. Do not modify.
 type: gefs_hrrr
-data_path: /data
-stats_path: /data/stats.json
+# Path to .nc data file. Must be overridden.
+data_path: ???
+# Path to json stats file. Must be overriden.
+stats_path: ???
+# Names of output channels.
 output_variables: ["u10m", "v10m", "t2m", "precip", "cat_snow", "cat_ice", "cat_freez", "cat_rain", "cat_none"]
+# Names of probability variables.
 prob_variables: ["cat_snow", "cat_ice", "cat_freez", "cat_rain"]
+# Names of input surface variables.
 input_surface_variables: ["u10m", "v10m", "t2m", "q2m", "sp", "msl", "precipitable_water"]
+# Names of input isobaric variables.
 input_isobaric_variables: ['u1000', 'u925', 'u850', 'u700', 'u500', 'u250', 'v1000', 'v925', 'v850', 'v700', 'v500', 'v250', 'z1000', 'z925', 'z850', 'z700', 'z500', 'z200', 't1000', 't925', 't850', 't700', 't500', 't100',  'r1000', 'r925', 'r850', 'r700', 'r500', 'r100']
+# Factor to downscale the image.
 ds_factor: 4
 train: False
-hrrr_window: [[1,1057], [4,1796]] # need dims to be divisible by 16 [[0,1024], [0,1024]]
+# Years to train the model.
+train_years: [2020, 2021, 2022, 2023]
+# Years to validate the model.
+valid_years: [2024]
+# Whether to normalize the data.
+normalize: True
+# Whether to shard the data.
+shard: False
+overfit: False
+# Whether to use all the data.
+use_all: False
+sample_shape: [-1, -1]
+hrrr_window: [[1,1057], [4,1796]] # need dims to be divisible by 16
diff --git a/.../model/corrdiff_regression_gefs_hrrr.yaml → ...corrdiff/conf/base/dataset/hrrr_mini.yaml b/.../model/corrdiff_regression_gefs_hrrr.yaml → ...corrdiff/conf/base/dataset/hrrr_mini.yaml
@@ -14,8 +14,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-name: lt_aware_ce_regression
-  # Name of the preconditioner
-hr_mean_conditioning: False
-  # High-res mean (regression's output) as additional condition
-
+# Dataset type
+type: hrrr_mini
+# Path to .nc data file. Must be overridden.
+data_path: ???
+# Path to json stats file. Must be overriden.
+stats_path: ???
+# Names of output channels. Must be overridden.
+output_variables: ['10u', '10v']
diff --git a/...rrdiff/conf/generation/patched_based.yaml → ...rrdiff/conf/base/generation/base_all.yaml b/...rrdiff/conf/generation/patched_based.yaml → ...rrdiff/conf/base/generation/base_all.yaml
@@ -14,35 +14,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-num_ensembles: 64
-  # Number of ensembles to generate per input
-seed_batch_size: 4
-  # Size of the batched inference
+defaults:
+    - sampler: stochastic
+    # Recommended is stochastic sampler. Change to deterministic if needed.
+
+num_ensembles: ???
+# Number of ensembles to generate per input. Should be overridden.
+seed_batch_size: ???
+# Size of the batched inference. Should be overridden.
 inference_mode: all
-  # Choose between "all" (regression + diffusion), "regression" or "diffusion"
-patch_size: 448
-patch_shape_x: 448
-patch_shape_y: 448
-  # Patch size. Patch-based sampling will be utilized if these dimensions differ from 
-  # img_shape_x and img_shape_y
-overlap_pixels: 4
-  # Number of overlapping pixels between adjacent patches
-boundary_pixels: 2
-  # Number of boundary pixels to be cropped out. 2 is recommanded to address the boundary
-  # artifact.
+# Choose between "all" (regression + diffusion), "regression" or "diffusion"
 hr_mean_conditioning: true
-gridtype: learnable
-N_grid_channels: 100
-sample_res: full
-  # Sampling resolution
-times_range: null
-times:
-  - 2021-02-02T00:00:00
-  - 2021-03-02T00:00:00
-  - 2021-04-02T00:00:00
-  # hurricane
-  - 2021-09-12T00:00:00
-  - 2021-09-12T12:00:00
+# Whether to use hr_mean_conditioning
+times_range: ???
+# Time range to generate. Should be overridden.
+has_lead_time: False
+# Whether the model has lead time.
 
 perf:
   force_fp16: false
@@ -55,9 +42,3 @@ perf:
   num_writer_workers: 1
     # number of workers to use for writing file
     # To support multiple workers a threadsafe version of the netCDF library must be used
-
-io:
-  res_ckpt_filename: diffusion_checkpoint.mdlus
-    # Checkpoint filename for the diffusion model  
-  reg_ckpt_filename: regression_checkpoint.mdlus
-    # Checkpoint filename for the mean predictor model
diff --git a/.../conf/model/corrdiff_regression_mini.yaml → ...iff/conf/base/generation/non_patched.yaml b/.../conf/model/corrdiff_regression_mini.yaml → ...iff/conf/base/generation/non_patched.yaml
@@ -15,9 +15,6 @@
 # limitations under the License.
 
 defaults:
-  - corrdiff_regression
+    - base_all
 
-model_args:
-  model_channels: 64
-  channel_mult: [1, 2, 2]
-  attn_resolutions: [16]
+patching: False
diff --git a/...conf/config_training_gefs_regression.yaml → ...orrdiff/conf/base/generation/patched.yaml b/...conf/config_training_gefs_regression.yaml → ...orrdiff/conf/base/generation/patched.yaml
@@ -14,21 +14,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-hydra:
-  job:
-    chdir: true
-    name: gefs_hrrr_regression
-  run:
-    dir: ./outputs/${hydra:job.name}
-
-# Get defaults
 defaults:
+    - base_all
 
-  # Dataset
-  - dataset/gefs_hrrr
-
-  # Model
-  - model/corrdiff_regression_gefs_hrrr
-
-  # Training
-  - training/corrdiff_regression_gefs_hrrr
+patching: True
+# Use patch-based sampling
+overlap_pix: 4
+# Number of overlapping pixels between adjacent patches
+boundary_pix: 2
+# Number of boundary pixels to be cropped out. 2 is recommended to address the boundary
+# artifact.
+patch_shape_x: ???
+patch_shape_y: ???
+  # Patch size. Patch-based sampling will be utilized if these dimensions
+  # differ from img_shape_x and img_shape_y. Needs to be overridden.
diff --git a/.../corrdiff/conf/sampler/deterministic.yaml → ...ase/generation/sampler/deterministic.yaml b/.../corrdiff/conf/sampler/deterministic.yaml → ...ase/generation/sampler/deterministic.yaml
@@ -14,6 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# @package _global_.sampler
 
 type: deterministic
 num_steps: 9

diff --git a/...ive/corrdiff/conf/sampler/stochastic.yaml → ...f/base/generation/sampler/stochastic.yaml b/...ive/corrdiff/conf/sampler/stochastic.yaml → ...f/base/generation/sampler/stochastic.yaml
@@ -14,7 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# @package _global_.sampler
+
 type: stochastic
-boundary_pix: 2
-overlap_pix: 4
 #overlap_pix has to be no less than 2*boundary_pix
diff --git a/examples/generative/corrdiff/conf/base/model/diffusion.yaml b/examples/generative/corrdiff/conf/base/model/diffusion.yaml
@@ -0,0 +1,37 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: diffusion
+# Model type.
+hr_mean_conditioning: True
+# Recommended to use high-res conditioning for diffusion.
+scale_cond_input: False
+# If true, also scales the input conditioning. Recommended to False.
+
+# Standard model parameters.
+model_args:
+  gridtype: "sinusoidal"
+  # Type of positional grid to use: 'sinusoidal', 'learnable', 'linear'.
+  # Controls how positional information is encoded.
+  N_grid_channels: 4
+  # Number of channels for positional grid embeddings
+  embedding_type: "zero"
+  # Type of timestep embedding: 'positional' for DDPM++, 'fourier' for NCSN++,
+  # 'zero' for none
+  model_type: "SongUNetPosEmbd"
+  # Type of model architecture: 'SongUNetPosLtEmbd' for lead-time aware UNet
+  # with positional embeddings, 'SongUNetPosEmbd' for UNet with positional
+  # embeddings, 'DhariwalUNet' for UNet with Fourier embeddings
diff --git a/examples/generative/corrdiff/conf/base/model/lt_aware_ce_regression.yaml b/examples/generative/corrdiff/conf/base/model/lt_aware_ce_regression.yaml
@@ -0,0 +1,39 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+name: lt_aware_ce_regression
+  # Model type.
+hr_mean_conditioning: False
+  # No high-res conditioning for regression.
+
+# Default model parameters.
+model_args:
+  img_channels: 4
+  # Number of color channels in the model
+  N_grid_channels: 4
+  # Number of channels for positional grid embeddings
+  embedding_type: "zero"
+  # Type of timestep embedding: 'positional' for DDPM++, 'fourier' for NCSN++,
+  # 'zero' for none
+  lead_time_channels: 4
+  # Number of channels for lead-time embeddings
+  lead_time_steps: 9
+  # Number of lead-time steps
+  model_type: "SongUNetPosLtEmbd"
+  # Type of model architecture: 'SongUNetPosLtEmbd' for lead-time aware UNet with
+  # positional embeddings, 'SongUNetPosEmbd' for UNet with positional embeddings,
+  # 'DhariwalUNet' for UNet with Fourier embeddings