Merge branch 'main' into sweep_updates

DiamondLightSource · Aug 16, 2024 · c89060f · c89060f
2 parents 3b893ba + 6e6bd1e
commit c89060f
Show file tree

Hide file tree

Showing 4 changed files with 22 additions and 27 deletions.
diff --git a/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/recon/algorithm.py b/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/recon/algorithm.py
@@ -139,24 +139,14 @@ def _calc_memory_bytes_FBP(
     # BUT: this swapaxis happens after the cudaArray inputs and the input swapaxis arrays are dropped,
     #      so it does not add to the memory overall
 
-    # NOTE
-    # Although the assumption that the memory will be cleared after filtration step (the fft plans)
-    # does hold for some WSs, on the cluster it results in OOM error in the IFFT step.
-    # It is not very clear why it is the case so far, therefore the workaround is to account for all memory required
-    # for the FBP step (filtering part + ASTRA backprojection). This is not ideal as it uses a lot of memory
-    # and therefore the blocks will be smaller making I/O suboptimal.
-
-    # The commented code bellow is how the memory should be estimated in principle
-    # if projection_mem_size > filtersync_size:
-    #     tot_memory_bytes = int(filtersync_output_slice_size + projection_mem_size)
-    # else:
-    #     tot_memory_bytes = int(filtersync_output_slice_size + filtersync_size + recon_output_size)
+    if projection_mem_size > filtersync_size:
+        tot_memory_bytes = int(filtersync_output_slice_size + projection_mem_size)
+    else:
+        # here we do not add recon_output_size as we assume that at least one fft plan will be released before the
+        # the backprojection step which is SMALLER than the current estimation.
+        tot_memory_bytes = int(filtersync_output_slice_size + filtersync_size)
 
     # this account for the memory used for filtration AND backprojection.
-    tot_memory_bytes = int(
-        filtersync_output_slice_size + filtersync_size + projection_mem_size
-    )
-
     return (tot_memory_bytes, fixed_amount)
 
 

diff --git a/httomo/runner/dataset.py b/httomo/runner/dataset.py
@@ -193,6 +193,12 @@ def data(self, new_data: generic_array):
         self._global_shape = make_3d_shape_from_shape(global_shape)
         self._chunk_shape = make_3d_shape_from_shape(chunk_shape)
 
+    @data.deleter
+    def data(self):
+        del self._data
+        del self._global_shape
+        del self._chunk_shape
+
     @property
     def is_padded(self) -> bool:
         return self._padding != (0, 0)

diff --git a/httomo/runner/task_runner.py b/httomo/runner/task_runner.py
@@ -149,7 +149,12 @@ def _execute_section(self, section: Section, section_index: int = 0):
                     block.global_index,
                     (end_sink - start_sink) * 1e-9,
                 )
+
+            # remove the reference pointing to the CuPy array before
+            # calling the clean-up rountine
+            del block.data
             gpumem_cleanup()
+
             start_source = time.perf_counter_ns()
 
         self._log_pipeline(

diff --git a/tests/test_backends/test_httomolibgpu.py b/tests/test_backends/test_httomolibgpu.py
@@ -435,23 +435,15 @@ def test_recon_FBP_memoryhook(
     kwargs["recon_mask_radius"] = 0.8
 
     hook = MaxMemoryHook()
-    # add another alloc using a mock, to capture the cudaArray that is allocated in astra
     p1 = mocker.patch(
-        "tomobar.astra_wrappers.astra_base.astra.algorithm.run",
-        side_effect=lambda id, it: hook.malloc_postprocess(
-            0, data.nbytes, data.nbytes, 0, 0
-        ),
-    )
-    p2 = mocker.patch(
-        "tomobar.astra_wrappers.astra_base.astra.algorithm.delete",
+        "tomobar.astra_wrappers.astra_base.astra.data3d.delete",
         side_effect=lambda id: hook.free_postprocess(0, data.nbytes, 0, 0),
     )
 
     with hook:
         recon_data = FBP(cp.copy(data), **kwargs)
 
     p1.assert_called_once()
-    p2.assert_called_once()
 
     # make sure estimator function is within range (80% min, 100% max)
     max_mem = (
@@ -472,7 +464,9 @@ def test_recon_FBP_memoryhook(
     # the estimated_memory_mb should be LARGER or EQUAL to max_mem_mb
     # the resulting percent value should not deviate from max_mem on more than 20%
     assert estimated_memory_mb >= max_mem_mb
-    assert percents_relative_maxmem <= 150  # big underestimation, to be looked into
+    assert (
+        percents_relative_maxmem <= 100
+    )  # overestimation happens here because of the ASTRA's part
 
 
 @pytest.mark.cupy
@@ -597,7 +591,7 @@ def test_rescale_to_int_memoryhook(
 
 
 @pytest.mark.cupy
-@pytest.mark.parametrize("slices", [3, 8, 30, 80])
+@pytest.mark.parametrize("slices", [3, 8, 30, 50])
 @pytest.mark.parametrize("det_x", [600, 2160])
 def test_sino_360_to_180_memoryhook(
     ensure_clean_memory,