From ddb46575819605c540f44390a58e8571f8bef466 Mon Sep 17 00:00:00 2001 From: dkazanc Date: Wed, 10 Jul 2024 11:11:37 +0100 Subject: [PATCH 01/15] adding pre-commit hook and black formatting --- httomo/cli.py | 17 ++- httomo/common.py | 12 +- httomo/data/hdf/_utils/reslice.py | 13 +- httomo/data/hdf/loaders.py | 1 + httomo/data/mpiutil.py | 8 +- httomo/logger.py | 4 +- httomo/method_wrappers/generic.py | 16 ++- httomo/method_wrappers/images.py | 6 +- httomo/method_wrappers/reconstruction.py | 6 +- httomo/method_wrappers/save_intermediate.py | 48 ++++--- httomo/method_wrappers/stats_calc.py | 9 +- .../supporting_funcs/misc/rescale.py | 11 +- .../supporting_funcs/prep/normalize.py | 13 +- .../supporting_funcs/recon/algorithm.py | 6 +- httomo/monitors/__init__.py | 16 +-- httomo/monitors/aggregate.py | 21 ++- httomo/monitors/benchmark.py | 4 +- httomo/monitors/summary.py | 26 ++-- httomo/runner/auxiliary_data.py | 55 ++++---- httomo/runner/block_split.py | 6 +- httomo/runner/dataset.py | 70 ++++++---- httomo/runner/dataset_store_interfaces.py | 16 +-- httomo/runner/gpu_utils.py | 5 +- httomo/runner/method_wrapper.py | 86 ++++++------ httomo/runner/methods_repository_interface.py | 2 +- httomo/runner/monitoring_interface.py | 29 ++--- httomo/runner/pipeline.py | 6 +- httomo/ui_layer.py | 1 + httomo/utils.py | 4 + httomo/yaml_checker.py | 4 +- tests/conftest.py | 9 +- tests/data/test_dataset_store.py | 95 ++++++++------ tests/loaders/test_standard_tomo_loader.py | 12 +- tests/method_wrappers/test_generic.py | 40 ++++-- tests/method_wrappers/test_reconstruction.py | 35 +++-- tests/method_wrappers/test_stats_calc.py | 1 - tests/monitors/test_aggregate.py | 17 ++- tests/monitors/test_benchmark.py | 122 ++++++++++++------ tests/monitors/test_summary.py | 27 +++- tests/runner/test_block_split.py | 1 - tests/runner/test_dataset_store_interfaces.py | 3 +- tests/runner/test_gpu_utils.py | 1 - tests/runner/test_pipeline.py | 2 +- tests/runner/test_section.py | 10 +- .../samples/python_templates/pipeline_cpu1.py | 116 ++++++++--------- tests/test_cli.py | 29 +++-- tests/test_loader.py | 8 +- tests/test_method_query.py | 10 +- tests/test_methods.py | 58 +++++---- tests/test_pipeline.py | 94 +++++++++++--- tests/test_utils.py | 6 +- tests/test_yaml_checker.py | 81 +++--------- tests/testing_utils.py | 24 ++-- 53 files changed, 752 insertions(+), 570 deletions(-) diff --git a/httomo/cli.py b/httomo/cli.py index 1a0981998..669eb74c1 100644 --- a/httomo/cli.py +++ b/httomo/cli.py @@ -38,7 +38,8 @@ def main(): @click.argument( "in_data_file", type=click.Path(exists=True, dir_okay=False, path_type=Path), - required=False, default=None, + required=False, + default=None, ) def check(yaml_config: Path, in_data_file: Path = None): """Check a YAML pipeline file for errors.""" @@ -84,27 +85,29 @@ def check(yaml_config: Path, in_data_file: Path = None): "--max-cpu-slices", type=click.INT, default=64, - help="Maximum number of slices to use for a block for CPU-only sections (default: 64)" + help="Maximum number of slices to use for a block for CPU-only sections (default: 64)", ) @click.option( "--max-memory", type=click.STRING, default="0", - help="Limit the amount of memory used by the pipeline to the given memory (supports strings like 3.2G or bytes)" + help="Limit the amount of memory used by the pipeline to the given memory (supports strings like 3.2G or bytes)", ) @click.option( "--monitor", type=click.STRING, multiple=True, default=[], - help=("Add monitor to the runner (can be given multiple times). " + - f"Available monitors: {', '.join(MONITORS_MAP.keys())}") + help=( + "Add monitor to the runner (can be given multiple times). " + + f"Available monitors: {', '.join(MONITORS_MAP.keys())}" + ), ) @click.option( "--monitor-output", - type=click.File('w'), + type=click.File("w"), default=sys.stdout, - help="File to store the monitoring output. Defaults to '-', which denotes stdout" + help="File to store the monitoring output. Defaults to '-', which denotes stdout", ) @click.option( "--syslog-host", diff --git a/httomo/common.py b/httomo/common.py index 463e3579c..4c1152a04 100644 --- a/httomo/common.py +++ b/httomo/common.py @@ -88,10 +88,10 @@ class ResliceInfo: class PlatformSection: """ Data class to represent a section of the pipeline. Section can combine methods - if they run on the same platform (cpu or gpu) and have the same pattern. + if they run on the same platform (cpu or gpu) and have the same pattern. The sections can be further divided if necessary if the results of the method - needed to be saved. - NOTE: More fine division of sections into subsections will slow down + needed to be saved. + NOTE: More fine division of sections into subsections will slow down the pipeline. Mainly used to iterate through GPU memory in chunks. @@ -138,14 +138,14 @@ class RunMethodInfo: save the result into intermediate dataset task_idx: int Index of the local task in the section being run - task_idx_global: int + task_idx_global: int Index of the global task (method) in the pipeline package_name: str The name of the package the method is imported from method_name: str The name of the method being executed global_statistics: bool - Whether global statistics needs to be calculated on the output of the method. + Whether global statistics needs to be calculated on the output of the method. """ dict_params_method: Dict[str, Any] = field(default_factory=dict) @@ -166,6 +166,7 @@ class PreProcessInfo: Class holding execution info for each method in the pre-processing stage of the pipeline """ + params: Dict[str, Any] method_name: str module_path: str @@ -177,6 +178,7 @@ class LoaderInfo: """ Class holding execution info for the loader """ + params: Dict[str, Any] method_name: str method_func: Callable diff --git a/httomo/data/hdf/_utils/reslice.py b/httomo/data/hdf/_utils/reslice.py index aa12a0d42..a18fccd3e 100644 --- a/httomo/data/hdf/_utils/reslice.py +++ b/httomo/data/hdf/_utils/reslice.py @@ -66,7 +66,7 @@ def reslice( mpiutil.alltoall(to_scatter), axis=current_slice_dim - 1 ) - start_idx = 0 if comm.rank == 0 else split_indices[comm.rank-1] + start_idx = 0 if comm.rank == 0 else split_indices[comm.rank - 1] return new_data, next_slice_dim, start_idx @@ -98,7 +98,7 @@ def reslice_filebased( detector_x : int det_x (horizontal) detector of the loaded dataset. detector_y : int - det_y (vertical) detector of the loaded dataset. + det_y (vertical) detector of the loaded dataset. comm : Comm The MPI communicator to be used. Returns: @@ -114,7 +114,7 @@ def reslice_filebased( slices_no_in_chunks = 1 chunks_data = list(data_shape) chunks_data[next_slice_dim - 1] = slices_no_in_chunks - + log_once( "<-------Reslicing/rechunking the data-------->", level=logging.DEBUG, @@ -138,6 +138,7 @@ def reslice_filebased( return data, next_slice_dim, start_idx + def single_sino_reslice( data: numpy.ndarray, idx: int, @@ -159,7 +160,7 @@ def single_sino_reslice( if mpiutil.rank == 0: # Define the numpy array that will hold the single sinogram that has # been gathered from data from all MPI processes - recvbuf = numpy.empty(data_shape[0]*data_shape[2], dtype=NUMPY_DTYPE) + recvbuf = numpy.empty(data_shape[0] * data_shape[2], dtype=NUMPY_DTYPE) else: recvbuf = None # From the full projections that an MPI process has, send the data that @@ -171,9 +172,9 @@ def single_sino_reslice( sizes_rec = mpiutil.comm.gather(sendbuf.size) # Gather the data into the rank 0 process mpiutil.comm.Gatherv( - (sendbuf, data.shape[0]*data.shape[2], MPI_DTYPE), + (sendbuf, data.shape[0] * data.shape[2], MPI_DTYPE), (recvbuf, sizes_rec, MPI_DTYPE), - root=0 + root=0, ) if mpiutil.rank == 0: diff --git a/httomo/data/hdf/loaders.py b/httomo/data/hdf/loaders.py index 21abbd16c..be38cc8bf 100644 --- a/httomo/data/hdf/loaders.py +++ b/httomo/data/hdf/loaders.py @@ -16,6 +16,7 @@ "standard_tomo", ] + @dataclass class LoaderData: data: ndarray diff --git a/httomo/data/mpiutil.py b/httomo/data/mpiutil.py index ec97fb435..4fa9ed38e 100644 --- a/httomo/data/mpiutil.py +++ b/httomo/data/mpiutil.py @@ -110,9 +110,7 @@ def alltoall(arrays: List[np.ndarray]) -> List[np.ndarray]: factor = ( arrays[0].shape[0] if dim0equal - else arrays[0].shape[1] - if dim1equal - else arrays[0].shape[2] + else arrays[0].shape[1] if dim1equal else arrays[0].shape[2] ) dtype1 = dtype.Create_contiguous(factor).Commit() # sanity check - this should always pass @@ -122,9 +120,7 @@ def alltoall(arrays: List[np.ndarray]) -> List[np.ndarray]: sizes_rec1 = [s // factor for s in sizes_rec] # now send the same data, but with the adjusted size+datatype (output is identical) - comm.Alltoallv( - (fullinput, sizes_send1, dtype1), (fulloutput, sizes_rec1, dtype1) - ) + comm.Alltoallv((fullinput, sizes_send1, dtype1), (fulloutput, sizes_rec1, dtype1)) # build list of output arrays cumsizes = np.cumsum(sizes_rec) diff --git a/httomo/logger.py b/httomo/logger.py index 9a80a3651..fbec6ff73 100644 --- a/httomo/logger.py +++ b/httomo/logger.py @@ -14,7 +14,9 @@ def setup_logger(out_path: Path): # Concise logs displayed in terminal logger.add(sink=sys.stdout, level="INFO", colorize=True, format="{message}") # Concise logs written to file - logger.add(sink=concise_logfile_path, level="INFO", colorize=False, format="{message}") + logger.add( + sink=concise_logfile_path, level="INFO", colorize=False, format="{message}" + ) # Verbose logs written to file logger.add(sink=verbose_logfile_path, level="DEBUG", colorize=False, enqueue=True) # Verbose logs sent to syslog server in GELF format diff --git a/httomo/method_wrappers/generic.py b/httomo/method_wrappers/generic.py index 5ff54482c..f5950f1e1 100644 --- a/httomo/method_wrappers/generic.py +++ b/httomo/method_wrappers/generic.py @@ -121,7 +121,7 @@ def __init__( raise ValueError("GPU is not available, please use only CPU methods") self._side_output: Dict[str, Any] = dict() - + self._gpu_time_info = GpuTimeInfo() if gpu_enabled: @@ -168,7 +168,7 @@ def is_cpu(self) -> bool: @property def is_gpu(self) -> bool: return not self.is_cpu - + @property def gpu_time(self) -> GpuTimeInfo: return self._gpu_time_info @@ -240,11 +240,15 @@ def _build_kwargs( elif p == "gpu_id": assert gpu_enabled, "methods with gpu_id parameter require GPU support" ret[p] = self._gpu_id - elif p == 'axis' and p in remaining_dict_params and remaining_dict_params[p] == 'auto': + elif ( + p == "axis" + and p in remaining_dict_params + and remaining_dict_params[p] == "auto" + ): ret[p] = self.pattern.value pass elif p in remaining_dict_params: - ret[p] = remaining_dict_params[p] + ret[p] = remaining_dict_params[p] elif p in self._params_with_defaults: pass else: @@ -288,7 +292,9 @@ def execute(self, block: DataSetBlock) -> DataSetBlock: block = self._transfer_data(block) with catch_gputime() as t: block = self._preprocess_data(block) - args = self._build_kwargs(self._transform_params(self._config_params), block) + args = self._build_kwargs( + self._transform_params(self._config_params), block + ) block = self._run_method(block, args) block = self._postprocess_data(block) diff --git a/httomo/method_wrappers/images.py b/httomo/method_wrappers/images.py index 6e527563c..8d93494ca 100644 --- a/httomo/method_wrappers/images.py +++ b/httomo/method_wrappers/images.py @@ -44,8 +44,8 @@ def __init__( self["out_dir"] = out_dir if out_dir is not None else httomo.globals.run_out_dir if "comm_rank" in self.parameters: raise ValueError( - "save_to_images with the comm_rank parameter is broken. " + - "Please upgrade to the latest version, taking an offset parameter" + "save_to_images with the comm_rank parameter is broken. " + + "Please upgrade to the latest version, taking an offset parameter" ) # Images execute is leaving original data on the device where it is, @@ -61,7 +61,7 @@ def execute( **self._config_params, "offset": block.global_index[_get_slicing_dim(self.pattern) - 1], } - + args = self._build_kwargs(self._transform_params(config_params), block) if block.is_gpu: with catchtime() as t: diff --git a/httomo/method_wrappers/reconstruction.py b/httomo/method_wrappers/reconstruction.py index d02edeb8f..5fe0c3c7e 100644 --- a/httomo/method_wrappers/reconstruction.py +++ b/httomo/method_wrappers/reconstruction.py @@ -18,8 +18,10 @@ def should_select_this_class(cls, module_path: str, method_name: str) -> bool: def _preprocess_data(self, block: DataSetBlock) -> DataSetBlock: # this is essential for the angles cutting below to be valid - assert self.pattern == Pattern.sinogram, "reconstruction methods must be sinogram" - + assert ( + self.pattern == Pattern.sinogram + ), "reconstruction methods must be sinogram" + # for 360 degrees data the angular dimension will be truncated while angles are not. # Truncating angles if the angular dimension has got a different size datashape0 = block.data.shape[0] diff --git a/httomo/method_wrappers/save_intermediate.py b/httomo/method_wrappers/save_intermediate.py index 08822beb6..a89a0a469 100644 --- a/httomo/method_wrappers/save_intermediate.py +++ b/httomo/method_wrappers/save_intermediate.py @@ -16,23 +16,33 @@ class SaveIntermediateFilesWrapper(GenericMethodWrapper): - + @classmethod def should_select_this_class(cls, module_path: str, method_name: str) -> bool: return method_name == "save_intermediate_data" - - def __init__(self, - method_repository: MethodRepository, - module_path: str, - method_name: str, - comm: Comm, - save_result: Optional[bool] = None, - output_mapping: Dict[str, str] = {}, - out_dir: Optional[os.PathLike] = None, - prev_method: Optional[MethodWrapper] = None, - loader: Optional[LoaderInterface] = None, - **kwargs): - super().__init__(method_repository, module_path, method_name, comm, save_result, output_mapping, **kwargs) + + def __init__( + self, + method_repository: MethodRepository, + module_path: str, + method_name: str, + comm: Comm, + save_result: Optional[bool] = None, + output_mapping: Dict[str, str] = {}, + out_dir: Optional[os.PathLike] = None, + prev_method: Optional[MethodWrapper] = None, + loader: Optional[LoaderInterface] = None, + **kwargs, + ): + super().__init__( + method_repository, + module_path, + method_name, + comm, + save_result, + output_mapping, + **kwargs, + ) assert loader is not None self._loader = loader assert prev_method is not None @@ -40,14 +50,16 @@ def __init__(self, filename = f"{prev_method.task_id}-{prev_method.package_name}-{prev_method.method_name}" if prev_method.recon_algorithm is not None: filename += f"-{prev_method.recon_algorithm}" - + if out_dir is None: out_dir = httomo.globals.run_out_dir assert out_dir is not None - self._file = h5py.File(f"{out_dir}/{filename}.h5", "w", driver="mpio", comm=comm) + self._file = h5py.File( + f"{out_dir}/{filename}.h5", "w", driver="mpio", comm=comm + ) # make sure file gets closed properly weakref.finalize(self, self._file.close) - + def execute(self, block: DataSetBlock) -> DataSetBlock: # we overwrite the whole execute method here, as we do not need any of the helper # methods from the Generic Wrapper @@ -72,7 +84,7 @@ def execute(self, block: DataSetBlock) -> DataSetBlock: detector_y=self._loader.detector_y, angles=block.angles, ) - + if block.is_last_in_chunk: self._file.close() diff --git a/httomo/method_wrappers/stats_calc.py b/httomo/method_wrappers/stats_calc.py index 2a3d28e6e..5794eac18 100644 --- a/httomo/method_wrappers/stats_calc.py +++ b/httomo/method_wrappers/stats_calc.py @@ -49,9 +49,9 @@ def __init__( def _transfer_data(self, dataset: DataSetBlock): # don't transfer anything (either way) at this point return dataset - + def _run_method(self, dataset: DataSetBlock, args: Dict[str, Any]) -> DataSetBlock: - # transfer data to GPU if we can / have it available (always faster), + # transfer data to GPU if we can / have it available (always faster), # but don't want to fail if we don't have a GPU (underlying method works for both) # and don't touch original dataset if gpu_enabled and dataset.is_cpu: @@ -60,11 +60,8 @@ def _run_method(self, dataset: DataSetBlock, args: Dict[str, Any]) -> DataSetBlo self._gpu_time_info.host2device += t.elapsed ret = self._method(**args) return self._process_return_type(ret, dataset) - - def _process_return_type( - self, ret: Any, input_block: DataSetBlock - ) -> DataSetBlock: + def _process_return_type(self, ret: Any, input_block: DataSetBlock) -> DataSetBlock: assert isinstance(ret, tuple), "expected return type is a tuple" assert len(ret) == 4, "A 4-tuple of stats values is expected" diff --git a/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/misc/rescale.py b/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/misc/rescale.py index b44ebf17e..2e0ef590e 100644 --- a/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/misc/rescale.py +++ b/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/misc/rescale.py @@ -7,9 +7,9 @@ def _calc_memory_bytes_rescale_to_int( - non_slice_dims_shape: Tuple[int, int], - dtype: np.dtype, - **kwargs, + non_slice_dims_shape: Tuple[int, int], + dtype: np.dtype, + **kwargs, ) -> Tuple[int, int]: bits: int = kwargs["bits"] if bits == 8: @@ -19,4 +19,7 @@ def _calc_memory_bytes_rescale_to_int( else: itemsize = 4 safety = 128 - return (int(np.prod(non_slice_dims_shape)) * (dtype.itemsize + itemsize) + safety, 0) + return ( + int(np.prod(non_slice_dims_shape)) * (dtype.itemsize + itemsize) + safety, + 0, + ) diff --git a/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/normalize.py b/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/normalize.py index 261ea6380..6b32d8c9b 100644 --- a/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/normalize.py +++ b/httomo/methods_database/packages/external/httomolibgpu/supporting_funcs/prep/normalize.py @@ -6,18 +6,19 @@ "_calc_memory_bytes_normalize", ] + def _calc_memory_bytes_normalize( - non_slice_dims_shape: Tuple[int, int], - dtype: np.dtype, - **kwargs, + non_slice_dims_shape: Tuple[int, int], + dtype: np.dtype, + **kwargs, ) -> Tuple[int, int]: - # this function changes the data type + # this function changes the data type in_slice_mem = np.prod(non_slice_dims_shape) * dtype.itemsize out_slice_mem = np.prod(non_slice_dims_shape) * np.float32().itemsize - + # fixed cost for keeping mean of flats and darks mean_mem = int(np.prod(non_slice_dims_shape) * np.float32().itemsize * 2) tot_memory_bytes = int(in_slice_mem + out_slice_mem) - return (tot_memory_bytes, mean_mem) \ No newline at end of file + return (tot_memory_bytes, mean_mem) diff --git a/httomo/methods_database/packages/external/tomopy/supporting_funcs/recon/algorithm.py b/httomo/methods_database/packages/external/tomopy/supporting_funcs/recon/algorithm.py index 84fffa08b..6908d1cbf 100644 --- a/httomo/methods_database/packages/external/tomopy/supporting_funcs/recon/algorithm.py +++ b/httomo/methods_database/packages/external/tomopy/supporting_funcs/recon/algorithm.py @@ -28,9 +28,9 @@ "_calc_output_dim_recon", ] + def _calc_output_dim_recon(non_slice_dims_shape, **kwargs): - """Function to calculate output dimensions for all reconstructors. - """ + """Function to calculate output dimensions for all reconstructors.""" DetectorsLengthH = non_slice_dims_shape[1] output_dims = (DetectorsLengthH, DetectorsLengthH) - return output_dims \ No newline at end of file + return output_dims diff --git a/httomo/monitors/__init__.py b/httomo/monitors/__init__.py index 9a1087533..cc55ecedc 100644 --- a/httomo/monitors/__init__.py +++ b/httomo/monitors/__init__.py @@ -5,19 +5,19 @@ from httomo.runner.monitoring_interface import MonitoringInterface -MONITORS_MAP = { - "bench": BenchmarkMonitoring, - "summary": SummaryMonitor -} +MONITORS_MAP = {"bench": BenchmarkMonitoring, "summary": SummaryMonitor} + def make_monitors(monitor_descriptors: List[str]) -> Optional[MonitoringInterface]: if len(monitor_descriptors) == 0: return None - + monitors: List[MonitoringInterface] = [] for descriptor in monitor_descriptors: if descriptor not in MONITORS_MAP: - raise ValueError(f"Unknown monitor '{descriptor}'. Please choose one of {MONITORS_MAP.keys()}") + raise ValueError( + f"Unknown monitor '{descriptor}'. Please choose one of {MONITORS_MAP.keys()}" + ) monitors.append(MONITORS_MAP[descriptor]()) - - return AggregateMonitoring(monitors) \ No newline at end of file + + return AggregateMonitoring(monitors) diff --git a/httomo/monitors/aggregate.py b/httomo/monitors/aggregate.py index 456bc38bc..cc0c9a23d 100644 --- a/httomo/monitors/aggregate.py +++ b/httomo/monitors/aggregate.py @@ -47,7 +47,13 @@ def report_source_block( ): for m in self._monitors: m.report_source_block( - name, first_task_id, slicing_dim, block_dims, block_idx_chunk, block_idx_global, cpu_time + name, + first_task_id, + slicing_dim, + block_dims, + block_idx_chunk, + block_idx_global, + cpu_time, ) def report_sink_block( @@ -62,16 +68,21 @@ def report_sink_block( ): for m in self._monitors: m.report_sink_block( - name, last_task_id, slicing_dim, block_dims, block_idx_chunk, block_idx_global, cpu_time + name, + last_task_id, + slicing_dim, + block_dims, + block_idx_chunk, + block_idx_global, + cpu_time, ) - + def report_total_time(self, cpu_time: float): for m in self._monitors: m.report_total_time(cpu_time) - def write_results(self, dest: TextIO): for m in self._monitors: m.write_results(dest) dest.writelines("\n") - dest.flush() \ No newline at end of file + dest.flush() diff --git a/httomo/monitors/benchmark.py b/httomo/monitors/benchmark.py index 9badb66ed..8624aa9d3 100644 --- a/httomo/monitors/benchmark.py +++ b/httomo/monitors/benchmark.py @@ -23,7 +23,7 @@ def report_method_block( cpu_time: float, gpu_kernel_time: float = 0.0, gpu_h2d_time: float = 0.0, - gpu_d2h_time: float = 0.0 + gpu_d2h_time: float = 0.0, ): self._data.append( OrderedDict( @@ -140,7 +140,7 @@ def write_results(self, dest: TextIO): writer = csv.DictWriter(dest, fieldnames=self._data[0].keys()) writer.writeheader() writer.writerows(self._data) - + def _aggregate_mpi(self): alldata = self._comm.gather(self._data) if self._comm.rank == 0: diff --git a/httomo/monitors/summary.py b/httomo/monitors/summary.py index d9789cd33..666ef8aed 100644 --- a/httomo/monitors/summary.py +++ b/httomo/monitors/summary.py @@ -15,7 +15,7 @@ def __init__(self) -> None: self._total = 0.0 self._total_agg = 0.0 self._comm = MPI.COMM_WORLD - + def report_method_block( self, method_name: str, @@ -28,7 +28,7 @@ def report_method_block( cpu_time: float, gpu_kernel_time: float = 0.0, gpu_h2d_time: float = 0.0, - gpu_d2h_time: float = 0.0 + gpu_d2h_time: float = 0.0, ): if method_name not in self._methods: self._methods[method_name] = 0.0 @@ -37,7 +37,7 @@ def report_method_block( self._methods_gpu += gpu_kernel_time self._h2d += gpu_h2d_time self._d2h += gpu_d2h_time - + def report_source_block( self, name: str, @@ -46,10 +46,10 @@ def report_source_block( block_dims: Tuple[int, int, int], block_idx_chunk: Tuple[int, int, int], block_idx_global: Tuple[int, int, int], - cpu_time: float + cpu_time: float, ): self._sources += cpu_time - + def report_sink_block( self, name: str, @@ -58,13 +58,13 @@ def report_sink_block( block_dims: Tuple[int, int, int], block_idx_chunk: Tuple[int, int, int], block_idx_global: Tuple[int, int, int], - cpu_time: float + cpu_time: float, ): self._sinks += cpu_time - + def report_total_time(self, cpu_time: float): self._total = cpu_time - + def write_results(self, dest: TextIO): self._aggregate_mpi() if self._comm.rank == 0: @@ -83,10 +83,12 @@ def write_results(self, dest: TextIO): f" ------------------------" + "-" * 15, f"Method breakdowns:", ] - for k,v in self._methods.items(): - lines.append(f" {k:>30s} : {v:>10.3f}s ({v / self._total * 100:>4.1f}%)") - dest.write('\n'.join(lines)) - + for k, v in self._methods.items(): + lines.append( + f" {k:>30s} : {v:>10.3f}s ({v / self._total * 100:>4.1f}%)" + ) + dest.write("\n".join(lines)) + def _aggregate_mpi(self): self._total_agg = self._total if self._comm.size == 1: diff --git a/httomo/runner/auxiliary_data.py b/httomo/runner/auxiliary_data.py index ae1612ab1..6dd134af1 100644 --- a/httomo/runner/auxiliary_data.py +++ b/httomo/runner/auxiliary_data.py @@ -3,19 +3,23 @@ import numpy as np from httomo.utils import xp, gpu_enabled + class AuxiliaryData: """ Keeps the darks and flats and angles auxiliary data together, and separate from the dataset. - + This allows them to be updated on their own without being affected by chunks and blocks, etc., including the GPU/CPU transfers if needed. """ - + generic_array: TypeAlias = Union[xp.ndarray, np.ndarray] - - def __init__(self, angles: np.ndarray, - darks: Optional[np.ndarray] = None, - flats: Optional[np.ndarray] = None): + + def __init__( + self, + angles: np.ndarray, + darks: Optional[np.ndarray] = None, + flats: Optional[np.ndarray] = None, + ): self._darks: Optional[AuxiliaryData.generic_array] = darks self._flats: Optional[AuxiliaryData.generic_array] = flats self._angles: np.ndarray = angles @@ -23,71 +27,70 @@ def __init__(self, angles: np.ndarray, @property def darks_dtype(self) -> Optional[np.dtype]: return self._darks.dtype if self._darks is not None else None - + @property def darks_shape(self) -> Tuple[int, int, int]: if self._darks is None: return (0, 0, 0) assert len(self._darks.shape) == 3 return (self._darks.shape[0], self._darks.shape[1], self._darks.shape[2]) - + @property def flats_dtype(self) -> Optional[np.dtype]: return self._flats.dtype if self._flats is not None else None - + @property def flats_shape(self) -> Tuple[int, int, int]: if self._flats is None: return (0, 0, 0) assert len(self._flats.shape) == 3 return (self._flats.shape[0], self._flats.shape[1], self._flats.shape[2]) - + @property def angles_dtype(self) -> np.dtype: return self._angles.dtype - + @property def angles_length(self) -> int: return len(self._angles) - - + def get_darks(self, gpu=False) -> Optional[generic_array]: return self._get_field("darks", gpu) - + def get_flats(self, gpu=False) -> Optional[generic_array]: return self._get_field("flats", gpu) - + def get_angles(self) -> np.ndarray: return self._angles - + def set_darks(self, darks: generic_array) -> None: self._darks = darks - + def set_flats(self, flats: generic_array) -> None: self._flats = flats - + def set_angles(self, angles: np.ndarray) -> None: assert getattr(angles, "device", None) is None, "Angles must be a CPU array" self._angles = angles - + def _get_field(self, field: str, gpu=False) -> generic_array: assert not gpu or gpu_enabled, "GPU can only be used if the GPU is enabled" - + array = getattr(self, f"_{field}") if array is None: return array - + # Note: if already on CPU/GPU, no copy is taken if gpu: array = xp.asarray(array) else: - if xp.__name__ == 'cupy': + if xp.__name__ == "cupy": array = xp.asnumpy(array) - + setattr(self, f"_{field}", array) - + return array - + def drop_darks_flats(self): self._darks = None - self._flats = None \ No newline at end of file + self._flats = None diff --git a/httomo/runner/block_split.py b/httomo/runner/block_split.py index e9bdc7d38..fc2c1ff35 100644 --- a/httomo/runner/block_split.py +++ b/httomo/runner/block_split.py @@ -6,6 +6,7 @@ log = logging.getLogger(__name__) + class BlockSplitter: """Produces blocks from a DataSetSource according to the given max slices per block. It provides an iterator interface, so that it can be used as:: @@ -48,8 +49,8 @@ class BlockIterator: def __init__(self, splitter): self.splitter = splitter self._current = 0 - - def __iter__(self) -> 'BlockIterator': + + def __iter__(self) -> "BlockIterator": return self # pragma: no cover def __next__(self) -> DataSetBlock: @@ -60,4 +61,3 @@ def __next__(self) -> DataSetBlock: return v return BlockIterator(self) - diff --git a/httomo/runner/dataset.py b/httomo/runner/dataset.py index a62ca6c46..ba9588fd0 100644 --- a/httomo/runner/dataset.py +++ b/httomo/runner/dataset.py @@ -16,7 +16,7 @@ class DataSetBlock: It stores the base object internally and routes all calls for the auxilliary arrays to the base object (darks/flats/angles). It does not store these directly. """ - + generic_array: TypeAlias = Union[np.ndarray, xp.ndarray] def __init__( @@ -39,7 +39,7 @@ def __init__( self._global_shape = make_3d_shape_from_array(data) else: self._global_shape = global_shape - + if chunk_shape is None: self._chunk_shape = make_3d_shape_from_array(data) else: @@ -53,29 +53,45 @@ def __init__( self._global_index = make_3d_shape_from_shape(global_index) self._check_inconsistencies() - + def _check_inconsistencies(self): if self.chunk_index[self.slicing_dim] < 0: raise ValueError("block start index must be >= 0") - if self.chunk_index[self.slicing_dim] + self.shape[self.slicing_dim] > self.chunk_shape[self.slicing_dim]: + if ( + self.chunk_index[self.slicing_dim] + self.shape[self.slicing_dim] + > self.chunk_shape[self.slicing_dim] + ): raise ValueError("block spans beyond the chunk's boundaries") if self.global_index[self.slicing_dim] < 0: raise ValueError("chunk start index must be >= 0") - if self.global_index[self.slicing_dim] + self.shape[self.slicing_dim] > self.global_shape[self.slicing_dim]: + if ( + self.global_index[self.slicing_dim] + self.shape[self.slicing_dim] + > self.global_shape[self.slicing_dim] + ): raise ValueError("chunk spans beyond the global data boundaries") - if any(self.chunk_shape[i] > self.global_shape[i] for i in range(3)): + if any(self.chunk_shape[i] > self.global_shape[i] for i in range(3)): raise ValueError("chunk shape is larger than the global shape") if any(self.shape[i] > self.chunk_shape[i] for i in range(3)): raise ValueError("block shape is larger than the chunk shape") - if any(self.shape[i] != self.global_shape[i] for i in range(3) if i != self.slicing_dim): - raise ValueError("block shape inconsistent with non-slicing dims of global shape") - - assert not any(self.chunk_shape[i] != self.global_shape[i] for i in range(3) if i != self.slicing_dim) + if any( + self.shape[i] != self.global_shape[i] + for i in range(3) + if i != self.slicing_dim + ): + raise ValueError( + "block shape inconsistent with non-slicing dims of global shape" + ) + + assert not any( + self.chunk_shape[i] != self.global_shape[i] + for i in range(3) + if i != self.slicing_dim + ) @property def aux_data(self) -> AuxiliaryData: return self._aux_data - + @property def shape(self) -> Tuple[int, int, int]: """Shape of the data in this block""" @@ -90,7 +106,7 @@ def chunk_index(self) -> Tuple[int, int, int]: def chunk_shape(self) -> Tuple[int, int, int]: """Shape of the full chunk handled by the current process""" return self._chunk_shape - + @property def global_index(self) -> Tuple[int, int, int]: """The index of this block within the global data across all processes""" @@ -100,27 +116,27 @@ def global_index(self) -> Tuple[int, int, int]: def global_shape(self) -> Tuple[int, int, int]: """Shape of the global data across all processes""" return self._global_shape - + @property def is_cpu(self) -> bool: return getattr(self._data, "device", None) is None - + @property def is_gpu(self) -> bool: return not self.is_cpu - + @property def angles(self) -> np.ndarray: return self._aux_data.get_angles() - + @angles.setter def angles(self, new_angles: np.ndarray): self._aux_data.set_angles(new_angles) - + @property def angles_radians(self) -> np.ndarray: return self.angles - + @angles_radians.setter def angles_radians(self, new_angles: np.ndarray): self.angles = new_angles @@ -136,7 +152,7 @@ def is_last_in_chunk(self) -> bool: @property def slicing_dim(self) -> int: return self._slicing_dim - + def _empty_aux_array(self): empty_shape = list(self._data.shape) empty_shape[self.slicing_dim] = 0 @@ -156,7 +172,7 @@ def data(self, new_data: generic_array): chunk_shape[i] = new_data.shape[i] elif self._data.shape[i] != new_data.shape[i]: raise ValueError("shape mismatch in slicing dimension") - + self._data = new_data self._global_shape = make_3d_shape_from_shape(global_shape) self._chunk_shape = make_3d_shape_from_shape(chunk_shape) @@ -171,16 +187,16 @@ def darks(self) -> generic_array: @darks.setter def darks(self, darks: generic_array): self._aux_data.set_darks(darks) - + # alias @property def dark(self) -> generic_array: return self.darks - + @dark.setter def dark(self, darks: generic_array): self.darks = darks - + @property def flats(self) -> generic_array: flats = self._aux_data.get_flats(self.is_gpu) @@ -191,12 +207,12 @@ def flats(self) -> generic_array: @flats.setter def flats(self, flats: generic_array): self._aux_data.set_flats(flats) - + # alias @property def flat(self) -> generic_array: return self.flats - + @flat.setter def flat(self, flats: generic_array): self.flats = flats @@ -205,13 +221,13 @@ def to_gpu(self): if not gpu_enabled: raise ValueError("no GPU available") # from doc: if already on GPU, no copy is taken - self._data = xp.asarray(self.data, order="C") + self._data = xp.asarray(self.data, order="C") def to_cpu(self): if not gpu_enabled: return self._data = xp.asnumpy(self.data, order="C") - + def __dir__(self) -> list[str]: """Return only those properties that are relevant for the data""" return ["data", "angles", "angles_radians", "darks", "flats", "dark", "flat"] diff --git a/httomo/runner/dataset_store_interfaces.py b/httomo/runner/dataset_store_interfaces.py index 1a5d583c1..8573b7a19 100644 --- a/httomo/runner/dataset_store_interfaces.py +++ b/httomo/runner/dataset_store_interfaces.py @@ -52,9 +52,8 @@ class DataSetSource(Protocol): the data can be read in *blocks*, sliced in the given slicing dimension""" @property - def dtype(self) -> np.dtype: - ... # pragma: no cover - + def dtype(self) -> np.dtype: ... # pragma: no cover + @property def global_shape(self) -> Tuple[int, int, int]: """Global data shape across all processes that we eventually have to read.""" @@ -75,12 +74,12 @@ def global_index(self) -> Tuple[int, int, int]: def slicing_dim(self) -> Literal[0, 1, 2]: """Slicing dimension - 0, 1, or 2""" ... # pragma: no cover - + @property def aux_data(self) -> AuxiliaryData: """Auxiliary data""" ... # pragma: no cover - + def read_block(self, start: int, length: int) -> DataSetBlock: """Reads a block from the dataset, starting at `start` of length `length`, in the current slicing dimension. Note that `start` is chunk-based, @@ -128,11 +127,12 @@ def finalize(self): to give implementations a chance to write everything to disk and close the file, etc.""" ... # pragma: no cover - + + class ReadableDataSetSink(DataSetSink): """Interface for a DataSetSink that is store-based, i.e. where it's possible to construct - a reader from the same data store that this sink has been written first""" - + a reader from the same data store that this sink has been written first""" + @abc.abstractmethod def make_reader( self, new_slicing_dim: Optional[Literal[0, 1, 2]] = None diff --git a/httomo/runner/gpu_utils.py b/httomo/runner/gpu_utils.py index 69e104668..5c3ad24ca 100644 --- a/httomo/runner/gpu_utils.py +++ b/httomo/runner/gpu_utils.py @@ -7,8 +7,8 @@ def gpumem_cleanup(): xp.get_default_memory_pool().free_all_blocks() cache = xp.fft.config.get_plan_cache() cache.clear() - - + + def get_available_gpu_memory(safety_margin_percent: float = 10.0) -> int: try: import cupy as cp @@ -22,4 +22,3 @@ def get_available_gpu_memory(safety_margin_percent: float = 10.0) -> int: return int(available_memory * (1 - safety_margin_percent / 100.0)) except: return int(100e9) # arbitrarily high number - only used if GPU isn't available - diff --git a/httomo/runner/method_wrapper.py b/httomo/runner/method_wrapper.py index 1a0a319db..d14673182 100644 --- a/httomo/runner/method_wrapper.py +++ b/httomo/runner/method_wrapper.py @@ -9,36 +9,42 @@ import os from typing import Any, Callable, Dict, List, Literal, Optional, Protocol, Tuple, Union -MethodParameterValues = Union[str, bool, int, float, os.PathLike, np.ndarray, xp.ndarray] -MethodParameterDictType = Dict[str, Union[MethodParameterValues, List[MethodParameterValues]]] +MethodParameterValues = Union[ + str, bool, int, float, os.PathLike, np.ndarray, xp.ndarray +] +MethodParameterDictType = Dict[ + str, Union[MethodParameterValues, List[MethodParameterValues]] +] @dataclass class GpuTimeInfo: """Captures the time spent on GPU""" + kernel: float = 0.0 host2device: float = 0.0 device2host: float = 0.0 + class MethodWrapper(Protocol): """Interface for method wrappers, that is used by the pipeline and task runners to execute methods in a generic way.""" - + # read/write properties task_id: str pattern: Pattern - + # read-only properties @property def comm(self) -> MPI.Comm: """The MPI communicator used""" ... # pragma: no cover - + @property def method(self) -> Callable: """The actual method underlying this wrapper""" ... # pragma: no cover - + @property def parameters(self) -> List[str]: """List of parameter names of the underlying method""" @@ -48,83 +54,81 @@ def parameters(self) -> List[str]: def memory_gpu(self) -> List[GpuMemoryRequirement]: """Memory requirements for GPU execution""" ... # pragma: nocover - + @property def implementation(self) -> Literal["gpu", "cpu", "gpu_cupy"]: """Implementation of this method""" ... # pragma: nocover - + @property def output_dims_change(self) -> bool: """Whether output dimensions change after executing this method""" ... # pragma: nocover - + @property def save_result(self) -> bool: """Whether to save the result of this method to intermediate files""" - ... # pragma: nocover - + ... # pragma: nocover + @property def method_name(self) -> str: """Returns the name of the method function""" - ... # pragma: nocover - + ... # pragma: nocover + @property def module_path(self) -> str: """Returns the full module path where the method function is defined""" - ... # pragma: nocover - + ... # pragma: nocover + @property def package_name(self) -> str: """The name of the top-level package where this method is implementated, e.g. 'httomolib'""" - ... # pragma: nocover - + ... # pragma: nocover + @property def cupyrun(self) -> bool: """True if method runs on GPU and expects a CuPy array as inputs""" - ... # pragma: nocover + ... # pragma: nocover @property def is_cpu(self) -> bool: """True if this is a CPU-only method""" - ... # pragma: nocover + ... # pragma: nocover @property def is_gpu(self) -> bool: """True if this is a GPU method""" - ... # pragma: nocover - + ... # pragma: nocover + @property def gpu_time(self) -> GpuTimeInfo: """Get the time spent on GPU in the last call to execute""" - ... # pragma: nocover - + ... # pragma: nocover + @property def config_params(self) -> Dict[str, Any]: """Access a copy of the configuration parameters (cannot be modified directly)""" - ... # pragma: nocover - + ... # pragma: nocover + @property def recon_algorithm(self) -> Optional[str]: """Determine the recon algorithm used, if the method is reconstruction. Otherwise return None.""" - ... # pragma: nocover + ... # pragma: nocover # Methods def __getitem__(self, key: str) -> MethodParameterValues: """Get a parameter for the method using dictionary notation (wrapper["param"])""" - ... # pragma: nocover + ... # pragma: nocover def __setitem__(self, key: str, value: MethodParameterValues): """Set a parameter for the method using dictionary notation (wrapper["param"] = 42)""" - ... # pragma: nocover - + ... # pragma: nocover def append_config_params(self, params: MethodParameterDictType): """Appends to the configuration parameters all values that are in the given dictionary""" - ... # pragma: nocover - + ... # pragma: nocover def execute(self, block: DataSetBlock) -> DataSetBlock: """Execute the method. @@ -141,20 +145,19 @@ def execute(self, block: DataSetBlock) -> DataSetBlock: DataSetBlock A CPU or GPU-based dataset object with the output """ - ... # pragma: nocover - + ... # pragma: nocover + def get_side_output(self) -> Dict[str, Any]: """Override this method for functions that have a side output. The returned dictionary will be merged with the dict_params parameter passed to execute for all methods that follow in the pipeline""" - ... # pragma: nocover - + ... # pragma: nocover + def calculate_output_dims( self, non_slice_dims_shape: Tuple[int, int] ) -> Tuple[int, int]: """Calculate the dimensions of the output for this method""" - ... # pragma: nocover - + ... # pragma: nocover def calculate_max_slices( self, @@ -170,11 +173,4 @@ def calculate_max_slices( The available memory may have been adjusted for the methods that follow, in case something persists afterwards. """ - ... # pragma: nocover - - - - - - - + ... # pragma: nocover diff --git a/httomo/runner/methods_repository_interface.py b/httomo/runner/methods_repository_interface.py index 5ad4a056f..162c46d51 100644 --- a/httomo/runner/methods_repository_interface.py +++ b/httomo/runner/methods_repository_interface.py @@ -36,7 +36,7 @@ def get_implementation(self) -> Literal["cpu", "gpu", "gpu_cupy"]: def get_memory_gpu_params(self) -> List[GpuMemoryRequirement]: """Get the parameters for the GPU memory estimation""" ... # pragma: no cover - + def save_result_default(self) -> bool: """Check if this method saves results by default""" ... # pragma: no cover diff --git a/httomo/runner/monitoring_interface.py b/httomo/runner/monitoring_interface.py index 24d6690ad..23e1ddd6a 100644 --- a/httomo/runner/monitoring_interface.py +++ b/httomo/runner/monitoring_interface.py @@ -14,10 +14,9 @@ def report_method_block( cpu_time: float, gpu_kernel_time: float = 0.0, gpu_h2d_time: float = 0.0, - gpu_d2h_time: float = 0.0 - ): - ... # pragma: no cover - + gpu_d2h_time: float = 0.0, + ): ... # pragma: no cover + def report_source_block( self, name: str, @@ -26,10 +25,9 @@ def report_source_block( block_dims: Tuple[int, int, int], block_idx_chunk: Tuple[int, int, int], block_idx_global: Tuple[int, int, int], - cpu_time: float - ): - ... # pragma: no cover - + cpu_time: float, + ): ... # pragma: no cover + def report_sink_block( self, name: str, @@ -38,12 +36,9 @@ def report_sink_block( block_dims: Tuple[int, int, int], block_idx_chunk: Tuple[int, int, int], block_idx_global: Tuple[int, int, int], - cpu_time: float - ): - ... # pragma: no cover - - def report_total_time(self, cpu_time: float): - ... # pragma: no cover - - def write_results(self, dest: TextIO): - ... # pragma: no cover + cpu_time: float, + ): ... # pragma: no cover + + def report_total_time(self, cpu_time: float): ... # pragma: no cover + + def write_results(self, dest: TextIO): ... # pragma: no cover diff --git a/httomo/runner/pipeline.py b/httomo/runner/pipeline.py index 640edf2dc..89cc4d22b 100644 --- a/httomo/runner/pipeline.py +++ b/httomo/runner/pipeline.py @@ -9,11 +9,7 @@ class Pipeline: """Represents a pipeline of methods, stored by their wrappers, and the loader. After creation, the pipeline is immutable.""" - def __init__( - self, - loader: LoaderInterface, - methods: List[MethodWrapper] - ): + def __init__(self, loader: LoaderInterface, methods: List[MethodWrapper]): self._methods = methods self._loader = loader diff --git a/httomo/ui_layer.py b/httomo/ui_layer.py index 339e89594..755c2812b 100644 --- a/httomo/ui_layer.py +++ b/httomo/ui_layer.py @@ -206,6 +206,7 @@ def update_side_output_references( check_valid_ref_id(side_str, ref_id, param_value, method) parameters[param_name] = OutputRef(method, ref_arg) + def get_regex_pattern() -> re.Pattern: """Return the reference string regex pattern to search for Returns diff --git a/httomo/utils.py b/httomo/utils.py index d032b2f87..1b451cc36 100644 --- a/httomo/utils.py +++ b/httomo/utils.py @@ -13,6 +13,7 @@ gpu_enabled = False try: import cupy as xp + if mpiutil.rank == 0: logger.debug("CuPy is installed") @@ -21,11 +22,13 @@ gpu_enabled = True # CuPy is installed and GPU is available except xp.cuda.runtime.CUDARuntimeError: import numpy as xp + if mpiutil.rank == 0: logger.debug("CuPy is installed but GPU device inaccessible") except ImportError: import numpy as xp + if mpiutil.rank == 0: logger.debug("CuPy is not installed") @@ -65,6 +68,7 @@ def log_once(output: Any, level: int = logging.INFO) -> None: else: logger.info(output) + def log_rank(output: Any, comm: Comm) -> None: """ Log output to log file with the process rank. diff --git a/httomo/yaml_checker.py b/httomo/yaml_checker.py index 42992ed18..e7ad258ab 100644 --- a/httomo/yaml_checker.py +++ b/httomo/yaml_checker.py @@ -426,9 +426,7 @@ def validate_yaml_config(yaml_file: Path, in_file: Optional[Path] = None) -> boo if not all_checks_pass: return False - end_str = ( - "\nValidation of pipeline YAML file is successful." - ) + end_str = "\nValidation of pipeline YAML file is successful." _print_with_colour(end_str, colour=Colour.BVIOLET) return True diff --git a/tests/conftest.py b/tests/conftest.py index 05cbd9551..6529fb35b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -308,7 +308,7 @@ def distortion_correction_path(test_data_path): def merge_yamls(load_yaml: Callable): def _merge_yamls(*yamls) -> None: """Merge multiple yaml files into one""" - data : List = [] + data: List = [] for y in yamls: curr_yaml_list = load_yaml(y) for x in curr_yaml_list: @@ -342,7 +342,7 @@ def dummy_block() -> DataSetBlock: @pytest.fixture() def get_files(): def _get_files(dir_path: str, excl: List[str] = []) -> List[str]: - """ Returns list of files from provided directory + """Returns list of files from provided directory Parameters ---------- @@ -360,13 +360,14 @@ def _get_files(dir_path: str, excl: List[str] = []) -> List[str]: str(f) for f in _dir if f.is_file() and not any(st in str(f) for st in excl) ] return _files + return _get_files @pytest.fixture() def load_yaml(): def _load_yaml(yaml_in: str) -> PipelineConfig: - """ Loads provided yaml and returns dict + """Loads provided yaml and returns dict Parameters ---------- @@ -380,5 +381,5 @@ def _load_yaml(yaml_in: str) -> PipelineConfig: with open(yaml_in, "r") as f: conf = list(yaml.load_all(f, Loader=yaml.FullLoader)) return conf[0] - return _load_yaml + return _load_yaml diff --git a/tests/data/test_dataset_store.py b/tests/data/test_dataset_store.py index 161dbeb74..8745ce37f 100644 --- a/tests/data/test_dataset_store.py +++ b/tests/data/test_dataset_store.py @@ -13,26 +13,30 @@ @pytest.mark.parametrize("slicing_dim", [0, 1, 2]) -def test_writer_can_set_sizes_and_shapes_dim(tmp_path: PathLike, slicing_dim: Literal[0, 1, 2]): - global_shape=(30, 15, 20) - chunk_shape_t=list(global_shape) +def test_writer_can_set_sizes_and_shapes_dim( + tmp_path: PathLike, slicing_dim: Literal[0, 1, 2] +): + global_shape = (30, 15, 20) + chunk_shape_t = list(global_shape) chunk_shape_t[slicing_dim] = 5 chunk_shape = make_3d_shape_from_shape(chunk_shape_t) - global_index_t=[0, 0, 0] + global_index_t = [0, 0, 0] global_index_t[slicing_dim] = 5 global_index = make_3d_shape_from_shape(global_index_t) writer = DataSetStoreWriter( - slicing_dim=slicing_dim, + slicing_dim=slicing_dim, comm=MPI.COMM_SELF, temppath=tmp_path, ) - block = DataSetBlock(data=np.ones(chunk_shape, dtype=np.float32), - aux_data=AuxiliaryData(angles=np.ones(global_shape[0], dtype=np.float32)), - chunk_shape=chunk_shape, - slicing_dim=slicing_dim, - global_shape=global_shape, - block_start=0, - chunk_start=global_index[slicing_dim]) + block = DataSetBlock( + data=np.ones(chunk_shape, dtype=np.float32), + aux_data=AuxiliaryData(angles=np.ones(global_shape[0], dtype=np.float32)), + chunk_shape=chunk_shape, + slicing_dim=slicing_dim, + global_shape=global_shape, + block_start=0, + chunk_start=global_index[slicing_dim], + ) writer.write_block(block) assert writer.global_shape == global_shape @@ -43,7 +47,7 @@ def test_writer_can_set_sizes_and_shapes_dim(tmp_path: PathLike, slicing_dim: Li def test_reader_throws_if_no_data(tmp_path: PathLike): writer = DataSetStoreWriter( - slicing_dim=0, + slicing_dim=0, comm=MPI.COMM_SELF, temppath=tmp_path, ) @@ -62,7 +66,7 @@ def test_can_write_and_read_blocks( comm=MPI.COMM_WORLD, temppath=tmp_path, ) - + GLOBAL_SHAPE = (10, 10, 10) global_data = np.arange(np.prod(GLOBAL_SHAPE), dtype=np.float32).reshape( GLOBAL_SHAPE @@ -70,22 +74,22 @@ def test_can_write_and_read_blocks( chunk_shape = (4, GLOBAL_SHAPE[1], GLOBAL_SHAPE[2]) chunk_start = 3 block1 = DataSetBlock( - data=global_data[chunk_start:chunk_start+2, :, :], + data=global_data[chunk_start : chunk_start + 2, :, :], aux_data=AuxiliaryData(angles=np.ones(GLOBAL_SHAPE[0], dtype=np.float32)), global_shape=GLOBAL_SHAPE, chunk_shape=chunk_shape, block_start=0, slicing_dim=0, - chunk_start=chunk_start + chunk_start=chunk_start, ) block2 = DataSetBlock( - data=global_data[chunk_start+2:chunk_start+2+2, :, :], + data=global_data[chunk_start + 2 : chunk_start + 2 + 2, :, :], aux_data=AuxiliaryData(angles=np.ones(GLOBAL_SHAPE[0], dtype=np.float32)), global_shape=GLOBAL_SHAPE, chunk_shape=chunk_shape, block_start=2, slicing_dim=0, - chunk_start=chunk_start + chunk_start=chunk_start, ) if file_based: @@ -112,7 +116,10 @@ def test_can_write_and_read_blocks( @pytest.mark.parametrize("file_based", [False, True]) def test_write_after_read_throws( - mocker: MockerFixture, dummy_block: DataSetBlock, tmp_path: PathLike, file_based: bool + mocker: MockerFixture, + dummy_block: DataSetBlock, + tmp_path: PathLike, + file_based: bool, ): writer = DataSetStoreWriter( slicing_dim=0, @@ -186,22 +193,22 @@ def test_can_write_and_read_block_with_different_sizes(tmp_path: PathLike): chunk_shape = (4, GLOBAL_SHAPE[1], GLOBAL_SHAPE[2]) chunk_start = 3 block1 = DataSetBlock( - data=global_data[chunk_start:chunk_start+2, :, :], + data=global_data[chunk_start : chunk_start + 2, :, :], aux_data=AuxiliaryData(angles=np.ones(GLOBAL_SHAPE[0], dtype=np.float32)), global_shape=GLOBAL_SHAPE, chunk_shape=chunk_shape, block_start=0, slicing_dim=0, - chunk_start=chunk_start + chunk_start=chunk_start, ) block2 = DataSetBlock( - data=global_data[chunk_start+2:chunk_start+2+2, :, :], + data=global_data[chunk_start + 2 : chunk_start + 2 + 2, :, :], aux_data=AuxiliaryData(angles=np.ones(GLOBAL_SHAPE[0], dtype=np.float32)), global_shape=GLOBAL_SHAPE, chunk_shape=chunk_shape, block_start=2, slicing_dim=0, - chunk_start=chunk_start + chunk_start=chunk_start, ) writer.write_block(block1) @@ -211,7 +218,9 @@ def test_can_write_and_read_block_with_different_sizes(tmp_path: PathLike): rblock = reader.read_block(0, 4) - np.testing.assert_array_equal(rblock.data, global_data[chunk_start:chunk_start+4, :, :]) + np.testing.assert_array_equal( + rblock.data, global_data[chunk_start : chunk_start + 4, :, :] + ) def test_writing_inconsistent_global_shapes_fails(tmp_path: PathLike): @@ -226,7 +235,7 @@ def test_writing_inconsistent_global_shapes_fails(tmp_path: PathLike): ) chunk_shape = (4, GLOBAL_SHAPE[1], GLOBAL_SHAPE[2]) chunk_start = 3 - aux_data=AuxiliaryData(angles=np.ones(GLOBAL_SHAPE[0]+10, dtype=np.float32)) + aux_data = AuxiliaryData(angles=np.ones(GLOBAL_SHAPE[0] + 10, dtype=np.float32)) block1 = DataSetBlock( data=global_data[:2, :, :], aux_data=aux_data, @@ -234,16 +243,16 @@ def test_writing_inconsistent_global_shapes_fails(tmp_path: PathLike): chunk_shape=chunk_shape, block_start=0, slicing_dim=0, - chunk_start=chunk_start + chunk_start=chunk_start, ) block2 = DataSetBlock( data=global_data[:2, :, :], aux_data=aux_data, - global_shape=(GLOBAL_SHAPE[0]+1, GLOBAL_SHAPE[1], GLOBAL_SHAPE[2]), + global_shape=(GLOBAL_SHAPE[0] + 1, GLOBAL_SHAPE[1], GLOBAL_SHAPE[2]), chunk_shape=chunk_shape, block_start=2, slicing_dim=0, - chunk_start=chunk_start + chunk_start=chunk_start, ) writer.write_block(block1) @@ -252,6 +261,7 @@ def test_writing_inconsistent_global_shapes_fails(tmp_path: PathLike): assert "inconsistent shape" in str(e) + def test_writing_inconsistent_chunk_shapes_fails(tmp_path: PathLike): writer = DataSetStoreWriter( slicing_dim=0, @@ -271,16 +281,16 @@ def test_writing_inconsistent_chunk_shapes_fails(tmp_path: PathLike): chunk_shape=chunk_shape, block_start=0, slicing_dim=0, - chunk_start=chunk_start + chunk_start=chunk_start, ) block2 = DataSetBlock( data=global_data[2:2, :, :], aux_data=AuxiliaryData(angles=np.ones(GLOBAL_SHAPE[0], dtype=np.float32)), global_shape=GLOBAL_SHAPE, - chunk_shape=(chunk_shape[0]-1, chunk_shape[1], chunk_shape[2]), + chunk_shape=(chunk_shape[0] - 1, chunk_shape[1], chunk_shape[2]), block_start=2, slicing_dim=0, - chunk_start=chunk_start + chunk_start=chunk_start, ) writer.write_block(block1) @@ -289,6 +299,7 @@ def test_writing_inconsistent_chunk_shapes_fails(tmp_path: PathLike): assert "inconsistent shape" in str(e) + def test_writing_inconsistent_global_index_fails(tmp_path: PathLike): writer = DataSetStoreWriter( slicing_dim=0, @@ -308,7 +319,7 @@ def test_writing_inconsistent_global_index_fails(tmp_path: PathLike): chunk_shape=chunk_shape, block_start=0, slicing_dim=0, - chunk_start=chunk_start + chunk_start=chunk_start, ) block2 = DataSetBlock( data=global_data[2:2, :, :], @@ -317,7 +328,7 @@ def test_writing_inconsistent_global_index_fails(tmp_path: PathLike): chunk_shape=chunk_shape, block_start=2, slicing_dim=0, - chunk_start=chunk_start+2 + chunk_start=chunk_start + 2, ) writer.write_block(block1) @@ -327,7 +338,6 @@ def test_writing_inconsistent_global_index_fails(tmp_path: PathLike): assert "inconsistent shape" in str(e) - def test_create_new_data_goes_to_file_on_memory_error( mocker: MockerFixture, dummy_block: DataSetBlock, tmp_path: PathLike ): @@ -350,8 +360,8 @@ def test_create_new_data_goes_to_file_on_memory_error( ANY, writer.comm, ) - - + + def test_create_new_data_goes_to_file_on_memory_limit( mocker: MockerFixture, tmp_path: PathLike ): @@ -359,8 +369,8 @@ def test_create_new_data_goes_to_file_on_memory_limit( data = np.ones(GLOBAL_SHAPE, dtype=np.float32) aux_data = AuxiliaryData( angles=np.ones(data.shape[0], dtype=np.float32), - darks=2.*np.ones((2, GLOBAL_SHAPE[1], GLOBAL_SHAPE[2]), dtype=np.float32), - flats=1.*np.ones((2, GLOBAL_SHAPE[1], GLOBAL_SHAPE[2]), dtype=np.float32), + darks=2.0 * np.ones((2, GLOBAL_SHAPE[1], GLOBAL_SHAPE[2]), dtype=np.float32), + flats=1.0 * np.ones((2, GLOBAL_SHAPE[1], GLOBAL_SHAPE[2]), dtype=np.float32), ) block = DataSetBlock( data=data[0:2, :, :], @@ -375,7 +385,7 @@ def test_create_new_data_goes_to_file_on_memory_limit( slicing_dim=0, comm=MPI.COMM_WORLD, temppath=tmp_path, - memory_limit_bytes=block.data.nbytes + 5 # only one block will fit in memory + memory_limit_bytes=block.data.nbytes + 5, # only one block will fit in memory ) createh5_mock = mocker.patch.object( @@ -412,7 +422,10 @@ def test_calls_reslice( @pytest.mark.parametrize("file_based", [False, True]) def test_reslice_single_block_single_process( - mocker: MockerFixture, dummy_block: DataSetBlock, tmp_path: PathLike, file_based: bool + mocker: MockerFixture, + dummy_block: DataSetBlock, + tmp_path: PathLike, + file_based: bool, ): writer = DataSetStoreWriter( slicing_dim=0, @@ -489,7 +502,7 @@ def test_full_integration_with_reslice( global_shape=GLOBAL_DATA_SHAPE, block_start=0, chunk_start=chunk_start, - chunk_shape=(chunk_size, GLOBAL_DATA_SHAPE[1], GLOBAL_DATA_SHAPE[2]) + chunk_shape=(chunk_size, GLOBAL_DATA_SHAPE[1], GLOBAL_DATA_SHAPE[2]), ) writer = DataSetStoreWriter( diff --git a/tests/loaders/test_standard_tomo_loader.py b/tests/loaders/test_standard_tomo_loader.py index 20edc1d4d..ff418d289 100644 --- a/tests/loaders/test_standard_tomo_loader.py +++ b/tests/loaders/test_standard_tomo_loader.py @@ -653,11 +653,13 @@ def test_standard_tomo_loader_user_defined_angles( stop_angle=180, angles_total=720, ) - EXPECTED_ANGLES = np.deg2rad(np.linspace( - USER_DEFINED_ANGLES.start_angle, - USER_DEFINED_ANGLES.stop_angle, - USER_DEFINED_ANGLES.angles_total, - )) + EXPECTED_ANGLES = np.deg2rad( + np.linspace( + USER_DEFINED_ANGLES.start_angle, + USER_DEFINED_ANGLES.stop_angle, + USER_DEFINED_ANGLES.angles_total, + ) + ) with mock.patch( "httomo.darks_flats.get_darks_flats", diff --git a/tests/method_wrappers/test_generic.py b/tests/method_wrappers/test_generic.py index 3a4ca173d..eff59f979 100644 --- a/tests/method_wrappers/test_generic.py +++ b/tests/method_wrappers/test_generic.py @@ -45,12 +45,15 @@ def fake_method(data): "testmodule.path", "fake_method", MPI.COMM_WORLD, - task_id="fake_method_id" + task_id="fake_method_id", ) - + assert wrp.task_id == "fake_method_id" -def test_generic_execute_transfers_to_gpu(mocker: MockerFixture, dummy_block: DataSetBlock): + +def test_generic_execute_transfers_to_gpu( + mocker: MockerFixture, dummy_block: DataSetBlock +): class FakeModule: def fake_method(data): return data @@ -72,7 +75,8 @@ def fake_method(data): reason="skipped as cupy is not available", ) @pytest.mark.cupy -def test_generic_excute_measures_gpu_times(dummy_block: DataSetBlock, mocker: MockerFixture +def test_generic_excute_measures_gpu_times( + dummy_block: DataSetBlock, mocker: MockerFixture ): class FakeModule: def fake_method(data): @@ -193,11 +197,10 @@ def fake_method(data, comm: Optional[MPI.Comm] = None): wrp.execute(dummy_block) -def test_generic_transforms_auto_axis( - mocker: MockerFixture, dummy_block: DataSetBlock -): +def test_generic_transforms_auto_axis(mocker: MockerFixture, dummy_block: DataSetBlock): PATTERN = Pattern.projection + class FakeModule: def fake_method(data, axis: int): assert axis == PATTERN.value @@ -205,8 +208,11 @@ def fake_method(data, axis: int): mocker.patch("importlib.import_module", return_value=FakeModule) wrp = make_method_wrapper( - make_mock_repo(mocker, pattern=PATTERN), "mocked_module_path", "fake_method", - MPI.COMM_WORLD, axis="auto", + make_mock_repo(mocker, pattern=PATTERN), + "mocked_module_path", + "fake_method", + MPI.COMM_WORLD, + axis="auto", ) wrp.execute(dummy_block) @@ -245,7 +251,9 @@ def fake_method(data, param): assert "Cannot map method parameter param to a value" in str(e) -def test_generic_access_outputref_params(mocker: MockerFixture, dummy_block: DataSetBlock): +def test_generic_access_outputref_params( + mocker: MockerFixture, dummy_block: DataSetBlock +): class FakeModule: def fake_method(data, param): assert param == 42 @@ -297,7 +305,9 @@ def fake_method(array): wrp.execute(dummy_block) -def test_generic_for_method_with_kwargs(mocker: MockerFixture, dummy_block: DataSetBlock): +def test_generic_for_method_with_kwargs( + mocker: MockerFixture, dummy_block: DataSetBlock +): class FakeModule: def fake_method(data, param, **kwargs): assert param == 42.0 @@ -482,7 +492,7 @@ def test_method(data): ) def test_generic_calculate_max_slices_direct( mocker: MockerFixture, - dummy_block: DataSetBlock, + dummy_block: DataSetBlock, implementation: str, memory_gpu: List[GpuMemoryRequirement], ): @@ -509,7 +519,11 @@ def test_method(data): shape = (shape_t[0], shape_t[1]) databytes = shape[0] * shape[1] * dummy_block.data.itemsize max_slices_expected = 5 - multiplier = float(memory_gpu[0].multiplier if memory_gpu != [] and memory_gpu[0].multiplier is not None else 1) + multiplier = float( + memory_gpu[0].multiplier + if memory_gpu != [] and memory_gpu[0].multiplier is not None + else 1 + ) available_memory_in = int(databytes * max_slices_expected * multiplier) if available_memory_in == 0: available_memory_in = 5 diff --git a/tests/method_wrappers/test_reconstruction.py b/tests/method_wrappers/test_reconstruction.py index 5004cc7c3..343c3f5e8 100644 --- a/tests/method_wrappers/test_reconstruction.py +++ b/tests/method_wrappers/test_reconstruction.py @@ -14,6 +14,7 @@ def test_recon_handles_reconstruction_angle_reshape(mocker: MockerFixture): GLOBAL_SHAPE = (10, 20, 30) + class FakeModule: # we give the angles a different name on purpose def recon_tester(data, theta): @@ -30,14 +31,21 @@ def recon_tester(data, theta): MPI.COMM_WORLD, ) assert isinstance(wrp, ReconstructionWrapper) - - - aux_data = AuxiliaryData(angles=2.*np.ones(GLOBAL_SHAPE[0]+10, dtype=np.float32)) + + aux_data = AuxiliaryData( + angles=2.0 * np.ones(GLOBAL_SHAPE[0] + 10, dtype=np.float32) + ) data = np.ones(GLOBAL_SHAPE, dtype=np.float32) - input = DataSetBlock(data[:, 0:3, :], slicing_dim=1, aux_data=aux_data, chunk_shape=GLOBAL_SHAPE, global_shape=GLOBAL_SHAPE) - + input = DataSetBlock( + data[:, 0:3, :], + slicing_dim=1, + aux_data=aux_data, + chunk_shape=GLOBAL_SHAPE, + global_shape=GLOBAL_SHAPE, + ) + wrp.execute(input) - + assert aux_data.get_angles().shape[0] == GLOBAL_SHAPE[0] @@ -53,14 +61,16 @@ def recon_tester(data, theta): "recon_tester", MPI.COMM_WORLD, ) - block = DataSetBlock(data=np.ones((13, 14, 15), dtype=np.float32), + block = DataSetBlock( + data=np.ones((13, 14, 15), dtype=np.float32), aux_data=AuxiliaryData(angles=np.ones(13, dtype=np.float32)), - slicing_dim=1 + slicing_dim=1, ) res = wrp.execute(block) assert res.data.shape == (13, 14, 15) + def test_recon_changes_global_shape_if_size_changes(mocker: MockerFixture): class FakeModule: def recon_tester(data, theta): @@ -74,14 +84,15 @@ def recon_tester(data, theta): "recon_tester", MPI.COMM_WORLD, ) - block = DataSetBlock(data=np.ones((13, 3, 15), dtype=np.float32), + block = DataSetBlock( + data=np.ones((13, 3, 15), dtype=np.float32), aux_data=AuxiliaryData(angles=np.ones(13, dtype=np.float32)), slicing_dim=1, global_shape=(13, 14, 15), - chunk_shape=(13, 14, 15) + chunk_shape=(13, 14, 15), ) res = wrp.execute(block) - + assert res.shape == (30, 3, 15) assert res.global_shape == (30, 14, 15) assert res.chunk_shape == (30, 14, 15) @@ -120,4 +131,4 @@ def tester(data, algorithm): assert wrp.recon_algorithm is None wrp["algorithm"] = "testalgo" - assert wrp.recon_algorithm is None \ No newline at end of file + assert wrp.recon_algorithm is None diff --git a/tests/method_wrappers/test_stats_calc.py b/tests/method_wrappers/test_stats_calc.py index 81bd9292d..244b02400 100644 --- a/tests/method_wrappers/test_stats_calc.py +++ b/tests/method_wrappers/test_stats_calc.py @@ -9,7 +9,6 @@ from httomo.utils import gpu_enabled, xp - def test_calculate_stats(mocker: MockerFixture, dummy_block: DataSetBlock): class FakeModule: def calculate_stats(data, comm): diff --git a/tests/monitors/test_aggregate.py b/tests/monitors/test_aggregate.py index abea71fc5..edcabb698 100644 --- a/tests/monitors/test_aggregate.py +++ b/tests/monitors/test_aggregate.py @@ -11,8 +11,19 @@ def test_aggregate_passes_on_method(mocker: MockerFixture): mon1 = mocker.create_autospec(MonitoringInterface, instance=True) mon2 = mocker.create_autospec(MonitoringInterface, instance=True) agg = AggregateMonitoring([mon1, mon2]) - args = ("method", "module", "task", 0, (1, 2, 3), (0, 0, 0), (10, 0, 0), - 42.0, 3.2, 1.2, 1.1) + args = ( + "method", + "module", + "task", + 0, + (1, 2, 3), + (0, 0, 0), + (10, 0, 0), + 42.0, + 3.2, + 1.2, + 1.1, + ) agg.report_method_block(*args) mon1.report_method_block.assert_called_once_with(*args) @@ -92,4 +103,4 @@ def test_make_monitors_unknown(mocker: MockerFixture): def test_make_monitors_empty(): - assert make_monitors([]) is None \ No newline at end of file + assert make_monitors([]) is None diff --git a/tests/monitors/test_benchmark.py b/tests/monitors/test_benchmark.py index 3fd8bc94d..1666c1416 100644 --- a/tests/monitors/test_benchmark.py +++ b/tests/monitors/test_benchmark.py @@ -8,10 +8,30 @@ def test_benchmark_monitor_records_and_displays_data(): mon = BenchmarkMonitoring() mon.report_method_block( - "method1", "module", "task", 0, (1, 2, 3), (0, 0, 0), (10, 0, 0), 42.0, 2.0, 0.1, 0.2 + "method1", + "module", + "task", + 0, + (1, 2, 3), + (0, 0, 0), + (10, 0, 0), + 42.0, + 2.0, + 0.1, + 0.2, ) mon.report_method_block( - "method2", "module", "task", 0, (1, 2, 3), (0, 0, 0), (10, 0, 0), 42.0, 2.0, 0.1, 0.2 + "method2", + "module", + "task", + 0, + (1, 2, 3), + (0, 0, 0), + (10, 0, 0), + 42.0, + 2.0, + 0.1, + 0.2, ) mon.report_source_block( "loader", "method1", 0, (1, 2, 3), (0, 0, 0), (10, 0, 0), 4.0 @@ -28,22 +48,25 @@ def test_benchmark_monitor_records_and_displays_data(): dest.flush() data = dest.getvalue().splitlines() assert len(data) == 8 - assert data[0] == ",".join([ - "Type", - "Rank","Name", - "Task id", - "Module", - "Slicing dim", - "Block offset (chunk)", - "Block offset (global)", - "Block dim z", - "Block dim y", - "Block dim x", - "CPU time", - "GPU kernel time", - "GPU H2D time", - "GPU D2H time" - ]) + assert data[0] == ",".join( + [ + "Type", + "Rank", + "Name", + "Task id", + "Module", + "Slicing dim", + "Block offset (chunk)", + "Block offset (global)", + "Block dim z", + "Block dim y", + "Block dim x", + "CPU time", + "GPU kernel time", + "GPU H2D time", + "GPU D2H time", + ] + ) assert "42.0,2.0,0.1,0.2" in data[1] assert "42.0,2.0,0.1,0.2" in data[2] @@ -64,7 +87,17 @@ def test_summary_monitor_records_and_displays_data_mpi(): # everything gets reported twice - once in each process - and the write_results should aggregate # in process 0 mon.report_method_block( - "method1", "module", "task", 0, (1, 2, 3), (0, 0, 0), (10, 0, 0), 42.0, 2.0, 0.1, 0.2 + "method1", + "module", + "task", + 0, + (1, 2, 3), + (0, 0, 0), + (10, 0, 0), + 42.0, + 2.0, + 0.1, + 0.2, ) mon.report_source_block( "loader", "method1", 0, (1, 2, 3), (0, 0, 0), (10, 0, 0), 4.0 @@ -80,29 +113,32 @@ def test_summary_monitor_records_and_displays_data_mpi(): assert len(data) == 0 else: assert len(data) == 9 - assert data[0] == ",".join([ - "Type", - "Rank","Name", - "Task id", - "Module", - "Slicing dim", - "Block offset (chunk)", - "Block offset (global)", - "Block dim z", - "Block dim y", - "Block dim x", - "CPU time", - "GPU kernel time", - "GPU H2D time", - "GPU D2H time" - ]) + assert data[0] == ",".join( + [ + "Type", + "Rank", + "Name", + "Task id", + "Module", + "Slicing dim", + "Block offset (chunk)", + "Block offset (global)", + "Block dim z", + "Block dim y", + "Block dim x", + "CPU time", + "GPU kernel time", + "GPU H2D time", + "GPU D2H time", + ] + ) for rank in [0, 1]: - assert "42.0,2.0,0.1,0.2" in data[1+rank*4] - assert f"method,{rank},method1" in data[1+rank*4] - assert "4.0,0.0" in data[2+rank*4] - assert f"source,{rank},loader" in data[2+rank*4] - assert "3.0,0.0" in data[3+rank*4] - assert f"sink,{rank},wrt1" in data[3+rank*4] - assert "500.0,0.0" in data[4+rank*4] - assert f"total,{rank},," in data[4+rank*4] + assert "42.0,2.0,0.1,0.2" in data[1 + rank * 4] + assert f"method,{rank},method1" in data[1 + rank * 4] + assert "4.0,0.0" in data[2 + rank * 4] + assert f"source,{rank},loader" in data[2 + rank * 4] + assert "3.0,0.0" in data[3 + rank * 4] + assert f"sink,{rank},wrt1" in data[3 + rank * 4] + assert "500.0,0.0" in data[4 + rank * 4] + assert f"total,{rank},," in data[4 + rank * 4] diff --git a/tests/monitors/test_summary.py b/tests/monitors/test_summary.py index 75c833346..a3e972095 100644 --- a/tests/monitors/test_summary.py +++ b/tests/monitors/test_summary.py @@ -27,7 +27,7 @@ def test_summary_monitor_records_and_displays_data(): mon.write_results(dest) dest.flush() data = dest.getvalue() - + assert "across 1 process" in data assert "Summary Statistics" in data assert "methods CPU time" in data @@ -45,6 +45,7 @@ def test_summary_monitor_records_and_displays_data(): assert "method1" in data assert "method2" in data + @pytest.mark.mpi @pytest.mark.skipif( MPI.COMM_WORLD.size != 2, reason="Only rank-2 MPI is supported with this test" @@ -56,10 +57,30 @@ def test_summary_monitor_records_and_displays_data_mpi(): # everything gets reported twice - once in each process - and the write_results should aggregate # in process 0 mon.report_method_block( - "method1", "module", "task", 0, (1, 2, 3), (0, 0, 0), (10, 0, 0), 42.0, 2.0, 0.1, 0.2 + "method1", + "module", + "task", + 0, + (1, 2, 3), + (0, 0, 0), + (10, 0, 0), + 42.0, + 2.0, + 0.1, + 0.2, ) mon.report_method_block( - "method2", "module", "task", 0, (1, 2, 3), (0, 0, 0), (10, 0, 0), 42.0, 2.0, 0.1, 0.2 + "method2", + "module", + "task", + 0, + (1, 2, 3), + (0, 0, 0), + (10, 0, 0), + 42.0, + 2.0, + 0.1, + 0.2, ) mon.report_source_block( "loader", "method1", 0, (1, 2, 3), (0, 0, 0), (10, 0, 0), 4.0 diff --git a/tests/runner/test_block_split.py b/tests/runner/test_block_split.py index 1bc8c23b2..0b78f0711 100644 --- a/tests/runner/test_block_split.py +++ b/tests/runner/test_block_split.py @@ -133,4 +133,3 @@ def test_block_can_iterate(mocker: MockerFixture, slicing_dim: int): source.read_block.assert_has_calls( [call(0, max_slices), call(max_slices, max_slices)] ) - diff --git a/tests/runner/test_dataset_store_interfaces.py b/tests/runner/test_dataset_store_interfaces.py index 63491f4b4..d4fbef939 100644 --- a/tests/runner/test_dataset_store_interfaces.py +++ b/tests/runner/test_dataset_store_interfaces.py @@ -5,8 +5,7 @@ def test_dummy_dataset_sink(dummy_block: DataSetBlock): dummy_sink = DummySink(0) dummy_sink.write_block(dummy_block) - + assert dummy_sink.global_shape == dummy_block.global_shape assert dummy_sink.global_index == dummy_block.chunk_index assert dummy_sink.chunk_shape == dummy_block.chunk_shape - \ No newline at end of file diff --git a/tests/runner/test_gpu_utils.py b/tests/runner/test_gpu_utils.py index 21de49ab3..1337c64f6 100644 --- a/tests/runner/test_gpu_utils.py +++ b/tests/runner/test_gpu_utils.py @@ -16,7 +16,6 @@ def test_get_available_memory(): assert mem <= 0.9 * xp.cuda.Device(local_rank).mem_info[0] - def test_get_available_memory_cpu(mocker: MockerFixture): # this function is called in the implementation try block - # we trigger an import error here to simulate cupy not being there diff --git a/tests/runner/test_pipeline.py b/tests/runner/test_pipeline.py index ae3bd3c8a..75ef9e331 100644 --- a/tests/runner/test_pipeline.py +++ b/tests/runner/test_pipeline.py @@ -45,6 +45,6 @@ def test_pipeline_can_access_by_index(mocker: MockerFixture): make_test_method(mocker, method_name="m3"), ], ) - + for i in range(3): assert p[i].method_name == f"m{i+1}" diff --git a/tests/runner/test_section.py b/tests/runner/test_section.py index 960b4185d..e2d4429e3 100644 --- a/tests/runner/test_section.py +++ b/tests/runner/test_section.py @@ -253,10 +253,12 @@ def test_sectionizer_output_ref_after_regular_section_break_does_nothing( assert len(s[0]) == 6 assert len(s[1]) == 6 -@pytest.mark.parametrize("patterns", [ - (Pattern.sinogram, Pattern.projection), - (Pattern.projection, Pattern.sinogram) -], ids=["sino-proj", "proj-sino"]) + +@pytest.mark.parametrize( + "patterns", + [(Pattern.sinogram, Pattern.projection), (Pattern.projection, Pattern.sinogram)], + ids=["sino-proj", "proj-sino"], +) def test_sectionizer_inserts_empty_section_if_loader_pattern_mismatches( mocker: MockerFixture, patterns: Tuple[Pattern, Pattern] ): diff --git a/tests/samples/python_templates/pipeline_cpu1.py b/tests/samples/python_templates/pipeline_cpu1.py index 980619262..22be20ba1 100644 --- a/tests/samples/python_templates/pipeline_cpu1.py +++ b/tests/samples/python_templates/pipeline_cpu1.py @@ -5,9 +5,10 @@ PipelineConfig: TypeAlias = List[PipelineStageConfig] # NOTE: when creating a Pythonic pipeline, please use -# the function's name "methods_to_list" so it will be +# the function's name "methods_to_list" so it will be # found by the loader + def methods_to_list() -> PipelineConfig: """Pythonic way to build a list of tasks from which Pipeline can be generated in Httomo. @@ -18,73 +19,72 @@ def methods_to_list() -> PipelineConfig: """ full_pipeline_list = [] loader = { - 'method': "standard_tomo", - 'module_path': "httomo.data.hdf.loaders", - 'parameters' : { - 'name': 'tomo', - 'data_path': 'entry1/tomo_entry/data/data', - 'image_key_path': 'entry1/tomo_entry/instrument/detector/image_key', - 'rotation_angles': {"data_path": "/entry1/tomo_entry/data/rotation_angle"}, - 'dimension': 1, - 'pad': 0, - }, - } + "method": "standard_tomo", + "module_path": "httomo.data.hdf.loaders", + "parameters": { + "name": "tomo", + "data_path": "entry1/tomo_entry/data/data", + "image_key_path": "entry1/tomo_entry/instrument/detector/image_key", + "rotation_angles": {"data_path": "/entry1/tomo_entry/data/rotation_angle"}, + "dimension": 1, + "pad": 0, + }, + } full_pipeline_list.append(loader) method1 = { - 'method': "normalize", - 'module_path': "tomopy.prep.normalize", - 'parameters' : { - 'cutoff': None, - }, - } + "method": "normalize", + "module_path": "tomopy.prep.normalize", + "parameters": { + "cutoff": None, + }, + } full_pipeline_list.append(method1) method2 = { - 'method': "minus_log", - 'module_path': "tomopy.prep.normalize", - 'parameters' : {}, - } + "method": "minus_log", + "module_path": "tomopy.prep.normalize", + "parameters": {}, + } full_pipeline_list.append(method2) method3 = { - 'method': "find_center_vo", - 'module_path': "tomopy.recon.rotation", - 'id': "centering", - 'parameters' : { - 'ind': "mid", - 'smin': -50, - 'smax': 50, - 'srad': 6, - 'step': 0.25, - 'ratio': 0.5, - 'drop': 20, - }, - 'side_outputs': {"cor": "centre_of_rotation"}, - } + "method": "find_center_vo", + "module_path": "tomopy.recon.rotation", + "id": "centering", + "parameters": { + "ind": "mid", + "smin": -50, + "smax": 50, + "srad": 6, + "step": 0.25, + "ratio": 0.5, + "drop": 20, + }, + "side_outputs": {"cor": "centre_of_rotation"}, + } full_pipeline_list.append(method3) method4 = { - 'method': "recon", - 'module_path': "tomopy.recon.algorithm", - 'parameters' : { - 'center': "${{centering.side_outputs.centre_of_rotation}}", - 'sinogram_order': False, - 'algorithm': "gridrec", - 'init_recon': None, - }, - } + "method": "recon", + "module_path": "tomopy.recon.algorithm", + "parameters": { + "center": "${{centering.side_outputs.centre_of_rotation}}", + "sinogram_order": False, + "algorithm": "gridrec", + "init_recon": None, + }, + } full_pipeline_list.append(method4) method5 = { - 'method': "save_to_images", - 'module_path': "httomolib.misc.images", - 'parameters' : { - 'subfolder_name': "images", - 'axis': 1, - 'file_format': "tif", - 'bits': 8, - 'perc_range_min': 0.0, - 'perc_range_max': 100.0, - 'jpeg_quality': 95, - }, - } + "method": "save_to_images", + "module_path": "httomolib.misc.images", + "parameters": { + "subfolder_name": "images", + "axis": 1, + "file_format": "tif", + "bits": 8, + "perc_range_min": 0.0, + "perc_range_max": 100.0, + "jpeg_quality": 95, + }, + } full_pipeline_list.append(method5) return full_pipeline_list - diff --git a/tests/test_cli.py b/tests/test_cli.py index 6cfc13000..462fa4414 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -55,26 +55,27 @@ def test_cli_pass_gpu_id(cmd, standard_data, standard_loader, output_folder): assert "GPU Device not available for access." in result.stderr -@pytest.mark.parametrize("cli_parameter,limit_bytes", [ - ("0", 0), - ("500", 500), - ("500k", 500 * 1024), - ("1M", 1024*1024), - ("1m", 1024*1024), - ("3g", 3 * 1024**3), - ("3.2g", int(3.2 * 1024**3)) -]) +@pytest.mark.parametrize( + "cli_parameter,limit_bytes", + [ + ("0", 0), + ("500", 500), + ("500k", 500 * 1024), + ("1M", 1024 * 1024), + ("1m", 1024 * 1024), + ("3g", 3 * 1024**3), + ("3.2g", int(3.2 * 1024**3)), + ], +) def test_cli_transforms_memory_limits(cli_parameter: str, limit_bytes: int): assert transform_limit_str_to_bytes(cli_parameter) == limit_bytes - -@pytest.mark.parametrize("cli_parameter", [ - "abcd", "nolimit", "124A", "23ki" -]) + +@pytest.mark.parametrize("cli_parameter", ["abcd", "nolimit", "124A", "23ki"]) def test_cli_fails_transforming_memory_limits(cli_parameter: str): with pytest.raises(ValueError) as e: transform_limit_str_to_bytes(cli_parameter) - + assert f"invalid memory limit string {cli_parameter}" in str(e) diff --git a/tests/test_loader.py b/tests/test_loader.py index e48f39a9e..cb7ced54c 100644 --- a/tests/test_loader.py +++ b/tests/test_loader.py @@ -14,7 +14,7 @@ def test_tomo_standard_testing_pipeline_loaded( cmd, standard_data, standard_loader, output_folder, testing_pipeline, merge_yamls ): cmd.pop(4) #: don't save all - cmd.insert(6, standard_data) + cmd.insert(6, standard_data) merge_yamls(standard_loader, testing_pipeline) cmd.insert(7, "temp.yaml") cmd.insert(8, output_folder) @@ -29,7 +29,8 @@ def test_tomo_standard_testing_pipeline_loaded( assert "Running task 4 (pattern=projection): find_center_vo.." in result.stderr assert "Running task 5 (pattern=sinogram): remove_stripe_fw..." in result.stderr assert "Running task 6 (pattern=sinogram): recon..." in result.stderr - assert "Running task 7 (pattern=sinogram): save_to_images.." in result.stderr + assert "Running task 7 (pattern=sinogram): save_to_images.." in result.stderr + @pytest.mark.skip("outdated - to be removed after refactoring") def test_diad_testing_pipeline_loaded( @@ -49,7 +50,8 @@ def test_diad_testing_pipeline_loaded( assert "Running task 4 (pattern=projection): find_center_vo.." in result.stderr assert "Running task 5 (pattern=sinogram): remove_stripe_fw..." in result.stderr assert "Running task 6 (pattern=sinogram): recon..." in result.stderr - assert "Running task 7 (pattern=sinogram): save_to_images.." in result.stderr + assert "Running task 7 (pattern=sinogram): save_to_images.." in result.stderr + def test_standard_tomo(standard_data, standard_data_path, standard_image_key_path): preview = [None, {"start": 5, "stop": 10}, None] diff --git a/tests/test_method_query.py b/tests/test_method_query.py index e5ec51ab3..d42debd8d 100644 --- a/tests/test_method_query.py +++ b/tests/test_method_query.py @@ -53,16 +53,18 @@ def test_httomolibgpu_default_save_result(): save_result = get_method_info( "httomolibgpu.prep.normalize", "normalize", "save_result_default" ) - + assert save_result is False - + + def test_httomolibgpu_default_save_result_recon(): save_result = get_method_info( "httomolibgpu.recon.algorithm", "FBP", "save_result_default" ) - + assert save_result is True + def test_httomolibgpu_memory_gpu(): memory_gpu = get_method_info( "httomolibgpu.prep.normalize", "normalize", "memory_gpu" @@ -80,7 +82,7 @@ def test_database_query_object(): mempars = query.get_memory_gpu_params() assert set(p.dataset for p in mempars) == set(["tomo"]) assert all(p.method == "module" for p in mempars) - assert all(p.multiplier == 'None' for p in mempars) + assert all(p.multiplier == "None" for p in mempars) def test_database_query_object_recon_swap_output(): diff --git a/tests/test_methods.py b/tests/test_methods.py index 16f97347b..3c059fbc3 100644 --- a/tests/test_methods.py +++ b/tests/test_methods.py @@ -40,18 +40,18 @@ def test_calculate_stats_gpu(): ret = calculate_stats(data) assert ret == (-10.0, 19.0, np.sum(data.get()), 30) - + @pytest.mark.perf @pytest.mark.parametrize("gpu", [False, True], ids=["CPU", "GPU"]) def test_calculcate_stats_performance(gpu: bool): if gpu and not gpu_enabled: pytest.skip("No GPU available") - + data = np.random.randint( low=7515, high=37624, size=(1801, 5, 2560), dtype=np.uint32 ).astype(np.float32) - + if gpu: data = xp.asarray(data) xp.cuda.Device().synchronize() @@ -61,32 +61,38 @@ def test_calculcate_stats_performance(gpu: bool): if gpu: xp.cuda.Device().synchronize() stop = time.perf_counter_ns() - duration_ms = float(stop-start) * 1e-6 / 10 - - # Note: on Quadro RTX 6000 vs Xeon(R) Gold 6148, GPU is about 10x faster + duration_ms = float(stop - start) * 1e-6 / 10 + + # Note: on Quadro RTX 6000 vs Xeon(R) Gold 6148, GPU is about 10x faster assert "performance in ms" == duration_ms - - + + def test_save_intermediate_data(tmp_path: Path): # use increasing numbers in the data, to make sure blocks have different content - GLOBAL_SHAPE=(10,10,10) + GLOBAL_SHAPE = (10, 10, 10) global_data = np.arange(np.prod(GLOBAL_SHAPE), dtype=np.float32).reshape( GLOBAL_SHAPE ) aux_data = AuxiliaryData(angles=np.ones(GLOBAL_SHAPE[0], dtype=np.float32)) bsize = 3 - b1 = DataSetBlock(data=global_data[:bsize], aux_data=aux_data, - slicing_dim=0, - block_start=0, - chunk_start=0, - chunk_shape=GLOBAL_SHAPE, - global_shape=GLOBAL_SHAPE) - b2 = DataSetBlock(data=global_data[bsize:], aux_data=aux_data, - slicing_dim=0, - block_start=bsize, - chunk_start=0, - chunk_shape=GLOBAL_SHAPE, - global_shape=GLOBAL_SHAPE) + b1 = DataSetBlock( + data=global_data[:bsize], + aux_data=aux_data, + slicing_dim=0, + block_start=0, + chunk_start=0, + chunk_shape=GLOBAL_SHAPE, + global_shape=GLOBAL_SHAPE, + ) + b2 = DataSetBlock( + data=global_data[bsize:], + aux_data=aux_data, + slicing_dim=0, + block_start=bsize, + chunk_start=0, + chunk_shape=GLOBAL_SHAPE, + global_shape=GLOBAL_SHAPE, + ) with h5py.File( tmp_path / "test_file.h5", "w", driver="mpio", comm=MPI.COMM_WORLD @@ -135,14 +141,16 @@ def test_save_intermediate_data_mpi(tmp_path: Path): # make sure we use the same tmp_path on both processes tmp_path = comm.bcast(tmp_path) GLOBAL_SHAPE = (10, 10, 10) - csize=5 + csize = 5 # use increasing numbers in the data, to make sure blocks have different content global_data = np.arange(np.prod(GLOBAL_SHAPE), dtype=np.float32).reshape( GLOBAL_SHAPE ) aux_data = AuxiliaryData(angles=np.ones(GLOBAL_SHAPE[0], dtype=np.float32)) # give each process only a portion of the data - rank_data = global_data[:csize, :, :] if comm.rank == 0 else global_data[csize:, :, :] + rank_data = ( + global_data[:csize, :, :] if comm.rank == 0 else global_data[csize:, :, :] + ) # create 2 blocks per rank b1 = DataSetBlock( data=rank_data[:3, :, :], @@ -151,7 +159,7 @@ def test_save_intermediate_data_mpi(tmp_path: Path): block_start=0, chunk_start=0 if comm.rank == 0 else csize, global_shape=GLOBAL_SHAPE, - chunk_shape=(csize, GLOBAL_SHAPE[1], GLOBAL_SHAPE[2]) + chunk_shape=(csize, GLOBAL_SHAPE[1], GLOBAL_SHAPE[2]), ) b2 = DataSetBlock( data=rank_data[3:, :, :], @@ -160,7 +168,7 @@ def test_save_intermediate_data_mpi(tmp_path: Path): block_start=3, chunk_start=0 if comm.rank == 0 else csize, global_shape=GLOBAL_SHAPE, - chunk_shape=(csize, GLOBAL_SHAPE[1], GLOBAL_SHAPE[2]) + chunk_shape=(csize, GLOBAL_SHAPE[1], GLOBAL_SHAPE[2]), ) with h5py.File(tmp_path / "test_file.h5", "w", driver="mpio", comm=comm) as file: diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index 773415971..b003611c0 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -48,7 +48,13 @@ def _check_tif(files: List, number: int, shape: Tuple): def test_tomo_standard_testing_pipeline_output( - get_files: Callable, cmd, standard_data, standard_loader, testing_pipeline, output_folder, merge_yamls + get_files: Callable, + cmd, + standard_data, + standard_loader, + testing_pipeline, + output_folder, + merge_yamls, ): cmd.pop(4) #: don't save all cmd.insert(6, standard_data) @@ -94,7 +100,9 @@ def test_tomo_standard_testing_pipeline_output( assert "Data shape is (180, 3, 160) of type uint16" in verbose_log_contents -def test_run_pipeline_cpu1_yaml(get_files: Callable, cmd, standard_data, yaml_cpu_pipeline1, output_folder): +def test_run_pipeline_cpu1_yaml( + get_files: Callable, cmd, standard_data, yaml_cpu_pipeline1, output_folder +): cmd.pop(4) #: don't save all cmd.insert(6, standard_data) cmd.insert(7, yaml_cpu_pipeline1) @@ -124,7 +132,9 @@ def test_run_pipeline_cpu1_yaml(get_files: Callable, cmd, standard_data, yaml_cp assert "Data shape is (180, 128, 160) of type uint16" in verbose_log_contents -def test_run_pipeline_cpu1_py(get_files: Callable, cmd, standard_data, python_cpu_pipeline1, output_folder): +def test_run_pipeline_cpu1_py( + get_files: Callable, cmd, standard_data, python_cpu_pipeline1, output_folder +): cmd.pop(4) #: don't save all cmd.insert(6, standard_data) cmd.insert(7, python_cpu_pipeline1) @@ -151,7 +161,9 @@ def test_run_pipeline_cpu1_py(get_files: Callable, cmd, standard_data, python_cp assert "Data shape is (180, 128, 160) of type uint16" in verbose_log_contents -def test_run_pipeline_cpu2_yaml(get_files: Callable, cmd, standard_data, yaml_cpu_pipeline2, output_folder): +def test_run_pipeline_cpu2_yaml( + get_files: Callable, cmd, standard_data, yaml_cpu_pipeline2, output_folder +): cmd.pop(4) #: don't save all cmd.insert(6, standard_data) cmd.insert(7, yaml_cpu_pipeline2) @@ -189,7 +201,9 @@ def test_run_pipeline_cpu2_yaml(get_files: Callable, cmd, standard_data, yaml_cp assert "Data shape is (180, 30, 160) of type uint16" in verbose_log_contents -def test_run_pipeline_cpu2_py(get_files: Callable, cmd, standard_data, python_cpu_pipeline2, output_folder): +def test_run_pipeline_cpu2_py( + get_files: Callable, cmd, standard_data, python_cpu_pipeline2, output_folder +): cmd.pop(4) #: don't save all cmd.insert(6, standard_data) cmd.insert(7, python_cpu_pipeline2) @@ -227,7 +241,9 @@ def test_run_pipeline_cpu2_py(get_files: Callable, cmd, standard_data, python_cp assert "Data shape is (180, 30, 160) of type uint16" in verbose_log_contents -def test_run_pipeline_cpu3_yaml(get_files: Callable, cmd, standard_data, yaml_cpu_pipeline3, output_folder): +def test_run_pipeline_cpu3_yaml( + get_files: Callable, cmd, standard_data, yaml_cpu_pipeline3, output_folder +): cmd.pop(4) #: don't save all cmd.insert(6, standard_data) cmd.insert(7, yaml_cpu_pipeline3) @@ -260,7 +276,9 @@ def test_run_pipeline_cpu3_yaml(get_files: Callable, cmd, standard_data, yaml_cp assert " Global mean 0.0016174" in verbose_log_contents -def test_run_pipeline_cpu3_py(get_files: Callable, cmd, standard_data, python_cpu_pipeline3, output_folder): +def test_run_pipeline_cpu3_py( + get_files: Callable, cmd, standard_data, python_cpu_pipeline3, output_folder +): cmd.pop(4) #: don't save all cmd.insert(6, standard_data) cmd.insert(7, python_cpu_pipeline3) @@ -293,7 +311,9 @@ def test_run_pipeline_cpu3_py(get_files: Callable, cmd, standard_data, python_cp assert " Global mean 0.0016174" in verbose_log_contents -def test_run_pipeline_cpu4_yaml(get_files: Callable, cmd, standard_data, yaml_cpu_pipeline4, output_folder): +def test_run_pipeline_cpu4_yaml( + get_files: Callable, cmd, standard_data, yaml_cpu_pipeline4, output_folder +): cmd.pop(4) #: don't save all cmd.insert(6, standard_data) cmd.insert(7, yaml_cpu_pipeline4) @@ -326,7 +346,9 @@ def test_run_pipeline_cpu4_yaml(get_files: Callable, cmd, standard_data, yaml_cp assert "Data shape is (180, 128, 160) of type uint16" in verbose_log_contents -def test_run_pipeline_gpu1_yaml(get_files: Callable, cmd, standard_data, yaml_gpu_pipeline1, output_folder): +def test_run_pipeline_gpu1_yaml( + get_files: Callable, cmd, standard_data, yaml_gpu_pipeline1, output_folder +): cmd.pop(4) #: don't save all cmd.insert(6, standard_data) cmd.insert(7, yaml_gpu_pipeline1) @@ -362,10 +384,14 @@ def test_run_pipeline_gpu1_yaml(get_files: Callable, cmd, standard_data, yaml_gp assert "Preview: (0:180, 0:128, 0:160)" in verbose_log_contents assert "Data shape is (180, 128, 160) of type uint16" in verbose_log_contents assert "The amount of the available GPU memory is" in verbose_log_contents - assert "Using GPU 0 to transfer data of shape (180, 128, 160)" in verbose_log_contents + assert ( + "Using GPU 0 to transfer data of shape (180, 128, 160)" in verbose_log_contents + ) -def test_run_pipeline_gpu1_py(get_files: Callable, cmd, standard_data, python_gpu_pipeline1, output_folder): +def test_run_pipeline_gpu1_py( + get_files: Callable, cmd, standard_data, python_gpu_pipeline1, output_folder +): cmd.pop(4) #: don't save all cmd.insert(6, standard_data) cmd.insert(7, python_gpu_pipeline1) @@ -401,11 +427,19 @@ def test_run_pipeline_gpu1_py(get_files: Callable, cmd, standard_data, python_gp assert "Preview: (0:180, 0:128, 0:160)" in verbose_log_contents assert "Data shape is (180, 128, 160) of type uint16" in verbose_log_contents assert "The amount of the available GPU memory is" in verbose_log_contents - assert "Using GPU 0 to transfer data of shape (180, 128, 160)" in verbose_log_contents + assert ( + "Using GPU 0 to transfer data of shape (180, 128, 160)" in verbose_log_contents + ) def test_tomo_standard_testing_pipeline_output_with_save_all( - get_files: Callable, cmd, standard_data, standard_loader, testing_pipeline, output_folder, merge_yamls + get_files: Callable, + cmd, + standard_data, + standard_loader, + testing_pipeline, + output_folder, + merge_yamls, ): cmd.insert(7, standard_data) merge_yamls(standard_loader, testing_pipeline) @@ -417,7 +451,7 @@ def test_tomo_standard_testing_pipeline_output_with_save_all( assert len(files) == 10 _check_yaml(files, "temp.yaml") - _check_tif(files, 3, (160,160)) + _check_tif(files, 3, (160, 160)) #: check the generated h5 files h5_files = list(filter(lambda x: ".h5" in x, files)) @@ -433,7 +467,13 @@ def test_tomo_standard_testing_pipeline_output_with_save_all( def test_i12_testing_pipeline_output( - get_files: Callable, cmd, i12_data, i12_loader, testing_pipeline, output_folder, merge_yamls + get_files: Callable, + cmd, + i12_data, + i12_loader, + testing_pipeline, + output_folder, + merge_yamls, ): cmd.insert(7, i12_data) merge_yamls(i12_loader, testing_pipeline) @@ -539,7 +579,13 @@ def test_i12_testing_pipeline_output( def test_diad_testing_pipeline_output( - get_files: Callable, cmd, diad_data, diad_loader, testing_pipeline, output_folder, merge_yamls + get_files: Callable, + cmd, + diad_data, + diad_loader, + testing_pipeline, + output_folder, + merge_yamls, ): cmd.insert(7, diad_data) merge_yamls(diad_loader, testing_pipeline) @@ -576,13 +622,17 @@ def test_diad_testing_pipeline_output( verbose_log_contents = _get_log_contents(verbose_log_file[0]) assert "The full dataset shape is (3201, 22, 26)" in verbose_log_contents - assert "Loading data: tests/test_data/k11_diad/k11-18014.nxs" in verbose_log_contents + assert ( + "Loading data: tests/test_data/k11_diad/k11-18014.nxs" in verbose_log_contents + ) assert "Path to data: /entry/imaging/data" in verbose_log_contents assert "Preview: (100:3101, 5:7, 0:26)" in verbose_log_contents assert "Data shape is (3001, 2, 26) of type uint16" in verbose_log_contents -def test_run_diad_pipeline_gpu(get_files: Callable, cmd, diad_data, diad_pipeline_gpu, output_folder): +def test_run_diad_pipeline_gpu( + get_files: Callable, cmd, diad_data, diad_pipeline_gpu, output_folder +): cmd.pop(4) #: don't save all cmd.insert(6, diad_data) cmd.insert(7, diad_pipeline_gpu) @@ -603,7 +653,9 @@ def test_run_diad_pipeline_gpu(get_files: Callable, cmd, diad_data, diad_pipelin verbose_log_contents = _get_log_contents(verbose_log_file[0]) assert "The full dataset shape is (3201, 22, 26)" in verbose_log_contents - assert "Loading data: tests/test_data/k11_diad/k11-18014.nxs" in verbose_log_contents + assert ( + "Loading data: tests/test_data/k11_diad/k11-18014.nxs" in verbose_log_contents + ) assert "Path to data: /entry/imaging/data" in verbose_log_contents assert "Preview: (100:3101, 8:15, 0:26)" in verbose_log_contents assert "Data shape is (3001, 7, 26) of type uint16" in verbose_log_contents @@ -612,7 +664,9 @@ def test_run_diad_pipeline_gpu(get_files: Callable, cmd, diad_data, diad_pipelin assert "Global mean 0.000291" in verbose_log_contents -def test_run_pipeline_360deg_gpu2(get_files: Callable, cmd, data360, yaml_gpu_pipeline360_2, output_folder): +def test_run_pipeline_360deg_gpu2( + get_files: Callable, cmd, data360, yaml_gpu_pipeline360_2, output_folder +): cmd.pop(4) #: don't save all cmd.insert(6, data360) cmd.insert(7, yaml_gpu_pipeline360_2) diff --git a/tests/test_utils.py b/tests/test_utils.py index 6769b5be1..6527f2ebb 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -5,7 +5,7 @@ def test_cachetime(): with catchtime() as t: sleep(0.1) - + assert t.elapsed >= 0.1 assert t.elapsed < 0.2 @@ -14,8 +14,8 @@ def test_catch_gputime(): input = xp.ones((500, 200, 100), dtype=xp.float32) with catch_gputime() as t: xp.sum(input) - + if gpu_enabled: assert t.elapsed > 0.0 else: - assert t.elapsed == 0.0 \ No newline at end of file + assert t.elapsed == 0.0 diff --git a/tests/test_yaml_checker.py b/tests/test_yaml_checker.py index 917c5d271..ec7834ea2 100644 --- a/tests/test_yaml_checker.py +++ b/tests/test_yaml_checker.py @@ -1,6 +1,7 @@ """ Some unit tests for the yaml checker """ + from typing import Callable import pytest @@ -35,91 +36,52 @@ def test_sanity_check(sample_pipelines): assert not sanity_check(conf_generator) -def test_check_first_method_is_loader( - sample_pipelines: str, - load_yaml: Callable -): - no_loader_method_pipeline = ( - sample_pipelines + "testing/no_loader_method.yaml" - ) +def test_check_first_method_is_loader(sample_pipelines: str, load_yaml: Callable): + no_loader_method_pipeline = sample_pipelines + "testing/no_loader_method.yaml" conf = load_yaml(no_loader_method_pipeline) assert not check_first_method_is_loader(conf) def test_hdf5_paths_against_loader( - standard_data, - sample_pipelines, - load_yaml: Callable + standard_data, sample_pipelines, load_yaml: Callable ): - incorrect_path_pipeline = ( - sample_pipelines + "testing/incorrect_path.yaml" - ) + incorrect_path_pipeline = sample_pipelines + "testing/incorrect_path.yaml" conf = load_yaml(incorrect_path_pipeline) assert not check_hdf5_paths_against_loader(conf, standard_data) -def test_check_methods_exist_in_templates( - sample_pipelines: str, - load_yaml: Callable -): - incorrect_method_pipeline = ( - sample_pipelines + "testing/incorrect_method.yaml" - ) +def test_check_methods_exist_in_templates(sample_pipelines: str, load_yaml: Callable): + incorrect_method_pipeline = sample_pipelines + "testing/incorrect_method.yaml" conf = load_yaml(incorrect_method_pipeline) assert not check_methods_exist_in_templates(conf) @pytest.mark.skip(reason="Some parameters are additional and not listed in templates") -def test_check_parameter_names_are_known( - sample_pipelines: str, - load_yaml: Callable -): - required_param_pipeline = ( - sample_pipelines + "testing/unknown_param.yaml" - ) +def test_check_parameter_names_are_known(sample_pipelines: str, load_yaml: Callable): + required_param_pipeline = sample_pipelines + "testing/unknown_param.yaml" conf = load_yaml(required_param_pipeline) assert not check_parameter_names_are_known(conf) -def test_check_parameter_names_are_str( - sample_pipelines: str, - load_yaml: Callable -): - required_param_pipeline = ( - sample_pipelines + "testing/non_str_param_name.yaml" - ) +def test_check_parameter_names_are_str(sample_pipelines: str, load_yaml: Callable): + required_param_pipeline = sample_pipelines + "testing/non_str_param_name.yaml" conf = load_yaml(required_param_pipeline) assert not check_parameter_names_are_str(conf) -def test_check_no_required_parameter_values( - sample_pipelines: str, - load_yaml: Callable -): - required_param_pipeline = ( - sample_pipelines + "testing/required_param.yaml" - ) +def test_check_no_required_parameter_values(sample_pipelines: str, load_yaml: Callable): + required_param_pipeline = sample_pipelines + "testing/required_param.yaml" conf = load_yaml(required_param_pipeline) assert not check_no_required_parameter_values(conf) -def test_check_no_duplicated_keys( - sample_pipelines: str, - load_yaml: Callable -): - required_param_pipeline = ( - sample_pipelines + "testing/duplicated_key.yaml" - ) +def test_check_no_duplicated_keys(sample_pipelines: str, load_yaml: Callable): + required_param_pipeline = sample_pipelines + "testing/duplicated_key.yaml" assert not check_no_duplicated_keys(required_param_pipeline) -def test_check_keys( - sample_pipelines: str, - load_yaml: Callable -): - required_param_pipeline = ( - sample_pipelines + "testing/required_keys.yaml" - ) +def test_check_keys(sample_pipelines: str, load_yaml: Callable): + required_param_pipeline = sample_pipelines + "testing/required_keys.yaml" conf = load_yaml(required_param_pipeline) assert not check_keys(conf) @@ -184,7 +146,7 @@ def test_check_side_out_matches_ref_arg( ("pipeline_cpu1.yaml", True), ("pipeline_cpu2.yaml", True), ("pipeline_gpu1.yaml", True), - #("parameter_sweeps/02_median_filter_kernel_sweep.yaml", True), + # ("parameter_sweeps/02_median_filter_kernel_sweep.yaml", True), ], ids=[ "cpu1_pipeline", @@ -193,12 +155,7 @@ def test_check_side_out_matches_ref_arg( ], ) def test_validate_yaml_config( - sample_pipelines: str, - yaml_file: str, - standard_data: str, - expected: bool + sample_pipelines: str, yaml_file: str, standard_data: str, expected: bool ): yaml_file = sample_pipelines + yaml_file assert validate_yaml_config(yaml_file, standard_data) == expected - - diff --git a/tests/testing_utils.py b/tests/testing_utils.py index 46a8370bf..59e1d6a0f 100644 --- a/tests/testing_utils.py +++ b/tests/testing_utils.py @@ -19,7 +19,7 @@ def make_test_method( method_name="testmethod", module_path="testpath", save_result=False, - task_id:Optional[str]=None, + task_id: Optional[str] = None, **kwargs, ) -> MethodWrapper: if task_id is None: @@ -64,23 +64,23 @@ def mock_make_data_source() -> DataSetSource: chunk_shape=block.chunk_shape, chunk_index=block.chunk_index, slicing_dim=1 if interface.pattern == Pattern.sinogram else 0, - aux_data=block.aux_data + aux_data=block.aux_data, ) - slicing_dim=1 if interface.pattern == Pattern.sinogram else 0 + slicing_dim = 1 if interface.pattern == Pattern.sinogram else 0 mocker.patch.object( ret, "read_block", side_effect=lambda start, length: DataSetBlock( - data=block.data[start: start+length, :, :], + data=block.data[start : start + length, :, :], aux_data=block.aux_data, global_shape=block.global_shape, chunk_shape=block.chunk_shape, slicing_dim=slicing_dim, block_start=start, - chunk_start=block.chunk_index[slicing_dim] - ) + chunk_start=block.chunk_index[slicing_dim], + ), ) - + return ret mocker.patch.object( @@ -100,7 +100,7 @@ def make_mock_repo( GpuMemoryRequirement(dataset="tomo", multiplier=1.2, method="direct") ], swap_dims_on_output=False, - save_result_default=False + save_result_default=False, ) -> MethodRepository: """Makes a mock MethodRepository that returns the given properties on any query""" mock_repo = mocker.MagicMock() @@ -110,8 +110,12 @@ def make_mock_repo( mocker.patch.object( mock_query, "get_output_dims_change", return_value=output_dims_change ) - mocker.patch.object(mock_query, "swap_dims_on_output", return_value=swap_dims_on_output) + mocker.patch.object( + mock_query, "swap_dims_on_output", return_value=swap_dims_on_output + ) mocker.patch.object(mock_query, "get_implementation", return_value=implementation) mocker.patch.object(mock_query, "get_memory_gpu_params", return_value=memory_gpu) - mocker.patch.object(mock_query, "save_result_default", return_value=save_result_default) + mocker.patch.object( + mock_query, "save_result_default", return_value=save_result_default + ) return mock_repo From 300847a066ec2ead1267fd8b175f7c03291e72b1 Mon Sep 17 00:00:00 2001 From: dkazanc Date: Wed, 10 Jul 2024 11:13:58 +0100 Subject: [PATCH 02/15] adding pre-commit deps to pyproject --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 04e5b22e5..5616a4553 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -75,6 +75,7 @@ dev-cpu = [ "typing_extensions", "mypy", "types-PyYAML", + "pre-commit" ] dev-gpu = [ "pytest", @@ -92,6 +93,7 @@ dev-gpu = [ "typing_extensions", "mypy", "types-PyYAML", + "pre-commit" ] [tool.mypy] From df0d548d6d27015c7d09b71aa71c7ab0edf7ed59 Mon Sep 17 00:00:00 2001 From: dkazanc Date: Wed, 10 Jul 2024 12:00:03 +0100 Subject: [PATCH 03/15] working pre-commit run, files modified --- .github/workflows/conda_upload.yml | 2 +- .github/workflows/nightly_build.yml | 4 +- .pre-commit-config.yaml | 33 +++++++ README.rst | 4 +- conda/environment-cpu.yml | 4 +- conda/recipe/meta.yaml | 2 +- docs/source/backends/list.rst | 30 +++--- docs/source/backends/templates.rst | 6 +- docs/source/conf.py | 28 ++++-- docs/source/developers/how_to_contribute.rst | 24 ++--- docs/source/developers/memory_calculation.rst | 16 ++-- docs/source/explanation/faq.rst | 22 ++--- docs/source/explanation/process_list.rst | 4 +- docs/source/explanation/templates.rst | 4 +- .../howto/httomo_features/centering.rst | 38 ++++---- .../httomo_features/parameter_tuning.rst | 4 +- .../howto/httomo_features/previewing.rst | 14 +-- docs/source/howto/installation.rst | 12 +-- docs/source/howto/interpret_logger.rst | 32 +++---- .../howto/process_lists/httomo_parameters.rst | 2 +- .../process_lists/process_list_configure.rst | 10 +- .../save_results/save_results.rst | 4 +- docs/source/howto/process_lists_guide.rst | 8 +- docs/source/howto/run_httomo.rst | 2 +- docs/source/index.rst | 6 +- docs/source/introduction/about.rst | 2 +- .../introduction/indepth/detailed_about.rst | 4 +- .../indepth/memory_estimators.rst | 6 +- docs/source/introduction/indepth/reslice.rst | 10 +- docs/source/introduction/indepth/sections.rst | 12 +-- docs/source/introduction/indepth/wrappers.rst | 8 +- docs/source/pipelines/yaml.rst | 18 ++-- docs/source/reference/yaml.rst | 2 +- docs/source/utilities/yaml_checker.rst | 2 +- docs/source/utilities/yaml_generator.rst | 14 +-- httomo/data/dataset_store.py | 92 +++++-------------- httomo/data/mpiutil.py | 4 +- httomo/globals.py | 4 +- httomo/method_wrappers/save_intermediate.py | 1 - .../external/httomolibgpu/httomolibgpu.yaml | 10 +- httomo/methods_database/packages/httomo.yaml | 4 +- httomo/runner/dataset_store_interfaces.py | 3 +- httomo/runner/monitoring_interface.py | 15 ++- pyproject.toml | 4 +- tests/method_wrappers/test_generic.py | 1 - tests/method_wrappers/test_rotation.py | 1 - tests/method_wrappers/test_stats_calc.py | 1 - tests/monitors/test_summary.py | 1 - .../DLS/01_diad_pipeline_gpu.yaml | 2 +- .../pipeline_360deg_gpu2.yaml | 6 +- .../pipeline_360deg_iterative_gpu3.yaml | 4 +- .../pipeline_cpu1.yaml | 2 +- .../pipeline_cpu3.yaml | 2 +- .../pipeline_cpu4.yaml | 2 +- .../testing/invalid_reference.yaml | 2 +- .../testing/testing_pipeline.yaml | 2 +- tests/test_backends/test_httomolibgpu.py | 1 - 57 files changed, 275 insertions(+), 282 deletions(-) create mode 100644 .pre-commit-config.yaml diff --git a/.github/workflows/conda_upload.yml b/.github/workflows/conda_upload.yml index 05be7d43a..5d8cca63d 100644 --- a/.github/workflows/conda_upload.yml +++ b/.github/workflows/conda_upload.yml @@ -35,7 +35,7 @@ jobs: $CONDA/bin/conda install -c conda-forge conda-build $CONDA/bin/conda install -c conda-forge anaconda-client $CONDA/bin/conda update conda - $CONDA/bin/conda update conda-build + $CONDA/bin/conda update conda-build $CONDA/bin/conda list - name: Decrypt a secret diff --git a/.github/workflows/nightly_build.yml b/.github/workflows/nightly_build.yml index 2c710a44d..66b9609e4 100644 --- a/.github/workflows/nightly_build.yml +++ b/.github/workflows/nightly_build.yml @@ -25,14 +25,14 @@ jobs: uses: actions/setup-python@v2 with: python-version: 3.9 - + - name: Install dependencies with Conda run: | $CONDA/bin/conda install -c conda-forge conda-build $CONDA/bin/conda install -c conda-forge anaconda-client $CONDA/bin/conda update conda $CONDA/bin/conda update conda-build - $CONDA/bin/conda list + $CONDA/bin/conda list - name: Decrypt a secret run: ./.scripts/decrypt_secret.sh diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 000000000..5b7ada818 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,33 @@ +repos: +- repo: https://github.com/psf/black + rev: 23.12.1 + hooks: + - id: black + language_version: python3 + exclude: ^docs/source/examples/ +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: trailing-whitespace # This hook trims trailing whitespace. + - id: check-docstring-first # Checks a common error of defining a docstring after code. + - id: check-merge-conflict # Check for files that contain merge conflict strings. + - id: detect-private-key # Detects the presence of private keys. + - id: check-symlinks + - id: check-toml +- repo: https://github.com/pre-commit/pygrep-hooks + rev: v1.10.0 + hooks: + - id: python-use-type-annotations +- repo: https://github.com/kynan/nbstripout + rev: 0.6.1 + hooks: + - id: nbstripout + exclude: ^docs/source/examples/ +- repo: https://github.com/asottile/blacken-docs + rev: 1.16.0 + hooks: + - id: blacken-docs +- repo: https://github.com/nbQA-dev/nbQA + rev: 1.7.1 + hooks: + - id: nbqa-black \ No newline at end of file diff --git a/README.rst b/README.rst index 2e5aa9eae..b04b9ce03 100644 --- a/README.rst +++ b/README.rst @@ -1,13 +1,13 @@ HTTomo (High Throughput Tomography pipeline) ******************************************************* -HTTomo is a user interface (UI) written in Python for fast big data processing using MPI protocols. +HTTomo is a user interface (UI) written in Python for fast big data processing using MPI protocols. It orchestrates I/O data operations and enables processing on a CPU and/or a GPU. HTTomo utilises other libraries, such as `TomoPy `_ and `HTTomolibgpu `_ as backends for data processing. The methods from the libraries are exposed through YAML templates to enable fast task programming. Installation ============ -See detailed instructions for `installation `_ . +See detailed instructions for `installation `_ . Documentation ============== diff --git a/conda/environment-cpu.yml b/conda/environment-cpu.yml index e1f2883b9..f7aac6987 100644 --- a/conda/environment-cpu.yml +++ b/conda/environment-cpu.yml @@ -11,10 +11,10 @@ dependencies: - conda-forge::h5py=*=*mpi_openmpi* - conda-forge::pyyaml - conda-forge::numpy<=1.25 - - conda-forge::python + - conda-forge::python - conda-forge::numexpr>=2.8.4 - anaconda::ipython - - anaconda::loguru + - anaconda::loguru - conda-forge::graypy - conda-forge::plumbum - conda-forge::tqdm diff --git a/conda/recipe/meta.yaml b/conda/recipe/meta.yaml index f807ae3ce..068ce3713 100644 --- a/conda/recipe/meta.yaml +++ b/conda/recipe/meta.yaml @@ -27,7 +27,7 @@ test: - tests/* commands: - pip check - - pytest + - pytest requirements: build: - h5py=*=*mpi_openmpi* diff --git a/docs/source/backends/list.rst b/docs/source/backends/list.rst index 37f661e22..21e81a640 100644 --- a/docs/source/backends/list.rst +++ b/docs/source/backends/list.rst @@ -3,36 +3,36 @@ Supported backends ============================== -HTTomo currently supports several software packages that are used as -backends to perform data processing and reconstruction. The list of +HTTomo currently supports several software packages that are used as +backends to perform data processing and reconstruction. The list of the external packages will be growing in future to meet users' needs. -If the package has a modular structure with an easy access to every -method, for example as `TomoPy `_ -software or `scikit `_ library, then the -integration process is straightforward. +If the package has a modular structure with an easy access to every +method, for example as `TomoPy `_ +software or `scikit `_ library, then the +integration process is straightforward. The required YAML template can be generated by using :ref:`utilities_yamlgenerator`, -or manually, and used with HTTomo. More complicated in structure packages would need -an additional wrapping, see for instance the reconstruction methods in -the `HTTomolibgpu `_ library. +or manually, and used with HTTomo. More complicated in structure packages would need +an additional wrapping, see for instance the reconstruction methods in +the `HTTomolibgpu `_ library. Please see the provided list of :ref:`reference_templates`. TomoPy software (CPU) --------------------- -`TomoPy `_ is an open-source Python package for -tomographic data processing and image reconstruction developed at -`The Advanced Photon Source `_ in Illinois, USA. -The project is active since 2013 and it gained a `large audience `_ +`TomoPy `_ is an open-source Python package for +tomographic data processing and image reconstruction developed at +`The Advanced Photon Source `_ in Illinois, USA. +The project is active since 2013 and it gained a `large audience `_ of users and contributors across tomographic imaging community. -* TomoPy is an open-source package in Python and C for data processing and reconstruction. TomoPy is mostly a CPU processing library and in HTTomo we expose the CPU modules only. +* TomoPy is an open-source package in Python and C for data processing and reconstruction. TomoPy is mostly a CPU processing library and in HTTomo we expose the CPU modules only. * It is a CPU-multithreaded package. HTTomo controls parallelisation through MPI on a higher level and also supports local CPU multithreading from TomoPy, for every MPI process. * TomoPy is a library of stand-alone methods which can be easily integrated into HTTomo. Notably not all TomoPy methods are integrated in HTTomo because of the I/O nature of some modules. Please see the list of available TomoPy :ref:`reference_templates`. HTTomolibgpu library (GPU) -------------------------- -`HTTomolibgpu `_ library is developed at `Diamond Light source `_ +`HTTomolibgpu `_ library is developed at `Diamond Light source `_ by Data Analysis Group to work together with the HTTomo software. * HTTomolibgpu is a Python library of GPU accelerated methods written using `CuPy `_ API and CUDA language. diff --git a/docs/source/backends/templates.rst b/docs/source/backends/templates.rst index b93a0e62f..987d653a4 100644 --- a/docs/source/backends/templates.rst +++ b/docs/source/backends/templates.rst @@ -5,9 +5,9 @@ Methods YAML Templates ====================== This section contains YAML templates from :ref:`backends_list`. These are ready-to-use templates can be either copy-pasted or -downloaded in order to build your pipeline. +downloaded in order to build your pipeline. -.. note:: When you click on a module you can find a link to the API of that module. This is where the description of the method and its arguments can be found. +.. note:: When you click on a module you can find a link to the API of that module. This is where the description of the method and its arguments can be found. TomoPy Modules @@ -23,7 +23,7 @@ v.1.15 .. _reference_templates_httomolibgpu: HTTomolibgpu Modules --------------------- +-------------------- v.2.0 ''''' diff --git a/docs/source/conf.py b/docs/source/conf.py index 65cc56d8e..10c8d4afe 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -1,7 +1,25 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# --------------------------------------------------------------------------- +# Copyright 2023 Diamond Light Source Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either ecpress or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# --------------------------------------------------------------------------- +# Created By : Tomography Team at DLS + # -- General configuration ------------------------------------------------ # https://www.sphinx-doc.org/en/master/usage/configuration.html -#!/usr/bin/env python import os import sys from datetime import date @@ -88,14 +106,6 @@ html_static_path = ["_static"] html_use_smartypants = True -""" -html_theme_options = { - "logo_only": True, - "display_version": False, - "githuburl": "https://github.com/DiamondLightSource/httomo", -} -""" - html_theme_options = { "logo": { "image_dark": "_static/logo_dark.png", diff --git a/docs/source/developers/how_to_contribute.rst b/docs/source/developers/how_to_contribute.rst index b35dbe147..0db79dfad 100644 --- a/docs/source/developers/how_to_contribute.rst +++ b/docs/source/developers/how_to_contribute.rst @@ -3,34 +3,34 @@ How to contribute ***************** -For those who are interested in contributing to HTTomo, we provide here steps to follow. All additional enquires can be left in -the issues section on `HTTomo's Github page `_. +For those who are interested in contributing to HTTomo, we provide here steps to follow. All additional enquires can be left in +the issues section on `HTTomo's Github page `_. 1. Write a new data processing method in Python. - + One needs to write a method and make it accessible in either a separate library or integrating - it into the list of already :ref:`backends_list`. The latter option is preferred as some of the packages are + it into the list of already :ref:`backends_list`. The latter option is preferred as some of the packages are maintained by HTTomo developers which will provide support during the integration. 2. Expose the method in the library file in HTTomo Then one needs to expose that method to HTTomo by editing the :ref:`pl_library`. You would need to specify the main static descriptors of that method, such as, :code:`pattern`, :code:`implementation`, etc. If the implementation is :code:`cpu` only, - then :code:`memory_gpu` must be set to :code:`None`. However, if the method requires GPU, then you would need to provide more information so - that HTTomo's framework would account for the memory use on the device. See :code:`HTTomolibgpu` library file for that. + then :code:`memory_gpu` must be set to :code:`None`. However, if the method requires GPU, then you would need to provide more information so + that HTTomo's framework would account for the memory use on the device. See :code:`HTTomolibgpu` library file for that. In a simple case, one can calculate the memory directly by providing multipliers in the library file. When memory calculation is more complicated, one needs to add a Python script that does this calculation. See more in :ref:`developers_memorycalc`. -3. Check the wrapper type +3. Check the wrapper type - Every method is executed by using :ref:`info_wrappers`. Check that the method fits the existing wrapper type, and if not, then possibly more work required - to accommodate it. In most of the cases the method should fit the existing types. + Every method is executed by using :ref:`info_wrappers`. Check that the method fits the existing wrapper type, and if not, then possibly more work required + to accommodate it. In most of the cases the method should fit the existing types. 4. Generate the Yaml template - HTTomo's UI requires :ref:`reference_templates` to execute the created method. One can either construct that YAML template manually or employ + HTTomo's UI requires :ref:`reference_templates` to execute the created method. One can either construct that YAML template manually or employ :ref:`utilities_yamlgenerator`. - - + + diff --git a/docs/source/developers/memory_calculation.rst b/docs/source/developers/memory_calculation.rst index 4a6d434be..85650c0b2 100644 --- a/docs/source/developers/memory_calculation.rst +++ b/docs/source/developers/memory_calculation.rst @@ -14,7 +14,7 @@ The ``calc_max_slices`` function must have the following signature:: The ``httomo`` package will call this function, passing in the dimension along which it will slice (``0`` for projection, ``1`` for sinogram), the other dimensions of the data array shape, the data type for the input, and the available memory on the GPU for method execution. -Additionally it passes all other parameters of the method in the ``kwargs`` argument, +Additionally it passes all other parameters of the method in the ``kwargs`` argument, which can be used by the function in case parameters determine the memory consumption. The function should calculate how many slices along the slicing dimension it can fit into the given memory. Further, it returns the output datatype of the method (given the input ``dtype`` argument), @@ -33,7 +33,7 @@ it can fit, given the other two dimension sizes. For example: max_slices, outdtype = my_method.meta.calc_max_slices(0, (10, 20), np.float32(), 14450, **method_args) * The developer of the given method needs to provide this function implementation, - and it needs to calculate the maximum number of slices it can fit. + and it needs to calculate the maximum number of slices it can fit. * Assuming that the method is very simple and does not need any local temporary memory, requiring only space for the input and output array, it could be implemented as follows:: @@ -57,12 +57,12 @@ it can fit, given the other two dimension sizes. For example: * ``input_mem_per_slice = 800`` * ``output_mem_per_slice = 800`` * => ``max_slices = 14450 // 1600 = 9`` - + Max Slices Tests ---------------- -In order to test that the slice calculation function is reflecting reality, each method has a +In order to test that the slice calculation function is reflecting reality, each method has a unit test implemented that verifies that the calculation is right (within bounds). That is, it tests that the estimated slices are between 80% and 100% of the actually used slices. These tests also help to keep the memory estimation functions in sync with the implementation. @@ -72,12 +72,12 @@ The strategy for testing is the other way around: * We first run the actual method, given a specific data set, and record the maximum memory actually used by the method. * Then, retrospectively, we call the ``calc_max_slices`` estimator function and pass in this memory - as the ``available_memory`` argument. So we're asking the estimation function to assume that - the memory available is the actually used memory in the method call. + as the ``available_memory`` argument. So we're asking the estimation function to assume that + the memory available is the actually used memory in the method call. * The estimated number of slices should then be less or equal to the actual slices used earlier. * To make sure the function is not too conservative, we're checking that it returns at least 80% of the slices that actually fit - - + + diff --git a/docs/source/explanation/faq.rst b/docs/source/explanation/faq.rst index 160c13f91..9ae89ba9a 100644 --- a/docs/source/explanation/faq.rst +++ b/docs/source/explanation/faq.rst @@ -13,10 +13,10 @@ Frequently Asked Questions Template can be used to build a process list or a pipeline of methods. Normally the content of a template (a YAML file) is copied to build a process list file. Please see :ref:`howto_process_list`. .. dropdown:: Can I create a template? - + You can create a template manually if you want to run a method from the external software. See more on :ref:`backends_list`. -.. dropdown:: How can I configure a multi-task pipeline? +.. dropdown:: How can I configure a multi-task pipeline? The multi-task pipeline is build from the available :ref:`reference_templates` by stacking them together. Please see :ref:`howto_process_list`. @@ -25,9 +25,9 @@ Frequently Asked Questions Please see :ref:`howto_run`. .. dropdown:: I have a Python method, can it be used with HTTomo? - + There is a high chance that it can be used. The method needs to be accessible in your Python environment and you will need a YAML template for it. See more on what kind of :ref:`backends_list` can be used with HTTomo. It is also recommended if you integrate your method in a library first. See :ref:`developers_content`. - + .. dropdown:: How can I contribute to HTTomo? You can contribute by adding new methods to :ref:`backends_list` or by contributing to the source base of the `HTTomo project `_. @@ -55,17 +55,17 @@ Working from a workstation at Diamond Light Source .. dropdown:: How can I use HTTomo inside the terminal? 1. Using the module system, ``module load`` allows you to obtain an access to the installed HTTomo at Diamond computing systems. - You can check which versions of HTTomo are installed with the command: :code:`module avail httomo`. You can either load a specific version - with ``module load httomo/*httomo_version*`` or the default (recommended) version by executing: + You can check which versions of HTTomo are installed with the command: :code:`module avail httomo`. You can either load a specific version + with ``module load httomo/*httomo_version*`` or the default (recommended) version by executing: .. code-block:: console - + $ module load httomo This will add all of the related packages and files into your path, meaning that you will have an access to these packages from your loaded Python environment. - - 2. Configure your pipeline using the templates as shown previously + + 2. Configure your pipeline using the templates as shown previously and run HTTomo. @@ -73,7 +73,7 @@ Working from a workstation at Diamond Light Source It is modifying the users environment, by including the path to certain environment modules. In case of HTTomo it enables a specific conda environment with Python. - + You can read more about how module works at `modules.readthedocs.io `_ .. dropdown:: What do I do if I have module loaded the wrong version of HTTomo? @@ -81,6 +81,6 @@ Working from a workstation at Diamond Light Source You can use repeat the module command, replacing ``load`` with ``unload`` .. code-block:: console - + $ module unload httomo/*httomo_old_version* # unload old version first $ module load httomo/*httomo_version* # load the correct one diff --git a/docs/source/explanation/process_list.rst b/docs/source/explanation/process_list.rst index dab97ef74..e2ef78604 100644 --- a/docs/source/explanation/process_list.rst +++ b/docs/source/explanation/process_list.rst @@ -3,7 +3,7 @@ What is a process list? ------------------------ -A process list is a YAML file (see :ref:`explanation_yaml`), which is required to execute the processing of data in HTTomo. The process list file consists of methods exposed as YAML templates stacked together -to form a **serially processed sequence of methods**. Each YAML template represents a standalone method or a loader which can be chained together with other templates to form a process list. +A process list is a YAML file (see :ref:`explanation_yaml`), which is required to execute the processing of data in HTTomo. The process list file consists of methods exposed as YAML templates stacked together +to form a **serially processed sequence of methods**. Each YAML template represents a standalone method or a loader which can be chained together with other templates to form a process list. Please check how :ref:`howto_process_list`. diff --git a/docs/source/explanation/templates.rst b/docs/source/explanation/templates.rst index 2d2a20d0e..1fbd5aeae 100644 --- a/docs/source/explanation/templates.rst +++ b/docs/source/explanation/templates.rst @@ -3,7 +3,7 @@ What is a template? ------------------------ -A YAML template (see :ref:`explanation_yaml`) is a textual interface to a method, which can be executed in HTTomo. +A YAML template (see :ref:`explanation_yaml`) is a textual interface to a method, which can be executed in HTTomo. The template provides a communication with a chosen method by setting its input/output entries and also additional parameters, if required. The combination of YAML templates results in a processing list, also called a pipeline. See more on :ref:`explanation_process_list` @@ -11,7 +11,7 @@ The combination of YAML templates results in a processing list, also called a pi As a simple template example, let's consider the template for the median filter from the `TomoPy `_ package. .. code-block:: yaml - + - method: median_filter3d module_path: tomopy.misc.corr parameters: diff --git a/docs/source/howto/httomo_features/centering.rst b/docs/source/howto/httomo_features/centering.rst index dabae8be8..d07a51816 100644 --- a/docs/source/howto/httomo_features/centering.rst +++ b/docs/source/howto/httomo_features/centering.rst @@ -6,20 +6,20 @@ Centre of Rotation (CoR) What is it? =========== -Identifying the optimal Centre of Rotation (CoR) parameter is an important -procedure to ensure the correctness of the reconstruction. It is crucial to find it +Identifying the optimal Centre of Rotation (CoR) parameter is an important +procedure to ensure the correctness of the reconstruction. It is crucial to find it as precise as possible, as the incorrect value can lead to distortions in reconstructions, therefore making post processing and -quantification invalid. +quantification invalid. The required CoR parameter places the object (a scanned sample) into a coordinate system of the scanning device to ensure that the object is centered and rotates around its axis in this system, see :numref:`fig_centerscheme`. This is essential for valid reconstruction as the back projection model assumes a central placing of a sample with respect to the detector's axis (usually horizontal for synchrotrons). -The CoR estimation problem is also sometimes referred to the centered sinogram. -If the sinogram is not centered and there is an offset (`d` in :numref:`fig_centerscheme`), the reconstruction -will result in strong arching artefacts at the boundaries of reconstructed objects, see :numref:`fig_center_find`. -Further from the optimal CoR value (here `d=0`), one should expect more pronounced arching. +The CoR estimation problem is also sometimes referred to the centered sinogram. +If the sinogram is not centered and there is an offset (`d` in :numref:`fig_centerscheme`), the reconstruction +will result in strong arching artefacts at the boundaries of reconstructed objects, see :numref:`fig_center_find`. +Further from the optimal CoR value (here `d=0`), one should expect more pronounced arching. Therefore the optimisation problem usually involves minimising the artefacts in the reconstructed images by varying the CoR value. @@ -35,12 +35,12 @@ by varying the CoR value. :scale: 85 % :alt: Finding CoR - The reconstructions using different CoR values. Incorrectly centered sinogram results in strong arching artifacts on the boundaries of the reconstructed object. Note how the arching is being reduced when `d` is closer to the correct value. + The reconstructions using different CoR values. Incorrectly centered sinogram results in strong arching artifacts on the boundaries of the reconstructed object. Note how the arching is being reduced when `d` is closer to the correct value. CoR in HTTomo ============= -The CoR parameter is present in any reconstruction template given as the :code:`center` parameter. It can be configured either +The CoR parameter is present in any reconstruction template given as the :code:`center` parameter. It can be configured either automatically (see :ref:`centering_auto`) or manually (see :ref:`centering_manual`). .. _centering_auto: @@ -48,18 +48,18 @@ automatically (see :ref:`centering_auto`) or manually (see :ref:`centering_manua Auto-centering =============== -There is a `variety `_ of -methods to estimate CoR automatically. At DLS, we frequently use the centering method which -was developed by Nghia Vo and it relies on the Fourier analysis of a sinogram, see the `paper `_. -This method is implemented in both TomoPy and HTTomolibgpu libraries and -available for HTTomo as *find_center_vo* template, see :ref:`reference_templates`. +There is a `variety `_ of +methods to estimate CoR automatically. At DLS, we frequently use the centering method which +was developed by Nghia Vo and it relies on the Fourier analysis of a sinogram, see the `paper `_. +This method is implemented in both TomoPy and HTTomolibgpu libraries and +available for HTTomo as *find_center_vo* template, see :ref:`reference_templates`. -Here are the steps to enable the auto-centering and then use the estimated value in the reconstruction: +Here are the steps to enable the auto-centering and then use the estimated value in the reconstruction: 1. The auto-centering method should be added to the process list before the reconstruction method. 2. It is recommended to position any auto-centering method right after the loader, see :ref:`pl_conf_order`. 3. The calculated CoR value with be stored in :ref:`side_output`. -4. In the reconstruction module we refer to that value by placing the reference into the :code:`center` parameter. +4. In the reconstruction module we refer to that value by placing the reference into the :code:`center` parameter. .. code-block:: yaml :emphasize-lines: 13,17 @@ -85,7 +85,7 @@ Here are the steps to enable the auto-centering and then use the estimated value recon_size: null recon_mask_radius: null -.. note:: When one auto-centering method fails it is recommended to try other available methods as they can still provide the correct or close to the correct CoR value. +.. note:: When one auto-centering method fails it is recommended to try other available methods as they can still provide the correct or close to the correct CoR value. .. _centering_manual: @@ -93,11 +93,11 @@ Manual Centering ================= Unfortunately, there could be various cases when :ref:`centering_auto` fails, e.g., the projection data is corrupted, incomplete, the object is outside the field of view of the detector, and possibly other issues. -In that case, it is recommended to find the center of rotation manually. :ref:`parameter_tuning` can simplify such search significantly. +In that case, it is recommended to find the center of rotation manually. :ref:`parameter_tuning` can simplify such search significantly. To enable manual centering, one would need to do the following steps: -1. Ensure that the auto centering estimation method is not in the process list (remove or comment it). +1. Ensure that the auto centering estimation method is not in the process list (remove or comment it). 2. Modify the centre of rotation value :code:`center` in the reconstruction plugin by substituting a number instead of the reference to side outputs. diff --git a/docs/source/howto/httomo_features/parameter_tuning.rst b/docs/source/howto/httomo_features/parameter_tuning.rst index 75a4b67c4..2f7a1f734 100644 --- a/docs/source/howto/httomo_features/parameter_tuning.rst +++ b/docs/source/howto/httomo_features/parameter_tuning.rst @@ -19,7 +19,7 @@ many reasons for this situation, such as being unfamiliar with the method, or working with unfamiliar data, etc. .. warning:: - Because of the significant modifications in HTTomo ver.2.0, the parameters tuning feature is not - currently supported. HTTomo developers recognise the importance of it for users and will be + Because of the significant modifications in HTTomo ver.2.0, the parameters tuning feature is not + currently supported. HTTomo developers recognise the importance of it for users and will be delivered as soon possible with the next release. diff --git a/docs/source/howto/httomo_features/previewing.rst b/docs/source/howto/httomo_features/previewing.rst index 24b90525c..12ccde6eb 100644 --- a/docs/source/howto/httomo_features/previewing.rst +++ b/docs/source/howto/httomo_features/previewing.rst @@ -4,7 +4,7 @@ Previewing ^^^^^^^^^^ -Previewing is the way to change the dimensions of the input data by reducing them. +Previewing is the way to change the dimensions of the input data by reducing them. It also can be interpreted as a data cropping or data slicing operation. Reduction of the input data is often done to remove unnecessary/useless @@ -29,7 +29,7 @@ a brief explanation is given on how to use the :code:`preview` parameter in the :scale: 55 % :alt: 3D data - 3D projection data and their axes + 3D projection data and their axes Structure of the :code:`preview` parameter value @@ -68,7 +68,7 @@ previewing is disabled in this case. Enabling data preview ===================== -In order to change the input data dimensions and accelerate the processing +In order to change the input data dimensions and accelerate the processing pipeline, one can do two of the following operations. .. note:: Although this is optional, by doing this the size of the reconstructed @@ -87,7 +87,7 @@ Before cropping |pic1| and after |pic2| 1. Reduce the size of the vertical dimension (detector- `Y`) by removing blank regions in your data (top and bottom cropping), - see :numref:`fig_dimsdataY`. The blank areas, if any, can be established by looking through the sequence of raw projections. + see :numref:`fig_dimsdataY`. The blank areas, if any, can be established by looking through the sequence of raw projections. .. code-block:: yaml @@ -109,10 +109,10 @@ Before cropping |pic1| and after |pic2| 2. Reduce the size of the horizontal dimension (detector- `X`) by removing blank regions in your data (cropping the left and right sides), see :numref:`fig_dimsdataX`. - + .. warning:: Please be aware that cropping this dimension can create issues with the automatic centering - and potentially lead to reconstruction artefacts, especially if iterative methods are used. + and potentially lead to reconstruction artefacts, especially if iterative methods are used. It is general practice to be more conservative with the cropping of the `X` detector dimension. @@ -124,7 +124,7 @@ Before cropping |pic1| and after |pic2| stop: 2000 In Python this will be interpreted as :code:`[:,:,100:2000]`. - + .. _fig_dimsdataX: .. figure:: ../../_static/preview/dims_prevX.svg :scale: 55 % diff --git a/docs/source/howto/installation.rst b/docs/source/howto/installation.rst index af723f2d8..6aaf83c1a 100644 --- a/docs/source/howto/installation.rst +++ b/docs/source/howto/installation.rst @@ -4,7 +4,7 @@ Installation Guide Install HTTomo as a pre-built conda Python package ================================================== -This installation is preferable as it should take care all of dependencies including :ref:`backends_list` by getting them from the dedicated anaconda channel. +This installation is preferable as it should take care all of dependencies including :ref:`backends_list` by getting them from the dedicated anaconda channel. .. code-block:: console @@ -14,14 +14,14 @@ This installation is preferable as it should take care all of dependencies inclu Install as a Python module =========================== -If installation above for some reason is not working for you, then the best way to install HTTomo is to create conda environment first and then -`pip install` HTTomo into it. You will need to `git clone` HTTomo repository to your disk first. Use `environment.yml` file to install +If installation above for some reason is not working for you, then the best way to install HTTomo is to create conda environment first and then +`pip install` HTTomo into it. You will need to `git clone` HTTomo repository to your disk first. Use `environment.yml` file to install the GPU-supported HTTomo. For CPU-only version, please use `environment-cpu.yml` instead. .. code-block:: console - + $ git clone git@github.com:DiamondLightSource/HTTomo.git # clone the repo - $ conda env create --name httomo --file conda/environment.yml # install dependencies for GPU version + $ conda env create --name httomo --file conda/environment.yml # install dependencies for GPU version $ conda activate httomo # activate environment $ pip install . # Install the module @@ -29,7 +29,7 @@ Setup HTTomo development environment: ====================================================== .. code-block:: console - $ pip install -e .[dev] # development mode + $ pip install -e .[dev] # development mode Build HTTomo as a conda Python package ====================================================== diff --git a/docs/source/howto/interpret_logger.rst b/docs/source/howto/interpret_logger.rst index 784fa0b0f..4175662f7 100644 --- a/docs/source/howto/interpret_logger.rst +++ b/docs/source/howto/interpret_logger.rst @@ -3,41 +3,41 @@ Interpret Log File ====================== -This section contains information on how to interpret the log file created by HTTomo. +This section contains information on how to interpret the log file created by HTTomo. -HTTomo uses :code:`loguru` software to unify and simplify the logging system. During the job execution, the concise information -goes to the terminal (see :numref:`fig_log`) and also to the :code:`user.log` file. More verbose information, that is usually -needed to debug the run, is saved into the :code:`debug.log` file. Let us explain few main elements of the :code:`user.log` -file and also stdout. +HTTomo uses :code:`loguru` software to unify and simplify the logging system. During the job execution, the concise information +goes to the terminal (see :numref:`fig_log`) and also to the :code:`user.log` file. More verbose information, that is usually +needed to debug the run, is saved into the :code:`debug.log` file. Let us explain few main elements of the :code:`user.log` +file and also stdout. .. _fig_log: .. figure:: ../_static/log/log_screenshot.png :scale: 40 % :alt: HTTomo log screenshot - The screenshot of the terminal output (AKA stdout) which also goes into the :code:`user.log` file. + The screenshot of the terminal output (AKA stdout) which also goes into the :code:`user.log` file. * :code:`Pipeline has been separated into N sections` - This means that `N` :ref:`info_sections` created for this pipeline and each section contains a certain amount of methods grouped together to work on :ref:`blocks_data`. The progress can be seen in every + This means that `N` :ref:`info_sections` created for this pipeline and each section contains a certain amount of methods grouped together to work on :ref:`blocks_data`. The progress can be seen in every section processing all of the input data divided into :ref:`chunks_data` and :ref:`blocks_data`, before continue to the next section. * :code:`Running loader` The loader does not belong to sections and always at the start of the pipeline. Note that the loader - loads the data using the specific :code:`pattern=projection` (See more :ref:`info_reslice`). The same pattern is used by the - following section. + loads the data using the specific :code:`pattern=projection` (See more :ref:`info_reslice`). The same pattern is used by the + following section. * :code:`Section N with the following methods` - Each section contains a number of methods that run sequentially for each :ref:`blocks_data` - of data. When all blocks are processed, the user will see the message :code:`Finished processing the last block`. This means that all of the + Each section contains a number of methods that run sequentially for each :ref:`blocks_data` + of data. When all blocks are processed, the user will see the message :code:`Finished processing the last block`. This means that all of the input data have been processed in this section and the pipeline moves to the next section, if it exists. * :code:`50%|##### | 1/2 [00:02<00:02, 2.52s/block]` - These are the progress bars showing how much data is being processed in every section. - The percentage progress bar demonstrates how many blocks have been processed by the `M` number of methods of the current section. Specifically in this case - we have :code:`1/2`, which means that one of two blocks completed (hence `50%`). Then :code:`00:02<00:02` shows the time in seconds to - reach the current block (time elapsed) and the remaining time to complete all iterations over blocks. The :code:`2.52s/block` part is an + These are the progress bars showing how much data is being processed in every section. + The percentage progress bar demonstrates how many blocks have been processed by the `M` number of methods of the current section. Specifically in this case + we have :code:`1/2`, which means that one of two blocks completed (hence `50%`). Then :code:`00:02<00:02` shows the time in seconds to + reach the current block (time elapsed) and the remaining time to complete all iterations over blocks. The :code:`2.52s/block` part is an estimation of how much time it's taking per block. When the time per block is less than one second then this can be presented as :code:`block/s` instead. - See :code:`save_to_images` progress report, for instance. + See :code:`save_to_images` progress report, for instance. .. note:: When interpreting progress bars, one possible misunderstanding can be an association of the progress with the methods completed. Because each piece of data (a block) can be processed by multiple methods, we report on how many blocks have been processed instead. \ No newline at end of file diff --git a/docs/source/howto/process_lists/httomo_parameters.rst b/docs/source/howto/process_lists/httomo_parameters.rst index 334e4ecd8..ff5ed5fc1 100644 --- a/docs/source/howto/process_lists/httomo_parameters.rst +++ b/docs/source/howto/process_lists/httomo_parameters.rst @@ -19,6 +19,6 @@ used. .. toctree:: :maxdepth: 1 - + side_outputs/side_out save_results/save_results diff --git a/docs/source/howto/process_lists/process_list_configure.rst b/docs/source/howto/process_lists/process_list_configure.rst index 4ec8a7ed9..3d93974f0 100644 --- a/docs/source/howto/process_lists/process_list_configure.rst +++ b/docs/source/howto/process_lists/process_list_configure.rst @@ -47,19 +47,19 @@ The pattern of any supported method can be found in :ref:`pl_library`. Library files ------------- -Here is the list of :ref:`pl_library` for backends where patterns and other fixed arguments for methods are specified. When HTTomo operates -with a certain method it always refers to its library file in order get the specific requirements for that method. +Here is the list of :ref:`pl_library` for backends where patterns and other fixed arguments for methods are specified. When HTTomo operates +with a certain method it always refers to its library file in order get the specific requirements for that method. .. dropdown:: TomoPy's library file - .. literalinclude:: ../../../../httomo/methods_database/packages/external/tomopy/tomopy.yaml + .. literalinclude:: ../../../../httomo/methods_database/packages/external/tomopy/tomopy.yaml .. dropdown:: Httomolibgpu's library file - + .. literalinclude:: ../../../../httomo/methods_database/packages/external/httomolibgpu/httomolibgpu.yaml .. dropdown:: Httomolib's library file - + .. literalinclude:: ../../../../httomo/methods_database/packages/external/httomolib/httomolib.yaml .. _pl_grouping: diff --git a/docs/source/howto/process_lists/save_results/save_results.rst b/docs/source/howto/process_lists/save_results/save_results.rst index 173de28a9..7291f782e 100644 --- a/docs/source/howto/process_lists/save_results/save_results.rst +++ b/docs/source/howto/process_lists/save_results/save_results.rst @@ -15,7 +15,7 @@ with the :code:`save_result` parameter. Its value is a boolean, so either Example 1: save output of a specific method ########################################### -Suppose we wanted to save the output of the normalisation function :code:`normalize`. Then we +Suppose we wanted to save the output of the normalisation function :code:`normalize`. Then we should add :code:`save_result: True` to the list of the function parameters, but NOT the method's parameters: .. code-block:: yaml @@ -39,7 +39,7 @@ output. In contrast to the previous example, suppose we had a process list where we would like to save the output of all methods using :code:`--save_all`, *apart* from the -:code:`normalize` method. +:code:`normalize` method. .. code-block:: yaml :emphasize-lines: 8 diff --git a/docs/source/howto/process_lists_guide.rst b/docs/source/howto/process_lists_guide.rst index 8f6800858..4888ac236 100644 --- a/docs/source/howto/process_lists_guide.rst +++ b/docs/source/howto/process_lists_guide.rst @@ -10,7 +10,7 @@ they are introduced here too. Editing process lists --------------------- -This section explains how to build a process list (see more on :ref:`explanation_process_list`) from YAML templates +This section explains how to build a process list (see more on :ref:`explanation_process_list`) from YAML templates (see more on :ref:`explanation_templates`). Given time working with HTTomo, a user will likely settle on a workflow for @@ -22,10 +22,10 @@ As a starting point, the general process of building the pipeline can be the fol - copy+paste templates for the desired methods from the :ref:`reference_templates` section -- manually edit the parameter values within the copied template as needed. The user might want +- manually edit the parameter values within the copied template as needed. The user might want to check the documentation for the relevant method in the library itself. - intermittently run the :ref:`YAML checker ` during - editing of the YAML file to detect any errors early on. It is strongly recommended to run + editing of the YAML file to detect any errors early on. It is strongly recommended to run the checker at least once when the YAML pipeline is configured and ready to be run. Methods order @@ -43,6 +43,6 @@ following: .. toctree:: :maxdepth: 2 - + process_lists/httomo_parameters process_lists/process_list_configure diff --git a/docs/source/howto/run_httomo.rst b/docs/source/howto/run_httomo.rst index fd4f9e030..4a1ff5555 100644 --- a/docs/source/howto/run_httomo.rst +++ b/docs/source/howto/run_httomo.rst @@ -17,7 +17,7 @@ Required inputs In order to run HTTomo you require a data file (an HDF5 file) and a YAML process list file that describes the desired processing pipeline. For information on -getting started creating this YAML file, please see :ref:`howto_process_list` +getting started creating this YAML file, please see :ref:`howto_process_list` and also ready-to-be-used :ref:`tutorials_pl_templates`. Running HTTomo Inside or Outside of Diamond diff --git a/docs/source/index.rst b/docs/source/index.rst index 2ff2a2b06..316299d51 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -7,7 +7,7 @@ :maxdepth: 2 :glob: - introduction/about + introduction/about explanation/templates explanation/process_list explanation/faq @@ -50,7 +50,7 @@ :glob: pipelines/yaml - + .. _utilities_content: .. toctree:: @@ -66,5 +66,5 @@ :caption: Developers :maxdepth: 2 - developers/how_to_contribute + developers/how_to_contribute developers/memory_calculation diff --git a/docs/source/introduction/about.rst b/docs/source/introduction/about.rst index 407e4a709..f72913776 100644 --- a/docs/source/introduction/about.rst +++ b/docs/source/introduction/about.rst @@ -19,7 +19,7 @@ HTTomo orchestrates the optimal data splitting driven by the available GPU memor HTTomo is a User Interface (UI) package and does not contain any data processing methods, but rather utilises other libraries as `backends `_. Please see the list of currently supported packages by HTTomo in :ref:`backends_list`. It should be relatively simple to integrate any other modular -CPU/GPU Python library for data processing methods in HTTomo, please see more on that in :ref:`developers_content` section. +CPU/GPU Python library for data processing methods in HTTomo, please see more on that in :ref:`developers_content` section. A complex data analysis pipelines can be built by stacking together provided :ref:`reference_templates` and :ref:`tutorials_pl_templates` are also provided. diff --git a/docs/source/introduction/indepth/detailed_about.rst b/docs/source/introduction/indepth/detailed_about.rst index a113a0f5f..cf5949c71 100644 --- a/docs/source/introduction/indepth/detailed_about.rst +++ b/docs/source/introduction/indepth/detailed_about.rst @@ -9,7 +9,7 @@ Here we present more detailed concepts of HTTomo's framework, such as, .. toctree:: :maxdepth: 2 - + sections reslice wrappers @@ -37,6 +37,6 @@ of data splitting produces a piece of data called a *block*. .. toctree:: :maxdepth: 2 - + chunks blocks \ No newline at end of file diff --git a/docs/source/introduction/indepth/memory_estimators.rst b/docs/source/introduction/indepth/memory_estimators.rst index 0720e96e0..2560a6992 100644 --- a/docs/source/introduction/indepth/memory_estimators.rst +++ b/docs/source/introduction/indepth/memory_estimators.rst @@ -5,9 +5,9 @@ Memory Estimators Memory estimators is the integral part of HTTomo when it comes to the GPU processing and efficient device-to-device transfers. -.. note:: One needs to know how much of the data safely fits a GPU card. This is the main purpose of the GPU memory estimators in HTTomo. +.. note:: One needs to know how much of the data safely fits a GPU card. This is the main purpose of the GPU memory estimators in HTTomo. -Based on the free memory available in the GPU device and memory requirements of a particular method, GPU memory estimators will initiate +Based on the free memory available in the GPU device and memory requirements of a particular method, GPU memory estimators will initiate :ref:`developers_memorycalc` to decide how much of the data can be safely transferred to the GPU for processing. The size of the data that can be fitted in the GPU card for computations will define the size of the :ref:`blocks_data`. @@ -19,7 +19,7 @@ that can be fitted in the GPU card for computations will define the size of the Free memory of the GPU device and the method's memory requirement is the information that is required for memory estimators to define the size of the :ref:`blocks_data` in the :ref:`info_sections`. As one would like to minimise device-to-device transfers while doing GPU computations, HTTomo tries to chain the methods together in one Section. -Therefore if there are multiple methods chained together, the size of the block will be defined by the most memory-demanding method. +Therefore if there are multiple methods chained together, the size of the block will be defined by the most memory-demanding method. Memory estimators will inspect every method in that methods' chain in order to decide which one dominates. .. _fig_memest2: diff --git a/docs/source/introduction/indepth/reslice.rst b/docs/source/introduction/indepth/reslice.rst index 4fd649cdf..6147093aa 100644 --- a/docs/source/introduction/indepth/reslice.rst +++ b/docs/source/introduction/indepth/reslice.rst @@ -2,9 +2,9 @@ Re-slicing ---------- -The re-slicing of data happens when we need to access a slice which is orthogonal to the current one. -In tomography, we normally work in the space of projections or in the space of sinograms. Different methods require different slicing -orientations, or, as we call it, a *pattern*. The change of the pattern is a **re-slice** operation or a transformation of an array by +The re-slicing of data happens when we need to access a slice which is orthogonal to the current one. +In tomography, we normally work in the space of projections or in the space of sinograms. Different methods require different slicing +orientations, or, as we call it, a *pattern*. The change of the pattern is a **re-slice** operation or a transformation of an array by re-slicing in a particular direction. For instance, from the projection space/pattern to the sinogram space/patterns, as in the figure below. .. _fig_reslice: @@ -14,8 +14,8 @@ re-slicing in a particular direction. For instance, from the projection space/pa The re-slicing operation for tomographic data. Here the data is resliced from the stack of projections to the stack of sinograms. -In HTTomo, the re-slicing operation is performed on the CPU as we need to access all the data. Even if the pipeline consists of only GPU methods stacked together, -the re-slicing step will transfer the data from the GPU device to the CPU memory first. This operation can be costly for big datasets and we recommend to minimise the number of +In HTTomo, the re-slicing operation is performed on the CPU as we need to access all the data. Even if the pipeline consists of only GPU methods stacked together, +the re-slicing step will transfer the data from the GPU device to the CPU memory first. This operation can be costly for big datasets and we recommend to minimise the number of re-slicing operations in your pipeline. Normally for tomographic pre-processing and reconstruction there is just one re-slice needed, please see how :ref:`howto_process_list`. .. _fig_reslice2: diff --git a/docs/source/introduction/indepth/sections.rst b/docs/source/introduction/indepth/sections.rst index 9fb2f88ba..3da7e5835 100644 --- a/docs/source/introduction/indepth/sections.rst +++ b/docs/source/introduction/indepth/sections.rst @@ -5,7 +5,7 @@ Sections Sections is the fundamental concept of the HTTomo's framework which is related to how the I/O operations and processing of data is organised. -.. note:: The main purpose of a section is to organise the data input/output workflow, as well as, chain together the methods so that the constructed pipeline is computationally efficient. +.. note:: The main purpose of a section is to organise the data input/output workflow, as well as, chain together the methods so that the constructed pipeline is computationally efficient. To better understand the purpose of the section it is also useful to read information about :ref:`chunks_data`, :ref:`blocks_data` and :ref:`info_memory_estimators`. @@ -33,7 +33,7 @@ Example 1: Sections with re-slice :alt: Sections in pipelines Let us say that the pattern in methods `M`\ :sub:`1-3` is *projection* and methods in `M`\ :sub:`4-5` belong to *sinogram* pattern. - This will result in two sections created and also :ref:`info_reslice` operation in the data transfer `T`\ :sub:`3` layer. + This will result in two sections created and also :ref:`info_reslice` operation in the data transfer `T`\ :sub:`3` layer. Example 2 : Sections with re-slice and data saving ================================================== @@ -43,10 +43,10 @@ Example 2 : Sections with re-slice and data saving :scale: 50 % :alt: Sections in pipelines - In addition Example 1 situation, let us assume that we want to save the result of `M`\ :sub:`2` method to the disk. + In addition Example 1 situation, let us assume that we want to save the result of `M`\ :sub:`2` method to the disk. This means that even though `M`\ :sub:`1-3` methods can be performed on the GPU, the data will be transferred to CPU. - The pipeline will be further fragmented to introduce another section, so that the data transfer `T`\ :sub:`2` layer also saves the data on the - disk, as well as, taking care to return the data back on the GPU for the method `M`\ :sub:`3`. + The pipeline will be further fragmented to introduce another section, so that the data transfer `T`\ :sub:`2` layer also saves the data on the + disk, as well as, taking care to return the data back on the GPU for the method `M`\ :sub:`3`. Example 3 : Sections with side outputs ====================================== @@ -59,6 +59,6 @@ Example 3 : Sections with side outputs Consider, again, Example 1. Here, however, `M`\ :sub:`5` requests the :ref:`side_output` of the method `M`\ :sub:`4`. This, for example, can be because the reconstruction method `M`\ :sub:`5` requires the :ref:`centering` value of `M`\ :sub:`4`, where this value is calculated. This divides `M`\ :sub:`4` and `M`\ :sub:`5` into separate sections. Also notice that `M`\ :sub:`1` needs the data - to be saved on disk, so in total, it is a pipeline with 4 sections in it. + to be saved on disk, so in total, it is a pipeline with 4 sections in it. .. note:: It can be seen that creating more sections in pipelines is to be avoided when building an efficient pipeline. Creating a section usually leads to synchronisation of all processes on the CPU and potentially, if not enough memory, through-disk operations. diff --git a/docs/source/introduction/indepth/wrappers.rst b/docs/source/introduction/indepth/wrappers.rst index 8c267d9e9..6c07cc529 100644 --- a/docs/source/introduction/indepth/wrappers.rst +++ b/docs/source/introduction/indepth/wrappers.rst @@ -3,16 +3,16 @@ Method Wrappers =============== -HTTomo does not contain image processing methods and uses external libraries for processing, see :ref:`backends_list`. -However, because methods process data in a different manner, there's a need to tell the framework what kind of processing that is. +HTTomo does not contain image processing methods and uses external libraries for processing, see :ref:`backends_list`. +However, because methods process data in a different manner, there's a need to tell the framework what kind of processing that is. -HTTomo currently has the following wrappers: +HTTomo currently has the following wrappers: 1. The generic wrapper. Suitable when the method does not change the dimensions of the data for its output and requires one input and one output. This also can be called a filter. 2. Normalisation wrapper is to deal with normalisation methods as they have supplementary data as an input (e.g. flats/darks) and one dataset as an output. -3. Rotation or Centering wrapper. These are special methods to estimate :ref:`centering` automatically. The input can be quite specific, e.g. normalised sinogram or selected projections, and the output is usually a scalar. +3. Rotation or Centering wrapper. These are special methods to estimate :ref:`centering` automatically. The input can be quite specific, e.g. normalised sinogram or selected projections, and the output is usually a scalar. 4. Reconstruction wrapper. Reconstruction needs an additional information to be passed like angles and the centre of rotation. This is all handled by the framework in the wrapper. diff --git a/docs/source/pipelines/yaml.rst b/docs/source/pipelines/yaml.rst index d52029a68..9c0cb439d 100644 --- a/docs/source/pipelines/yaml.rst +++ b/docs/source/pipelines/yaml.rst @@ -3,7 +3,7 @@ Full YAML pipelines ============================== -This is a collection of ready to be used pipeline templates aka process lists. +This is a collection of ready to be used pipeline templates aka process lists. See more on :ref:`explanation_process_list` and how to :ref:`howto_process_list`. .. _tutorials_pl_templates_cpu: @@ -16,11 +16,11 @@ CPU Pipeline templates .. literalinclude:: ../../../tests/samples/pipeline_template_examples/pipeline_cpu1.yaml .. dropdown:: TomoPy's pipeline where :ref:`previewing` is demonstrated - + .. literalinclude:: ../../../tests/samples/pipeline_template_examples/pipeline_cpu2.yaml .. dropdown:: This pipeline shows how "calculate_stats" module extracts global statistics in order to be passed to "save_to_images" function which uses it to rescale data for saving images - + .. literalinclude:: ../../../tests/samples/pipeline_template_examples/pipeline_cpu2.yaml .. _tutorials_pl_templates_gpu: @@ -29,7 +29,7 @@ GPU Pipeline templates ---------------------------- .. dropdown:: Basic GPU pipeline which uses functions from the httomolibgpu library. - + .. literalinclude:: ../../../tests/samples/pipeline_template_examples/pipeline_gpu1.yaml @@ -42,12 +42,12 @@ DLS Specific templates .. literalinclude:: ../../../tests/samples/pipeline_template_examples/DLS/01_diad_pipeline_gpu.yaml -.. dropdown:: GPU-driven pipeline for the 360-degrees data which estimates the CoR value and the overlap. The 180-degrees sinogram is obtained by stitching using the overlap value. The pipeline shows the extensive use of side_outputs and refrencing. - +.. dropdown:: GPU-driven pipeline for the 360-degrees data which estimates the CoR value and the overlap. The 180-degrees sinogram is obtained by stitching using the overlap value. The pipeline shows the extensive use of side_outputs and refrencing. + .. literalinclude:: ../../../tests/samples/pipeline_template_examples/pipeline_360deg_gpu2.yaml -.. dropdown:: More advanced GPU pipeline for the 360-degrees data. Here we preview the section and then reconstruct it iteratively, the result then downsampled before saving smaller images. - +.. dropdown:: More advanced GPU pipeline for the 360-degrees data. Here we preview the section and then reconstruct it iteratively, the result then downsampled before saving smaller images. + .. literalinclude:: ../../../tests/samples/pipeline_template_examples/pipeline_360deg_iterative_gpu3.yaml .. _tutorials_pl_templates_sweeps: @@ -56,4 +56,4 @@ Parameter Sweeps templates ---------------------------- Those templates demonstrate how to perform sweeps across multiple values a single parameter. See more on :ref:`parameter_tuning`. -To be added in the forthcoming releases. +To be added in the forthcoming releases. diff --git a/docs/source/reference/yaml.rst b/docs/source/reference/yaml.rst index a22792953..0d60fcbb4 100644 --- a/docs/source/reference/yaml.rst +++ b/docs/source/reference/yaml.rst @@ -68,7 +68,7 @@ to use. This can be especially helpful for those who are less familiar with prog or are new to the specific tools and libraries you are using. We have a :ref:`utilities_yamlchecker` that can help you to validate your YAML file. -Before running your pipeline, we highly recommend that you validate your YAML file using this utility. +Before running your pipeline, we highly recommend that you validate your YAML file using this utility. The checker will help you to identify errors in your YAML file. We also recommend to use editors that `support `_ YAML format naturally, e.g.: Atom, Visual Studio Code, Notepad++, and others. diff --git a/docs/source/utilities/yaml_checker.rst b/docs/source/utilities/yaml_checker.rst index e1afdc073..eee5eaf31 100644 --- a/docs/source/utilities/yaml_checker.rst +++ b/docs/source/utilities/yaml_checker.rst @@ -2,7 +2,7 @@ YAML Checker - Why use it? ************************** -YAML checker will help you to validate your process list (see :ref:`explanation_process_list`) +YAML checker will help you to validate your process list (see :ref:`explanation_process_list`) saved as a YAML file. Before running your pipeline with HTTomo, we highly recommend that you validate your process list using this utility. **The checker will help you to identify errors in your process list and avoid problems during the run**. Usage diff --git a/docs/source/utilities/yaml_generator.rst b/docs/source/utilities/yaml_generator.rst index 7e5e98e48..63345b2af 100644 --- a/docs/source/utilities/yaml_generator.rst +++ b/docs/source/utilities/yaml_generator.rst @@ -2,13 +2,13 @@ Templates generator ******************* -:ref:`backends_content` can be generated automatically using the YAML generator tool `provided `_ in the Github repo. +:ref:`backends_content` can be generated automatically using the YAML generator tool `provided `_ in the Github repo. -The script does the following: +The script does the following: * Generates a list of YAML files for all accessible on import methods in a chosen software package, e.g., TomoPy. * Modifies and/or removes some extracted parameters in YAML templates to make the templates compatible with HTTomo. - + How does it work: * The user would need to provide a YAML file with the listed *modules* you would like to inspect and extract the methods from. For instance, for the TomoPy package this would be: @@ -19,15 +19,15 @@ How does it work: - tomopy.misc.morph * The generator can be applied using the following command: - + .. code-block:: console - + $ python -m yaml_templates_generator -i /path/to/modules.yaml -o /path/to/outputfolder/ Please note that the package from which the modules are extracted, must be installed into your conda environment. **For TomoPy templates only.** After templates have been generated for TomoPy, we need to remove the ones that are not currently supported by HTTomo. We do that by looking into the library file that exists in HTTomo for TomoPy. - + .. code-block:: console - + $ python -m remove_unsupported_templates -t /path/to/templates/ -l /path/to/library/file diff --git a/httomo/data/dataset_store.py b/httomo/data/dataset_store.py index b9819f2a6..bba4fc8ca 100644 --- a/httomo/data/dataset_store.py +++ b/httomo/data/dataset_store.py @@ -1,3 +1,24 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# --------------------------------------------------------------------------- +# Copyright 2023 Diamond Light Source Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either ecpress or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# --------------------------------------------------------------------------- +# Created By : Tomography Team at DLS + +# --------------------------------------------------------------------------- + import logging from os import PathLike from pathlib import Path @@ -18,77 +39,6 @@ from httomo.utils import log_once, make_3d_shape_from_shape -""" -This is from the final handover call: - -# Cases - -- *We always process the data in blocks* - -## 1 Process, all fits memory - -- chunk_shape = global_shape -- chunk_index = (0, 0, 0) -- the whole chunk is a numpy array, in a `DataSet` in-memory -- `write_block` -> writes a `DataSetBlock` into the chunk `DataSet` -- `read_block` -> get a block, which might be the full size of the `DataSet` for the chunk, - it's an in-memory slice of the chunk `DataSet` - -### Reslice - -- does nothing mostly -- updates `slicing_dim` somewhere, so that `read_block` knows how to slice the block - -## 2 Processes, all fits in memory (or more) - -(assume slicing dim is 0) - -- chunk_shape < global_shape, e.g. `(5, 20, 30) < (10, 20, 30)` -- `chunk_index`: in rank 0: `(0, 0, 0)`, in rank 1: `(5, 0, 0)` -- the whole chunk is in numpy array, in a `DataSet`, in-memory -- BUT: each process as a part of the global data in-memory -- `write_block` -> writes a `DataSetBlock` into the chunk `DataSet` -- `read_block` -> get a block, which might be the full size of the `DataSet` for the chunk - -### Reslice - -- call the MPI memory-based reslice function we always had (using `MPI.allgather`) -- we have a new chunk in each process, wich chunk_shape=`(10, 10, 30)` -- we have `chunk_index`: in rank 0: `(0, 0, 0)`, in rank 1: `(0, 10, 0)` -- updates `slicing_dim` somewhere, so that `read_block` knows how to slice the block - - -## 2 Processes, doesn't memory (or more) - -(assume slicing dim is 0) - -- chunk_shape < global_shape, e.g. `(5, 20, 30) < (10, 20, 30)` -- `chunk_index`: in rank 0: `(0, 0, 0)`, in rank 1: `(5, 0, 0)` -- *the global data is fully is a single h5py file* - - it is the whole global cube! - - each process only needs to access a chunk, block-wise, out of that file - - has the same interface as numpy array (`shape`, indexing, ...) - - BUT: as soon as we index, we read the file into an in-memory numpy array - - --> we cannot create a "view" of a subset of the file, referencing the disk - - so, each process needs to keep track of the start of its chunk within the file, - and when read_block is called with start index 0, we add the chunk_index to that -- `write_block` -> writes a `DataSetBlock` into the file, with the correct offset (`chunk_index + block_index`) -- `read_block` -> get a block from the file, with offset `chunk_index + block_index` -- --> `FullFileDataSet` class takes care of that - -### Reslice - -- we have full globally shared file, all the data is there on disk already -- does nothing mostly -- updates `slicing_dim` somewhere, so that `read_block` knows how to slice the block -- update `chunk_index`: in rank 0: `(0, 0, 0)`, in rank 1: `(0, 10, 0)` - - -""" - -# Notes: -# - refactoring the nested if into separate function is badly needed - class DataSetStoreWriter(ReadableDataSetSink): """A DataSetSink that can be used to store block-wise data in the current chunk (for the current process). diff --git a/httomo/data/mpiutil.py b/httomo/data/mpiutil.py index 4fa9ed38e..7adecb6b5 100644 --- a/httomo/data/mpiutil.py +++ b/httomo/data/mpiutil.py @@ -110,7 +110,9 @@ def alltoall(arrays: List[np.ndarray]) -> List[np.ndarray]: factor = ( arrays[0].shape[0] if dim0equal - else arrays[0].shape[1] if dim1equal else arrays[0].shape[2] + else arrays[0].shape[1] + if dim1equal + else arrays[0].shape[2] ) dtype1 = dtype.Create_contiguous(factor).Commit() # sanity check - this should always pass diff --git a/httomo/globals.py b/httomo/globals.py index 2682cdd50..21cb9caeb 100644 --- a/httomo/globals.py +++ b/httomo/globals.py @@ -4,8 +4,6 @@ run_out_dir: os.PathLike = Path(".") gpu_id: int = -1 # maximum slices to use in CPU-only section -MAX_CPU_SLICES: int = ( - 64 # A some random number which will be overwritten by --max-cpu_slices flag during runtime -) +MAX_CPU_SLICES: int = 64 # A some random number which will be overwritten by --max-cpu_slices flag during runtime SYSLOG_SERVER = "localhost" SYSLOG_PORT = 514 diff --git a/httomo/method_wrappers/save_intermediate.py b/httomo/method_wrappers/save_intermediate.py index a89a0a469..52c5d66bd 100644 --- a/httomo/method_wrappers/save_intermediate.py +++ b/httomo/method_wrappers/save_intermediate.py @@ -16,7 +16,6 @@ class SaveIntermediateFilesWrapper(GenericMethodWrapper): - @classmethod def should_select_this_class(cls, module_path: str, method_name: str) -> bool: return method_name == "save_intermediate_data" diff --git a/httomo/methods_database/packages/external/httomolibgpu/httomolibgpu.yaml b/httomo/methods_database/packages/external/httomolibgpu/httomolibgpu.yaml index 5da0fc4ee..280f6c8e3 100644 --- a/httomo/methods_database/packages/external/httomolibgpu/httomolibgpu.yaml +++ b/httomo/methods_database/packages/external/httomolibgpu/httomolibgpu.yaml @@ -46,7 +46,7 @@ prep: normalize: normalize: pattern: projection - output_dims_change: False + output_dims_change: False implementation: gpu_cupy save_result_default: False memory_gpu: @@ -56,7 +56,7 @@ prep: phase: paganin_filter_tomopy: pattern: projection - output_dims_change: False + output_dims_change: False implementation: gpu_cupy save_result_default: False memory_gpu: @@ -65,7 +65,7 @@ prep: - methods: [module] paganin_filter_savu: pattern: projection - output_dims_change: False + output_dims_change: False implementation: gpu_cupy save_result_default: False memory_gpu: @@ -75,7 +75,7 @@ prep: alignment: distortion_correction_proj_discorpy: pattern: projection - output_dims_change: False + output_dims_change: False implementation: gpu_cupy save_result_default: False memory_gpu: @@ -85,7 +85,7 @@ prep: stripe: remove_stripe_based_sorting: pattern: sinogram - output_dims_change: False + output_dims_change: False implementation: gpu_cupy save_result_default: False memory_gpu: diff --git a/httomo/methods_database/packages/httomo.yaml b/httomo/methods_database/packages/httomo.yaml index 590622db1..02c90fd80 100644 --- a/httomo/methods_database/packages/httomo.yaml +++ b/httomo/methods_database/packages/httomo.yaml @@ -5,9 +5,9 @@ data: pattern: projection output_dims_change: False implementation: cpu - memory_gpu: None + memory_gpu: None save_result_default: False -methods: +methods: calculate_stats: pattern: all output_dims_change: False diff --git a/httomo/runner/dataset_store_interfaces.py b/httomo/runner/dataset_store_interfaces.py index 8573b7a19..083e56f3a 100644 --- a/httomo/runner/dataset_store_interfaces.py +++ b/httomo/runner/dataset_store_interfaces.py @@ -52,7 +52,8 @@ class DataSetSource(Protocol): the data can be read in *blocks*, sliced in the given slicing dimension""" @property - def dtype(self) -> np.dtype: ... # pragma: no cover + def dtype(self) -> np.dtype: + ... # pragma: no cover @property def global_shape(self) -> Tuple[int, int, int]: diff --git a/httomo/runner/monitoring_interface.py b/httomo/runner/monitoring_interface.py index 23e1ddd6a..8ea5ada28 100644 --- a/httomo/runner/monitoring_interface.py +++ b/httomo/runner/monitoring_interface.py @@ -15,7 +15,8 @@ def report_method_block( gpu_kernel_time: float = 0.0, gpu_h2d_time: float = 0.0, gpu_d2h_time: float = 0.0, - ): ... # pragma: no cover + ): + ... # pragma: no cover def report_source_block( self, @@ -26,7 +27,8 @@ def report_source_block( block_idx_chunk: Tuple[int, int, int], block_idx_global: Tuple[int, int, int], cpu_time: float, - ): ... # pragma: no cover + ): + ... # pragma: no cover def report_sink_block( self, @@ -37,8 +39,11 @@ def report_sink_block( block_idx_chunk: Tuple[int, int, int], block_idx_global: Tuple[int, int, int], cpu_time: float, - ): ... # pragma: no cover + ): + ... # pragma: no cover - def report_total_time(self, cpu_time: float): ... # pragma: no cover + def report_total_time(self, cpu_time: float): + ... # pragma: no cover - def write_results(self, dest: TextIO): ... # pragma: no cover + def write_results(self, dest: TextIO): + ... # pragma: no cover diff --git a/pyproject.toml b/pyproject.toml index 5616a4553..909e258fa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -110,8 +110,8 @@ float_to_top = true # Make flake8 respect black's line length (default 88), max-line-length = 88 extend-ignore = [ - "E203", - "F811" + "E203", + "F811" ] per-file-ignores = [ "*pyi:E302" diff --git a/tests/method_wrappers/test_generic.py b/tests/method_wrappers/test_generic.py index eff59f979..dd0f505a5 100644 --- a/tests/method_wrappers/test_generic.py +++ b/tests/method_wrappers/test_generic.py @@ -198,7 +198,6 @@ def fake_method(data, comm: Optional[MPI.Comm] = None): def test_generic_transforms_auto_axis(mocker: MockerFixture, dummy_block: DataSetBlock): - PATTERN = Pattern.projection class FakeModule: diff --git a/tests/method_wrappers/test_rotation.py b/tests/method_wrappers/test_rotation.py index 0910ed64f..ed3c677d4 100644 --- a/tests/method_wrappers/test_rotation.py +++ b/tests/method_wrappers/test_rotation.py @@ -216,7 +216,6 @@ def test_rotation_normalize_sino_no_darks_flats(): def test_rotation_normalize_sino_same_darks_flats(): - self_mock = MagicMock() # this mocks self - as function only sets gpu time ret = RotationWrapper.normalize_sino( self_mock, diff --git a/tests/method_wrappers/test_stats_calc.py b/tests/method_wrappers/test_stats_calc.py index 244b02400..b58b56bb0 100644 --- a/tests/method_wrappers/test_stats_calc.py +++ b/tests/method_wrappers/test_stats_calc.py @@ -112,7 +112,6 @@ def calculate_stats(data, comm): def test_calculate_stats_uses_gpu_if_available( mocker: MockerFixture, dummy_block: DataSetBlock, gpu: bool ): - if gpu and not gpu_enabled: pytest.skip("No GPU available") diff --git a/tests/monitors/test_summary.py b/tests/monitors/test_summary.py index a3e972095..7e1d51361 100644 --- a/tests/monitors/test_summary.py +++ b/tests/monitors/test_summary.py @@ -51,7 +51,6 @@ def test_summary_monitor_records_and_displays_data(): MPI.COMM_WORLD.size != 2, reason="Only rank-2 MPI is supported with this test" ) def test_summary_monitor_records_and_displays_data_mpi(): - comm = MPI.COMM_WORLD mon = SummaryMonitor() # everything gets reported twice - once in each process - and the write_results should aggregate diff --git a/tests/samples/pipeline_template_examples/DLS/01_diad_pipeline_gpu.yaml b/tests/samples/pipeline_template_examples/DLS/01_diad_pipeline_gpu.yaml index 2b50021b9..2be755b7e 100644 --- a/tests/samples/pipeline_template_examples/DLS/01_diad_pipeline_gpu.yaml +++ b/tests/samples/pipeline_template_examples/DLS/01_diad_pipeline_gpu.yaml @@ -56,7 +56,7 @@ parameters: {} id: statistics side_outputs: - glob_stats: glob_stats + glob_stats: glob_stats - method: save_to_images module_path: httomolib.misc.images parameters: diff --git a/tests/samples/pipeline_template_examples/pipeline_360deg_gpu2.yaml b/tests/samples/pipeline_template_examples/pipeline_360deg_gpu2.yaml index daf20d34c..303689d22 100644 --- a/tests/samples/pipeline_template_examples/pipeline_360deg_gpu2.yaml +++ b/tests/samples/pipeline_template_examples/pipeline_360deg_gpu2.yaml @@ -43,7 +43,7 @@ module_path: httomolibgpu.misc.morph parameters: overlap: ${{centering.side_outputs.overlap}} - rotation: right + rotation: right - method: FBP module_path: httomolibgpu.recon.algorithm parameters: @@ -57,7 +57,7 @@ parameters: {} id: statistics side_outputs: - glob_stats: glob_stats + glob_stats: glob_stats - method: save_to_images module_path: httomolib.misc.images parameters: @@ -68,4 +68,4 @@ perc_range_min: 0.0 perc_range_max: 95.0 jpeg_quality: 95 - glob_stats: ${{statistics.side_outputs.glob_stats}} + glob_stats: ${{statistics.side_outputs.glob_stats}} diff --git a/tests/samples/pipeline_template_examples/pipeline_360deg_iterative_gpu3.yaml b/tests/samples/pipeline_template_examples/pipeline_360deg_iterative_gpu3.yaml index 4a548d5e7..6978b5730 100644 --- a/tests/samples/pipeline_template_examples/pipeline_360deg_iterative_gpu3.yaml +++ b/tests/samples/pipeline_template_examples/pipeline_360deg_iterative_gpu3.yaml @@ -60,13 +60,13 @@ parameters: newshape: [256, 256] axis: auto - interpolation: nearest + interpolation: nearest - method: calculate_stats module_path: httomo.methods parameters: {} id: statistics side_outputs: - glob_stats: glob_stats + glob_stats: glob_stats - method: save_to_images module_path: httomolib.misc.images parameters: diff --git a/tests/samples/pipeline_template_examples/pipeline_cpu1.yaml b/tests/samples/pipeline_template_examples/pipeline_cpu1.yaml index 8527d0f26..3f4d2bbbb 100644 --- a/tests/samples/pipeline_template_examples/pipeline_cpu1.yaml +++ b/tests/samples/pipeline_template_examples/pipeline_cpu1.yaml @@ -1,6 +1,6 @@ - method: standard_tomo module_path: httomo.data.hdf.loaders - parameters: + parameters: name: tomo data_path: entry1/tomo_entry/data/data image_key_path: entry1/tomo_entry/instrument/detector/image_key diff --git a/tests/samples/pipeline_template_examples/pipeline_cpu3.yaml b/tests/samples/pipeline_template_examples/pipeline_cpu3.yaml index af55dc3d1..3ef574596 100644 --- a/tests/samples/pipeline_template_examples/pipeline_cpu3.yaml +++ b/tests/samples/pipeline_template_examples/pipeline_cpu3.yaml @@ -58,4 +58,4 @@ perc_range_max: 100.0 jpeg_quality: 95 glob_stats: ${{statistics.side_outputs.glob_stats}} - + diff --git a/tests/samples/pipeline_template_examples/pipeline_cpu4.yaml b/tests/samples/pipeline_template_examples/pipeline_cpu4.yaml index e8e2eb8be..4347eadec 100644 --- a/tests/samples/pipeline_template_examples/pipeline_cpu4.yaml +++ b/tests/samples/pipeline_template_examples/pipeline_cpu4.yaml @@ -1,6 +1,6 @@ - method: standard_tomo module_path: httomo.data.hdf.loaders - parameters: + parameters: name: tomo data_path: entry1/tomo_entry/data/data image_key_path: entry1/tomo_entry/instrument/detector/image_key diff --git a/tests/samples/pipeline_template_examples/testing/invalid_reference.yaml b/tests/samples/pipeline_template_examples/testing/invalid_reference.yaml index 959edca36..1060f2c9c 100644 --- a/tests/samples/pipeline_template_examples/testing/invalid_reference.yaml +++ b/tests/samples/pipeline_template_examples/testing/invalid_reference.yaml @@ -28,7 +28,7 @@ drop: 20 id: centering side_outputs: - cor: centre_of_rotation + cor: centre_of_rotation - method: remove_stripe_fw module_path: tomopy.prep.stripe parameters: diff --git a/tests/samples/pipeline_template_examples/testing/testing_pipeline.yaml b/tests/samples/pipeline_template_examples/testing/testing_pipeline.yaml index 6577495f9..55025f702 100644 --- a/tests/samples/pipeline_template_examples/testing/testing_pipeline.yaml +++ b/tests/samples/pipeline_template_examples/testing/testing_pipeline.yaml @@ -18,7 +18,7 @@ drop: 20 id: centering side_outputs: - cor: centre_of_rotation + cor: centre_of_rotation - method: remove_stripe_fw module_path: tomopy.prep.stripe parameters: diff --git a/tests/test_backends/test_httomolibgpu.py b/tests/test_backends/test_httomolibgpu.py index ff78ffc2e..0da91c9f4 100644 --- a/tests/test_backends/test_httomolibgpu.py +++ b/tests/test_backends/test_httomolibgpu.py @@ -41,7 +41,6 @@ class MaxMemoryHook(cp.cuda.MemoryHook): - def __init__(self, initial=0): self.max_mem = initial self.current = initial From 0ed26f75e06bd66228f78653a18f297d063e294e Mon Sep 17 00:00:00 2001 From: dkazanc Date: Wed, 10 Jul 2024 12:18:59 +0100 Subject: [PATCH 04/15] satisfy black --- httomo/data/mpiutil.py | 4 +--- httomo/globals.py | 4 +++- httomo/runner/dataset_store_interfaces.py | 3 +-- httomo/runner/monitoring_interface.py | 15 +++++---------- 4 files changed, 10 insertions(+), 16 deletions(-) diff --git a/httomo/data/mpiutil.py b/httomo/data/mpiutil.py index 7adecb6b5..4fa9ed38e 100644 --- a/httomo/data/mpiutil.py +++ b/httomo/data/mpiutil.py @@ -110,9 +110,7 @@ def alltoall(arrays: List[np.ndarray]) -> List[np.ndarray]: factor = ( arrays[0].shape[0] if dim0equal - else arrays[0].shape[1] - if dim1equal - else arrays[0].shape[2] + else arrays[0].shape[1] if dim1equal else arrays[0].shape[2] ) dtype1 = dtype.Create_contiguous(factor).Commit() # sanity check - this should always pass diff --git a/httomo/globals.py b/httomo/globals.py index 21cb9caeb..2682cdd50 100644 --- a/httomo/globals.py +++ b/httomo/globals.py @@ -4,6 +4,8 @@ run_out_dir: os.PathLike = Path(".") gpu_id: int = -1 # maximum slices to use in CPU-only section -MAX_CPU_SLICES: int = 64 # A some random number which will be overwritten by --max-cpu_slices flag during runtime +MAX_CPU_SLICES: int = ( + 64 # A some random number which will be overwritten by --max-cpu_slices flag during runtime +) SYSLOG_SERVER = "localhost" SYSLOG_PORT = 514 diff --git a/httomo/runner/dataset_store_interfaces.py b/httomo/runner/dataset_store_interfaces.py index 083e56f3a..8573b7a19 100644 --- a/httomo/runner/dataset_store_interfaces.py +++ b/httomo/runner/dataset_store_interfaces.py @@ -52,8 +52,7 @@ class DataSetSource(Protocol): the data can be read in *blocks*, sliced in the given slicing dimension""" @property - def dtype(self) -> np.dtype: - ... # pragma: no cover + def dtype(self) -> np.dtype: ... # pragma: no cover @property def global_shape(self) -> Tuple[int, int, int]: diff --git a/httomo/runner/monitoring_interface.py b/httomo/runner/monitoring_interface.py index 8ea5ada28..23e1ddd6a 100644 --- a/httomo/runner/monitoring_interface.py +++ b/httomo/runner/monitoring_interface.py @@ -15,8 +15,7 @@ def report_method_block( gpu_kernel_time: float = 0.0, gpu_h2d_time: float = 0.0, gpu_d2h_time: float = 0.0, - ): - ... # pragma: no cover + ): ... # pragma: no cover def report_source_block( self, @@ -27,8 +26,7 @@ def report_source_block( block_idx_chunk: Tuple[int, int, int], block_idx_global: Tuple[int, int, int], cpu_time: float, - ): - ... # pragma: no cover + ): ... # pragma: no cover def report_sink_block( self, @@ -39,11 +37,8 @@ def report_sink_block( block_idx_chunk: Tuple[int, int, int], block_idx_global: Tuple[int, int, int], cpu_time: float, - ): - ... # pragma: no cover + ): ... # pragma: no cover - def report_total_time(self, cpu_time: float): - ... # pragma: no cover + def report_total_time(self, cpu_time: float): ... # pragma: no cover - def write_results(self, dest: TextIO): - ... # pragma: no cover + def write_results(self, dest: TextIO): ... # pragma: no cover From d11fc69b022e7f87a6c167149e822da393e356e7 Mon Sep 17 00:00:00 2001 From: dkazanc Date: Wed, 10 Jul 2024 15:23:23 +0100 Subject: [PATCH 05/15] correcting name for the ruleset --- .github/workflows/lint.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index c8fc22429..1b059675c 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -1,4 +1,4 @@ -name: Python linters +name: Python_linters on: push: @@ -17,11 +17,11 @@ jobs: - name: Checkout repository code uses: actions/checkout@v2 - # setup Python 3.9 - - name: Setup Python 3.9 + # setup Python 3.10 + - name: Setup Python 3.10 uses: actions/setup-python@v2 with: - python-version: 3.9 + python-version: '3.10' - name: Install Python dependencies run: pip install black From 23cc34ebafb6314527c73da3f9c379c3c30a92f6 Mon Sep 17 00:00:00 2001 From: dkazanc Date: Wed, 10 Jul 2024 15:24:58 +0100 Subject: [PATCH 06/15] python3.10 for linter --- .github/workflows/lint.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 1b059675c..88cd60db6 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -1,4 +1,4 @@ -name: Python_linters +name: Python linters on: push: From b2e465e6f8b562c3f8048d44876655ca236c7a80 Mon Sep 17 00:00:00 2001 From: dkazanc Date: Wed, 10 Jul 2024 15:54:49 +0100 Subject: [PATCH 07/15] adding iris test --- .github/workflows/conda_upload.yml | 2 +- .github/workflows/cpu_tests.yml | 43 --------------------------- .github/workflows/doc_conda.yml | 2 +- .github/workflows/nightly_build.yml | 8 ++--- .github/workflows/tests_iris.yml | 45 +++++++++++++++++++++++++++++ 5 files changed, 51 insertions(+), 49 deletions(-) delete mode 100644 .github/workflows/cpu_tests.yml create mode 100644 .github/workflows/tests_iris.yml diff --git a/.github/workflows/conda_upload.yml b/.github/workflows/conda_upload.yml index 5d8cca63d..9638700c2 100644 --- a/.github/workflows/conda_upload.yml +++ b/.github/workflows/conda_upload.yml @@ -1,4 +1,4 @@ -name: httomo conda upload +name: HTTomo version conda upload # Run the workflow whenever a tag beginning with `v` is pushed to any branch on: diff --git a/.github/workflows/cpu_tests.yml b/.github/workflows/cpu_tests.yml deleted file mode 100644 index d5dcbd901..000000000 --- a/.github/workflows/cpu_tests.yml +++ /dev/null @@ -1,43 +0,0 @@ -name: httomo CPU tests - -on: - push: - branches: - - main - pull_request: - branches: - - main - -jobs: - build-linux: - runs-on: ubuntu-20.04 - - defaults: - run: - shell: bash -l {0} - - steps: - - name: Checkout repository code - uses: actions/checkout@v2 - - # setup Python 3.9 - - name: Setup Python 3.9 - uses: actions/setup-python@v2 - with: - python-version: 3.9 - - # install dependencies with conda - - name: Install dependencies with conda - run: | - conda env create --name httomo --file conda/environment-cpu.yml - conda activate httomo - conda run -n httomo pip install typing_extensions - conda run -n httomo pip install . - conda env list - conda list - - # run tests - - name: Run CPU tests (No CUDA drivers needed) - run: | - conda run -n httomo pip install pytest plumbum - conda run -n httomo pytest -m "not cupy" diff --git a/.github/workflows/doc_conda.yml b/.github/workflows/doc_conda.yml index 40c05ebea..fa563f93c 100644 --- a/.github/workflows/doc_conda.yml +++ b/.github/workflows/doc_conda.yml @@ -1,4 +1,4 @@ -name: HTTomo doc test +name: HTTomo docs publish on: workflow_dispatch: diff --git a/.github/workflows/nightly_build.yml b/.github/workflows/nightly_build.yml index 66b9609e4..ae1eae4d2 100644 --- a/.github/workflows/nightly_build.yml +++ b/.github/workflows/nightly_build.yml @@ -1,4 +1,4 @@ -name: httomo nightly dev conda package build + upload +name: HTTomo nightly build + upload on: schedule: @@ -20,11 +20,11 @@ jobs: ref: "main" fetch-depth: 0 - # setup Python 3.9 - - name: Setup Python 3.9 + # setup Python 3.10 + - name: Setup Python 3.10 uses: actions/setup-python@v2 with: - python-version: 3.9 + python-version: '3.9' - name: Install dependencies with Conda run: | diff --git a/.github/workflows/tests_iris.yml b/.github/workflows/tests_iris.yml new file mode 100644 index 000000000..4ec8c0776 --- /dev/null +++ b/.github/workflows/tests_iris.yml @@ -0,0 +1,45 @@ +name: HTTomo tests Iris + +on: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + build-linux: + runs-on: iris-gpu + container: + image: nvidia/cuda:11.6.2-devel-ubi8 + env: + NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }} + + defaults: + run: + shell: bash -l {0} + + steps: + - name: Checkout repository code + uses: actions/checkout@v3 + with: + ref: "main" + fetch-depth: 0 + + - name: Create conda environment + uses: mamba-org/setup-micromamba@v1 + with: + environment-file: conda/environment.yml + environment-name: httomo + post-cleanup: 'all' + init-shell: bash + + - name: Install httomolibgpu + run: | + pip install .[dev] + micromamba list + + - name: Run tests + run: | + pytest tests/ From c5a246b99b669715afd7a6abab587cb502c53538 Mon Sep 17 00:00:00 2001 From: dkazanc Date: Wed, 10 Jul 2024 16:16:12 +0100 Subject: [PATCH 08/15] renaming the jobs --- .github/workflows/conda_upload.yml | 4 ++-- .github/workflows/doc_conda.yml | 4 ++-- .github/workflows/nightly_build.yml | 4 ++-- .github/workflows/tests_iris.yml | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/conda_upload.yml b/.github/workflows/conda_upload.yml index 9638700c2..ad4a823e5 100644 --- a/.github/workflows/conda_upload.yml +++ b/.github/workflows/conda_upload.yml @@ -1,4 +1,4 @@ -name: HTTomo version conda upload +name: HTTomo version build # Run the workflow whenever a tag beginning with `v` is pushed to any branch on: @@ -7,7 +7,7 @@ on: - v* jobs: - build-linux: + conda-build-upload: runs-on: ubuntu-20.04 defaults: diff --git a/.github/workflows/doc_conda.yml b/.github/workflows/doc_conda.yml index fa563f93c..b413a2f2e 100644 --- a/.github/workflows/doc_conda.yml +++ b/.github/workflows/doc_conda.yml @@ -1,4 +1,4 @@ -name: HTTomo docs publish +name: HTTomo docs on: workflow_dispatch: @@ -10,7 +10,7 @@ on: - main jobs: - build-linux: + build-docs-publish: runs-on: ubuntu-latest defaults: run: diff --git a/.github/workflows/nightly_build.yml b/.github/workflows/nightly_build.yml index ae1eae4d2..bb0316525 100644 --- a/.github/workflows/nightly_build.yml +++ b/.github/workflows/nightly_build.yml @@ -1,4 +1,4 @@ -name: HTTomo nightly build + upload +name: HTTomo nightly build (no test) on: schedule: @@ -6,7 +6,7 @@ on: - cron: '0 0 * * *' jobs: - build-linux: + build-conda-upload: runs-on: ubuntu-20.04 defaults: diff --git a/.github/workflows/tests_iris.yml b/.github/workflows/tests_iris.yml index 4ec8c0776..db4e02875 100644 --- a/.github/workflows/tests_iris.yml +++ b/.github/workflows/tests_iris.yml @@ -9,7 +9,7 @@ on: - main jobs: - build-linux: + iris-gpu: runs-on: iris-gpu container: image: nvidia/cuda:11.6.2-devel-ubi8 From 3f0bd724d905dbd1ce2d3ef092a0f0768eff47c4 Mon Sep 17 00:00:00 2001 From: dkazanc Date: Wed, 10 Jul 2024 16:54:06 +0100 Subject: [PATCH 09/15] adding weekly conda builds for main label --- ...ightly_build.yml => nightly_dev_build.yml} | 6 +-- .github/workflows/tests_iris.yml | 2 +- .github/workflows/weekly_main_build.yml | 47 +++++++++++++++++++ 3 files changed, 51 insertions(+), 4 deletions(-) rename .github/workflows/{nightly_build.yml => nightly_dev_build.yml} (91%) create mode 100644 .github/workflows/weekly_main_build.yml diff --git a/.github/workflows/nightly_build.yml b/.github/workflows/nightly_dev_build.yml similarity index 91% rename from .github/workflows/nightly_build.yml rename to .github/workflows/nightly_dev_build.yml index bb0316525..515d2ff10 100644 --- a/.github/workflows/nightly_build.yml +++ b/.github/workflows/nightly_dev_build.yml @@ -1,4 +1,4 @@ -name: HTTomo nightly build (no test) +name: HTTomo nightly conda-build (dev) on: schedule: @@ -6,7 +6,7 @@ on: - cron: '0 0 * * *' jobs: - build-conda-upload: + notest-conda-upload: runs-on: ubuntu-20.04 defaults: @@ -24,7 +24,7 @@ jobs: - name: Setup Python 3.10 uses: actions/setup-python@v2 with: - python-version: '3.9' + python-version: '3.10' - name: Install dependencies with Conda run: | diff --git a/.github/workflows/tests_iris.yml b/.github/workflows/tests_iris.yml index db4e02875..af2ee174e 100644 --- a/.github/workflows/tests_iris.yml +++ b/.github/workflows/tests_iris.yml @@ -1,4 +1,4 @@ -name: HTTomo tests Iris +name: HTTomo tests on: push: diff --git a/.github/workflows/weekly_main_build.yml b/.github/workflows/weekly_main_build.yml new file mode 100644 index 000000000..3bac0c2d6 --- /dev/null +++ b/.github/workflows/weekly_main_build.yml @@ -0,0 +1,47 @@ +name: HTTomo weekly conda-build (main) + +on: + schedule: + - cron: '0 0 * * 4' # At 00:00 on Thursday + +jobs: + notest-conda-upload: + runs-on: ubuntu-20.04 + + defaults: + run: + shell: bash -l {0} + + steps: + - name: Checkout repository code + uses: actions/checkout@v3 + with: + ref: "main" + fetch-depth: 0 + + # setup Python 3.10 + - name: Setup Python 3.10 + uses: actions/setup-python@v2 + with: + python-version: '3.10' + + - name: Install dependencies with Conda + run: | + $CONDA/bin/conda install -c conda-forge conda-build + $CONDA/bin/conda install -c conda-forge anaconda-client + $CONDA/bin/conda update conda + $CONDA/bin/conda update conda-build + $CONDA/bin/conda list + + - name: Decrypt a secret + run: ./.scripts/decrypt_secret.sh + env: + LARGE_SECRET_PASSPHRASE: ${{ secrets.LARGE_SECRET_PASSPHRASE }} + + - name: Build and upload the package to httomo conda cloud + env: + LABEL: main + run: | + chmod +x ./.scripts/conda_upload.sh + ./.scripts/conda_upload.sh + From 4793426e4b502e77d4fbcf248330ee33924661fb Mon Sep 17 00:00:00 2001 From: dkazanc Date: Wed, 10 Jul 2024 17:03:46 +0100 Subject: [PATCH 10/15] adding weekly iris build of the main --- .github/workflows/tests_iris.yml | 2 +- .github/workflows/weekly_main_tests_iris.yml | 41 ++++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/weekly_main_tests_iris.yml diff --git a/.github/workflows/tests_iris.yml b/.github/workflows/tests_iris.yml index af2ee174e..e2be95924 100644 --- a/.github/workflows/tests_iris.yml +++ b/.github/workflows/tests_iris.yml @@ -35,7 +35,7 @@ jobs: post-cleanup: 'all' init-shell: bash - - name: Install httomolibgpu + - name: Install httomo run: | pip install .[dev] micromamba list diff --git a/.github/workflows/weekly_main_tests_iris.yml b/.github/workflows/weekly_main_tests_iris.yml new file mode 100644 index 000000000..ef8f0985d --- /dev/null +++ b/.github/workflows/weekly_main_tests_iris.yml @@ -0,0 +1,41 @@ +name: Weekly main tests + +on: + schedule: + - cron: '55 0 * * 4' # At 00:55 every Thursday + +jobs: + iris-gpu: + runs-on: iris-gpu + container: + image: nvidia/cuda:11.6.2-devel-ubi8 + env: + NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }} + + defaults: + run: + shell: bash -l {0} + + steps: + - name: Checkout repository code + uses: actions/checkout@v3 + with: + ref: "main" + fetch-depth: 0 + + - name: Create conda environment + uses: mamba-org/setup-micromamba@v1 + with: + environment-file: conda/environment.yml + environment-name: httomo + post-cleanup: 'all' + init-shell: bash + + - name: Install httomo + run: | + conda install "httomo/linux-64::httomo * py310_openmpi_regular*" + micromamba list + + - name: Run tests + run: | + pytest tests/ From 685bbc7aec9be34833e4e7e02de7b6c09387070b Mon Sep 17 00:00:00 2001 From: dkazanc Date: Thu, 11 Jul 2024 11:42:52 +0100 Subject: [PATCH 11/15] enforcer rule and updated dev CI yaml --- .github/workflows/dev_build_test.yml | 83 +++++++++++++++++++++++++ .github/workflows/enforcer.yaml | 14 +++++ .github/workflows/nightly_dev_build.yml | 48 -------------- .github/workflows/tests_iris.yml | 45 -------------- 4 files changed, 97 insertions(+), 93 deletions(-) create mode 100644 .github/workflows/dev_build_test.yml create mode 100644 .github/workflows/enforcer.yaml delete mode 100644 .github/workflows/nightly_dev_build.yml delete mode 100644 .github/workflows/tests_iris.yml diff --git a/.github/workflows/dev_build_test.yml b/.github/workflows/dev_build_test.yml new file mode 100644 index 000000000..72b2abc07 --- /dev/null +++ b/.github/workflows/dev_build_test.yml @@ -0,0 +1,83 @@ +name: HTTomo dev build + +on: + pull_request: + branches: + - dev + +jobs: + build-conda-upload: + runs-on: ubuntu-latest + + defaults: + run: + shell: bash -l {0} + + steps: + - name: Checkout repository code + uses: actions/checkout@v3 + + # setup Python 3.10 + - name: Setup Python 3.10 + uses: actions/setup-python@v2 + with: + python-version: '3.10' + + - name: Install dependencies with Conda + run: | + $CONDA/bin/conda install -c conda-forge conda-build + $CONDA/bin/conda install -c conda-forge anaconda-client + $CONDA/bin/conda update conda + $CONDA/bin/conda update conda-build + $CONDA/bin/conda list + + - name: Decrypt a secret + run: ./.scripts/decrypt_secret.sh + env: + LARGE_SECRET_PASSPHRASE: ${{ secrets.LARGE_SECRET_PASSPHRASE }} + + - name: Build and upload the package (dev) to httomo conda cloud + env: + LABEL: dev + run: | + chmod +x ./.scripts/conda_upload.sh + ./.scripts/conda_upload.sh + run: exit 0 + + install-run-tests: + runs-on: ubuntu-latest + needs: build-conda-upload + + defaults: + run: + shell: bash -l {0} + + steps: + - name: Checkout repository code + uses: actions/checkout@v3 + + # setup Python 3.10 + - name: Setup Python 3.10 + uses: actions/setup-python@v2 + with: + python-version: '3.10' + + - name: Create conda environment + uses: mamba-org/setup-micromamba@v1 + with: + environment-file: conda/environment.yml + environment-name: httomo + post-cleanup: 'all' + init-shell: bash + + - name: Install httomo + run: | + conda install "httomo/linux-64::httomo * py310_openmpi_regular*" + micromamba list + + - name: Run tests + run: | + pytest tests/ + run: exit 0 + + \ No newline at end of file diff --git a/.github/workflows/enforcer.yaml b/.github/workflows/enforcer.yaml new file mode 100644 index 000000000..09bb13ec7 --- /dev/null +++ b/.github/workflows/enforcer.yaml @@ -0,0 +1,14 @@ +name: 'Check Branch' + +on: + pull_request: + +jobs: + check_branch: + runs-on: ubuntu-latest + steps: + - name: Check branch + if: github.base_ref == 'main' && github.head_ref != 'dev' + run: | + echo "ERROR: You can only merge to main from dev branch." + exit 1 \ No newline at end of file diff --git a/.github/workflows/nightly_dev_build.yml b/.github/workflows/nightly_dev_build.yml deleted file mode 100644 index 515d2ff10..000000000 --- a/.github/workflows/nightly_dev_build.yml +++ /dev/null @@ -1,48 +0,0 @@ -name: HTTomo nightly conda-build (dev) - -on: - schedule: - # Run at midnight every day - - cron: '0 0 * * *' - -jobs: - notest-conda-upload: - runs-on: ubuntu-20.04 - - defaults: - run: - shell: bash -l {0} - - steps: - - name: Checkout repository code - uses: actions/checkout@v3 - with: - ref: "main" - fetch-depth: 0 - - # setup Python 3.10 - - name: Setup Python 3.10 - uses: actions/setup-python@v2 - with: - python-version: '3.10' - - - name: Install dependencies with Conda - run: | - $CONDA/bin/conda install -c conda-forge conda-build - $CONDA/bin/conda install -c conda-forge anaconda-client - $CONDA/bin/conda update conda - $CONDA/bin/conda update conda-build - $CONDA/bin/conda list - - - name: Decrypt a secret - run: ./.scripts/decrypt_secret.sh - env: - LARGE_SECRET_PASSPHRASE: ${{ secrets.LARGE_SECRET_PASSPHRASE }} - - - name: Build and upload the package to httomo conda cloud - env: - LABEL: dev - run: | - chmod +x ./.scripts/conda_upload.sh - ./.scripts/conda_upload.sh - diff --git a/.github/workflows/tests_iris.yml b/.github/workflows/tests_iris.yml deleted file mode 100644 index e2be95924..000000000 --- a/.github/workflows/tests_iris.yml +++ /dev/null @@ -1,45 +0,0 @@ -name: HTTomo tests - -on: - push: - branches: - - main - pull_request: - branches: - - main - -jobs: - iris-gpu: - runs-on: iris-gpu - container: - image: nvidia/cuda:11.6.2-devel-ubi8 - env: - NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }} - - defaults: - run: - shell: bash -l {0} - - steps: - - name: Checkout repository code - uses: actions/checkout@v3 - with: - ref: "main" - fetch-depth: 0 - - - name: Create conda environment - uses: mamba-org/setup-micromamba@v1 - with: - environment-file: conda/environment.yml - environment-name: httomo - post-cleanup: 'all' - init-shell: bash - - - name: Install httomo - run: | - pip install .[dev] - micromamba list - - - name: Run tests - run: | - pytest tests/ From 7e20175da8d5927842eb849e5ac039be401e467f Mon Sep 17 00:00:00 2001 From: dkazanc Date: Thu, 11 Jul 2024 12:06:54 +0100 Subject: [PATCH 12/15] ci os update --- .github/workflows/weekly_main_build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/weekly_main_build.yml b/.github/workflows/weekly_main_build.yml index 3bac0c2d6..6484198c1 100644 --- a/.github/workflows/weekly_main_build.yml +++ b/.github/workflows/weekly_main_build.yml @@ -6,7 +6,7 @@ on: jobs: notest-conda-upload: - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest defaults: run: From 243627295619480fb2ce09e451bc0ccaadbcff1b Mon Sep 17 00:00:00 2001 From: dkazanc Date: Thu, 11 Jul 2024 12:17:08 +0100 Subject: [PATCH 13/15] yaml update --- .github/workflows/dev_build_test.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/dev_build_test.yml b/.github/workflows/dev_build_test.yml index 72b2abc07..b480da3d2 100644 --- a/.github/workflows/dev_build_test.yml +++ b/.github/workflows/dev_build_test.yml @@ -3,7 +3,7 @@ name: HTTomo dev build on: pull_request: branches: - - dev + - main jobs: build-conda-upload: @@ -16,6 +16,10 @@ jobs: steps: - name: Checkout repository code uses: actions/checkout@v3 + with: + ref: "dev" + fetch-depth: 0 + # setup Python 3.10 - name: Setup Python 3.10 From e832d2a52150e6b821ba35adbdd34914566337ff Mon Sep 17 00:00:00 2001 From: dkazanc Date: Thu, 11 Jul 2024 12:28:25 +0100 Subject: [PATCH 14/15] checker actions added, fixes in GA --- .github/pull_request_template.md | 6 ++++++ .github/workflows/checker_action.yaml | 12 ++++++++++++ .github/workflows/doc_conda.yml | 4 ++-- .github/workflows/lint.yml | 4 ++-- 4 files changed, 22 insertions(+), 4 deletions(-) create mode 100755 .github/pull_request_template.md create mode 100644 .github/workflows/checker_action.yaml diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100755 index 000000000..1676cee71 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,6 @@ +## Checklist + +- [ ] I have opened PR to `dev` branch +- [ ] I have added tests that prove my fix is effective or that my feature works +- [ ] New and existing unit tests pass locally with my changes +- [ ] I have made corresponding changes to the documentation \ No newline at end of file diff --git a/.github/workflows/checker_action.yaml b/.github/workflows/checker_action.yaml new file mode 100644 index 000000000..1ee0dc3c8 --- /dev/null +++ b/.github/workflows/checker_action.yaml @@ -0,0 +1,12 @@ +name: 'PR Tasks Completed Check' +on: + pull_request: + types: [opened, edited] + +jobs: + task-check: + runs-on: ubuntu-latest + steps: + - uses: kentaro-m/task-completed-checker-action@v0.1.2 + with: + repo-token: "${{ secrets.GITHUB_TOKEN }}" \ No newline at end of file diff --git a/.github/workflows/doc_conda.yml b/.github/workflows/doc_conda.yml index b413a2f2e..fe82cbd45 100644 --- a/.github/workflows/doc_conda.yml +++ b/.github/workflows/doc_conda.yml @@ -4,10 +4,10 @@ on: workflow_dispatch: pull_request: branches: - - main + - dev push: branches: - - main + - dev jobs: build-docs-publish: diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 88cd60db6..096434c04 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -3,10 +3,10 @@ name: Python linters on: push: branches: - - main + - dev pull_request: branches: - - main + - dev jobs: run-linters: From f189d15ae831e01eb2c18fd310bc0f92bb9f20d7 Mon Sep 17 00:00:00 2001 From: dkazanc Date: Thu, 11 Jul 2024 12:34:27 +0100 Subject: [PATCH 15/15] correcting file type --- .github/workflows/{checker_action.yaml => checker_action.yml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/workflows/{checker_action.yaml => checker_action.yml} (100%) diff --git a/.github/workflows/checker_action.yaml b/.github/workflows/checker_action.yml similarity index 100% rename from .github/workflows/checker_action.yaml rename to .github/workflows/checker_action.yml