From 268ff93ecdb5811e0c906b350687ee14dd7495f5 Mon Sep 17 00:00:00 2001 From: Sander Roet Date: Tue, 13 Aug 2024 16:52:01 +0200 Subject: [PATCH 1/2] add E501 check --- pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 5eb505e0..ee6801a8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -68,3 +68,6 @@ parallel = true [tool.ruff] line-length = 88 + +[tool.ruff.lint] +extend-select = ["E501"] From 4417c8dfcb793f435dba4feff085eb7a5bd2c56b Mon Sep 17 00:00:00 2001 From: Sander Roet Date: Tue, 13 Aug 2024 18:01:36 +0200 Subject: [PATCH 2/2] fix all too-long-lines --- src/pytom_tm/angles.py | 31 +++++--- src/pytom_tm/correlation.py | 12 +-- src/pytom_tm/entry_points.py | 20 ++--- src/pytom_tm/extract.py | 47 +++++++----- src/pytom_tm/io.py | 105 ++++++++++++++----------- src/pytom_tm/mask.py | 12 ++- src/pytom_tm/matching.py | 67 +++++++++------- src/pytom_tm/parallel.py | 27 ++++--- src/pytom_tm/plotting.py | 6 +- src/pytom_tm/template.py | 18 +++-- src/pytom_tm/tmjob.py | 143 +++++++++++++++++++++-------------- src/pytom_tm/utils.py | 3 +- src/pytom_tm/weights.py | 102 +++++++++++++++---------- tests/test00_parallel.py | 13 ++-- tests/test_tmjob.py | 64 ++++++++++------ tests/test_weights.py | 9 ++- 16 files changed, 410 insertions(+), 269 deletions(-) diff --git a/src/pytom_tm/angles.py b/src/pytom_tm/angles.py index 9dabacf7..a5e76978 100644 --- a/src/pytom_tm/angles.py +++ b/src/pytom_tm/angles.py @@ -19,7 +19,8 @@ def angle_to_angle_list( angle_diff: float maximum difference (in degrees) for the angle list sort_angles: bool, default True - sort the list, using python default angle_list.sort(), sorts first on Z1, then X, then Z2 + sort the list, using python default angle_list.sort(), sorts first on Z1, + then X, then Z2 log_level: int, default logging.DEBUG the log level to use when generating logs @@ -67,14 +68,17 @@ def load_angle_list( Parameters ---------- file_name: pathlib.Path - path to text file containing angular search, each line should contain 3 floats of anti-clockwise ZXZ + path to text file containing angular search, each line should contain 3 floats + of anti-clockwise ZXZ sort_angles: bool, default True - sort the list, using python default angle_list.sort(), sorts first on Z1, then X, then Z2 + sort the list, using python default angle_list.sort(), sorts first on Z1, + then X, then Z2 Returns ------- angle_list: list[tuple[float, float, float]] - a list where each element is a tuple of 3 floats containing an anti-clockwise ZXZ Euler rotation in radians + a list where each element is a tuple of 3 floats containing an anti-clockwise + ZXZ Euler rotation in radians """ with open(str(file_name)) as fstream: lines = fstream.readlines() @@ -84,7 +88,8 @@ def load_angle_list( "Invalid angle file provided, each line should have 3 ZXZ Euler angles!" ) if sort_angles: - angle_list.sort() # angle list needs to be sorted otherwise symmetry reduction cannot be used! + # angle_list needs to be sorted otherwise symmetry reduction cannot be used! + angle_list.sort() return angle_list @@ -104,7 +109,8 @@ def get_angle_list( or if a float: maximum difference (in degrees) for the angle list sort_angles: bool, default True - sort the list, using python default angle_list.sort(), sorts first on Z1, then X, then Z2 + sort the list, using python default angle_list.sort(), sorts first on Z1, + then X, then Z2 symmetry: int, default 1 the returned list will only have Z2 angles [0, (2*pi/symmetry)) log_level: str, default 'DEBUG' @@ -113,7 +119,8 @@ def get_angle_list( Returns ------- angle_list: list[tuple[float, float, float]] - a list where each element is a tuple of 3 floats containing an anti-clockwise ZXZ Euler rotation in radians + a list where each element is a tuple of 3 floats containing an anti-clockwise + ZXZ Euler rotation in radians """ log_level = logging.getLevelNamesMapping()[log_level] out = None @@ -134,7 +141,8 @@ def get_angle_list( if possible_file_path.exists() and possible_file_path.suffix == ".txt": logging.log( log_level, - "Custom file provided for the angular search. Checking if it can be read...", + "Custom file provided for the angular search. " + "Checking if it can be read...", ) out = load_angle_list(angle, sort_angles) @@ -150,9 +158,10 @@ def convert_euler( degrees_in: bool = True, degrees_out: bool = True, ) -> tuple[float, float, float]: - """Convert a single set of Euler angles from one Euler notation to another. This function makes use of - scipy.spatial.transform.Rotation meaning that capital letters (i.e. ZXZ) specify intrinsic rotations (commonly - used in cryo-EM) and small letters (i.e. zxz) specific extrinsic rotations. + """Convert a single set of Euler angles from one Euler notation to another. This + function makes use of scipy.spatial.transform.Rotation meaning that capital letters + (i.e. ZXZ) specify intrinsic rotations (commonly used in cryo-EM) and small letters + (i.e. zxz) specific extrinsic rotations. Parameters ---------- diff --git a/src/pytom_tm/correlation.py b/src/pytom_tm/correlation.py index dc791aa3..a50f7c95 100644 --- a/src/pytom_tm/correlation.py +++ b/src/pytom_tm/correlation.py @@ -42,8 +42,8 @@ def std_under_mask( mean: float, mask_weight: Optional[float] = None, ) -> Union[float, cpt.NDArray[float]]: - """Calculate standard deviation of array in the mask region. Uses mean_under_mask() to calculate the mean of - data**2 within the mask. + """Calculate standard deviation of array in the mask region. Uses mean_under_mask() + to calculate the mean of data**2 within the mask. data and mask can be cupy or numpy arrays. @@ -72,8 +72,9 @@ def normalise( mask: Optional[Union[npt.NDArray[float], cpt.NDArray[float]]] = None, mask_weight: Optional[float] = None, ) -> Union[npt.NDArray[float], cpt.NDArray[float]]: - """Normalise array by subtracting mean and dividing by standard deviation. If a mask is provided the array is - normalised with the mean and std calculated within the mask. + """Normalise array by subtracting mean and dividing by standard deviation. If a mask + is provided the array is normalised with the mean and std calculated within the + mask. data and mask can be cupy or numpy arrays. @@ -105,7 +106,8 @@ def normalised_cross_correlation( data2: Union[npt.NDArray[float], cpt.NDArray[float]], mask: Optional[Union[npt.NDArray[float], cpt.NDArray[float]]] = None, ) -> Union[float, cpt.NDArray[float]]: - """Calculate normalised cross correlation between two arrays. Optionally only in a masked region. + """Calculate normalised cross correlation between two arrays. Optionally only in a + masked region. data1, data2, and mask can be cupy or numpy arrays. diff --git a/src/pytom_tm/entry_points.py b/src/pytom_tm/entry_points.py index 8a642179..d565ecab 100644 --- a/src/pytom_tm/entry_points.py +++ b/src/pytom_tm/entry_points.py @@ -436,19 +436,20 @@ def extract_candidates(argv=None): type=pathlib.Path, required=False, action=CheckFileExists, - help="Here you can provide a mask for the extraction with dimensions (in pixels) equal to " - "the tomogram. All values in the mask that are smaller or equal to 0 will be " - "removed, all values larger than 0 are considered regions of interest. It can " - "be used to extract annotations only within a specific cellular region." - "If the job was run with a tomogram mask, this file will be used instead of the job mask", + help="Here you can provide a mask for the extraction with dimensions " + "(in pixels) equal to the tomogram. All values in the mask that are smaller or " + "equal to 0 will be removed, all values larger than 0 are considered regions " + "of interest. It can be used to extract annotations only within a specific " + "cellular region. If the job was run with a tomogram mask, this file will be " + "used instead of the job mask", ) parser.add_argument( "--ignore_tomogram_mask", action="store_true", default=False, required=False, - help="Flag to ignore the input and TM job tomogram mask. " - "Useful if the scores mrc looks reasonable, but this finds 0 particles to extract", + help="Flag to ignore the input and TM job tomogram mask. Useful if the scores " + "mrc looks reasonable, but this finds 0 particles to extract", ) parser.add_argument( "-n", @@ -687,8 +688,9 @@ def match_template(argv=None): type=pathlib.Path, required=False, action=CheckFileExists, - help="Here you can provide a mask for matching with dimensions (in pixels) equal to " - "the tomogram. If a subvolume only has values <= 0 for this mask it will be skipped.", + help="Here you can provide a mask for matching with dimensions (in pixels) " + "equal to the tomogram. If a subvolume only has values <= 0 for this mask it " + "will be skipped.", ) filter_group = parser.add_argument_group("Filter control") diff --git a/src/pytom_tm/extract.py b/src/pytom_tm/extract.py index a72f83cd..61f40939 100644 --- a/src/pytom_tm/extract.py +++ b/src/pytom_tm/extract.py @@ -33,24 +33,28 @@ def predict_tophat_mask( create_plot: bool = True, tophat_connectivity: int = 1, ) -> npt.NDArray[bool]: - """This function gets as input a score map and returns a peak mask as determined with a tophat transform. + """This function gets as input a score map and returns a peak mask as determined + with a tophat transform. It does the following things: - calculate a tophat transform using scipy.ndimage.white_tophat() and a kernel ndimage.generate_binary_structure(rank=3, connectivity=1). - - calculate a histogram of the transformed score map and take its log to focus more on small values - - take second derivative of log(histogram) to find the region for fitting a Gaussian, where the second derivative - switches from negative to positive the background noise likely breaks - - use formula from excellent work of Rickgauer et al. (2017, eLife) which uses the error function to find the - likelihood of false positives on the background Gaussian distribution: - N**(-1) = erfc( theta / ( sigma * sqrt(2) ) ) / 2 + - calculate a histogram of the transformed score map and take its log to focus more + on small values + - take second derivative of log(histogram) to find the region for fitting a + Gaussian, where the second derivative switches from negative to positive the + background noise likely breaks + - use formula from excellent work of Rickgauer et al. (2017, eLife) which uses the + error function to find the likelihood of false positives on the background + Gaussian distribution: N**(-1) = erfc( theta / ( sigma * sqrt(2) ) ) / 2 Parameters ---------- score_volume: npt.NDArray[float] template matching score map output_path: Optional[pathlib.Path], default None - if provided (and plotting is available), write a figure of the fit to the output folder + if provided (and plotting is available), write a figure of the fit to the output + folder n_false_positives: float, default 1.0 number of false positive for error function cutoff calculation create_plot: bool, default True @@ -111,7 +115,8 @@ def log_gauss(x, amp, mu, sigma): # log of gaussian for fitting 0 ] # now go for accurate fit to log of gauss search_space = coeff_log[0] / (coeff_log[2] * np.sqrt(2 * np.pi)) - # formula Rickgauer et al. (2017, eLife): N**(-1) = erfc( theta / ( sigma * sqrt(2) ) ) / 2 + # formula Rickgauer et al. (2017, eLife): + # N**(-1) = erfc( theta / ( sigma * sqrt(2) ) ) / 2 # we need to find theta (i.e. the cut-off) cut_off = ( erfcinv((2 * n_false_positives) / search_space) * np.sqrt(2) * coeff_log[2] @@ -162,10 +167,11 @@ def extract_particles( cut_off: Optional[float] manually override the automated score cut-off estimation, value between 0 and 1 n_false_positives: float, default 1.0 - tune the number of false positives to be included for automated error function cut-off estimation: - should be a float > 0 + tune the number of false positives to be included for automated error function + cut-off estimation: should be a float > 0 tomogram_mask_path: Optional[pathlib.Path] - path to a tomographic binary mask for extraction, will override job.tomogram_mask + path to a tomographic binary mask for extraction, will override + job.tomogram_mask tophat_filter: bool attempt to only select sharp peaks with the tophat filter create_plot: bool, default True @@ -183,7 +189,8 @@ def extract_particles( Returns ------- dataframe, scores: tuple[pd.DataFrame, list[float, ...]] - dataframe with annotations that can be written out as a STAR file and a list of the selected scores + dataframe with annotations that can be written out as a STAR file and a list of + the selected scores """ score_volume = read_mrc(job.output_dir.joinpath(f"{job.tomo_id}_scores.mrc")) @@ -218,8 +225,9 @@ def extract_particles( if tomogram_mask is not None: if tomogram_mask.shape != job.tomo_shape: raise ValueError( - "Tomogram mask does not have the same number of pixels as the tomogram.\n" - f"Tomogram mask shape: {tomogram_mask.shape}, tomogram shape: {job.tomo_shape}" + "Tomogram mask does not have the same number of pixels as the " + f"tomogram.\n Tomogram mask shape: {tomogram_mask.shape}, " + f"tomogram shape: {job.tomo_shape}" ) slices = [ slice(origin, origin + size) @@ -240,14 +248,15 @@ def extract_particles( sigma = job.job_stats["std"] search_space = job.job_stats["search_space"] if cut_off is None: - # formula Rickgauer et al. (2017, eLife): N**(-1) = erfc( theta / ( sigma * sqrt(2) ) ) / 2 + # formula Rickgauer et al. (2017, eLife): + # N**(-1) = erfc( theta / ( sigma * sqrt(2) ) ) / 2 # we need to find theta (i.e. the cut off) cut_off = erfcinv((2 * n_false_positives) / search_space) * np.sqrt(2) * sigma logging.info(f"cut off for particle extraction: {cut_off}") elif cut_off < 0: logging.warning( - "Provided extraction score cut-off is smaller than 0. Changing to 0 as that is smallest " - "allowed value." + "Provided extraction score cut-off is smaller than 0. Changing to 0 as " + "that is smallest allowed value." ) cut_off = 0 @@ -293,7 +302,7 @@ def extract_particles( rotation[2], # AnglePsi lcc_max, # LCCmax cut_off, # Extraction cut off - sigma, # Add sigma of template matching search, LCCmax can be divided by sigma to obtain SNR + sigma, # Add sigma of template matching search, LCCmax/sigma = SNR pixel_size, # DetectorPixelSize tomogram_id, # MicrographName ) diff --git a/src/pytom_tm/io.py b/src/pytom_tm/io.py index d54f58f2..89a77aac 100644 --- a/src/pytom_tm/io.py +++ b/src/pytom_tm/io.py @@ -10,16 +10,16 @@ class ParseLogging(argparse.Action): - """argparse.Action subclass to parse logging parameter from input scripts. Users can set these to info/debug.""" + """argparse.Action subclass to parse logging parameter from input scripts. Users can + set these to info/debug.""" def __call__( self, parser, namespace, values: str, option_string: Optional[str] = None ): if values.upper() not in ["INFO", "DEBUG"]: parser.error( - "{0} log got an invalid option, set either to `info` or `debug` ".format( - option_string - ) + f"{option_string} log got an invalid option, " + "set either to `info` or `debug` " ) else: numeric_level = getattr(logging, values.upper(), None) @@ -79,7 +79,8 @@ def __call__( class BetweenZeroAndOne(argparse.Action): - """argparse.Action subclass to constrain an input value to a fraction, i.e. between 0 and 1.""" + """argparse.Action subclass to constrain an input value to a fraction, i.e. between + 0 and 1.""" def __call__( self, parser, namespace, values: float, option_string: Optional[str] = None @@ -95,8 +96,9 @@ def __call__( class ParseSearch(argparse.Action): - """argparse.Action subclass to restrict the search area of tomogram to these indices along an axis. Checks that - these value are larger than zero and that the second value is larger than the first.""" + """argparse.Action subclass to restrict the search area of tomogram to these indices + along an axis. Checks that these value are larger than zero and that the second + value is larger than the first.""" def __call__( self, @@ -107,18 +109,18 @@ def __call__( ): if not (0 <= values[0] < values[1]): parser.error( - "{0} start and end indices must be larger than 0 and end must be larger than start".format( - option_string - ) + f"{option_string} start and end indices must be larger than 0 and end " + "must be larger than start" ) setattr(namespace, self.dest, values) class ParseTiltAngles(argparse.Action): - """argparse.Action subclass to parse tilt_angle info. The input can either be two floats that specify the tilt - range for a continous wedge model. Alternatively can be a .tlt/.rawtlt file that specifies all the the tilt - angles of the tilt-series to use for more refined wedge models.""" + """argparse.Action subclass to parse tilt_angle info. The input can either be two + floats that specify the tilt range for a continous wedge model. Alternatively can be + a .tlt/.rawtlt file that specifies all the the tilt angles of the tilt-series to use + for more refined wedge models.""" def __call__( self, @@ -133,17 +135,15 @@ def __call__( setattr(namespace, self.dest, values) except ValueError: parser.error( - "{0} the two arguments provided could not be parsed to floats".format( - option_string - ) + f"{option_string} the two arguments provided could not be parsed " + "to floats" ) elif len(values) == 1: values = pathlib.Path(values[0]) if not values.exists() or values.suffix not in [".tlt", ".rawtlt"]: parser.error( - "{0} provided tilt angle file does not exist or does not have the right format".format( - option_string - ) + f"{option_string} provided tilt angle file does not exist or does " + "not have the right format" ) setattr(namespace, self.dest, read_tlt_file(values)) else: @@ -151,7 +151,8 @@ def __call__( class ParseDoseFile(argparse.Action): - """argparse.Action subclass to parse a txt file contain information on accumulated dose per tilt.""" + """argparse.Action subclass to parse a txt file contain information on accumulated + dose per tilt.""" def __call__( self, parser, namespace, values: str, option_string: Optional[str] = None @@ -195,8 +196,8 @@ def __call__( class UnequalSpacingError(Exception): - """Exception for an mrc file that has unequal spacing along the xyz dimensions annotated in its voxel size - metadata.""" + """Exception for an mrc file that has unequal spacing along the xyz dimensions + annotated in its voxel size metadata.""" pass @@ -206,8 +207,9 @@ def write_angle_list( file_name: pathlib.Path, order: tuple[int, int, int] = (0, 2, 1), ): - """Helper function to write angular search list from old PyTom to current module. Order had to be changed as old - PyTom always stored it as Z1, Z2, X, and here its Z1, X, Z2. + """Helper function to write angular search list from old PyTom to current module. + Order had to be changed as old PyTom always stored it as Z1, Z2, X, and here it's + Z1, X, Z2. @todo remove function """ @@ -220,7 +222,8 @@ def write_angle_list( @contextmanager def _wrap_mrcfile_readers(func, *args, **kwargs): - """Try to autorecover broken mrcfiles, assumes 'permissive' is a kwarg and not an arg""" + """Try to autorecover broken mrcfiles, assumes 'permissive' is a kwarg and not an + arg""" try: mrc = func(*args, **kwargs) except ValueError as err: @@ -231,12 +234,14 @@ def _wrap_mrcfile_readers(func, *args, **kwargs): if mrc.data is not None: logging.warning( f"Loading {args[0]} in strict mode gave an error. " - "However, loading with 'permissive=True' did generate data, make sure this is correct!" + "However, loading with 'permissive=True' did generate data, make sure " + "this is correct!" ) else: logging.debug("Could not reasonably recover") raise ValueError( - f"{args[0]} header or data is too corrupt to recover, please fix the header or data" + f"{args[0]} header or data is too corrupt to recover, please fix the " + "header or data" ) from err yield mrc # this should only be called after the context exists @@ -246,8 +251,9 @@ def _wrap_mrcfile_readers(func, *args, **kwargs): def read_mrc_meta_data(file_name: pathlib.Path) -> dict: """Read the metadata of provided MRC file path (using mrcfile) and return as dict. - If the voxel size along the x,y,and z dimensions differs a lot (not within 3 decimals) the function will raise an - UnequalSpacingError as it could mean template matching on these volumes might not be consistent. + If the voxel size along the x, y, and z dimensions differs a lot (not within 3 + decimals) the function will raise an UnequalSpacingError as it could mean template + matching on these volumes might not be consistent. Parameters ---------- @@ -257,13 +263,15 @@ def read_mrc_meta_data(file_name: pathlib.Path) -> dict: Returns ------- metadata: dict - a dictionary of the mrc metadata with key 'shape' containing the x,y,z dimensions of the file and key - 'voxel_size' containing the voxel size along x,y,z and dimensions in A units + a dictionary of the mrc metadata with key 'shape' containing the x,y,z + dimensions of the file and key 'voxel_size' containing the voxel size along + x, y, and z and dimensions in Å units """ meta_data = {} with _wrap_mrcfile_readers(mrcfile.mmap, file_name) as mrc: meta_data["shape"] = tuple(map(int, attrgetter("nx", "ny", "nz")(mrc.header))) - # allow small numerical inconsistencies in voxel size of MRC headers, sometimes seen in Warp + # allow small numerical inconsistencies in voxel size of MRC headers, sometimes + # seen in Warp if not all( [ np.round(mrc.voxel_size.x, 3) == np.round(s, 3) @@ -278,9 +286,9 @@ def read_mrc_meta_data(file_name: pathlib.Path) -> dict: [mrc.voxel_size.x == s for s in attrgetter("y", "z")(mrc.voxel_size)] ): logging.warning( - f"Voxel size annotation in MRC is slightly different between dimensions, " - f"namely {mrc.voxel_size}. It might be a tiny numerical inaccuracy, but " - f"please ensure this is not problematic." + "Voxel size annotation in MRC is slightly different between " + f"dimensions, namely {mrc.voxel_size}. It might be a tiny " + "numerical inaccuracy, but please ensure this is not problematic." ) meta_data["voxel_size"] = float(mrc.voxel_size.x) return meta_data @@ -293,8 +301,8 @@ def write_mrc( overwrite: bool = True, transpose: bool = True, ) -> None: - """Write data to an MRC file. Data is transposed before writing as pytom internally uses xyz ordering and MRCs - use zyx. + """Write data to an MRC file. Data is transposed before writing as pytom internally + uses xyz ordering and MRCs use zyx. Parameters ---------- @@ -305,7 +313,8 @@ def write_mrc( voxel_size: float voxel size of array to annotate in MRC header overwrite: bool, default True - True (default) will overwrite current MRC on path, setting to False will error when writing to existing file + True (default) will overwrite current MRC on path, setting to False will error + when writing to existing file transpose: bool, default True True (default) transpose array before writing, setting to False prevents this @@ -326,16 +335,16 @@ def write_mrc( def read_mrc(file_name: pathlib.Path, transpose: bool = True) -> npt.NDArray[float]: - """Read an MRC file from disk. Data is transposed after reading as pytom internally uses xyz ordering and MRCs - use zyx. + """Read an MRC file from disk. Data is transposed after reading as pytom internally + uses xyz ordering and MRCs use zyx. Parameters ---------- file_name: pathlib.Path path to file on disk transpose: bool, default True - True (default) transposes the volume after reading, setting to False prevents transpose but probably not a - good idea when using the functions from this module + True (default) transposes the volume after reading, setting to False prevents + transpose but probably not a good idea when using the functions from this module Returns ------- @@ -366,7 +375,8 @@ def read_txt_file(file_name: pathlib.Path) -> list[float, ...]: def read_tlt_file(file_name: pathlib.Path) -> list[float, ...]: - """Read a txt file from disk using read_txt_file(). File is expected to have tilt angles in degrees. + """Read a txt file from disk using read_txt_file(). File is expected to have tilt + angles in degrees. Parameters ---------- @@ -382,7 +392,8 @@ def read_tlt_file(file_name: pathlib.Path) -> list[float, ...]: def read_dose_file(file_name: pathlib.Path) -> list[float, ...]: - """Read a txt file from disk using read_txt_file(). File is expected to have dose accumulation in e-/(A^2). + """Read a txt file from disk using read_txt_file(). File is expected to have dose + accumulation in e-/(Å^2). Parameters ---------- @@ -398,8 +409,10 @@ def read_dose_file(file_name: pathlib.Path) -> list[float, ...]: def read_imod_defocus_file(file_name: pathlib.Path) -> list[float, ...]: - """Read an IMOD style defocus file. This function can read version 2 and 3 defocus files. For format - specification see: https://bio3d.colorado.edu/imod/doc/man/ctfphaseflip.html (section: Defocus File Format). + """Read an IMOD style defocus file. This function can read version 2 and 3 defocus + files. For format specification see: + https://bio3d.colorado.edu/imod/doc/man/ctfphaseflip.html + (section: Defocus File Format). Parameters ---------- diff --git a/src/pytom_tm/mask.py b/src/pytom_tm/mask.py index 108b45f8..cd82279c 100644 --- a/src/pytom_tm/mask.py +++ b/src/pytom_tm/mask.py @@ -10,7 +10,8 @@ def spherical_mask( cutoff_sd: int = 3, center: Optional[float] = None, ) -> npt.NDArray[float]: - """Wrapper around ellipsoidal_mask() to create a spherical mask with just a single radius. + """Wrapper around ellipsoidal_mask() to create a spherical mask with just a single + radius. Parameters ---------- @@ -21,7 +22,8 @@ def spherical_mask( smooth: Optional[float], default None sigma (float relative to number of pixels) of gaussian falloff around mask cutoff_sd: int, default 3 - how many standard deviations of the Gaussian falloff to include, default of 3 is a good choice + how many standard deviations of the Gaussian falloff to include, default of 3 is + a good choice center: Optional[float], default None alternative center for the mask, default is (size - 1) / 2 @@ -44,7 +46,8 @@ def ellipsoidal_mask( cutoff_sd: int = 3, center: Optional[float] = None, ) -> npt.NDArray[float]: - """Create an ellipsoidal mask in the specified square box. Ellipsoid is defined by 3 radius on x,y, and z axis. + """Create an ellipsoidal mask in the specified square box. Ellipsoid is defined by 3 + radius on x,y, and z axis. Parameters ---------- @@ -59,7 +62,8 @@ def ellipsoidal_mask( smooth: Optional[float], default None sigma (float relative to number of pixels) of gaussian falloff around mask cutoff_sd: int, default 3 - how many standard deviations of the Gaussian falloff to include, default of 3 is a good choice + how many standard deviations of the Gaussian falloff to include, default of 3 is + a good choice center: Optional[float], default None alternative center for the mask, default is (size - 1) / 2 diff --git a/src/pytom_tm/matching.py b/src/pytom_tm/matching.py index 38f8d0bc..f6789735 100644 --- a/src/pytom_tm/matching.py +++ b/src/pytom_tm/matching.py @@ -20,31 +20,35 @@ def __init__( wedge: Optional[npt.NDArray[float]] = None, phase_randomized_template: Optional[npt.NDArray[float]] = None, ): - """Initialize a template matching plan. All the necessary cupy arrays will be allocated on the GPU. + """Initialize a template matching plan. All the necessary cupy arrays will be + allocated on the GPU. Parameters ---------- volume: npt.NDArray[float] 3D numpy array representing the search tomogram template: npt.NDArray[float] - 3D numpy array representing the template for the search, a square box of size sx + 3D numpy array representing the template for the search, a square box of + size sx mask: npt.NDArray[float] - 3D numpy array representing the mask for the search, same dimensions as template + 3D numpy array representing the mask for the search, same dimensions as + template device_id: int GPU device id to load arrays on wedge: Optional[npt.NDArray[float]], default None - 3D numpy array that contains the Fourier space weighting for the template, it should be in Fourier - reduced form, with dimensions (sx, sx, sx // 2 + 1) + 3D numpy array that contains the Fourier space weighting for the template, + it should be in Fourier reduced form, with dimensions (sx, sx, sx // 2 + 1) phase_randomized_template: Optional[npt.NDArray[float]], default None - initialize the plan with a phase randomized version of the template for noise correction + initialize the plan with a phase randomized version of the template for + noise correction """ # Search volume + and fft transform plan for the volume volume_shape = volume.shape cp_vol = cp.asarray(volume, dtype=cp.float32, order="C") self.volume_rft_conj = rfftn(cp_vol).conj() self.volume_sq_rft_conj = rfftn(cp_vol**2).conj() - # Explicit fft plan is no longer necessary as cupy generates a plan behind the scene which leads to - # comparable timings + # Explicit fft plan is no longer necessary as cupy generates a plan behind the + # scene which leads to comparable timings # Array for storing local standard deviations self.std_volume = cp.zeros(volume_shape, dtype=cp.float32) @@ -152,18 +156,21 @@ def __init__( angle_list: list[tuple[float, float, float]] list of tuples with 3 floats representing Euler angle rotations angle_ids: list[int] - list of indices for angle_list to actually search, this can be a subset of the full list + list of indices for angle_list to actually search, this can be a subset of + the full list mask_is_spherical: bool, default True - True (default) if mask is spherical, set to False for non-spherical mask which increases computation time + True (default) if mask is spherical, set to False for non-spherical mask + which increases computation time wedge: Optional[npt.NDArray[float]], default None - 3D numpy array that contains the Fourier space weighting for the template, it should be in Fourier - reduced form, with dimensions (sx, sx, sx // 2 + 1) + 3D numpy array that contains the Fourier space weighting for the template, + it should be in Fourier reduced form, with dimensions (sx, sx, sx // 2 + 1) stats_roi: Optional[tuple[slice, slice, slice]], default None - region of interest to calculate statistics on the search volume, default will just take the full search - volume + region of interest to calculate statistics on the search volume, default + will just take the full search volume noise_correction: bool, default False - initialize template matching with a phase randomized version of the template that is used to subtract - background noise from the score map; expense is more gpu memory and computation time + initialize template matching with a phase randomized version of the template + that is used to subtract background noise from the score map; expense is + more gpu memory and computation time rng_seed: int, default 321 seed for rng in phase randomization """ @@ -204,11 +211,14 @@ def run(self) -> tuple[npt.NDArray[float], npt.NDArray[float], dict]: ------- results: tuple[npt.NDArray[float], npt.NDArray[float], dict] results is a tuple with tree elements: - - score_map with the locally normalised maximum score over all the angles searched; a 3D numpy array - with same size as search volume - - angle_map with an index into the angle list corresponding to the maximum of the correlation scores; - a 3D numpy array with same size as search volume - - a dictionary with three floats of search statistics - 'search_space', 'variance', and 'std' + - score_map with the locally normalised maximum score over all the + angles searched; + a 3D numpy array with same size as search volume + - angle_map with an index into the angle list corresponding to the + maximum of the correlation scores; + a 3D numpy array with same size as search volume + - a dictionary with three floats of search statistics; + 'search_space', 'variance', and 'std' """ print("Progress job_{} on device {:d}:".format(self.job_id, self.device_id)) @@ -260,7 +270,8 @@ def run(self) -> tuple[npt.NDArray[float], npt.NDArray[float], dict]: rotation_units="rad", ) self.plan.mask_padded[pad_index] = self.plan.mask - # Std volume needs to be recalculated for every rotation of the mask, expensive step + # Std volume needs to be recalculated for every rotation of the mask, + # expensive step self.plan.std_volume = ( std_under_mask_convolution( self.plan.volume_rft_conj, @@ -366,8 +377,8 @@ def correlate(self, padding_index: tuple[slice, slice, slice]): # Paste in center self.plan.template_padded[padding_index] = self.plan.template - # Fast local correlation function between volume and template, norm is the standard deviation at each - # point in the volume in the masked area + # Fast local correlation function between volume and template, norm is the + # standard deviation at each point in the volume in the masked area self.plan.ccc_map = ( irfftn( self.plan.volume_rft_conj * rfftn(self.plan.template_padded), @@ -383,8 +394,9 @@ def std_under_mask_convolution( padded_mask: cpt.NDArray[float], mask_weight: float, ) -> cpt.NDArray[float]: - """Calculate the local standard deviation under the mask for each position in the volume. Calculation is done in - Fourier space as this is a convolution between volume and mask. + """Calculate the local standard deviation under the mask for each position in the + volume. Calculation is done in Fourier space as this is a convolution between volume + and mask. Parameters ---------- @@ -433,7 +445,8 @@ def std_under_mask_convolution( ) -"""Calculate the sum of squares in a volume. Mean is assumed to be 0 which makes this operation a lot faster.""" +"""Calculate the sum of squares in a volume. Mean is assumed to be 0 which makes this +operation a lot faster.""" square_sum_kernel = cp.ReductionKernel( "T x", # input params "T y", # output params diff --git a/src/pytom_tm/parallel.py b/src/pytom_tm/parallel.py index 11681d4f..4c99bd44 100644 --- a/src/pytom_tm/parallel.py +++ b/src/pytom_tm/parallel.py @@ -22,9 +22,10 @@ def gpu_runner( log_level: int, unittest_mute: bool, ) -> None: - """Start a GPU runner, each runner should be initialized to a multiprocessing.Process() and manage running jobs - on a single GPU. Each runner will grab jobs from the task_queue and assign jobs to the result_queue once they - finish. When the task_queue is empty the gpu_runner will stop. + """Start a GPU runner, each runner should be initialized to a + multiprocessing.Process() and manage running jobs on a single GPU. Each runner will + grab jobs from the task_queue and assign jobs to the result_queue once they finish. + When the task_queue is empty the gpu_runner will stop. Parameters ---------- @@ -37,7 +38,8 @@ def gpu_runner( log_level: int log level for logging unittest_mute: Bool - optional muting of runner to prevent unittests flooding the terminal, only use for development + optional muting of runner to prevent unittests flooding the terminal, only use + for development """ if unittest_mute: mute_context = mute_stdout_stderr @@ -59,9 +61,10 @@ def run_job_parallel( gpu_ids: list[int, ...], unittest_mute: bool = False, ) -> tuple[npt.NDArray[float], npt.NDArray[float]]: - """Run a job in parallel over a single or multiple GPUs. If no volume_splits are given the search is parallelized - by splitting the angular search. If volume_splits are provided the job will first be split by volume, - if there are still more GPUs available, the subvolume jobs are still further split by angular search. + """Run a job in parallel over a single or multiple GPUs. If no volume_splits are + given the search is parallelized by splitting the angular search. If volume_splits + are provided the job will first be split by volume, if there are still more GPUs + available, the subvolume jobs are still further split by angular search. Parameters ---------- @@ -72,7 +75,8 @@ def run_job_parallel( gpu_ids: list[int, ...] list of gpu indices to spread jobs over unittest_mute: bool, default False - boolean to mute spawned process terminal output, only set to True for unittesting + boolean to mute spawned process terminal output, only set to True for + unittesting Returns ------- @@ -149,7 +153,9 @@ def run_job_parallel( logging.debug("Got all results from the child processes") break - for p in procs: # if one of the processes is no longer alive and has a failed exit we should error + for p in procs: + # if one of the processes is no longer alive and has a failed exit + # we should error if not p.is_alive() and p.exitcode == 1: # to prevent a deadlock [ x.terminate() for x in procs @@ -168,5 +174,6 @@ def run_job_parallel( else: ValueError( - "For some reason there are more gpu_ids than split job, this should never happen." + "For some reason there are more gpu_ids than split job, this should never " + "happen." ) diff --git a/src/pytom_tm/plotting.py b/src/pytom_tm/plotting.py index d962f1ee..ca765309 100644 --- a/src/pytom_tm/plotting.py +++ b/src/pytom_tm/plotting.py @@ -331,8 +331,10 @@ def plist_quality_gaussian_fit( n_false_positives = 0.0 # list for storing probability of true positives and false positives # for each cutoff - recall = [] # recall = TP / (TP + FN); TP + FN is the full area under the Gaussian curve - fdr = [] # false discovery rate = FP / (TP + FP); == 1 - TP / (TP + FP) + # recall = TP / (TP + FN); TP + FN is the full area under the Gaussian curve + recall = [] + # false discovery rate = FP / (TP + FP); == 1 - TP / (TP + FP) + fdr = [] # find integral of gaussian particle population; # NEED TO DIVIDE BY HISTOGRAM BIN STEP diff --git a/src/pytom_tm/template.py b/src/pytom_tm/template.py index 1cea5756..dcbef506 100644 --- a/src/pytom_tm/template.py +++ b/src/pytom_tm/template.py @@ -24,15 +24,18 @@ def generate_template_from_map( Parameters ---------- input_map: npt.NDArray[float] - 3D density to use for generating the template, if box is not square it will be padded to square + 3D density to use for generating the template, if box is not square it will be + padded to square input_spacing: float voxel size of input map (in A) output_spacing: float - voxel size of output map (in A) the ratio of input to output will be used for downsampling + voxel size of output map (in A) the ratio of input to output will be used for + downsampling center: bool, default False set to True to center the template in the box by calculating the center of mass filter_to_resolution: Optional[float], default None - low-pass filter resolution to apply to template, if not provided will be set to 2 * output_spacing + low-pass filter resolution to apply to template, if not provided will be set to + 2 * output_spacing output_box_size: Optional[int], default None final box size of template display_filter: bool, default False @@ -80,7 +83,8 @@ def generate_template_from_map( # extend volume to the desired output size before applying convolutions! if output_box_size is not None: logging.debug( - f"size check {output_box_size} > {(input_map.shape[0] * input_spacing) // output_spacing}" + f"size check {output_box_size} > " + f"{(input_map.shape[0] * input_spacing) // output_spacing}" ) if output_box_size > (input_map.shape[0] * input_spacing) // output_spacing: pad = ( @@ -96,9 +100,9 @@ def generate_template_from_map( ) elif output_box_size < (input_map.shape[0] * input_spacing) // output_spacing: logging.warning( - "Could not set specified box size as the map would need to be cut and this might " - "result in loss of information of the structure. Please decrease the box size of the map " - "by hand (e.g. chimera)" + "Could not set specified box size as the map would need to be cut and " + "this might result in loss of information of the structure. Please " + "decrease the box size of the map by hand (e.g. chimera)" ) # create low pass filter diff --git a/src/pytom_tm/tmjob.py b/src/pytom_tm/tmjob.py index f0aa17e6..f5689137 100644 --- a/src/pytom_tm/tmjob.py +++ b/src/pytom_tm/tmjob.py @@ -32,8 +32,9 @@ def load_json_to_tmjob( file_name: pathlib.Path path to TMJob json file load_for_extraction: bool, default True - whether a finished job is loaded form disk for extraction, default is True as this function is currently only - called for pytom_extract_candidates and pytom_estimate_roc which run on previously finished jobs + whether a finished job is loaded form disk for extraction, default is True as + this function is currently only called for pytom_extract_candidates and + pytom_estimate_roc which run on previously finished jobs Returns ------- @@ -115,7 +116,8 @@ def _determine_1D_fft_splits( either the split with the most data or the left one if both splits have the same size """ - # Everything in this code assumes default slices of [x,y) so including x but excluding y + # Everything in this code assumes default slices of [x,y) so including x but + # excluding y data_slices = [] valid_data_slices = [] sub_len = [] @@ -249,10 +251,11 @@ def __init__( mask_is_spherical: bool, default True whether template mask is spherical, reduces computation complexity tilt_angles: Optional[list[float, ...]], default None - tilt angles of tilt-series used to reconstruct tomogram, if only two floats will be used to generate a - continuous wedge model + tilt angles of tilt-series used to reconstruct tomogram, if only two floats + will be used to generate a continuous wedge model tilt_weighting: bool, default False - use advanced tilt weighting options, can be supplemented with CTF parameters and accumulated dose + use advanced tilt weighting options, can be supplemented with CTF parameters + and accumulated dose search_x: Optional[list[int, int]], default None restrict tomogram search region along the x-axis search_y: Optional[list[int, int]], default None @@ -260,31 +263,36 @@ def __init__( search_z: Optional[list[int, int]], default None restrict tomogram search region along the z-axis tomogram_mask: Optional[pathlib.Path], default None - when volume splitting tomograms, only subjobs where any(mask > 0) will be generated + when volume splitting tomograms, only subjobs where any(mask > 0) will be + generated voxel_size: Optional[float], default None - voxel size of tomogram and template (in A) if not provided will be read from template/tomogram MRCs + voxel size of tomogram and template (in A) if not provided will be read from + template/tomogram MRCs low_pass: Optional[float], default None optional low-pass filter (resolution in A) to apply to tomogram and template high_pass: Optional[float], default None - optional high-pass filter (resolution in A) to apply to tomogram and template + optional high-pass filter (resolution in A) to apply to tomogram and + template dose_accumulation: Optional[list[float, ...]], default None list with dose accumulation per tilt image ctf_data: Optional[list[dict, ...]], default None - list of dictionaries with CTF parameters per tilt image, see pytom_tm.weight.create_ctf() for parameter - definition + list of dictionaries with CTF parameters per tilt image, see + pytom_tm.weight.create_ctf() for parameter definition whiten_spectrum: bool, default False whether to apply spectrum whitening rotational_symmetry: int, default 1 - specify a rotational symmetry around the z-axis, is only valid if the symmetry axis of the template is - aligned with the z-axis + specify a rotational symmetry around the z-axis, is only valid if the + symmetry axis of the template is aligned with the z-axis pytom_tm_version_number: str, default current version a string with the version number of pytom_tm for backward compatibility job_loaded_for_extraction: bool, default False - flag to set for finished template matching jobs that are loaded back for extraction, it prevents + flag to set for finished template matching jobs that are loaded back for + extraction, it prevents recomputation of the whitening filter particle_diameter: Optional[float], default None particle diameter (in Angstrom) to calculate angular search random_phase_correction: bool, default False, - run matching with a phase randomized version of the template to correct scores for noise + run matching with a phase randomized version of the template to correct + scores for noise rng_seed: int, default 321 set a seed for the rng for phase randomization """ @@ -319,17 +327,20 @@ def __init__( "Invalid voxel size provided, smaller or equal to zero." ) self.voxel_size = voxel_size - if ( # allow tiny numerical differences that are not relevant for template matching + if ( # allow tiny numerical differences that are not relevant for + # template matching round(self.voxel_size, 3) != round(meta_data_tomo["voxel_size"], 3) or round(self.voxel_size, 3) != round(meta_data_template["voxel_size"], 3) ): logging.debug( - f"provided {self.voxel_size} tomogram {meta_data_tomo['voxel_size']} " + f"provided {self.voxel_size} tomogram " + f"{meta_data_tomo['voxel_size']} " f"template {meta_data_template['voxel_size']}" ) print( - "WARNING: Provided voxel size does not match voxel size annotated in tomogram/template mrc." + "WARNING: Provided voxel size does not match voxel size annotated " + "in tomogram/template mrc." ) elif ( round(meta_data_tomo["voxel_size"], 3) @@ -339,8 +350,8 @@ def __init__( self.voxel_size = round(meta_data_tomo["voxel_size"], 3) else: raise ValueError( - "Voxel size could not be assigned, either a mismatch between tomogram and template or" - " annotated as 0." + "Voxel size could not be assigned, either a mismatch between tomogram " + "and template or annotated as 0." ) search_origin = [ @@ -358,7 +369,8 @@ def __init__( if x is not None: if not x[1] <= s: raise ValueError( - "One of search end indices is larger than the tomogram dimension." + "One of search end indices is larger than the tomogram " + "dimension." ) search_end.append(x[1]) else: @@ -373,18 +385,23 @@ def __init__( temp = read_mrc(tomogram_mask) if temp.shape != self.tomo_shape: raise ValueError( - "Tomogram mask does not have the same number of pixels as the tomogram.\n" - f"Tomogram mask shape: {temp.shape}, tomogram shape: {self.tomo_shape}" + "Tomogram mask does not have the same number of pixels as the " + "tomogram.\n" + f"Tomogram mask shape: {temp.shape}, " + f"tomogram shape: {self.tomo_shape}" ) if np.all(temp <= 0): raise ValueError( - f"No values larger than 0 found in the tomogram mask: {tomogram_mask}" + "No values larger than 0 found in the tomogram mask: " + f"{tomogram_mask}" ) self.whole_start = None - # For the main job these are always [0,0,0] and self.search_size, for sub_jobs these will differ from - # self.search_origin and self.search_size. The main job only uses them to calculate the search_volume_roi for - # statistics. Sub jobs also use these to extract and place back the relevant region in the master job. + # For the main job these are always [0,0,0] and self.search_size, for sub_jobs + # these will differ from self.search_origin and self.search_size. The main job + # only uses them to calculate the search_volume_roi for statistics. Sub jobs + # also use these to extract and place back the relevant region in the master + # job. self.sub_start, self.sub_step = [0, 0, 0], self.search_size.copy() # Rotation parameters @@ -507,8 +524,8 @@ def write_to_json(self, file_name: pathlib.Path) -> None: json.dump(d, fstream, indent=4) def split_rotation_search(self, n: int) -> list[TMJob, ...]: - """Split the search into sub_jobs by dividing the rotations. Sub jobs will obtain the key - self.job_key + str(i) when looping over range(n). + """Split the search into sub_jobs by dividing the rotations. Sub jobs will + obtain the key self.job_key + str(i) when looping over range(n). Parameters ---------- @@ -518,7 +535,8 @@ def split_rotation_search(self, n: int) -> list[TMJob, ...]: Returns ------- sub_jobs: list[TMJob, ...] - a list of TMJobs that were split from self, the jobs are also assigned as the TMJob.sub_jobs attribute + a list of TMJobs that were split from self, the jobs are also assigned as + the TMJob.sub_jobs attribute """ if len(self.sub_jobs) > 0: raise TMJobError( @@ -539,30 +557,35 @@ def split_rotation_search(self, n: int) -> list[TMJob, ...]: return self.sub_jobs def split_volume_search(self, split: tuple[int, int, int]) -> list[TMJob, ...]: - """Split the search into sub_jobs by dividing into subvolumes. Final number of subvolumes is obtained by - multiplying all the split together, e.g. (2, 2, 1) results in 4 subvolumes. Sub jobs will obtain the key - self.job_key + str(i) when looping over range(n). + """Split the search into sub_jobs by dividing into subvolumes. Final number of + subvolumes is obtained by multiplying all the split together, e.g. (2, 2, 1) + results in 4 subvolumes. Sub jobs will obtain the key self.job_key + str(i) when + looping over range(n). - The sub jobs search area of the full tomogram is defined by: new_job.search_origin and new_job.search_size. + The sub jobs search area of the full tomogram is defined by: + new_job.search_origin and new_job.search_size. They are used when loading the search volume from the full tomogram. - The attribute new_job.whole_start defines how the volume maps back to the score volume of the parent job - (which can be a different size than the tomogram when the search is restricted along x, y or z). + The attribute new_job.whole_start defines how the volume maps back to the score + volume of the parent job (which can be a different size than the tomogram when + the search is restricted along x, y or z). - Finally, new_job.sub_start and new_job.sub_step, extract the score and angle map without the template - overhang from the subvolume. + Finally, new_job.sub_start and new_job.sub_step, extract the score and angle map + without the template overhang from the subvolume. If self.tomogram_mask is set, we will skip subjobs where all(mask <= 0). Parameters ---------- split: tuple[int, int, int] - tuple that defines how many times the search volume should be split into subvolumes along each axis + tuple that defines how many times the search volume should be split into + subvolumes along each axis Returns ------- sub_jobs: list[TMJob, ...] - a list of TMJobs that were split from self, the jobs are also assigned as the TMJob.sub_jobs attribute + a list of TMJobs that were split from self, the jobs are also assigned as + the TMJob.sub_jobs attribute """ if len(self.sub_jobs) > 0: raise TMJobError( @@ -588,7 +611,8 @@ def split_volume_search(self, split: tuple[int, int, int]) -> list[TMJob, ...]: for i, data_3D in enumerate(itt.product(x_splits, y_splits, z_splits)): # each data point for each dim is slice(left, right) of the search space # and slice(left,right) of the unique data point in the search space - # Look at the comments in the new_job.attribute for the meaning of each attribute + # Look at the comments in the new_job.attribute for the meaning of each + # attribute search_origin = tuple( data_3D[d][0][0] + self.search_origin[d] for d in range(3) @@ -598,7 +622,8 @@ def split_volume_search(self, split: tuple[int, int, int]) -> list[TMJob, ...]: sub_start = tuple(dim_data[1][0] - dim_data[0][0] for dim_data in data_3D) sub_step = tuple(dim_data[1][1] - dim_data[1][0] for dim_data in data_3D) - # check if this contains any of the unique data points are where tomo_mask>=0 + # check if this contains any of the unique data points are where + # tomo_mask > 0 if tomogram_mask is not None: slices = [ slice(origin, origin + step) @@ -616,11 +641,13 @@ def split_volume_search(self, split: tuple[int, int, int]) -> list[TMJob, ...]: # search size TODO: should be combined with the origin into slices new_job.search_size = search_size - # whole start is the start of the unique data within the complete searched array + # whole start is the start of the unique data within the complete searched + # array new_job.whole_start = whole_start # sub_start is where the unique data starts inside the split array new_job.sub_start = sub_start - # sub_step is the step of unique data inside the split array. TODO: should be slices instead + # sub_step is the step of unique data inside the split array. + # TODO: should be slices instead new_job.sub_step = sub_step sub_jobs.append(new_job) @@ -631,7 +658,8 @@ def split_volume_search(self, split: tuple[int, int, int]) -> list[TMJob, ...]: def merge_sub_jobs( self, stats: Optional[list[dict, ...]] = None ) -> tuple[npt.NDArray[float], npt.NDArray[float]]: - """Merge the sub jobs present in self.sub_jobs together to create the final output score and angle maps. + """Merge the sub jobs present in self.sub_jobs together to create the final + output score and angle maps. Parameters ---------- @@ -698,7 +726,8 @@ def merge_sub_jobs( job.sub_start[1] : job.sub_start[1] + job.sub_step[1], job.sub_start[2] : job.sub_start[2] + job.sub_step[2], ] - # Then the corrected sub part needs to be placed back into the full volume + # Then the corrected sub part needs to be placed back into the full + # volume scores[ job.whole_start[0] : job.whole_start[0] + sub_scores.shape[0], job.whole_start[1] : job.whole_start[1] + sub_scores.shape[1], @@ -714,26 +743,28 @@ def merge_sub_jobs( def start_job( self, gpu_id: int, return_volumes: bool = False ) -> Union[tuple[npt.NDArray[float], npt.NDArray[float]], dict]: - """Run this template matching job on the specified GPU. Search statistics of the job will always be assigned - to the self.job_stats. + """Run this template matching job on the specified GPU. Search statistics of the + job will always be assigned to the self.job_stats. Parameters ---------- gpu_id: int index of the GPU to run the job on return_volumes: bool, default False - False (default) does not return volumes but instead writes them to disk, set to True to instead directly - return the score and angle volumes + False (default) does not return volumes but instead writes them to disk, set + to True to instead directly return the score and angle volumes Returns ------- output: Union[tuple[npt.NDArray[float], npt.NDArray[float]], dict] - when volumes are returned the output consists of two numpy arrays (score and angle map), when no volumes - are returned the output consists of a dictionary with search statistics + when volumes are returned the output consists of two numpy arrays (score and + angle map), when no volumes are returned the output consists of a dictionary + with search statistics """ # next fast fft len logging.debug( - f"Next fast fft shape: {tuple([next_fast_len(s, real=True) for s in self.search_size])}" + "Next fast fft shape: " + f"{tuple([next_fast_len(s, real=True) for s in self.search_size])}" ) search_volume = np.zeros( tuple([next_fast_len(s, real=True) for s in self.search_size]), @@ -778,7 +809,8 @@ def start_job( # if tilt angles are provided we can create wedge filters if self.tilt_angles is not None: - # for the tomogram a binary wedge is generated to explicitly set the missing wedge region to 0 + # for the tomogram a binary wedge is generated to explicitly set the + # missing wedge region to 0 tomo_filter *= create_wedge( search_volume.shape, self.tilt_angles, @@ -787,7 +819,8 @@ def start_job( angles_in_degrees=True, tilt_weighting=False, ).astype(np.float32) - # for the template a binary or per-tilt-weighted wedge is generated depending on the options + # for the template a binary or per-tilt-weighted wedge is generated + # depending on the options template_wedge *= create_wedge( self.template_shape, self.tilt_angles, diff --git a/src/pytom_tm/utils.py b/src/pytom_tm/utils.py index 9b5218b9..5e842f84 100644 --- a/src/pytom_tm/utils.py +++ b/src/pytom_tm/utils.py @@ -3,7 +3,8 @@ class mute_stdout_stderr(object): - """Context manager to redirect stdout and stderr to devnull. Only used to prevent terminal flooding in unittests.""" + """Context manager to redirect stdout and stderr to devnull. Only used to prevent + terminal flooding in unittests.""" def __enter__(self): self.outnull = open(os.devnull, "w") diff --git a/src/pytom_tm/weights.py b/src/pytom_tm/weights.py index 2fd5a944..9aeb96c6 100644 --- a/src/pytom_tm/weights.py +++ b/src/pytom_tm/weights.py @@ -26,7 +26,8 @@ def hwhm_to_sigma(hwhm: float) -> float: - """Convert half width of half maximum of a Gaussian to sigma by dividing by sqrt(2 * ln(2)). + """Convert half width of half maximum of a Gaussian to sigma by dividing by + sqrt(2 * ln(2)). Parameters ---------- @@ -42,7 +43,8 @@ def hwhm_to_sigma(hwhm: float) -> float: def sigma_to_hwhm(sigma: float) -> float: - """Convert sigma to half width of half maximum of a Gaussian by multiplying with sqrt(2 * ln(2)). + """Convert sigma to half width of half maximum of a Gaussian by multiplying with + sqrt(2 * ln(2)). Parameters ---------- @@ -83,13 +85,15 @@ def wavelength_ev2m(voltage: float) -> float: def radial_reduced_grid( shape: Union[tuple[int, int, int], tuple[int, int]], shape_is_reduced: bool = False ) -> npt.NDArray[float]: - """Calculates a Fourier space radial reduced grid for the given input shape, with the 0 frequency in the center - of the output image. Values range from 0 in the center to 1 at Nyquist frequency. + """Calculates a Fourier space radial reduced grid for the given input shape, with + the 0 frequency in the center of the output image. Values range from 0 in the + center to 1 at Nyquist frequency. - By default, it is assumed shape belongs to a real space array, which causes the function to return a - grid with the last dimension reduced, i.e. shape[-1] // 2 + 1 (ideal for creating frequency - dependent filters). However, setting radial_reduced_grid(..., shape_is_reduced=True) the shape is assumed to - already be in a reduced form. + By default, it is assumed shape belongs to a real space array, which causes the + function to return a grid with the last dimension reduced, i.e. shape[-1] // 2 + 1 + (ideal for creating frequency dependent filters). However, setting + radial_reduced_grid(..., shape_is_reduced=True) the shape is assumed to already be + in a reduced form. Parameters ---------- @@ -143,7 +147,8 @@ def create_gaussian_low_pass( spacing: float, resolution: float, ) -> npt.NDArray[float]: - """Create a 3D Gaussian low-pass filter with cutoff (or HWHM) that is reduced in fourier space. + """Create a 3D Gaussian low-pass filter with cutoff (or HWHM) that is reduced in + fourier space. Parameters ---------- @@ -173,7 +178,8 @@ def create_gaussian_high_pass( spacing: float, resolution: float, ) -> npt.NDArray[float]: - """Create a 3D Gaussian high-pass filter with cutoff (or HWHM) that is reduced in fourier space. + """Create a 3D Gaussian high-pass filter with cutoff (or HWHM) that is reduced in + fourier space. Parameters ---------- @@ -204,9 +210,10 @@ def create_gaussian_band_pass( low_pass: Optional[float] = None, high_pass: Optional[float] = None, ) -> npt.NDArray[float]: - """Resolution bands presents the resolution shells where information needs to be maintained. For example the bands - might be (150A, 40A). For a spacing of 15A (nyquist resolution is 30A) this is a mild low pass filter. However, - quite some low spatial frequencies will be cut by it. + """Resolution bands presents the resolution shells where information needs to be + maintained. For example the bands might be (150A, 40A). For a spacing of 15A + (nyquist resolution is 30A) this is a mild low pass filter. However, quite some low + spatial frequencies will be cut by it. Parameters ---------- @@ -262,7 +269,8 @@ def create_wedge( accumulated_dose_per_tilt: Optional[list[float, ...]] = None, ctf_params_per_tilt: Optional[list[dict]] = None, ) -> npt.NDArray[float]: - """This function returns a wedge volume that is either symmetric or asymmetric depending on wedge angle input. + """This function returns a wedge volume that is either symmetric or asymmetric + depending on wedge angle input. Parameters ---------- @@ -285,7 +293,8 @@ def create_wedge( accumulated_dose_per_tilt: Optional[list[float, ...]], default None accumulated dose for each tilt for dose weighting ctf_params_per_tilt: Optional[list[dict]], default None - ctf parameters for each tilt (see _create_tilt_weighted_wedge() for dict specification) + ctf parameters for each tilt (see _create_tilt_weighted_wedge() for dict + specification) Returns ------- @@ -297,12 +306,14 @@ def create_wedge( if voxel_size <= 0.0: raise ValueError( - "Voxel size in create wedge is smaller or equal to 0, which is an invalid voxel spacing." + "Voxel size in create wedge is smaller or equal to 0, which is an invalid " + "voxel spacing." ) if cut_off_radius > 1: print( - "Warning: wedge cutoff needs to be defined as a fraction of nyquist 0 < c <= 1. Setting value to 1.0." + "Warning: wedge cutoff needs to be defined as a fraction of nyquist " + "0 < c <= 1. Setting value to 1.0." ) cut_off_radius = 1.0 elif cut_off_radius <= 0: @@ -359,8 +370,8 @@ def create_wedge( def _create_symmetric_wedge( shape: tuple[int, int, int], wedge_angle: float, cut_off_radius: float ) -> npt.NDArray[float]: - """This function returns a symmetric wedge object. Function should not be imported, user should call - create_wedge(). + """This function returns a symmetric wedge object. + Function should not be imported, user should call create_wedge(). Parameters ---------- @@ -406,7 +417,8 @@ def _create_asymmetric_wedge( wedge_angles: tuple[float, float], cut_off_radius: float, ) -> npt.NDArray[float]: - """This function returns an asymmetric wedge object. Function should not be imported, user should call create_wedge(). + """This function returns an asymmetric wedge object. + Function should not be imported, user should call create_wedge(). Parameters ---------- @@ -477,8 +489,10 @@ def _create_tilt_weighted_wedge( ctf_params_per_tilt: Optional[list[dict]] = None, ) -> npt.NDArray[float]: """ - The following B-factor heuristic is used (as mentioned in the M paper, and introduced in RELION 1.4): - "The B factor is increased by 4Å2 per 1e− Å−2 of exposure, and each tilt is weighted as cos θ." + The following B-factor heuristic is used (as mentioned in the M paper, and + introduced in RELION 1.4): + "The B factor is increased by 4Å2 per 1e− Å−2 of exposure, and each tilt + is weighted as cos θ." Relation between B-factor and the sigma of a gaussian: @@ -501,7 +515,8 @@ def _create_tilt_weighted_wedge( accumulated_dose_per_tilt: list[float, ...], default None the accumulated dose in e− Å−2 ctf_params_per_tilt: list[dict, ...], default None - the ctf parameters per tilt angle, list of dicts where each dict has the following keys: + the ctf parameters per tilt angle, list of dicts + where each dict has the following keys: - 'defocus'; in um - 'amplitude'; fraction of amplitude contrast between 0 and 1 - 'voltage'; in keV @@ -511,24 +526,26 @@ def _create_tilt_weighted_wedge( Returns ------- wedge: npt.NDArray[float] - structured wedge mask in fourier reduced form, i.e. output shape is (shape[0], shape[1], shape[2] // 2 + 1) + structured wedge mask in fourier reduced form, i.e. output shape is + (shape[0], shape[1], shape[2] // 2 + 1) """ if accumulated_dose_per_tilt is not None and len(accumulated_dose_per_tilt) != len( tilt_angles ): raise ValueError( - "in _create_tilt_weighted_wedge the list of accumulated dose per tilt does not have the same " - "length as the tilt angle list!" + "in _create_tilt_weighted_wedge the list of accumulated dose per tilt does " + "not have the same length as the tilt angle list!" ) if ctf_params_per_tilt is not None and len(ctf_params_per_tilt) != len(tilt_angles): raise ValueError( - "in _create_tilt_weighted_wedge the list of CTF parameters per tilt does not have the same " - "length as the tilt angle list!" + "in _create_tilt_weighted_wedge the list of CTF parameters per tilt does " + "not have the same length as the tilt angle list!" ) if not all([shape[0] == s for s in shape[1:]]): raise UnequalSpacingError( "Input shape for structured wedge needs to be a square box. " - "Otherwise the frequencies in fourier space are not equal across dimensions." + "Otherwise the frequencies in fourier space are not equal across " + "dimensions." ) image_size = shape[0] # assign to size variable as all dimensions are equal size @@ -567,7 +584,8 @@ def _create_tilt_weighted_wedge( ) tilt[:, :, image_size // 2] = ( np.concatenate( - ( # duplicate and flip the CTF around the 0 frequency; then concatenate to make it non-reduced + ( # duplicate and flip the CTF around the 0 frequency; + # then concatenate to make it non-reduced np.flip(ctf[:, 1 : 1 + image_size - ctf.shape[1]], axis=1), ctf, ), @@ -687,7 +705,8 @@ def ctf_1d(q): values = ctf_1d(k_range) zero_crossings = np.where(np.diff(np.sign(values)))[0] - # for overfocus the first crossing needs to be skipped, for example see: Yonekura et al. 2006 JSB + # for overfocus the first crossing needs to be skipped, + # for example see: Yonekura et al. 2006 JSB k_cutoff = ( k_range[zero_crossings[0]] if defocus > 0 else k_range[zero_crossings[1]] ) @@ -713,12 +732,14 @@ def radial_average( Parameters ---------- weights: npt.NDArray[float] - 3D array to be radially averaged: in fourier reduced form and with the origin in the corner. + 3D array to be radially averaged: in fourier reduced form and with the origin + in the corner. Returns ------- (q, mean): tuple[npt.NDArray[float], npt.NDArray[float]] - A tuple of two 1d numpy arrays. Their length equals half of largest input dimension. + A tuple of two 1d numpy arrays. Their length equals half of largest + input dimension. """ if len(weights.shape) not in [2, 3]: raise ValueError("Radial average calculation only works for 2d/3d arrays") @@ -729,7 +750,8 @@ def radial_average( q = np.arange(sampling_points) q_grid = np.floor( - # convert to radial indices in the fourier power spectrum, 0.5 is added to obtain the correct ring + # convert to radial indices in the fourier power spectrum, + # 0.5 is added to obtain the correct ring radial_reduced_grid(weights.shape, shape_is_reduced=True) * (sampling_points - 1) + 0.5 @@ -744,12 +766,14 @@ def radial_average( def power_spectrum_profile(image: npt.NDArray[float]) -> npt.NDArray[float]: - """Calculate the power spectrum for a real space array and then find the profile (radial average). + """Calculate the power spectrum for a real space array and then find the profile + (radial average). Parameters ---------- image: npt.NDArray[float] - 2D/3D array in real space for which the power spectrum profile needs to be calculated + 2D/3D array in real space for which the power spectrum profile needs to be + calculated Returns ------- @@ -774,9 +798,11 @@ def profile_to_weighting( Parameters ---------- profile: npt.NDArray[float] - power spectrum profile (or other 1d profile) to transform in a fourier space filter + power spectrum profile (or other 1d profile) to transform in a fourier space + filter shape: Union[tuple[int, int], tuple[int, int, int]] - 2D/3D array shape in real space for which the fourier reduced weights are calculated + 2D/3D array shape in real space for which the fourier reduced weights are + calculated Returns ------- diff --git a/tests/test00_parallel.py b/tests/test00_parallel.py index c14121ab..55beda2f 100644 --- a/tests/test00_parallel.py +++ b/tests/test00_parallel.py @@ -1,9 +1,10 @@ """ This file name explicitly starts with test00_ to ensure its run first during testing. -Other tests will run jobs on the GPU which keeps the main unittest process lingering on the GPU. When starting the -parallel manager test on a GPU in exclusive process mode, the spawned process from the parallel manager will fail due -the occupation from the other unittests. If the parallel manager is tested first, the spawned process is fully closed -which will allow the remaining test to use the GPU. +Other tests will run jobs on the GPU which keeps the main unittest process lingering on +the GPU. When starting the parallel manager test on a GPU in exclusive process mode, +the spawned process from the parallel manager will fail due the occupation from the +other unittests. If the parallel manager is tested first, the spawned process is fully +closed which will allow the remaining test to use the GPU. """ import unittest @@ -98,8 +99,8 @@ def test_parallel_breaking(self): self.assertEqual( len(multiprocessing.active_children()), 0, - msg="a process was still lingering after a parallel job with partially invalid resources " - "was started", + msg="a process was still lingering after a parallel job with partially " + "invalid resources was started", ) else: # pragma: no cover self.fail("This should have given a RuntimeError") diff --git a/tests/test_tmjob.py b/tests/test_tmjob.py index 4e4a0bdb..9db731ec 100644 --- a/tests/test_tmjob.py +++ b/tests/test_tmjob.py @@ -145,8 +145,9 @@ def tearDownClass(cls) -> None: TEST_SCORES.unlink() TEST_ANGLES.unlink() TEST_CUSTOM_ANGULAR_SEARCH.unlink() - # the whitening filter might not exist if the job with spectrum whitening failed, so the unlinking needs to - # allow this (with missing_ok=True) to ensure clean up of the test directory + # the whitening filter might not exist if the job with spectrum whitening + # failed, so the unlinking needs to allow this (with missing_ok=True) to ensure + # clean up of the test directory TEST_WHITENING_FILTER.unlink(missing_ok=True) TEST_JOB_JSON.unlink() TEST_JOB_JSON_WHITENING.unlink() @@ -308,7 +309,8 @@ def test_tm_job_weighting_options(self): score.shape, job.tomo_shape, msg="TMJob with all options failed" ) - # run with only tilt weighting (in test_weights different options are tested for create_wedge) + # run with only tilt weighting + # (in test_weights different options are tested for create_wedge) job = TMJob( "0", 10, @@ -346,7 +348,8 @@ def test_tm_job_weighting_options(self): score.shape, job.tomo_shape, msg="TMJob with only band-pass failed" ) - # run with only spectrum whitening (in test_weights the whitening filter is tested + # run with only spectrum whitening + # (in test_weights the whitening filter is tested) job = TMJob( "0", 10, @@ -381,17 +384,18 @@ def test_tm_job_weighting_options(self): self.assertNotEqual( whitening_filter.shape, new_whitening_filter.shape, - msg="After reducing the search region along the largest dimension the whitening filter " - "should have less sampling points", + msg="After reducing the search region along the largest dimension the " + "whitening filter should have less sampling points", ) self.assertEqual( new_whitening_filter.shape, (max(job.search_size) // 2 + 1,), - msg="The whitening filter does not have the expected size, it should be equal (x // 2) + 1, " - "where x is the largest dimension of the search box.", + msg="The whitening filter does not have the expected size, it should be " + "equal (x // 2) + 1, where x is the largest dimension of the search box.", ) - # TMJob with none of these weighting options is tested in all other runs in this file. + # TMJob with none of these weighting options is tested in all other runs + # in this file. def test_load_json_to_tmjob(self): # check base job loading @@ -449,11 +453,13 @@ def test_custom_angular_search(self): ) def test_tm_job_split_volume(self): - # Splitting the volume into smaller boxes than the template should not raise an error + # Splitting the volume into smaller boxes than the template + # should not raise an error _ = self.job.split_volume_search((10, 3, 2)) # Reset self.job.sub_jobs = [] - # Make sure that asking for more splits than pixels results in just a pixel number of jobs + # Make sure that asking for more splits than pixels results in + # just a pixel number of jobs with self.assertWarnsRegex(RuntimeWarning, "More splits than pixels"): self.job.split_volume_search((TOMO_SHAPE[0] + 42, 1, 1)) self.assertEqual(len(self.job.sub_jobs), TOMO_SHAPE[0]) @@ -481,10 +487,12 @@ def test_tm_job_split_volume(self): self.assertEqual(ANGLE_ID, angle[ind]) self.assertSequenceEqual(LOCATION, ind) - # Small difference in the edge regions of the split dimension. This is because the cross correlation function - # is not well defined in the boundary area, only a small part of the template is correlated here (and we are - # not really interested in it). Probably the inaccuracy in this area becomes more apparent when splitting - # into subvolumes due to a smaller number of sampling points in Fourier space. + # Small difference in the edge regions of the split dimension. This is because + # the cross correlation function is not well defined in the boundary area, only + # a small part of the template is correlated here (and we are not really + # interested in it). Probably the inaccuracy in this area becomes more apparent + # when splitting into subvolumes due to a smaller number of sampling points in + # Fourier space. ok_region = slice(TEMPLATE_SIZE // 2, -TEMPLATE_SIZE // 2) score_diff = np.abs( score[ok_region, ok_region, ok_region] @@ -494,7 +502,8 @@ def test_tm_job_split_volume(self): self.assertAlmostEqual( score_diff, 0, places=1, msg="score diff should not be larger than 0.01" ) - # There is some race condition that sometimes gives a different angle for 1 specific point + # There is some race condition that sometimes gives + # a different angle for 1 specific point. # This point is deterministic but different per machine # We suspect it has something to do with the FFT padding # See https://github.com/SBC-Utrecht/pytom-match-pick/pull/163 @@ -504,7 +513,8 @@ def test_tm_job_split_volume(self): # read_mrc(TEST_ANGLES)[ok_region, ok_region, ok_region] # ).sum() - # self.assertAlmostEqual(angle_diff, 0, places=1, msg='angle diff should not change') + # self.assertAlmostEqual(angle_diff, 0, places=1, + # msg='angle diff should not change') # get search statistics before and after splitting split_stats = self.job.job_stats @@ -518,8 +528,8 @@ def test_tm_job_split_volume(self): split_stats["std"], reference_stats["std"], places=3, - msg="Standard deviation over template matching with subvolume splitting should be " - "almost identical.", + msg="Standard deviation over template matching with subvolume splitting " + "should be almost identical.", ) def test_splitting_with_tomogram_mask(self): @@ -529,7 +539,8 @@ def test_splitting_with_tomogram_mask(self): self.assertLess(len(job.sub_jobs), 10 * 10 * 10) def test_splitting_with_offsets(self): - # check if subjobs have correct offsets for the main job, the last sub job will have the largest errors + # check if subjobs have correct offsets for the main job, + # the last sub job will have the largest errors job = TMJob( "0", 10, @@ -545,7 +556,8 @@ def test_splitting_with_offsets(self): ) # split along each dimension and get only the last sub job last_sub_job = job.split_volume_search((2, 3, 2))[-1] - # Make sure the start of the data + size of the last subjob result in the correct size + # Make sure the start of the data + size of the last subjob + # result in the correct size final_size = [ i + j for i, j in zip(last_sub_job.whole_start, last_sub_job.sub_step) ] @@ -597,8 +609,8 @@ def test_tm_job_split_angles(self): split_stats["std"], reference_stats["std"], places=6, - msg="Standard deviation of template matching with angular search split should be almost " - "identical.", + msg="Standard deviation of template matching with angular search split " + "should be almost identical.", ) def test_extraction(self): @@ -664,7 +676,8 @@ def test_extraction(self): msg="Length of returned list should be 0 after applying mask where the " "object is not in the region of interest.", ) - # test if all masks are ignored if ignore_tomogram_mask=True and that a warning is raised + # test if all masks are ignored if ignore_tomogram_mask=True + # and that a warning is raised with self.assertLogs(level="WARNING") as cm: df, scores = extract_particles( job, @@ -687,7 +700,8 @@ def test_extraction(self): msg="We would expect some annotations if all tomogram masks are ignored", ) - # test mask that covers the particle and should override the one now attached to the job + # test mask that covers the particle + # and should override the one now attached to the job df, scores = extract_particles( job, 5, diff --git a/tests/test_weights.py b/tests/test_weights.py index ce0f448f..c1c9ef55 100644 --- a/tests/test_weights.py +++ b/tests/test_weights.py @@ -308,7 +308,8 @@ def test_ctf(self): ) self.assertTrue( np.sum((ctf_raw != ctf_cut) * 1) != 0, - msg="CTF should be different when cutting it off after the first zero crossing", + msg="CTF should be different when cutting it off after the first zero " + "crossing", ) def test_radial_average(self): @@ -363,9 +364,9 @@ def test_power_spectrum_profile(self): self.assertEqual( profile.shape, (max(self.volume_shape_irregular) // 2 + 1,), - msg="Power spectrum profile output shape should be a 1-dimensional array with " - "length equal to max(input_shape) // 2 + 1, corresponding to largest sampling component " - "in Fourier space.", + msg="Power spectrum profile output shape should be a 1-dimensional array " + "with length equal to max(input_shape) // 2 + 1, corresponding to largest " + "sampling component in Fourier space.", ) def test_profile_to_weighting(self):