SBC-Utrecht · McHaillet · Aug 14, 2024 · Aug 13, 2024 · Aug 13, 2024
diff --git a/pyproject.toml b/pyproject.toml
@@ -68,3 +68,6 @@ parallel = true
 
 [tool.ruff]
 line-length = 88
+
+[tool.ruff.lint]
+extend-select = ["E501"]
diff --git a/src/pytom_tm/angles.py b/src/pytom_tm/angles.py
@@ -19,7 +19,8 @@ def angle_to_angle_list(
     angle_diff: float
         maximum difference (in degrees) for the angle list
     sort_angles: bool, default True
-        sort the list, using python default angle_list.sort(), sorts first on Z1, then X, then Z2
+        sort the list, using python default angle_list.sort(), sorts first on Z1,
+        then X, then Z2
     log_level: int, default logging.DEBUG
         the log level to use when generating logs
 
@@ -67,14 +68,17 @@ def load_angle_list(
     Parameters
     ----------
     file_name: pathlib.Path
-        path to text file containing angular search, each line should contain 3 floats of anti-clockwise ZXZ
+        path to text file containing angular search, each line should contain 3 floats
+        of anti-clockwise ZXZ
     sort_angles: bool, default True
-        sort the list, using python default angle_list.sort(), sorts first on Z1, then X, then Z2
+        sort the list, using python default angle_list.sort(), sorts first on Z1,
+        then X, then Z2
 
     Returns
     -------
     angle_list: list[tuple[float, float, float]]
-        a list where each element is a tuple of 3 floats containing an anti-clockwise ZXZ Euler rotation in radians
+        a list where each element is a tuple of 3 floats containing an anti-clockwise
+        ZXZ Euler rotation in radians
     """
     with open(str(file_name)) as fstream:
         lines = fstream.readlines()
@@ -84,7 +88,8 @@ def load_angle_list(
             "Invalid angle file provided, each line should have 3 ZXZ Euler angles!"
         )
     if sort_angles:
-        angle_list.sort()  # angle list needs to be sorted otherwise symmetry reduction cannot be used!
+        # angle_list needs to be sorted otherwise symmetry reduction cannot be used!
+        angle_list.sort()
     return angle_list
 
 
@@ -104,7 +109,8 @@ def get_angle_list(
         or if a float:
           maximum difference (in degrees) for the angle list
     sort_angles: bool, default True
-        sort the list, using python default angle_list.sort(), sorts first on Z1, then X, then Z2
+        sort the list, using python default angle_list.sort(), sorts first on Z1,
+        then X, then Z2
     symmetry: int, default 1
         the returned list will only have Z2 angles [0, (2*pi/symmetry))
     log_level: str, default 'DEBUG'
@@ -113,7 +119,8 @@ def get_angle_list(
     Returns
     -------
     angle_list: list[tuple[float, float, float]]
-        a list where each element is a tuple of 3 floats containing an anti-clockwise ZXZ Euler rotation in radians
+        a list where each element is a tuple of 3 floats containing an anti-clockwise
+        ZXZ Euler rotation in radians
     """
     log_level = logging.getLevelNamesMapping()[log_level]
     out = None
@@ -134,7 +141,8 @@ def get_angle_list(
         if possible_file_path.exists() and possible_file_path.suffix == ".txt":
             logging.log(
                 log_level,
-                "Custom file provided for the angular search. Checking if it can be read...",
+                "Custom file provided for the angular search. "
+                "Checking if it can be read...",
             )
             out = load_angle_list(angle, sort_angles)
 
@@ -150,9 +158,10 @@ def convert_euler(
     degrees_in: bool = True,
     degrees_out: bool = True,
 ) -> tuple[float, float, float]:
-    """Convert a single set of Euler angles from one Euler notation to another. This function makes use of
-    scipy.spatial.transform.Rotation meaning that capital letters (i.e. ZXZ) specify intrinsic rotations (commonly
-    used in cryo-EM) and small letters (i.e. zxz) specific extrinsic rotations.
+    """Convert a single set of Euler angles from one Euler notation to another. This
+    function makes use of scipy.spatial.transform.Rotation meaning that capital letters
+    (i.e. ZXZ) specify intrinsic rotations (commonly used in cryo-EM) and small letters
+    (i.e. zxz) specific extrinsic rotations.
 
     Parameters
     ----------

diff --git a/src/pytom_tm/correlation.py b/src/pytom_tm/correlation.py
@@ -42,8 +42,8 @@ def std_under_mask(
     mean: float,
     mask_weight: Optional[float] = None,
 ) -> Union[float, cpt.NDArray[float]]:
-    """Calculate standard deviation of array in the mask region. Uses mean_under_mask() to calculate the mean of
-    data**2 within the mask.
+    """Calculate standard deviation of array in the mask region. Uses mean_under_mask()
+    to calculate the mean of data**2 within the mask.
 
     data and mask can be cupy or numpy arrays.
 
@@ -72,8 +72,9 @@ def normalise(
     mask: Optional[Union[npt.NDArray[float], cpt.NDArray[float]]] = None,
     mask_weight: Optional[float] = None,
 ) -> Union[npt.NDArray[float], cpt.NDArray[float]]:
-    """Normalise array by subtracting mean and dividing by standard deviation. If a mask is provided the array is
-    normalised with the mean and std calculated within the mask.
+    """Normalise array by subtracting mean and dividing by standard deviation. If a mask
+    is provided the array is normalised with the mean and std calculated within the
+    mask.
 
     data and mask can be cupy or numpy arrays.
 
@@ -105,7 +106,8 @@ def normalised_cross_correlation(
     data2: Union[npt.NDArray[float], cpt.NDArray[float]],
     mask: Optional[Union[npt.NDArray[float], cpt.NDArray[float]]] = None,
 ) -> Union[float, cpt.NDArray[float]]:
-    """Calculate normalised cross correlation between two arrays. Optionally only in a masked region.
+    """Calculate normalised cross correlation between two arrays. Optionally only in a
+    masked region.
 
     data1, data2, and mask can be cupy or numpy arrays.
 

diff --git a/src/pytom_tm/entry_points.py b/src/pytom_tm/entry_points.py
@@ -436,19 +436,20 @@ def extract_candidates(argv=None):
         type=pathlib.Path,
         required=False,
         action=CheckFileExists,
-        help="Here you can provide a mask for the extraction with dimensions (in pixels) equal to "
-        "the tomogram. All values in the mask that are smaller or equal to 0 will be "
-        "removed, all values larger than 0 are considered regions of interest. It can "
-        "be used to extract annotations only within a specific cellular region."
-        "If the job was run with a tomogram mask, this file will be used instead of the job mask",
+        help="Here you can provide a mask for the extraction with dimensions "
+        "(in pixels) equal to the tomogram. All values in the mask that are smaller or "
+        "equal to 0 will be removed, all values larger than 0 are considered regions "
+        "of interest. It can be used to extract annotations only within a specific "
+        "cellular region. If the job was run with a tomogram mask, this file will be "
+        "used instead of the job mask",
     )
     parser.add_argument(
         "--ignore_tomogram_mask",
         action="store_true",
         default=False,
         required=False,
-        help="Flag to ignore the input and TM job tomogram mask. "
-        "Useful if the scores mrc looks reasonable, but this finds 0 particles to extract",
+        help="Flag to ignore the input and TM job tomogram mask. Useful if the scores "
+        "mrc looks reasonable, but this finds 0 particles to extract",
     )
     parser.add_argument(
         "-n",
@@ -687,8 +688,9 @@ def match_template(argv=None):
         type=pathlib.Path,
         required=False,
         action=CheckFileExists,
-        help="Here you can provide a mask for matching with dimensions (in pixels) equal to "
-        "the tomogram. If a subvolume only has values <= 0 for this mask it will be skipped.",
+        help="Here you can provide a mask for matching with dimensions (in pixels) "
+        "equal to the tomogram. If a subvolume only has values <= 0 for this mask it "
+        "will be skipped.",
     )
 
     filter_group = parser.add_argument_group("Filter control")

diff --git a/src/pytom_tm/extract.py b/src/pytom_tm/extract.py
@@ -33,24 +33,28 @@ def predict_tophat_mask(
     create_plot: bool = True,
     tophat_connectivity: int = 1,
 ) -> npt.NDArray[bool]:
-    """This function gets as input a score map and returns a peak mask as determined with a tophat transform.
+    """This function gets as input a score map and returns a peak mask as determined
+    with a tophat transform.
 
     It does the following things:
      - calculate a tophat transform using scipy.ndimage.white_tophat() and a kernel
      ndimage.generate_binary_structure(rank=3, connectivity=1).
-     - calculate a histogram of the transformed score map and take its log to focus more on small values
-     - take second derivative of log(histogram) to find the region for fitting a Gaussian, where the second derivative
-     switches from negative to positive the background noise likely breaks
-     - use formula from excellent work of Rickgauer et al. (2017, eLife) which uses the error function to find the
-     likelihood of false positives on the background Gaussian distribution:
-            N**(-1) = erfc( theta / ( sigma * sqrt(2) ) ) / 2
+     - calculate a histogram of the transformed score map and take its log to focus more
+        on small values
+     - take second derivative of log(histogram) to find the region for fitting a
+        Gaussian, where the second derivative switches from negative to positive the
+        background noise likely breaks
+     - use formula from excellent work of Rickgauer et al. (2017, eLife) which uses the
+        error function to find the likelihood of false positives on the background
+        Gaussian distribution: N**(-1) = erfc( theta / ( sigma * sqrt(2) ) ) / 2
 
     Parameters
     ----------
     score_volume: npt.NDArray[float]
         template matching score map
     output_path: Optional[pathlib.Path], default None
-        if provided (and plotting is available), write a figure of the fit to the output folder
+        if provided (and plotting is available), write a figure of the fit to the output
+        folder
     n_false_positives: float, default 1.0
         number of false positive for error function cutoff calculation
     create_plot: bool, default True
@@ -111,7 +115,8 @@ def log_gauss(x, amp, mu, sigma):  # log of gaussian for fitting
         0
     ]  # now go for accurate fit to log of gauss
     search_space = coeff_log[0] / (coeff_log[2] * np.sqrt(2 * np.pi))
-    # formula Rickgauer et al. (2017, eLife): N**(-1) = erfc( theta / ( sigma * sqrt(2) ) ) / 2
+    # formula Rickgauer et al. (2017, eLife):
+    # N**(-1) = erfc( theta / ( sigma * sqrt(2) ) ) / 2
     # we need to find theta (i.e. the cut-off)
     cut_off = (
         erfcinv((2 * n_false_positives) / search_space) * np.sqrt(2) * coeff_log[2]
@@ -162,10 +167,11 @@ def extract_particles(
     cut_off: Optional[float]
         manually override the automated score cut-off estimation, value between 0 and 1
     n_false_positives: float, default 1.0
-        tune the number of false positives to be included for automated error function cut-off estimation:
-        should be a float > 0
+        tune the number of false positives to be included for automated error function
+        cut-off estimation: should be a float > 0
     tomogram_mask_path: Optional[pathlib.Path]
-        path to a tomographic binary mask for extraction, will override job.tomogram_mask
+        path to a tomographic binary mask for extraction, will override
+        job.tomogram_mask
     tophat_filter: bool
         attempt to only select sharp peaks with the tophat filter
     create_plot: bool, default True
@@ -183,7 +189,8 @@ def extract_particles(
     Returns
     -------
     dataframe, scores: tuple[pd.DataFrame, list[float, ...]]
-        dataframe with annotations that can be written out as a STAR file and a list of the selected scores
+        dataframe with annotations that can be written out as a STAR file and a list of
+        the selected scores
     """
 
     score_volume = read_mrc(job.output_dir.joinpath(f"{job.tomo_id}_scores.mrc"))
@@ -218,8 +225,9 @@ def extract_particles(
     if tomogram_mask is not None:
         if tomogram_mask.shape != job.tomo_shape:
             raise ValueError(
-                "Tomogram mask does not have the same number of pixels as the tomogram.\n"
-                f"Tomogram mask shape: {tomogram_mask.shape}, tomogram shape: {job.tomo_shape}"
+                "Tomogram mask does not have the same number of pixels as the "
+                f"tomogram.\n Tomogram mask shape: {tomogram_mask.shape}, "
+                f"tomogram shape: {job.tomo_shape}"
             )
         slices = [
             slice(origin, origin + size)
@@ -240,14 +248,15 @@ def extract_particles(
     sigma = job.job_stats["std"]
     search_space = job.job_stats["search_space"]
     if cut_off is None:
-        # formula Rickgauer et al. (2017, eLife): N**(-1) = erfc( theta / ( sigma * sqrt(2) ) ) / 2
+        # formula Rickgauer et al. (2017, eLife):
+        # N**(-1) = erfc( theta / ( sigma * sqrt(2) ) ) / 2
         # we need to find theta (i.e. the cut off)
         cut_off = erfcinv((2 * n_false_positives) / search_space) * np.sqrt(2) * sigma
         logging.info(f"cut off for particle extraction: {cut_off}")
     elif cut_off < 0:
         logging.warning(
-            "Provided extraction score cut-off is smaller than 0. Changing to 0 as that is smallest "
-            "allowed value."
+            "Provided extraction score cut-off is smaller than 0. Changing to 0 as "
+            "that is smallest allowed value."
         )
         cut_off = 0
 
@@ -293,7 +302,7 @@ def extract_particles(
                 rotation[2],  # AnglePsi
                 lcc_max,  # LCCmax
                 cut_off,  # Extraction cut off
-                sigma,  # Add sigma of template matching search, LCCmax can be divided by sigma to obtain SNR
+                sigma,  # Add sigma of template matching search, LCCmax/sigma = SNR
                 pixel_size,  # DetectorPixelSize
                 tomogram_id,  # MicrographName
             )