Skip to content

Commit

Permalink
add E501 check (#216)
Browse files Browse the repository at this point in the history
* add E501 check

* fix all too-long-lines
  • Loading branch information
sroet authored Aug 14, 2024
1 parent 1d7d238 commit e28e155
Show file tree
Hide file tree
Showing 17 changed files with 413 additions and 269 deletions.
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,6 @@ parallel = true

[tool.ruff]
line-length = 88

[tool.ruff.lint]
extend-select = ["E501"]
31 changes: 20 additions & 11 deletions src/pytom_tm/angles.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ def angle_to_angle_list(
angle_diff: float
maximum difference (in degrees) for the angle list
sort_angles: bool, default True
sort the list, using python default angle_list.sort(), sorts first on Z1, then X, then Z2
sort the list, using python default angle_list.sort(), sorts first on Z1,
then X, then Z2
log_level: int, default logging.DEBUG
the log level to use when generating logs
Expand Down Expand Up @@ -67,14 +68,17 @@ def load_angle_list(
Parameters
----------
file_name: pathlib.Path
path to text file containing angular search, each line should contain 3 floats of anti-clockwise ZXZ
path to text file containing angular search, each line should contain 3 floats
of anti-clockwise ZXZ
sort_angles: bool, default True
sort the list, using python default angle_list.sort(), sorts first on Z1, then X, then Z2
sort the list, using python default angle_list.sort(), sorts first on Z1,
then X, then Z2
Returns
-------
angle_list: list[tuple[float, float, float]]
a list where each element is a tuple of 3 floats containing an anti-clockwise ZXZ Euler rotation in radians
a list where each element is a tuple of 3 floats containing an anti-clockwise
ZXZ Euler rotation in radians
"""
with open(str(file_name)) as fstream:
lines = fstream.readlines()
Expand All @@ -84,7 +88,8 @@ def load_angle_list(
"Invalid angle file provided, each line should have 3 ZXZ Euler angles!"
)
if sort_angles:
angle_list.sort() # angle list needs to be sorted otherwise symmetry reduction cannot be used!
# angle_list needs to be sorted otherwise symmetry reduction cannot be used!
angle_list.sort()
return angle_list


Expand All @@ -104,7 +109,8 @@ def get_angle_list(
or if a float:
maximum difference (in degrees) for the angle list
sort_angles: bool, default True
sort the list, using python default angle_list.sort(), sorts first on Z1, then X, then Z2
sort the list, using python default angle_list.sort(), sorts first on Z1,
then X, then Z2
symmetry: int, default 1
the returned list will only have Z2 angles [0, (2*pi/symmetry))
log_level: str, default 'DEBUG'
Expand All @@ -113,7 +119,8 @@ def get_angle_list(
Returns
-------
angle_list: list[tuple[float, float, float]]
a list where each element is a tuple of 3 floats containing an anti-clockwise ZXZ Euler rotation in radians
a list where each element is a tuple of 3 floats containing an anti-clockwise
ZXZ Euler rotation in radians
"""
log_level = logging.getLevelNamesMapping()[log_level]
out = None
Expand All @@ -134,7 +141,8 @@ def get_angle_list(
if possible_file_path.exists() and possible_file_path.suffix == ".txt":
logging.log(
log_level,
"Custom file provided for the angular search. Checking if it can be read...",
"Custom file provided for the angular search. "
"Checking if it can be read...",
)
out = load_angle_list(angle, sort_angles)

Expand All @@ -150,9 +158,10 @@ def convert_euler(
degrees_in: bool = True,
degrees_out: bool = True,
) -> tuple[float, float, float]:
"""Convert a single set of Euler angles from one Euler notation to another. This function makes use of
scipy.spatial.transform.Rotation meaning that capital letters (i.e. ZXZ) specify intrinsic rotations (commonly
used in cryo-EM) and small letters (i.e. zxz) specific extrinsic rotations.
"""Convert a single set of Euler angles from one Euler notation to another. This
function makes use of scipy.spatial.transform.Rotation meaning that capital letters
(i.e. ZXZ) specify intrinsic rotations (commonly used in cryo-EM) and small letters
(i.e. zxz) specific extrinsic rotations.
Parameters
----------
Expand Down
12 changes: 7 additions & 5 deletions src/pytom_tm/correlation.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ def std_under_mask(
mean: float,
mask_weight: Optional[float] = None,
) -> Union[float, cpt.NDArray[float]]:
"""Calculate standard deviation of array in the mask region. Uses mean_under_mask() to calculate the mean of
data**2 within the mask.
"""Calculate standard deviation of array in the mask region. Uses mean_under_mask()
to calculate the mean of data**2 within the mask.
data and mask can be cupy or numpy arrays.
Expand Down Expand Up @@ -72,8 +72,9 @@ def normalise(
mask: Optional[Union[npt.NDArray[float], cpt.NDArray[float]]] = None,
mask_weight: Optional[float] = None,
) -> Union[npt.NDArray[float], cpt.NDArray[float]]:
"""Normalise array by subtracting mean and dividing by standard deviation. If a mask is provided the array is
normalised with the mean and std calculated within the mask.
"""Normalise array by subtracting mean and dividing by standard deviation. If a mask
is provided the array is normalised with the mean and std calculated within the
mask.
data and mask can be cupy or numpy arrays.
Expand Down Expand Up @@ -105,7 +106,8 @@ def normalised_cross_correlation(
data2: Union[npt.NDArray[float], cpt.NDArray[float]],
mask: Optional[Union[npt.NDArray[float], cpt.NDArray[float]]] = None,
) -> Union[float, cpt.NDArray[float]]:
"""Calculate normalised cross correlation between two arrays. Optionally only in a masked region.
"""Calculate normalised cross correlation between two arrays. Optionally only in a
masked region.
data1, data2, and mask can be cupy or numpy arrays.
Expand Down
20 changes: 11 additions & 9 deletions src/pytom_tm/entry_points.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,19 +436,20 @@ def extract_candidates(argv=None):
type=pathlib.Path,
required=False,
action=CheckFileExists,
help="Here you can provide a mask for the extraction with dimensions (in pixels) equal to "
"the tomogram. All values in the mask that are smaller or equal to 0 will be "
"removed, all values larger than 0 are considered regions of interest. It can "
"be used to extract annotations only within a specific cellular region."
"If the job was run with a tomogram mask, this file will be used instead of the job mask",
help="Here you can provide a mask for the extraction with dimensions "
"(in pixels) equal to the tomogram. All values in the mask that are smaller or "
"equal to 0 will be removed, all values larger than 0 are considered regions "
"of interest. It can be used to extract annotations only within a specific "
"cellular region. If the job was run with a tomogram mask, this file will be "
"used instead of the job mask",
)
parser.add_argument(
"--ignore_tomogram_mask",
action="store_true",
default=False,
required=False,
help="Flag to ignore the input and TM job tomogram mask. "
"Useful if the scores mrc looks reasonable, but this finds 0 particles to extract",
help="Flag to ignore the input and TM job tomogram mask. Useful if the scores "
"mrc looks reasonable, but this finds 0 particles to extract",
)
parser.add_argument(
"-n",
Expand Down Expand Up @@ -687,8 +688,9 @@ def match_template(argv=None):
type=pathlib.Path,
required=False,
action=CheckFileExists,
help="Here you can provide a mask for matching with dimensions (in pixels) equal to "
"the tomogram. If a subvolume only has values <= 0 for this mask it will be skipped.",
help="Here you can provide a mask for matching with dimensions (in pixels) "
"equal to the tomogram. If a subvolume only has values <= 0 for this mask it "
"will be skipped.",
)

filter_group = parser.add_argument_group("Filter control")
Expand Down
47 changes: 28 additions & 19 deletions src/pytom_tm/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,24 +33,28 @@ def predict_tophat_mask(
create_plot: bool = True,
tophat_connectivity: int = 1,
) -> npt.NDArray[bool]:
"""This function gets as input a score map and returns a peak mask as determined with a tophat transform.
"""This function gets as input a score map and returns a peak mask as determined
with a tophat transform.
It does the following things:
- calculate a tophat transform using scipy.ndimage.white_tophat() and a kernel
ndimage.generate_binary_structure(rank=3, connectivity=1).
- calculate a histogram of the transformed score map and take its log to focus more on small values
- take second derivative of log(histogram) to find the region for fitting a Gaussian, where the second derivative
switches from negative to positive the background noise likely breaks
- use formula from excellent work of Rickgauer et al. (2017, eLife) which uses the error function to find the
likelihood of false positives on the background Gaussian distribution:
N**(-1) = erfc( theta / ( sigma * sqrt(2) ) ) / 2
- calculate a histogram of the transformed score map and take its log to focus more
on small values
- take second derivative of log(histogram) to find the region for fitting a
Gaussian, where the second derivative switches from negative to positive the
background noise likely breaks
- use formula from excellent work of Rickgauer et al. (2017, eLife) which uses the
error function to find the likelihood of false positives on the background
Gaussian distribution: N**(-1) = erfc( theta / ( sigma * sqrt(2) ) ) / 2
Parameters
----------
score_volume: npt.NDArray[float]
template matching score map
output_path: Optional[pathlib.Path], default None
if provided (and plotting is available), write a figure of the fit to the output folder
if provided (and plotting is available), write a figure of the fit to the output
folder
n_false_positives: float, default 1.0
number of false positive for error function cutoff calculation
create_plot: bool, default True
Expand Down Expand Up @@ -111,7 +115,8 @@ def log_gauss(x, amp, mu, sigma): # log of gaussian for fitting
0
] # now go for accurate fit to log of gauss
search_space = coeff_log[0] / (coeff_log[2] * np.sqrt(2 * np.pi))
# formula Rickgauer et al. (2017, eLife): N**(-1) = erfc( theta / ( sigma * sqrt(2) ) ) / 2
# formula Rickgauer et al. (2017, eLife):
# N**(-1) = erfc( theta / ( sigma * sqrt(2) ) ) / 2
# we need to find theta (i.e. the cut-off)
cut_off = (
erfcinv((2 * n_false_positives) / search_space) * np.sqrt(2) * coeff_log[2]
Expand Down Expand Up @@ -162,10 +167,11 @@ def extract_particles(
cut_off: Optional[float]
manually override the automated score cut-off estimation, value between 0 and 1
n_false_positives: float, default 1.0
tune the number of false positives to be included for automated error function cut-off estimation:
should be a float > 0
tune the number of false positives to be included for automated error function
cut-off estimation: should be a float > 0
tomogram_mask_path: Optional[pathlib.Path]
path to a tomographic binary mask for extraction, will override job.tomogram_mask
path to a tomographic binary mask for extraction, will override
job.tomogram_mask
tophat_filter: bool
attempt to only select sharp peaks with the tophat filter
create_plot: bool, default True
Expand All @@ -183,7 +189,8 @@ def extract_particles(
Returns
-------
dataframe, scores: tuple[pd.DataFrame, list[float, ...]]
dataframe with annotations that can be written out as a STAR file and a list of the selected scores
dataframe with annotations that can be written out as a STAR file and a list of
the selected scores
"""

score_volume = read_mrc(job.output_dir.joinpath(f"{job.tomo_id}_scores.mrc"))
Expand Down Expand Up @@ -218,8 +225,9 @@ def extract_particles(
if tomogram_mask is not None:
if tomogram_mask.shape != job.tomo_shape:
raise ValueError(
"Tomogram mask does not have the same number of pixels as the tomogram.\n"
f"Tomogram mask shape: {tomogram_mask.shape}, tomogram shape: {job.tomo_shape}"
"Tomogram mask does not have the same number of pixels as the "
f"tomogram.\n Tomogram mask shape: {tomogram_mask.shape}, "
f"tomogram shape: {job.tomo_shape}"
)
slices = [
slice(origin, origin + size)
Expand All @@ -240,14 +248,15 @@ def extract_particles(
sigma = job.job_stats["std"]
search_space = job.job_stats["search_space"]
if cut_off is None:
# formula Rickgauer et al. (2017, eLife): N**(-1) = erfc( theta / ( sigma * sqrt(2) ) ) / 2
# formula Rickgauer et al. (2017, eLife):
# N**(-1) = erfc( theta / ( sigma * sqrt(2) ) ) / 2
# we need to find theta (i.e. the cut off)
cut_off = erfcinv((2 * n_false_positives) / search_space) * np.sqrt(2) * sigma
logging.info(f"cut off for particle extraction: {cut_off}")
elif cut_off < 0:
logging.warning(
"Provided extraction score cut-off is smaller than 0. Changing to 0 as that is smallest "
"allowed value."
"Provided extraction score cut-off is smaller than 0. Changing to 0 as "
"that is smallest allowed value."
)
cut_off = 0

Expand Down Expand Up @@ -293,7 +302,7 @@ def extract_particles(
rotation[2], # AnglePsi
lcc_max, # LCCmax
cut_off, # Extraction cut off
sigma, # Add sigma of template matching search, LCCmax can be divided by sigma to obtain SNR
sigma, # Add sigma of template matching search, LCCmax/sigma = SNR
pixel_size, # DetectorPixelSize
tomogram_id, # MicrographName
)
Expand Down
Loading

0 comments on commit e28e155

Please sign in to comment.