Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add E501 check #216

Merged
merged 2 commits into from
Aug 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,6 @@ parallel = true

[tool.ruff]
line-length = 88

[tool.ruff.lint]
extend-select = ["E501"]
31 changes: 20 additions & 11 deletions src/pytom_tm/angles.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ def angle_to_angle_list(
angle_diff: float
maximum difference (in degrees) for the angle list
sort_angles: bool, default True
sort the list, using python default angle_list.sort(), sorts first on Z1, then X, then Z2
sort the list, using python default angle_list.sort(), sorts first on Z1,
then X, then Z2
log_level: int, default logging.DEBUG
the log level to use when generating logs

Expand Down Expand Up @@ -67,14 +68,17 @@ def load_angle_list(
Parameters
----------
file_name: pathlib.Path
path to text file containing angular search, each line should contain 3 floats of anti-clockwise ZXZ
path to text file containing angular search, each line should contain 3 floats
of anti-clockwise ZXZ
sort_angles: bool, default True
sort the list, using python default angle_list.sort(), sorts first on Z1, then X, then Z2
sort the list, using python default angle_list.sort(), sorts first on Z1,
then X, then Z2

Returns
-------
angle_list: list[tuple[float, float, float]]
a list where each element is a tuple of 3 floats containing an anti-clockwise ZXZ Euler rotation in radians
a list where each element is a tuple of 3 floats containing an anti-clockwise
ZXZ Euler rotation in radians
"""
with open(str(file_name)) as fstream:
lines = fstream.readlines()
Expand All @@ -84,7 +88,8 @@ def load_angle_list(
"Invalid angle file provided, each line should have 3 ZXZ Euler angles!"
)
if sort_angles:
angle_list.sort() # angle list needs to be sorted otherwise symmetry reduction cannot be used!
# angle_list needs to be sorted otherwise symmetry reduction cannot be used!
angle_list.sort()
return angle_list


Expand All @@ -104,7 +109,8 @@ def get_angle_list(
or if a float:
maximum difference (in degrees) for the angle list
sort_angles: bool, default True
sort the list, using python default angle_list.sort(), sorts first on Z1, then X, then Z2
sort the list, using python default angle_list.sort(), sorts first on Z1,
then X, then Z2
symmetry: int, default 1
the returned list will only have Z2 angles [0, (2*pi/symmetry))
log_level: str, default 'DEBUG'
Expand All @@ -113,7 +119,8 @@ def get_angle_list(
Returns
-------
angle_list: list[tuple[float, float, float]]
a list where each element is a tuple of 3 floats containing an anti-clockwise ZXZ Euler rotation in radians
a list where each element is a tuple of 3 floats containing an anti-clockwise
ZXZ Euler rotation in radians
"""
log_level = logging.getLevelNamesMapping()[log_level]
out = None
Expand All @@ -134,7 +141,8 @@ def get_angle_list(
if possible_file_path.exists() and possible_file_path.suffix == ".txt":
logging.log(
log_level,
"Custom file provided for the angular search. Checking if it can be read...",
"Custom file provided for the angular search. "
"Checking if it can be read...",
)
out = load_angle_list(angle, sort_angles)

Expand All @@ -150,9 +158,10 @@ def convert_euler(
degrees_in: bool = True,
degrees_out: bool = True,
) -> tuple[float, float, float]:
"""Convert a single set of Euler angles from one Euler notation to another. This function makes use of
scipy.spatial.transform.Rotation meaning that capital letters (i.e. ZXZ) specify intrinsic rotations (commonly
used in cryo-EM) and small letters (i.e. zxz) specific extrinsic rotations.
"""Convert a single set of Euler angles from one Euler notation to another. This
function makes use of scipy.spatial.transform.Rotation meaning that capital letters
(i.e. ZXZ) specify intrinsic rotations (commonly used in cryo-EM) and small letters
(i.e. zxz) specific extrinsic rotations.

Parameters
----------
Expand Down
12 changes: 7 additions & 5 deletions src/pytom_tm/correlation.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ def std_under_mask(
mean: float,
mask_weight: Optional[float] = None,
) -> Union[float, cpt.NDArray[float]]:
"""Calculate standard deviation of array in the mask region. Uses mean_under_mask() to calculate the mean of
data**2 within the mask.
"""Calculate standard deviation of array in the mask region. Uses mean_under_mask()
to calculate the mean of data**2 within the mask.

data and mask can be cupy or numpy arrays.

Expand Down Expand Up @@ -72,8 +72,9 @@ def normalise(
mask: Optional[Union[npt.NDArray[float], cpt.NDArray[float]]] = None,
mask_weight: Optional[float] = None,
) -> Union[npt.NDArray[float], cpt.NDArray[float]]:
"""Normalise array by subtracting mean and dividing by standard deviation. If a mask is provided the array is
normalised with the mean and std calculated within the mask.
"""Normalise array by subtracting mean and dividing by standard deviation. If a mask
is provided the array is normalised with the mean and std calculated within the
mask.

data and mask can be cupy or numpy arrays.

Expand Down Expand Up @@ -105,7 +106,8 @@ def normalised_cross_correlation(
data2: Union[npt.NDArray[float], cpt.NDArray[float]],
mask: Optional[Union[npt.NDArray[float], cpt.NDArray[float]]] = None,
) -> Union[float, cpt.NDArray[float]]:
"""Calculate normalised cross correlation between two arrays. Optionally only in a masked region.
"""Calculate normalised cross correlation between two arrays. Optionally only in a
masked region.

data1, data2, and mask can be cupy or numpy arrays.

Expand Down
20 changes: 11 additions & 9 deletions src/pytom_tm/entry_points.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,19 +436,20 @@ def extract_candidates(argv=None):
type=pathlib.Path,
required=False,
action=CheckFileExists,
help="Here you can provide a mask for the extraction with dimensions (in pixels) equal to "
"the tomogram. All values in the mask that are smaller or equal to 0 will be "
"removed, all values larger than 0 are considered regions of interest. It can "
"be used to extract annotations only within a specific cellular region."
"If the job was run with a tomogram mask, this file will be used instead of the job mask",
help="Here you can provide a mask for the extraction with dimensions "
"(in pixels) equal to the tomogram. All values in the mask that are smaller or "
"equal to 0 will be removed, all values larger than 0 are considered regions "
"of interest. It can be used to extract annotations only within a specific "
"cellular region. If the job was run with a tomogram mask, this file will be "
"used instead of the job mask",
)
parser.add_argument(
"--ignore_tomogram_mask",
action="store_true",
default=False,
required=False,
help="Flag to ignore the input and TM job tomogram mask. "
"Useful if the scores mrc looks reasonable, but this finds 0 particles to extract",
help="Flag to ignore the input and TM job tomogram mask. Useful if the scores "
"mrc looks reasonable, but this finds 0 particles to extract",
)
parser.add_argument(
"-n",
Expand Down Expand Up @@ -687,8 +688,9 @@ def match_template(argv=None):
type=pathlib.Path,
required=False,
action=CheckFileExists,
help="Here you can provide a mask for matching with dimensions (in pixels) equal to "
"the tomogram. If a subvolume only has values <= 0 for this mask it will be skipped.",
help="Here you can provide a mask for matching with dimensions (in pixels) "
"equal to the tomogram. If a subvolume only has values <= 0 for this mask it "
"will be skipped.",
)

filter_group = parser.add_argument_group("Filter control")
Expand Down
47 changes: 28 additions & 19 deletions src/pytom_tm/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,24 +33,28 @@ def predict_tophat_mask(
create_plot: bool = True,
tophat_connectivity: int = 1,
) -> npt.NDArray[bool]:
"""This function gets as input a score map and returns a peak mask as determined with a tophat transform.
"""This function gets as input a score map and returns a peak mask as determined
with a tophat transform.

It does the following things:
- calculate a tophat transform using scipy.ndimage.white_tophat() and a kernel
ndimage.generate_binary_structure(rank=3, connectivity=1).
- calculate a histogram of the transformed score map and take its log to focus more on small values
- take second derivative of log(histogram) to find the region for fitting a Gaussian, where the second derivative
switches from negative to positive the background noise likely breaks
- use formula from excellent work of Rickgauer et al. (2017, eLife) which uses the error function to find the
likelihood of false positives on the background Gaussian distribution:
N**(-1) = erfc( theta / ( sigma * sqrt(2) ) ) / 2
- calculate a histogram of the transformed score map and take its log to focus more
on small values
- take second derivative of log(histogram) to find the region for fitting a
Gaussian, where the second derivative switches from negative to positive the
background noise likely breaks
- use formula from excellent work of Rickgauer et al. (2017, eLife) which uses the
error function to find the likelihood of false positives on the background
Gaussian distribution: N**(-1) = erfc( theta / ( sigma * sqrt(2) ) ) / 2

Parameters
----------
score_volume: npt.NDArray[float]
template matching score map
output_path: Optional[pathlib.Path], default None
if provided (and plotting is available), write a figure of the fit to the output folder
if provided (and plotting is available), write a figure of the fit to the output
folder
n_false_positives: float, default 1.0
number of false positive for error function cutoff calculation
create_plot: bool, default True
Expand Down Expand Up @@ -111,7 +115,8 @@ def log_gauss(x, amp, mu, sigma): # log of gaussian for fitting
0
] # now go for accurate fit to log of gauss
search_space = coeff_log[0] / (coeff_log[2] * np.sqrt(2 * np.pi))
# formula Rickgauer et al. (2017, eLife): N**(-1) = erfc( theta / ( sigma * sqrt(2) ) ) / 2
# formula Rickgauer et al. (2017, eLife):
# N**(-1) = erfc( theta / ( sigma * sqrt(2) ) ) / 2
# we need to find theta (i.e. the cut-off)
cut_off = (
erfcinv((2 * n_false_positives) / search_space) * np.sqrt(2) * coeff_log[2]
Expand Down Expand Up @@ -162,10 +167,11 @@ def extract_particles(
cut_off: Optional[float]
manually override the automated score cut-off estimation, value between 0 and 1
n_false_positives: float, default 1.0
tune the number of false positives to be included for automated error function cut-off estimation:
should be a float > 0
tune the number of false positives to be included for automated error function
cut-off estimation: should be a float > 0
tomogram_mask_path: Optional[pathlib.Path]
path to a tomographic binary mask for extraction, will override job.tomogram_mask
path to a tomographic binary mask for extraction, will override
job.tomogram_mask
tophat_filter: bool
attempt to only select sharp peaks with the tophat filter
create_plot: bool, default True
Expand All @@ -183,7 +189,8 @@ def extract_particles(
Returns
-------
dataframe, scores: tuple[pd.DataFrame, list[float, ...]]
dataframe with annotations that can be written out as a STAR file and a list of the selected scores
dataframe with annotations that can be written out as a STAR file and a list of
the selected scores
"""

score_volume = read_mrc(job.output_dir.joinpath(f"{job.tomo_id}_scores.mrc"))
Expand Down Expand Up @@ -218,8 +225,9 @@ def extract_particles(
if tomogram_mask is not None:
if tomogram_mask.shape != job.tomo_shape:
raise ValueError(
"Tomogram mask does not have the same number of pixels as the tomogram.\n"
f"Tomogram mask shape: {tomogram_mask.shape}, tomogram shape: {job.tomo_shape}"
"Tomogram mask does not have the same number of pixels as the "
f"tomogram.\n Tomogram mask shape: {tomogram_mask.shape}, "
f"tomogram shape: {job.tomo_shape}"
)
slices = [
slice(origin, origin + size)
Expand All @@ -240,14 +248,15 @@ def extract_particles(
sigma = job.job_stats["std"]
search_space = job.job_stats["search_space"]
if cut_off is None:
# formula Rickgauer et al. (2017, eLife): N**(-1) = erfc( theta / ( sigma * sqrt(2) ) ) / 2
# formula Rickgauer et al. (2017, eLife):
# N**(-1) = erfc( theta / ( sigma * sqrt(2) ) ) / 2
# we need to find theta (i.e. the cut off)
cut_off = erfcinv((2 * n_false_positives) / search_space) * np.sqrt(2) * sigma
logging.info(f"cut off for particle extraction: {cut_off}")
elif cut_off < 0:
logging.warning(
"Provided extraction score cut-off is smaller than 0. Changing to 0 as that is smallest "
"allowed value."
"Provided extraction score cut-off is smaller than 0. Changing to 0 as "
"that is smallest allowed value."
)
cut_off = 0

Expand Down Expand Up @@ -293,7 +302,7 @@ def extract_particles(
rotation[2], # AnglePsi
lcc_max, # LCCmax
cut_off, # Extraction cut off
sigma, # Add sigma of template matching search, LCCmax can be divided by sigma to obtain SNR
sigma, # Add sigma of template matching search, LCCmax/sigma = SNR
pixel_size, # DetectorPixelSize
tomogram_id, # MicrographName
)
Expand Down
Loading