diff --git a/cellfinder/core/classify/classify.py b/cellfinder/core/classify/classify.py index 8ae1f5ff..37fc06cc 100644 --- a/cellfinder/core/classify/classify.py +++ b/cellfinder/core/classify/classify.py @@ -19,8 +19,8 @@ def main( signal_array: types.array, background_array: types.array, n_free_cpus: int, - voxel_sizes: Tuple[int, int, int], - network_voxel_sizes: Tuple[int, int, int], + voxel_sizes: Tuple[float, float, float], + network_voxel_sizes: Tuple[float, float, float], batch_size: int, cube_height: int, cube_width: int, @@ -35,6 +35,45 @@ def main( """ Parameters ---------- + + points: List of Cell objects + The potential cells to classify. + signal_array : numpy.ndarray or dask array + 3D array representing the signal data in z, y, x order. + background_array : numpy.ndarray or dask array + 3D array representing the signal data in z, y, x order. + n_free_cpus : int + How many CPU cores to leave free. + voxel_sizes : 3-tuple of floats + Size of your voxels in the z, y, and x dimensions. + network_voxel_sizes : 3-tuple of floats + Size of the pre-trained network's voxels in the z, y, and x dimensions. + batch_size : int + How many potential cells to classify at one time. The GPU/CPU + memory must be able to contain at once this many data cubes for + the models. For performance-critical applications, tune to maximize + memory usage without running out. Check your GPU/CPU memory to verify + it's not full. + cube_height: int + The height of the data cube centered on the cell used for + classification. Defaults to `50`. + cube_width: int + The width of the data cube centered on the cell used for + classification. Defaults to `50`. + cube_depth: int + The depth of the data cube centered on the cell used for + classification. Defaults to `20`. + trained_model : Optional[Path] + Trained model file path (home directory (default) -> pretrained + weights). + model_weights : Optional[Path] + Model weights path (home directory (default) -> pretrained + weights). + network_depth: str + The network depth to use during classification. Defaults to `"50"`. + max_workers: int + The number of sub-processes to use for data loading / processing. + Defaults to 8. callback : Callable[int], optional A callback function that is called during classification. Called with the batch number once that batch has been classified. diff --git a/cellfinder/core/detect/detect.py b/cellfinder/core/detect/detect.py index e7ba8e95..076b3ae5 100644 --- a/cellfinder/core/detect/detect.py +++ b/cellfinder/core/detect/detect.py @@ -49,10 +49,10 @@ def main( plane_directory: Optional[str] = None, batch_size: Optional[int] = None, torch_device: Optional[str] = None, - split_ball_xy_size: int = 3, - split_ball_z_size: int = 3, + split_ball_xy_size: float = 6, + split_ball_z_size: float = 15, split_ball_overlap_fraction: float = 0.8, - split_soma_diameter: int = 7, + n_splitting_iter: int = 10, *, callback: Optional[Callable[[int], None]] = None, ) -> List[Cell]: @@ -61,69 +61,74 @@ def main( Parameters ---------- - signal_array : numpy.ndarray - 3D array representing the signal data. - + signal_array : numpy.ndarray or dask array + 3D array representing the signal data in z, y, x order. start_plane : int - Index of the starting plane for detection. - + First plane index to process (inclusive, to process a subset of the + data). end_plane : int - Index of the ending plane for detection. - - voxel_sizes : Tuple[float, float, float] - Tuple of voxel sizes in each dimension (z, y, x). - + Last plane index to process (exclusive, to process a subset of the + data). + voxel_sizes : 3-tuple of floats + Size of your voxels in the z, y, and x dimensions (microns). soma_diameter : float - Diameter of the soma in physical units. - + The expected in-plane (xy) soma diameter (microns). max_cluster_size : float - Maximum size of a cluster in physical units. - + Largest detected cell cluster (in cubic um) where splitting + should be attempted. Clusters above this size will be labeled + as artifacts. ball_xy_size : float - Size of the XY ball used for filtering in physical units. - + 3d filter's in-plane (xy) filter ball size (microns). ball_z_size : float - Size of the Z ball used for filtering in physical units. - + 3d filter's axial (z) filter ball size (microns). ball_overlap_fraction : float - Fraction of overlap allowed between balls. - + 3d filter's fraction of the ball filter needed to be filled by + foreground voxels, centered on a voxel, to retain the voxel. soma_spread_factor : float - Spread factor for soma size. - + Cell spread factor for determining the largest cell volume before + splitting up cell clusters. Structures with spherical volume of + diameter `soma_spread_factor * soma_diameter` or less will not be + split. n_free_cpus : int - Number of free CPU cores available for parallel processing. - + How many CPU cores to leave free. log_sigma_size : float - Size of the sigma for the log filter. - + Gaussian filter width (as a fraction of soma diameter) used during + 2d in-plane Laplacian of Gaussian filtering. n_sds_above_mean_thresh : float - Number of standard deviations above the mean threshold. - + Intensity threshold (the number of standard deviations above + the mean) of the filtered 2d planes used to mark pixels as + foreground or background. outlier_keep : bool, optional Whether to keep outliers during detection. Defaults to False. - artifact_keep : bool, optional Whether to keep artifacts during detection. Defaults to False. - save_planes : bool, optional Whether to save the planes during detection. Defaults to False. - plane_directory : str, optional Directory path to save the planes. Defaults to None. - - batch_size : int, optional - The number of planes to process in each batch. Defaults to 1. - For CPU, there's no benefit for a larger batch size. Only a memory - usage increase. For CUDA, the larger the batch size the better the - performance. Until it fills up the GPU memory - after which it - becomes slower. - + batch_size: int + The number of planes of the original data volume to process at + once. The GPU/CPU memory must be able to contain this many planes + for all the filters. For performance-critical applications, tune to + maximize memory usage without running out. Check your GPU/CPU memory + to verify it's not full. torch_device : str, optional The device on which to run the computation. If not specified (None), "cuda" will be used if a GPU is available, otherwise "cpu". You can also manually specify "cuda" or "cpu". - + split_ball_xy_size: float + Similar to `ball_xy_size`, except the value to use for the 3d + filter during cluster splitting. + split_ball_z_size: float + Similar to `ball_z_size`, except the value to use for the 3d filter + during cluster splitting. + split_ball_overlap_fraction: float + Similar to `ball_overlap_fraction`, except the value to use for the + 3d filter during cluster splitting. + n_splitting_iter: int + The number of iterations to run the 3d filtering on a cluster. Each + iteration reduces the cluster size by the voxels not retained in + the previous iteration. callback : Callable[int], optional A callback function that is called every time a plane has finished being processed. Called with the plane number that has finished. @@ -131,7 +136,7 @@ def main( Returns ------- List[Cell] - List of detected cells. + List of detected cell candidates. """ start_time = datetime.now() if torch_device is None: @@ -187,19 +192,15 @@ def main( plane_directory=plane_directory, batch_size=batch_size, torch_device=torch_device, + n_splitting_iter=n_splitting_iter, ) # replicate the settings specific to splitting, before we access anything # of the original settings, causing cached properties kwargs = dataclasses.asdict(settings) - kwargs["ball_z_size_um"] = split_ball_z_size * settings.z_pixel_size - kwargs["ball_xy_size_um"] = ( - split_ball_xy_size * settings.in_plane_pixel_size - ) + kwargs["ball_z_size_um"] = split_ball_z_size + kwargs["ball_xy_size_um"] = split_ball_xy_size kwargs["ball_overlap_fraction"] = split_ball_overlap_fraction - kwargs["soma_diameter_um"] = ( - split_soma_diameter * settings.in_plane_pixel_size - ) # always run on cpu because copying to gpu overhead is likely slower than # any benefit for detection on smallish volumes kwargs["torch_device"] = "cpu" diff --git a/cellfinder/core/detect/filters/plane/plane_filter.py b/cellfinder/core/detect/filters/plane/plane_filter.py index 82434d16..20fabc27 100644 --- a/cellfinder/core/detect/filters/plane/plane_filter.py +++ b/cellfinder/core/detect/filters/plane/plane_filter.py @@ -39,7 +39,7 @@ class TileProcessor: Number of standard deviations above the mean threshold to use for determining whether a voxel is bright. log_sigma_size : float - Size of the sigma for the gaussian filter. + Size of the Gaussian sigma for the Laplacian of Gaussian filtering. soma_diameter : float Diameter of the soma in voxels. torch_device: str diff --git a/cellfinder/core/detect/filters/setup_filters.py b/cellfinder/core/detect/filters/setup_filters.py index 15db72ff..1cf13ee8 100644 --- a/cellfinder/core/detect/filters/setup_filters.py +++ b/cellfinder/core/detect/filters/setup_filters.py @@ -80,23 +80,28 @@ class DetectionSettings: voxel_sizes: Tuple[float, float, float] = (1.0, 1.0, 1.0) """ - Tuple of voxel sizes in each dimension (z, y, x). We use this to convert - from `um` to pixel sizes. + Tuple of voxel sizes (microns) in each dimension (z, y, x). We use this + to convert from `um` to pixel sizes. """ soma_spread_factor: float = 1.4 - """Spread factor for soma size - how much it may stretch in the images.""" + """ + Cell spread factor for determining the largest cell volume before + splitting up cell clusters. Structures with spherical volume of + diameter `soma_spread_factor * soma_diameter` or less will not be + split. + """ soma_diameter_um: float = 16 """ - Diameter of a typical soma in um. Bright areas larger than this will be - split. + Diameter of a typical soma in-plane (xy) in microns. """ max_cluster_size_um3: float = 100_000 """ - Maximum size of a cluster (bright area) that will be processed, in um. - Larger bright areas are skipped as artifacts. + Largest detected cell cluster (in cubic um) where splitting + should be attempted. Clusters above this size will be labeled + as artifacts. """ ball_xy_size_um: float = 6 @@ -116,17 +121,21 @@ class DetectionSettings: ball_overlap_fraction: float = 0.6 """ - Fraction of overlap between a bright area and the spherical kernel, - for the area to be considered a single ball. + Fraction of the 3d ball filter needed to be filled by foreground voxels, + centered on a voxel, to retain the voxel. """ log_sigma_size: float = 0.2 - """Size of the sigma for the 2d Gaussian filter.""" + """ + Gaussian filter width (as a fraction of soma diameter) used during + 2d in-plane Laplacian of Gaussian filtering. + """ n_sds_above_mean_thresh: float = 10 """ - Number of standard deviations above the mean intensity to use for a - threshold to define bright areas. Below it, it's not considered bright. + Intensity threshold (the number of standard deviations above + the mean) of the filtered 2d planes used to mark pixels as + foreground or background. """ outlier_keep: bool = False @@ -191,6 +200,8 @@ class DetectionSettings: """ During the structure splitting phase we iteratively shrink the bright areas and re-filter with the 3d filter. This is the number of iterations to do. + Each iteration reduces the cluster size by the voxels not retained in the + previous iteration. This is a maximum because we also stop if there are no more structures left during any iteration. diff --git a/cellfinder/core/detect/filters/volume/ball_filter.py b/cellfinder/core/detect/filters/volume/ball_filter.py index 8bc9729b..f36d2aef 100644 --- a/cellfinder/core/detect/filters/volume/ball_filter.py +++ b/cellfinder/core/detect/filters/volume/ball_filter.py @@ -78,11 +78,11 @@ class BallFilter: ---------- plane_height, plane_width : int Height/width of the planes. - ball_xy_size : int - Diameter of the spherical kernel in the x/y dimensions. - ball_z_size : int - Diameter of the spherical kernel in the z dimension. - Equal to the number of planes stacked to filter + ball_xy_size : float + Diameter of the spherical kernel (in microns) in the x/y dimensions. + ball_z_size : float + Diameter of the spherical kernel in the z dimension in microns. + Determines the number of planes stacked to filter the central plane of the stack. overlap_fraction : float The fraction of pixels within the spherical kernel that diff --git a/cellfinder/core/main.py b/cellfinder/core/main.py index 5f7e39d1..7ff3418b 100644 --- a/cellfinder/core/main.py +++ b/cellfinder/core/main.py @@ -1,34 +1,33 @@ import os from typing import Callable, List, Optional, Tuple -import numpy as np from brainglobe_utils.cells.cells import Cell -from cellfinder.core import logger +from cellfinder.core import logger, types from cellfinder.core.download.download import model_type from cellfinder.core.train.train_yaml import depth_type def main( - signal_array: np.ndarray, - background_array: np.ndarray, - voxel_sizes: Tuple[int, int, int], + signal_array: types.array, + background_array: types.array, + voxel_sizes: Tuple[float, float, float], start_plane: int = 0, end_plane: int = -1, trained_model: Optional[os.PathLike] = None, model_weights: Optional[os.PathLike] = None, model: model_type = "resnet50_tv", - batch_size: int = 64, + classification_batch_size: int = 64, n_free_cpus: int = 2, - network_voxel_sizes: Tuple[int, int, int] = (5, 1, 1), - soma_diameter: int = 16, - ball_xy_size: int = 6, - ball_z_size: int = 15, + network_voxel_sizes: Tuple[float, float, float] = (5, 1, 1), + soma_diameter: float = 16, + ball_xy_size: float = 6, + ball_z_size: float = 15, ball_overlap_fraction: float = 0.6, log_sigma_size: float = 0.2, n_sds_above_mean_thresh: float = 10, soma_spread_factor: float = 1.4, - max_cluster_size: int = 100000, + max_cluster_size: float = 100000, cube_width: int = 50, cube_height: int = 50, cube_depth: int = 20, @@ -36,8 +35,12 @@ def main( skip_detection: bool = False, skip_classification: bool = False, detected_cells: List[Cell] = None, - classification_batch_size: Optional[int] = None, + detection_batch_size: Optional[int] = None, torch_device: Optional[str] = None, + split_ball_xy_size: float = 6, + split_ball_z_size: float = 15, + split_ball_overlap_fraction: float = 0.8, + n_splitting_iter: int = 10, *, detect_callback: Optional[Callable[[int], None]] = None, classify_callback: Optional[Callable[[int], None]] = None, @@ -46,6 +49,105 @@ def main( """ Parameters ---------- + signal_array : numpy.ndarray or dask array + 3D array representing the signal data in z, y, x order. + background_array : numpy.ndarray or dask array + 3D array representing the signal data in z, y, x order. + voxel_sizes : 3-tuple of floats + Size of your voxels in the z, y, and x dimensions (microns). + start_plane : int + First plane index to process (inclusive, to process a subset of the + data). + end_plane : int + Last plane index to process (exclusive, to process a subset of the + data). + trained_model : Optional[Path] + Trained model file path (home directory (default) -> pretrained + weights). + model_weights : Optional[Path] + Model weights path (home directory (default) -> pretrained + weights). + model: str + Type of model to use. Defaults to `"resnet50_tv"`. + classification_batch_size : int + How many potential cells to classify at one time. The GPU/CPU + memory must be able to contain at once this many data cubes for + the models. For performance-critical applications, tune to maximize + memory usage without running out. Check your GPU/CPU memory to verify + it's not full. + n_free_cpus : int + How many CPU cores to leave free. + network_voxel_sizes : 3-tuple of floats + Size of the pre-trained network's voxels (microns) in the z, y, and x + dimensions. + soma_diameter : float + The expected in-plane (xy) soma diameter (microns). + ball_xy_size : float + 3d filter's in-plane (xy) filter ball size (microns). + ball_z_size : float + 3d filter's axial (z) filter ball size (microns). + ball_overlap_fraction : float + 3d filter's fraction of the ball filter needed to be filled by + foreground voxels, centered on a voxel, to retain the voxel. + log_sigma_size : float + Gaussian filter width (as a fraction of soma diameter) used during + 2d in-plane Laplacian of Gaussian filtering. + n_sds_above_mean_thresh : float + Intensity threshold (the number of standard deviations above + the mean) of the filtered 2d planes used to mark pixels as + foreground or background. + soma_spread_factor : float + Cell spread factor for determining the largest cell volume before + splitting up cell clusters. Structures with spherical volume of + diameter `soma_spread_factor * soma_diameter` or less will not be + split. + max_cluster_size : float + Largest detected cell cluster (in cubic um) where splitting + should be attempted. Clusters above this size will be labeled + as artifacts. + cube_width: int + The width of the data cube centered on the cell used for + classification. Defaults to `50`. + cube_height: int + The height of the data cube centered on the cell used for + classification. Defaults to `50`. + cube_depth: int + The depth of the data cube centered on the cell used for + classification. Defaults to `20`. + network_depth: str + The network depth to use during classification. Defaults to `"50"`. + skip_detection : bool + If selected, the detection step is skipped and instead we get the + detected cells from the cell layer below (from a previous + detection run or import). + skip_classification : bool + If selected, the classification step is skipped and all cells from + the detection stage are added. + detected_cells: Optional list of Cell objects. + If specified, the cells to use during classification. + detection_batch_size: int + The number of planes of the original data volume to process at + once. The GPU/CPU memory must be able to contain this many planes + for all the filters. For performance-critical applications, tune + to maximize memory usage without running out. Check your GPU/CPU + memory to verify it's not full. + torch_device : str, optional + The device on which to run the computation. If not specified (None), + "cuda" will be used if a GPU is available, otherwise "cpu". + You can also manually specify "cuda" or "cpu". + split_ball_xy_size: float + Similar to `ball_xy_size`, except the value to use for the 3d + filter during cluster splitting. + split_ball_z_size: float + Similar to `ball_z_size`, except the value to use for the 3d filter + during cluster splitting. + split_ball_overlap_fraction: float + Similar to `ball_overlap_fraction`, except the value to use for the + 3d filter during cluster splitting. + n_splitting_iter: int + The number of iterations to run the 3d filtering on a cluster. Each + iteration reduces the cluster size by the voxels not retained in + the previous iteration. detect_callback : Callable[int], optional Called every time a plane has finished being processed during the detection stage. Called with the plane number that has finished. @@ -76,9 +178,13 @@ def main( n_free_cpus, log_sigma_size, n_sds_above_mean_thresh, - batch_size=classification_batch_size, + batch_size=detection_batch_size, torch_device=torch_device, callback=detect_callback, + split_ball_z_size=split_ball_z_size, + split_ball_xy_size=split_ball_xy_size, + split_ball_overlap_fraction=split_ball_overlap_fraction, + n_splitting_iter=n_splitting_iter, ) if detect_finished_callback is not None: @@ -101,7 +207,7 @@ def main( n_free_cpus, voxel_sizes, network_voxel_sizes, - batch_size, + classification_batch_size, cube_height, cube_width, cube_depth, diff --git a/cellfinder/napari/detect/detect.py b/cellfinder/napari/detect/detect.py index fcb01ce5..2503d412 100644 --- a/cellfinder/napari/detect/detect.py +++ b/cellfinder/napari/detect/detect.py @@ -244,18 +244,19 @@ def widget( detection_options, skip_detection: bool, soma_diameter: float, + log_sigma_size: float, + n_sds_above_mean_thresh: float, ball_xy_size: float, ball_z_size: float, ball_overlap_fraction: float, - log_sigma_size: float, - n_sds_above_mean_thresh: int, + detection_batch_size: int, soma_spread_factor: float, - max_cluster_size: int, + max_cluster_size: float, classification_options, skip_classification: bool, use_pre_trained_weights: bool, trained_model: Optional[Path], - batch_size: int, + classification_batch_size: int, misc_options, start_plane: int, end_plane: int, @@ -271,43 +272,60 @@ def widget( Parameters ---------- voxel_size_z : float - Size of your voxels in the axial dimension + Size of your voxels in the axial dimension (microns) voxel_size_y : float - Size of your voxels in the y direction (top to bottom) + Size of your voxels in the y direction (top to bottom) (microns) voxel_size_x : float - Size of your voxels in the x direction (left to right) + Size of your voxels in the x direction (left to right) (microns) skip_detection : bool If selected, the detection step is skipped and instead we get the detected cells from the cell layer below (from a previous detection run or import) soma_diameter : float - The expected in-plane soma diameter (microns) + The expected in-plane (xy) soma diameter (microns) + log_sigma_size : float + Gaussian filter width (as a fraction of soma diameter) used during + 2d in-plane Laplacian of Gaussian filtering + n_sds_above_mean_thresh : float + Intensity threshold (the number of standard deviations above + the mean) of the filtered 2d planes used to mark pixels as + foreground or background ball_xy_size : float - Elliptical morphological in-plane filter size (microns) + 3d filter's in-plane (xy) filter ball size (microns) ball_z_size : float - Elliptical morphological axial filter size (microns) + 3d filter's axial (z) filter ball size (microns) ball_overlap_fraction : float - Fraction of the morphological filter needed to be filled - to retain a voxel - log_sigma_size : float - Laplacian of Gaussian filter width (as a fraction of soma diameter) - n_sds_above_mean_thresh : int - Cell intensity threshold (as a multiple of noise above the mean) + 3d filter's fraction of the ball filter needed to be filled by + foreground voxels, centered on a voxel, to retain the voxel + detection_batch_size: int + The number of planes of the original data volume to process at + once. The GPU/CPU memory must be able to contain this many planes + for all the filters. For performance-critical applications, tune + to maximize memory usage without + running out. Check your GPU/CPU memory to verify it's not full soma_spread_factor : float - Cell spread factor (for splitting up cell clusters) - max_cluster_size : int - Largest putative cell cluster (in cubic um) where splitting - should be attempted - use_pre_trained_weights : bool - Select to use pre-trained model weights - batch_size : int - How many points to classify at one time + Cell spread factor for determining the largest cell volume before + splitting up cell clusters. Structures with spherical volume of + diameter `soma_spread_factor * soma_diameter` or less will not be + split + max_cluster_size : float + Largest detected cell cluster (in cubic um) where splitting + should be attempted. Clusters above this size will be labeled + as artifacts skip_classification : bool If selected, the classification step is skipped and all cells from the detection stage are added + use_pre_trained_weights : bool + Select to use pre-trained model weights trained_model : Optional[Path] Trained model file path (home directory (default) -> pretrained weights) + classification_batch_size : int + How many potential cells to classify at one time. The GPU/CPU + memory must be able to contain at once this many data cubes for + the models. For performance-critical applications, tune to + maximize memory usage without running + out. Check your GPU/CPU memory to verify it's not full start_plane : int First plane to process (to process a subset of the data) end_plane : int @@ -373,6 +391,7 @@ def widget( n_sds_above_mean_thresh, soma_spread_factor, max_cluster_size, + detection_batch_size, ) if use_pre_trained_weights: @@ -381,7 +400,7 @@ def widget( skip_classification, use_pre_trained_weights, trained_model, - batch_size, + classification_batch_size, ) if analyse_local: diff --git a/cellfinder/napari/detect/detect_containers.py b/cellfinder/napari/detect/detect_containers.py index 5d8cdb7a..5a130853 100644 --- a/cellfinder/napari/detect/detect_containers.py +++ b/cellfinder/napari/detect/detect_containers.py @@ -68,9 +68,10 @@ class DetectionInputs(InputContainer): ball_z_size: float = 15 ball_overlap_fraction: float = 0.6 log_sigma_size: float = 0.2 - n_sds_above_mean_thresh: int = 10 + n_sds_above_mean_thresh: float = 10 soma_spread_factor: float = 1.4 - max_cluster_size: int = 100000 + max_cluster_size: float = 100000 + detection_batch_size: int = 1 def as_core_arguments(self) -> dict: return super().as_core_arguments() @@ -97,14 +98,17 @@ def widget_representation(cls) -> dict: "n_sds_above_mean_thresh", custom_label="Threshold" ), soma_spread_factor=cls._custom_widget( - "soma_spread_factor", custom_label="Cell spread" + "soma_spread_factor", custom_label="Split cell spread" ), max_cluster_size=cls._custom_widget( "max_cluster_size", - custom_label="Max cluster", + custom_label="Split max cluster", min=0, max=10000000, ), + detection_batch_size=cls._custom_widget( + "detection_batch_size", custom_label="Batch size (detection)" + ), ) @@ -115,7 +119,7 @@ class ClassificationInputs(InputContainer): skip_classification: bool = False use_pre_trained_weights: bool = True trained_model: Optional[Path] = Path.home() - batch_size: int = 64 + classification_batch_size: int = 64 def as_core_arguments(self) -> dict: args = super().as_core_arguments() @@ -133,7 +137,10 @@ def widget_representation(cls) -> dict: skip_classification=dict( value=cls.defaults()["skip_classification"] ), - batch_size=dict(value=cls.defaults()["batch_size"]), + classification_batch_size=dict( + value=cls.defaults()["classification_batch_size"], + label="Batch size (classification)", + ), )