Skip to content

Commit

Permalink
docs: update docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
raylim committed Nov 1, 2023
1 parent 44a6f50 commit 1150031
Show file tree
Hide file tree
Showing 9 changed files with 348 additions and 56 deletions.
27 changes: 24 additions & 3 deletions src/luna/pathology/cli/dsa_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,27 @@ def upload_annotation_to_dsa(
insecure: bool = False,
storage_options: dict = {},
):
uuids = []
"""Upload annotation to DSA
Upload json annotation file as a new annotation to the image in the DSA collection.
Args:
dsa_endpoint_url (string): DSA API endpoint e.g. http://localhost:8080/api/v1
slide_manifest (DataFrame[SlideSchema]): slide manifest from slide_etl
annotation_column (string): annotation column of slide_manifest containing the dsa url
collection_name (string): name of the collection in DSA
image_filename (string): name of the image file in DSA e.g. 123.svs. If not specified, infer from annotiaton_file_urpath
username (string): DSA username (defaults to environment variable DSA_USERNAME)
password (string): DSA password (defaults to environment variable DSA_PASSWORD)
force (bool): upload even if annotation with same name exists for the slide
insecure (bool): insecure ssl
storage_options (dict): options to pass to reading functions
Returns:
DataFrame[SlideSchema]: slide manifest
"""
for slide in slide_manifest.itertuples(name="Slide"):
uuids += _upload_annotation_to_dsa(
uuids = _upload_annotation_to_dsa(
dsa_endpoint_url,
slide[annotation_column],
collection_name,
Expand All @@ -130,7 +148,10 @@ def upload_annotation_to_dsa(
insecure,
storage_options,
)
return uuids
slide_manifest.at[
slide.Index, annotation_column.replace("url", "uuid")
] = uuids[0]
return slide_manifest


def _upload_annotation_to_dsa(
Expand Down
182 changes: 161 additions & 21 deletions src/luna/pathology/cli/dsa_viz.py

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions src/luna/pathology/cli/extract_shape_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ def cli(
slide_mask_urlpath (str): URL/path to slide mask (*.tif)
label_cols (List[str]): list of labels that coorespond to those in slide_mask_urlpath
output_urlpath (str): output URL/path prefix
include_smaller_regions (bool): include the smaller regions (not just larget)
storage_options (dict): storage options to pass to read functions
output_storage_options (dict): storage options to pass to write functions
local_config (str): local config YAML file
Returns:
dict: output .tif path and the number of shapes for which features were generated
Expand Down
31 changes: 28 additions & 3 deletions src/luna/pathology/cli/extract_tile_shape_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,6 @@ def cli(

def extract_tile_shape_features(
slide_manifest: DataFrame[SlideSchema],
slide_urlpath: str,
output_urlpath: str,
resize_factor: int = 16,
detection_probability_threshold: Optional[float] = None,
Expand All @@ -161,6 +160,27 @@ def extract_tile_shape_features(
"solidity",
],
):
"""Extracts shape and spatial features (HIF features) from a slide mask.
Args:
slide_manifest (DataFrame[SlideSchema]): slide manifest from slide_etl
output_urlpath (str): output URL/path
resize_factor (int): factor to downsample slide image
detection_probability_threshold (Optional[float]): detection probability threshold
statistical_descriptors (str): statistical descriptors to calculate. One of All, Quantiles, Stats, or Density
cellular_features (str): cellular features to include. One of All, Nucleus, Cell, Cytoplasm, and Membrane
property_type (str): properties to include. One of All, Geometric, or Stain
include_smaller_regions (bool): include smaller regions in output
label_cols (List[str]): list of score columns to use for the classification. Tile is classified as the column with the max score
storage_options (dict): storage options to pass to reading functions
output_storage_options (dict): storage options to pass to writing functions
local_config (str): local config yaml file
objects_column (str): slide manifest column name with stardist geoJSON URLs
properties (List[str]): properties to extract
Returns:
DataFrame[SlideSchema]: slide manifest
"""
client = get_or_create_dask_client()

futures = []
Expand Down Expand Up @@ -225,16 +245,21 @@ def __extract_tile_shape_features(
"""Extracts shape and spatial features (HIF features) from a slide mask.
Args:
objects (Union[str, gpd.GeoDataFrame]): URL/path to slide (tiffslide supported formats)
tiles (Union[str, pd.DataFrame]): URL/path to object file (geopandas supported formats)
objects_urlpath (str): URL/path to object file (geopandas supported formats)
tiles_urlpath (str): URL/path to tiles manifest (parquet)
slide_urlpath (str): URL/path to slide (tiffslide supported formats)
output_urlpath (str): output URL/path
resize_factor (int): factor to downsample slide image
detection_probability_threshold (Optional[float]): detection
probability threshold
slide_id (str): Slide ID to add to dataframes
statistical_descriptors (StatisticalDescriptors): statistical descriptors to calculate
cellular_features (CellularFeatures): cellular features to include
property_type (PropertyType): properties to include
include_smaller_regions (bool): include smaller regions
label_cols (List[str]): list of score columns to use for the classification. Tile is classified as the column with the max score
storage_options (dict): storage options to pass to reading functions
output_storage_options (dict): storage options to pass to writing functions
properties (List[str]): list of whole slide image properties to
extract. Needs to be parquet compatible (numeric).
Returns:
Expand Down
45 changes: 35 additions & 10 deletions src/luna/pathology/cli/infer_tile_labels.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from tqdm import tqdm

from luna.common.dask import configure_dask_client, get_or_create_dask_client
from luna.common.models import SlideSchema
from luna.common.utils import get_config, make_temp_directory, save_metadata, timed
from luna.pathology.analysis.ml import (
HDF5Dataset,
Expand Down Expand Up @@ -54,8 +55,9 @@ def cli(
Args:
slide_urlpath (str): url/path to slide image (virtual slide formats compatible with TiffSlide, .svs, .tif, .scn, ...)
tiles_urlpath (str): path to a slide-tile manifest file (.tiles.csv)
tile_size (int): size of tiles to use (at the requested magnification)
tile_size (Optional[int]): size of tiles to use (at the requested magnification)
filter_query (str): pandas query by which to filter tiles based on their various tissue detection scores
requested_magnification (Optional[int]): Magnification scale at which to perform computation
torch_model_repo_or_dir (str): repository root name like (namespace/repo) at github.com to serve torch.hub models. Or path to a local model (e.g. msk-mind/luna-ml)
model_name (str): torch hub model name (a nn.Module at the repo repo_name)
num_cores (int): Number of cores to use for CPU parallelization
Expand All @@ -72,6 +74,7 @@ def cli(
dict: metadata
"""
config = get_config(vars())
configure_dask_client(**config["dask_options"])

if not config["slide_urlpath"] and not config["tiles_urlpath"]:
raise fire.core.FireError("Specify either tiles_urlpath or slide_urlpath")
Expand Down Expand Up @@ -130,7 +133,7 @@ def cli(


def infer_tile_labels(
slide_manifest: DataFrame,
slide_manifest: DataFrame[SlideSchema],
tile_size: Optional[int] = None,
filter_query: str = "",
thumbnail_magnification: Optional[int] = None,
Expand All @@ -142,13 +145,35 @@ def infer_tile_labels(
output_urlpath: str = ".",
kwargs: dict = {},
use_gpu: bool = False,
dask_options: dict = {},
insecure: bool = False,
storage_options: dict = {},
output_storage_options: dict = {},
) -> pd.DataFrame:
) -> DataFrame[SlideSchema]:
"""Run inference using a model and transform definition (either local or using torch.hub)
Decorates existing tiles manifests with additional columns corresponding to class prediction/scores from the model
Args:
slide_manifest (DataFrame): slide manifest from slide_etl
tile_size (Optional[int]): size of tiles to use (at the requested magnification)
filter_query (str): pandas query by which to filter tiles based on their various tissue detection scores
thumbnail_magnification (Optional[int]): Magnification scale at which to detect tissue
tile_magnification (Optional[int]): Magnification scale at which to generate tiles
torch_model_repo_or_dir (str): repository root name like (namespace/repo) at github.com to serve torch.hub models. Or path to a local model (e.g. msk-mind/luna-ml)
model_name (str): torch hub model name (a nn.Module at the repo repo_name)
num_cores (int): Number of cores to use for CPU parallelization
batch_size (int): size in batch dimension to chuck inference (8-256 recommended, depending on memory usage)
output_urlpath (str): output/working directory
kwargs (dict): additional keywords to pass to model initialization
use_gpu (bool): use GPU if available
insecure (bool): insecure SSL
storage_options (dict): storage options to pass to reading functions
output_storage_options (dict): storage options to pass to writing functions
Returns:
pd.DataFrame: slide manifest
"""
client = get_or_create_dask_client()
configure_dask_client(**dask_options)

if "tiles_url" not in slide_manifest.columns:
if tile_size is None:
Expand Down Expand Up @@ -221,20 +246,20 @@ def __infer_tile_labels(
Args:
tiles_urlpath (str): path to a slide-tile manifest file (.tiles.parquet)
tile_size (int): size of tiles to use (at the requested magnification)
filter_query (str): pandas query by which to filter tiles based on their various tissue detection scores
requested_magnification (Optional[int]): Magnification scale at which to perform computation
slide_id (str): slide ID
output_urlpath (str): output/working directory
torch_model_repo_or_dir (str): repository root name like (namespace/repo) at github.com to serve torch.hub models. Or path to a local model (e.g. msk-mind/luna-ml)
model_name (str): torch hub model name (a nn.Module at the repo repo_name)
num_cores (int): Number of cores to use for CPU parallelization
batch_size (int): size in batch dimension to chuck inference (8-256 recommended, depending on memory usage)
output_urlpath (str): output/working directory
kwargs (dict): additional keywords to pass to model initialization
use_gpu (bool): use GPU if available
insecure (bool): insecure SSL
storage_options (dict): storage options to pass to reading functions
output_storage_options (dict): storage options to pass to writing functions
Returns:
pd.DataFrame: augmented tiles dataframe
dict: metadata
"""
if insecure:
ssl._create_default_https_context = ssl._create_unverified_context
Expand Down
63 changes: 53 additions & 10 deletions src/luna/pathology/cli/run_stardist_cell_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def stardist_simple_cli(
local_config (str): local config yaml file
Returns:
pd.DataFrame: metadata about function call
dict: metadata about function call
"""

config = get_config(vars())
Expand Down Expand Up @@ -79,7 +79,29 @@ def stardist_simple(
storage_options: dict,
output_storage_options: dict,
annotation_column: str = "stardist_geojson_url",
) -> pd.DataFrame:
) -> DataFrame[SlideSchema]:
"""Run stardist using qupath CLI on slides in a slide manifest from
slide_etl. URIs to resulting GeoJSON will be stored in a specified column
of the returned slide manifest.
Args:
slide_manifest (DataFrame[SlideSchema]): slide manifest from slide_etl
cell_expansion_size (float): size in pixels to expand cell cytoplasm
image_type (str): qupath image type (BRIGHTFIELD_H_DAB)
output_urlpath (str): output url/path
debug_opts (str): debug options passed as arguments to groovy script
num_cores (int): Number of cores to use for CPU parallelization
image (str): docker/singularity image
use_singularity (bool): use singularity instead of docker
max_heap_size (str): maximum heap size to pass to java options
storage_options (dict): storage options to pass to reading functions
output_storage_options (dict): storage options to pass to writing functions
annotation_column (str): name of column in resulting slide manifest to store GeoJson URIs
Returns:
DataFrame[SlideSchema]: slide manifest
"""

client = get_or_create_dask_client()

futures = []
Expand Down Expand Up @@ -122,8 +144,10 @@ def __stardist_simple(
max_heap_size: str,
storage_options: dict,
output_storage_options: dict,
) -> pd.DataFrame:
"""Run stardist using qupath CLI
) -> dict:
"""Run stardist using qupath CLI on slides in a slide manifest from
slide_etl. URIs to resulting GeoJSON will be stored in a specified column
of the returned slide manifest.
Args:
slide_urlpath (str): path to slide image (virtual slide formats compatible with openslide, .svs, .tif, .scn, ...)
Expand All @@ -139,7 +163,7 @@ def __stardist_simple(
output_storage_options (dict): storage options to pass to writing functions
Returns:
pd.DataFrame: cell detections
dict: run metadata
"""
fs, slide_path = fsspec.core.url_to_fs(slide_urlpath, **storage_options)
ofs, output_path = fsspec.core.url_to_fs(output_urlpath, **output_storage_options)
Expand Down Expand Up @@ -228,21 +252,22 @@ def stardist_cell_lymphocyte_cli(
max_heap_size: str = "64G",
storage_options: dict = {},
output_storage_options: dict = {},
):
) -> dict:
"""Run stardist using qupath CLI
Args:
slide_urlpath (str): url/path to slide image (virtual slide formats compatible with openslide, .svs, .tif, .scn, ...)
output_urlpath (str): output url/path
num_cores (int): Number of cores to use for CPU parallelization
use_gpu (bool): use GPU
image (str): docker/singularity image
use_singularity (bool): use singularity instead of docker
max_heap_size (str): maximum heap size to pass to java options
storage_options (dict): storage options to pass to reading functions
output_storage_options (dict): storage options to pass to writing functions
Returns:
pd.DataFrame: cell detections
dict: run metadata
"""
config = get_config(vars())
slide_id = Path(config["slide_urlpath"]).stem
Expand Down Expand Up @@ -272,7 +297,24 @@ def stardist_cell_lymphocyte(
storage_options: dict = {},
output_storage_options: dict = {},
annotation_column: str = "lymphocyte_geojson_url",
):
) -> DataFrame[SlideSchema]:
"""Run stardist using qupath CLI
Args:
slide_manifest (DataFrame[SlideSchema]): slide manifest from slide_etl
output_urlpath (str): output url/path
num_cores (int): Number of cores to use for CPU parallelization
use_gpu (bool): use GPU
image (str): docker/singularity image
use_singularity (bool): use singularity instead of docker
max_heap_size (str): maximum heap size to pass to java options
storage_options (dict): storage options to pass to reading functions
output_storage_options (dict): storage options to pass to writing functions
annotation_column (str): name of column in resulting slide manifest to store GeoJson URIs
Returns:
DataFrame[SlideSchema]: slide manifest
"""
client = get_or_create_dask_client()

futures = []
Expand Down Expand Up @@ -313,20 +355,21 @@ def __stardist_cell_lymphocyte(
max_heap_size: str = "64G",
storage_options: dict = {},
output_storage_options: dict = {},
) -> pd.DataFrame:
) -> dict:
"""Run stardist using qupath CLI
Args:
slide_urlpath (str): url/path to slide image (virtual slide formats compatible with openslide, .svs, .tif, .scn, ...)
output_urlpath (str): output url/path
num_cores (int): Number of cores to use for CPU parallelization
use_gpu (bool): use GPU
image (str): docker/singularity image
use_singularity (bool): use singularity instead of docker
max_heap_size (str): maximum heap size to pass to java options
storage_options (dict): storage options to pass to reading functions
Returns:
pd.DataFrame: cell detections
dict: run metadata
"""
fs, slide_path = fsspec.core.url_to_fs(slide_urlpath, **storage_options)
ofs, output_path = fsspec.core.url_to_fs(output_urlpath, **output_storage_options)
Expand Down
Loading

0 comments on commit 1150031

Please sign in to comment.