docs: update docstrings

msk-mind · Nov 1, 2023 · 1150031 · 1150031
1 parent 44a6f50
commit 1150031
Show file tree

Hide file tree

Showing 9 changed files with 348 additions and 56 deletions.
diff --git a/src/luna/pathology/cli/dsa_upload.py b/src/luna/pathology/cli/dsa_upload.py
@@ -117,9 +117,27 @@ def upload_annotation_to_dsa(
     insecure: bool = False,
     storage_options: dict = {},
 ):
-    uuids = []
+    """Upload annotation to DSA
+
+    Upload json annotation file as a new annotation to the image in the DSA collection.
+
+    Args:
+        dsa_endpoint_url (string): DSA API endpoint e.g. http://localhost:8080/api/v1
+        slide_manifest (DataFrame[SlideSchema]): slide manifest from slide_etl
+        annotation_column (string): annotation column of slide_manifest containing the dsa url
+        collection_name (string): name of the collection in DSA
+        image_filename (string): name of the image file in DSA e.g. 123.svs. If not specified, infer from annotiaton_file_urpath
+        username (string): DSA username (defaults to environment variable DSA_USERNAME)
+        password (string): DSA password (defaults to environment variable DSA_PASSWORD)
+        force (bool): upload even if annotation with same name exists for the slide
+        insecure (bool): insecure ssl
+        storage_options (dict): options to pass to reading functions
+
+    Returns:
+        DataFrame[SlideSchema]: slide manifest
+    """
     for slide in slide_manifest.itertuples(name="Slide"):
-        uuids += _upload_annotation_to_dsa(
+        uuids = _upload_annotation_to_dsa(
             dsa_endpoint_url,
             slide[annotation_column],
             collection_name,
@@ -130,7 +148,10 @@ def upload_annotation_to_dsa(
             insecure,
             storage_options,
         )
-    return uuids
+        slide_manifest.at[
+            slide.Index, annotation_column.replace("url", "uuid")
+        ] = uuids[0]
+    return slide_manifest
 
 
 def _upload_annotation_to_dsa(

diff --git a/src/luna/pathology/cli/dsa_viz.py b/src/luna/pathology/cli/dsa_viz.py
diff --git a/src/luna/pathology/cli/extract_shape_features.py b/src/luna/pathology/cli/extract_shape_features.py
@@ -43,6 +43,10 @@ def cli(
         slide_mask_urlpath (str): URL/path to slide mask (*.tif)
         label_cols (List[str]): list of labels that coorespond to those in slide_mask_urlpath
         output_urlpath (str): output URL/path prefix
+        include_smaller_regions (bool): include the smaller regions (not just larget)
+        storage_options (dict): storage options to pass to read functions
+        output_storage_options (dict): storage options to pass to write functions
+        local_config (str): local config YAML file
 
     Returns:
         dict: output .tif path and the number of shapes for which features were generated

diff --git a/src/luna/pathology/cli/extract_tile_shape_features.py b/src/luna/pathology/cli/extract_tile_shape_features.py
@@ -135,7 +135,6 @@ def cli(
 
 def extract_tile_shape_features(
     slide_manifest: DataFrame[SlideSchema],
-    slide_urlpath: str,
     output_urlpath: str,
     resize_factor: int = 16,
     detection_probability_threshold: Optional[float] = None,
@@ -161,6 +160,27 @@ def extract_tile_shape_features(
         "solidity",
     ],
 ):
+    """Extracts shape and spatial features (HIF features) from a slide mask.
+
+     Args:
+        slide_manifest (DataFrame[SlideSchema]): slide manifest from slide_etl
+        output_urlpath (str): output URL/path
+        resize_factor (int): factor to downsample slide image
+        detection_probability_threshold (Optional[float]): detection probability threshold
+        statistical_descriptors (str): statistical descriptors to calculate. One of All, Quantiles, Stats, or Density
+        cellular_features (str): cellular features to include. One of All, Nucleus, Cell, Cytoplasm, and Membrane
+        property_type (str): properties to include. One of All, Geometric, or Stain
+        include_smaller_regions (bool): include smaller regions in output
+        label_cols (List[str]): list of score columns to use for the classification. Tile is classified as the column with the max score
+        storage_options (dict): storage options to pass to reading functions
+        output_storage_options (dict): storage options to pass to writing functions
+        local_config (str): local config yaml file
+        objects_column (str): slide manifest column name with stardist geoJSON URLs
+        properties (List[str]): properties to extract
+
+    Returns:
+        DataFrame[SlideSchema]: slide manifest
+    """
     client = get_or_create_dask_client()
 
     futures = []
@@ -225,16 +245,21 @@ def __extract_tile_shape_features(
     """Extracts shape and spatial features (HIF features) from a slide mask.
 
      Args:
-        objects (Union[str, gpd.GeoDataFrame]): URL/path to slide (tiffslide supported formats)
-        tiles (Union[str, pd.DataFrame]): URL/path to object file (geopandas supported formats)
+        objects_urlpath (str): URL/path to object file (geopandas supported formats)
+        tiles_urlpath (str): URL/path to tiles manifest (parquet)
+        slide_urlpath (str): URL/path to slide (tiffslide supported formats)
+        output_urlpath (str): output URL/path
         resize_factor (int): factor to downsample slide image
         detection_probability_threshold (Optional[float]): detection
             probability threshold
         slide_id (str): Slide ID to add to dataframes
         statistical_descriptors (StatisticalDescriptors): statistical descriptors to calculate
         cellular_features (CellularFeatures): cellular features to include
         property_type (PropertyType): properties to include
+        include_smaller_regions (bool): include smaller regions
         label_cols (List[str]): list of score columns to use for the classification. Tile is classified as the column with the max score
+        storage_options (dict): storage options to pass to reading functions
+        output_storage_options (dict): storage options to pass to writing functions
         properties (List[str]): list of whole slide image properties to
             extract. Needs to be parquet compatible (numeric).
     Returns:

diff --git a/src/luna/pathology/cli/infer_tile_labels.py b/src/luna/pathology/cli/infer_tile_labels.py
@@ -16,6 +16,7 @@
 from tqdm import tqdm
 
 from luna.common.dask import configure_dask_client, get_or_create_dask_client
+from luna.common.models import SlideSchema
 from luna.common.utils import get_config, make_temp_directory, save_metadata, timed
 from luna.pathology.analysis.ml import (
     HDF5Dataset,
@@ -54,8 +55,9 @@ def cli(
     Args:
         slide_urlpath (str): url/path to slide image (virtual slide formats compatible with TiffSlide, .svs, .tif, .scn, ...)
         tiles_urlpath (str): path to a slide-tile manifest file (.tiles.csv)
-        tile_size (int): size of tiles to use (at the requested magnification)
+        tile_size (Optional[int]): size of tiles to use (at the requested magnification)
         filter_query (str): pandas query by which to filter tiles based on their various tissue detection scores
+        requested_magnification (Optional[int]): Magnification scale at which to perform computation
         torch_model_repo_or_dir (str): repository root name like (namespace/repo) at github.com to serve torch.hub models. Or path to a local model (e.g. msk-mind/luna-ml)
         model_name (str): torch hub model name (a nn.Module at the repo repo_name)
         num_cores (int): Number of cores to use for CPU parallelization
@@ -72,6 +74,7 @@ def cli(
         dict: metadata
     """
     config = get_config(vars())
+    configure_dask_client(**config["dask_options"])
 
     if not config["slide_urlpath"] and not config["tiles_urlpath"]:
         raise fire.core.FireError("Specify either tiles_urlpath or slide_urlpath")
@@ -130,7 +133,7 @@ def cli(
 
 
 def infer_tile_labels(
-    slide_manifest: DataFrame,
+    slide_manifest: DataFrame[SlideSchema],
     tile_size: Optional[int] = None,
     filter_query: str = "",
     thumbnail_magnification: Optional[int] = None,
@@ -142,13 +145,35 @@ def infer_tile_labels(
     output_urlpath: str = ".",
     kwargs: dict = {},
     use_gpu: bool = False,
-    dask_options: dict = {},
     insecure: bool = False,
     storage_options: dict = {},
     output_storage_options: dict = {},
-) -> pd.DataFrame:
+) -> DataFrame[SlideSchema]:
+    """Run inference using a model and transform definition (either local or using torch.hub)
+
+    Decorates existing tiles manifests with additional columns corresponding to class prediction/scores from the model
+
+    Args:
+        slide_manifest (DataFrame): slide manifest from slide_etl
+        tile_size (Optional[int]): size of tiles to use (at the requested magnification)
+        filter_query (str): pandas query by which to filter tiles based on their various tissue detection scores
+        thumbnail_magnification (Optional[int]): Magnification scale at which to detect tissue
+        tile_magnification (Optional[int]): Magnification scale at which to generate tiles
+        torch_model_repo_or_dir (str): repository root name like (namespace/repo) at github.com to serve torch.hub models. Or path to a local model (e.g. msk-mind/luna-ml)
+        model_name (str): torch hub model name (a nn.Module at the repo repo_name)
+        num_cores (int): Number of cores to use for CPU parallelization
+        batch_size (int): size in batch dimension to chuck inference (8-256 recommended, depending on memory usage)
+        output_urlpath (str): output/working directory
+        kwargs (dict): additional keywords to pass to model initialization
+        use_gpu (bool): use GPU if available
+        insecure (bool): insecure SSL
+        storage_options (dict): storage options to pass to reading functions
+        output_storage_options (dict): storage options to pass to writing functions
+
+    Returns:
+        pd.DataFrame: slide manifest
+    """
     client = get_or_create_dask_client()
-    configure_dask_client(**dask_options)
 
     if "tiles_url" not in slide_manifest.columns:
         if tile_size is None:
@@ -221,20 +246,20 @@ def __infer_tile_labels(
 
     Args:
         tiles_urlpath (str): path to a slide-tile manifest file (.tiles.parquet)
-        tile_size (int): size of tiles to use (at the requested magnification)
-        filter_query (str): pandas query by which to filter tiles based on their various tissue detection scores
-        requested_magnification (Optional[int]): Magnification scale at which to perform computation
+        slide_id (str): slide ID
+        output_urlpath (str): output/working directory
         torch_model_repo_or_dir (str): repository root name like (namespace/repo) at github.com to serve torch.hub models. Or path to a local model (e.g. msk-mind/luna-ml)
         model_name (str): torch hub model name (a nn.Module at the repo repo_name)
         num_cores (int): Number of cores to use for CPU parallelization
         batch_size (int): size in batch dimension to chuck inference (8-256 recommended, depending on memory usage)
-        output_urlpath (str): output/working directory
         kwargs (dict): additional keywords to pass to model initialization
+        use_gpu (bool): use GPU if available
+        insecure (bool): insecure SSL
         storage_options (dict): storage options to pass to reading functions
         output_storage_options (dict): storage options to pass to writing functions
 
     Returns:
-        pd.DataFrame: augmented tiles dataframe
+        dict: metadata
     """
     if insecure:
         ssl._create_default_https_context = ssl._create_unverified_context

diff --git a/src/luna/pathology/cli/run_stardist_cell_detection.py b/src/luna/pathology/cli/run_stardist_cell_detection.py
@@ -46,7 +46,7 @@ def stardist_simple_cli(
         local_config (str): local config yaml file
 
     Returns:
-        pd.DataFrame: metadata about function call
+        dict: metadata about function call
     """
 
     config = get_config(vars())
@@ -79,7 +79,29 @@ def stardist_simple(
     storage_options: dict,
     output_storage_options: dict,
     annotation_column: str = "stardist_geojson_url",
-) -> pd.DataFrame:
+) -> DataFrame[SlideSchema]:
+    """Run stardist using qupath CLI on slides in a slide manifest from
+    slide_etl. URIs to resulting GeoJSON will be stored in a specified column
+    of the returned slide manifest.
+
+    Args:
+        slide_manifest (DataFrame[SlideSchema]): slide manifest from slide_etl
+        cell_expansion_size (float): size in pixels to expand cell cytoplasm
+        image_type (str): qupath image type (BRIGHTFIELD_H_DAB)
+        output_urlpath (str): output url/path
+        debug_opts (str): debug options passed as arguments to groovy script
+        num_cores (int): Number of cores to use for CPU parallelization
+        image (str): docker/singularity image
+        use_singularity (bool): use singularity instead of docker
+        max_heap_size (str): maximum heap size to pass to java options
+        storage_options (dict): storage options to pass to reading functions
+        output_storage_options (dict): storage options to pass to writing functions
+        annotation_column (str): name of column in resulting slide manifest to store GeoJson URIs
+
+    Returns:
+        DataFrame[SlideSchema]: slide manifest
+    """
+
     client = get_or_create_dask_client()
 
     futures = []
@@ -122,8 +144,10 @@ def __stardist_simple(
     max_heap_size: str,
     storage_options: dict,
     output_storage_options: dict,
-) -> pd.DataFrame:
-    """Run stardist using qupath CLI
+) -> dict:
+    """Run stardist using qupath CLI on slides in a slide manifest from
+    slide_etl. URIs to resulting GeoJSON will be stored in a specified column
+    of the returned slide manifest.
 
     Args:
         slide_urlpath (str): path to slide image (virtual slide formats compatible with openslide, .svs, .tif, .scn, ...)
@@ -139,7 +163,7 @@ def __stardist_simple(
         output_storage_options (dict): storage options to pass to writing functions
 
     Returns:
-        pd.DataFrame: cell detections
+        dict: run metadata
     """
     fs, slide_path = fsspec.core.url_to_fs(slide_urlpath, **storage_options)
     ofs, output_path = fsspec.core.url_to_fs(output_urlpath, **output_storage_options)
@@ -228,21 +252,22 @@ def stardist_cell_lymphocyte_cli(
     max_heap_size: str = "64G",
     storage_options: dict = {},
     output_storage_options: dict = {},
-):
+) -> dict:
     """Run stardist using qupath CLI
 
     Args:
         slide_urlpath (str): url/path to slide image (virtual slide formats compatible with openslide, .svs, .tif, .scn, ...)
         output_urlpath (str): output url/path
         num_cores (int): Number of cores to use for CPU parallelization
         use_gpu (bool): use GPU
+        image (str): docker/singularity image
         use_singularity (bool): use singularity instead of docker
         max_heap_size (str): maximum heap size to pass to java options
         storage_options (dict): storage options to pass to reading functions
         output_storage_options (dict): storage options to pass to writing functions
 
     Returns:
-        pd.DataFrame: cell detections
+        dict: run metadata
     """
     config = get_config(vars())
     slide_id = Path(config["slide_urlpath"]).stem
@@ -272,7 +297,24 @@ def stardist_cell_lymphocyte(
     storage_options: dict = {},
     output_storage_options: dict = {},
     annotation_column: str = "lymphocyte_geojson_url",
-):
+) -> DataFrame[SlideSchema]:
+    """Run stardist using qupath CLI
+
+    Args:
+        slide_manifest (DataFrame[SlideSchema]): slide manifest from slide_etl
+        output_urlpath (str): output url/path
+        num_cores (int): Number of cores to use for CPU parallelization
+        use_gpu (bool): use GPU
+        image (str): docker/singularity image
+        use_singularity (bool): use singularity instead of docker
+        max_heap_size (str): maximum heap size to pass to java options
+        storage_options (dict): storage options to pass to reading functions
+        output_storage_options (dict): storage options to pass to writing functions
+        annotation_column (str): name of column in resulting slide manifest to store GeoJson URIs
+
+    Returns:
+        DataFrame[SlideSchema]: slide manifest
+    """
     client = get_or_create_dask_client()
 
     futures = []
@@ -313,20 +355,21 @@ def __stardist_cell_lymphocyte(
     max_heap_size: str = "64G",
     storage_options: dict = {},
     output_storage_options: dict = {},
-) -> pd.DataFrame:
+) -> dict:
     """Run stardist using qupath CLI
 
     Args:
         slide_urlpath (str): url/path to slide image (virtual slide formats compatible with openslide, .svs, .tif, .scn, ...)
         output_urlpath (str): output url/path
         num_cores (int): Number of cores to use for CPU parallelization
         use_gpu (bool): use GPU
+        image (str): docker/singularity image
         use_singularity (bool): use singularity instead of docker
         max_heap_size (str): maximum heap size to pass to java options
         storage_options (dict): storage options to pass to reading functions
 
     Returns:
-        pd.DataFrame: cell detections
+        dict: run metadata
     """
     fs, slide_path = fsspec.core.url_to_fs(slide_urlpath, **storage_options)
     ofs, output_path = fsspec.core.url_to_fs(output_urlpath, **output_storage_options)