From 321b7db9ee53f028c170028d653141a59854be8a Mon Sep 17 00:00:00 2001
From: Matt Cai <matt.cai@gmail.com>
Date: Thu, 7 May 2020 15:51:08 -0700
Subject: [PATCH 1/3] outline of rnascope hiplex pipeline example

---
 examples/pipelines/rnascope_hiplex.py | 139 ++++++++++++++++++++++++++
 1 file changed, 139 insertions(+)
 create mode 100644 examples/pipelines/rnascope_hiplex.py

diff --git a/examples/pipelines/rnascope_hiplex.py b/examples/pipelines/rnascope_hiplex.py
new file mode 100644
index 000000000..bde3cbab9
--- /dev/null
+++ b/examples/pipelines/rnascope_hiplex.py
@@ -0,0 +1,139 @@
+"""
+.. _RNAScope_processing_example:
+
+Single Field of View for RNAScope HiPlex
+=========================================
+
+This notebook walks through a work flow that analyzes one field of view of _____ from
+ACDBio, using the starfish package.
+
+This example processes an experiment with a single round from a single field of view of RNAScope
+HiPlex data taken from ____. These data
+
+The data consist of 15 images from 3 round, 5 channels, and 1 z-plane. Each image is
+(2048, 2048) (y, x).
+"""
+
+from typing import Optional, Tuple
+from IPython import get_ipython
+
+import starfish
+import starfish.data
+from starfish import FieldOfView, DecodedIntensityTable
+from starfish.types import TraceBuildingStrategies
+
+# equivalent to %gui qt
+ipython = get_ipython()
+ipython.magic("gui qt5")
+
+
+###################################################################################################
+# Define image filters
+# --------------------
+# The 3d smFISH workflow run by the Allen runs a bandpass filter to remove high and low frequency
+# signal and blurs over z with a 1-pixel gaussian to smooth the signal over the z-axis.
+#
+# low-intensity signal is (stringently) clipped from the images before and after these filters.
+
+# bandpass filter to remove cellular background and camera noise
+bandpass = starfish.image.Filter.Bandpass(lshort=.5, llong=7, threshold=0.0)
+
+# gaussian blur to smooth z-axis
+glp = starfish.image.Filter.GaussianLowPass(
+    sigma=(1, 0, 0),
+    is_volume=True
+)
+
+# pre-filter clip to remove low-intensity background signal
+clip1 = starfish.image.Filter.Clip(p_min=50, p_max=100)
+
+# post-filter clip to eliminate all but the highest-intensity peaks
+clip2 = starfish.image.Filter.Clip(p_min=99, p_max=100, is_volume=True)
+
+###################################################################################################
+# Define a spot detection method
+# ------------------------------
+# Spots are detected using a spot finder based on trackpy's locate method, which identifies
+# local intensity maxima, and spots are matched to the gene they represent by looking them up in a
+# codebook that records which (round, channel) matches which gene target.
+
+tlmpf = starfish.spots.FindSpots.TrackpyLocalMaxPeakFinder(
+    spot_diameter=5,  # must be odd integer
+    min_mass=0.02,
+    max_size=2,  # this is max radius
+    separation=7,
+    preprocess=False,
+    percentile=10,  # this is irrelevant when min_mass, spot_diameter, and max_size are set properly
+    verbose=True,
+    is_volume=True,
+)
+
+###################################################################################################
+# Construct the pipeline
+# ----------------------
+
+def processing_pipeline(
+    experiment: starfish.Experiment,
+    fov_name: str,
+    n_processes: Optional[int]=None
+) -> Tuple[starfish.ImageStack, starfish.IntensityTable]:
+    """Process a single field of view of an experiment
+
+    Parameters
+    ----------
+    experiment : starfish.Experiment
+        starfish experiment containing fields of view to analyze
+    fov_name : str
+        name of the field of view to process
+    n_processes : int
+
+    Returns
+    -------
+    starfish.IntensityTable :
+        decoded IntensityTable containing spots matched to the genes they are hybridized against
+    """
+
+    all_intensities = list()
+    codebook = experiment.codebook
+
+    print("Loading images...")
+    images = enumerate(experiment[fov_name].get_images(FieldOfView.PRIMARY_IMAGES))
+
+    decoder = starfish.spots.DecodeSpots.PerRoundMaxChannel(
+        codebook=codebook,
+        trace_building_strategy=TraceBuildingStrategies.SEQUENTIAL
+    )
+
+    for image_number, primary_image in images:
+        print(f"Filtering image {image_number}...")
+        filter_kwargs = dict(
+            in_place=True,
+            verbose=True,
+            n_processes=n_processes
+        )
+        clip1.run(primary_image, **filter_kwargs)
+        bandpass.run(primary_image, **filter_kwargs)
+        glp.run(primary_image, **filter_kwargs)
+        clip2.run(primary_image, **filter_kwargs)
+
+        print("Calling spots...")
+        spots = tlmpf.run(primary_image)
+        print("Decoding spots...")
+        decoded_intensities = decoder.run(spots=spots)
+        all_intensities.append(decoded_intensities)
+
+    decoded = DecodedIntensityTable.concatenate_intensity_tables(all_intensities)
+    decoded = decoded[decoded["total_intensity"] > .025]
+
+    return primary_image, decoded
+
+###################################################################################################
+# Load data, run pipeline, display results
+# ----------------------------------------
+
+experiment = starfish.data.allen_smFISH(use_test_data=True)
+
+image, intensities = processing_pipeline(experiment, fov_name='fov_001')
+
+# uncomment the below line to visualize the output with the spot calls.
+# viewer = starfish.display(image, intensities)
\ No newline at end of file

From 0e3c332a3cfa63231abbdcf3c213ba696ccd524e Mon Sep 17 00:00:00 2001
From: Matt Cai <matt.cai@gmail.com>
Date: Wed, 29 Jul 2020 20:15:42 -0700
Subject: [PATCH 2/3] added current state of rnascope_hiplex.py pipeline in
 gallery along with feedback from ACDBio.

---
 examples/pipelines/rnascope_hiplex.py | 618 ++++++++++++++++++++++----
 1 file changed, 521 insertions(+), 97 deletions(-)

diff --git a/examples/pipelines/rnascope_hiplex.py b/examples/pipelines/rnascope_hiplex.py
index bde3cbab9..5c9d5f993 100644
--- a/examples/pipelines/rnascope_hiplex.py
+++ b/examples/pipelines/rnascope_hiplex.py
@@ -2,138 +2,562 @@
 .. _RNAScope_processing_example:
 
 Single Field of View for RNAScope HiPlex
-=========================================
+========================================
 
-This notebook walks through a work flow that analyzes one field of view of _____ from
-ACDBio, using the starfish package.
+ACDBio has a 12-gene HiPlex assay that uses 3 rounds of fluorescence imaging with 4 fluorescence RNA
+channels and 1 DAPI-stained nuclei channel. In the example data provided by ACDBio, these 15 images
+are saved as single-plane RGB tiffs with filenames that indicate the round and channel. A key for
+mapping images to genes is included in list.txt. This vignette will demonstrate how to format the
+data into SpaceTx format, register the images into the same coordinate system, and process them
+into cell x gene expression matrices using modules contained in starfish.
 
-This example processes an experiment with a single round from a single field of view of RNAScope
-HiPlex data taken from ____. These data
+Feedback from ACDBio:
+* Blob detection needs improvement in:
+  * Low-contrast regions - skip background subtraction and enhance contrast instead
+  * Clustered regions - use pre-defined spot size to break up clusters and count as single spots
+  * High-intensity saturated regions - use pre-defined spot size and intensity to to resolve
+  saturated regions as single spots
+  * Airy disks - either remove airy disks prior to spot finding or modify spot finding algorithm
+  to account for airy disk pattern
+* Image registration could be improved with rotation (low priority)
+* Cell Segmentation (watershed)
+  * sum intensities instead of max
+  * connect nuclear fragments with morphological operation
+* Expression Matrix
+  * Summary of data like dot size, intensity, etc - this is in the DecodedIntensityTable object
+  * Further analysis e.g. normalization - not in the scope of starfish
 
-The data consist of 15 images from 3 round, 5 channels, and 1 z-plane. Each image is
-(2048, 2048) (y, x).
+* Show blob results overlaid on raw
+
+* overlay_spot_calls() wrong coordinates?
 """
 
-from typing import Optional, Tuple
-from IPython import get_ipython
+###################################################################################################
+# Convert RGB images to greyscale images
+# --------------------------------------
+# starfish and the SpaceTx format expect greyscale images so the RGB data must first be converted.
+# This can be done in many tools, such as MATLAB and ImageJ. Here we chose to use Python Imaging
+# Library (PIL):
 
-import starfish
-import starfish.data
-from starfish import FieldOfView, DecodedIntensityTable
-from starfish.types import TraceBuildingStrategies
+import os
+import numpy as np
+from shutil import copy2
+from PIL import Image
 
-# equivalent to %gui qt
-ipython = get_ipython()
-ipython.magic("gui qt5")
 
+def convert_rgb_to_greyscale(input_dir, output_dir):
+    """
+    Original example data is stored in a directory named 'original_data' and consists of 15 RGB
+    images and 1 text file.
+    Copy everything into a new `output_dir` and convert any RGB tiffs to greyscale.
 
-###################################################################################################
-# Define image filters
-# --------------------
-# The 3d smFISH workflow run by the Allen runs a bandpass filter to remove high and low frequency
-# signal and blurs over z with a 1-pixel gaussian to smooth the signal over the z-axis.
-#
-# low-intensity signal is (stringently) clipped from the images before and after these filters.
+    └── original_data
+            ├── list.txt
+            ├── R1_DAPI.tif
+            ├── R1_Probe T1.tif
+            ├── R1_Probe T2.tif
+            ├── R1_Probe T3.tif
+            ├── R1_Probe T4.tif
+            ├── R2_DAPI.tif
+            ├── R2_Probe T5.tif
+            ├── R2_Probe T6.tif
+            ├── ...
 
-# bandpass filter to remove cellular background and camera noise
-bandpass = starfish.image.Filter.Bandpass(lshort=.5, llong=7, threshold=0.0)
+    """
 
-# gaussian blur to smooth z-axis
-glp = starfish.image.Filter.GaussianLowPass(
-    sigma=(1, 0, 0),
-    is_volume=True
-)
+    abs_input_dir = os.path.abspath(input_dir)
+    abs_output_dir = os.path.abspath(output_dir)
+    os.makedirs(abs_output_dir, exist_ok=True)
 
-# pre-filter clip to remove low-intensity background signal
-clip1 = starfish.image.Filter.Clip(p_min=50, p_max=100)
+    for file in os.listdir(abs_input_dir):
+        if file.endswith(".txt"):
+            copy2(os.path.join(abs_input_dir, file), abs_output_dir)
+        elif file.endswith(".tif"):
+            rgb_array = np.asarray(Image.open(os.path.join(abs_input_dir, file)))
+            img = Image.fromarray(np.amax(rgb_array,
+                                          axis=2))  # take the max rgb value rather than ITU-R
+            # 601-2 luma transform from PIL
+            img.save(os.path.join(abs_output_dir, file), format='tiff')
 
-# post-filter clip to eliminate all but the highest-intensity peaks
-clip2 = starfish.image.Filter.Clip(p_min=99, p_max=100, is_volume=True)
+
+convert_rgb_to_greyscale('original_data', 'RNAScope_grayscale')
 
 ###################################################################################################
-# Define a spot detection method
+# Formatting into SpaceTx Format
 # ------------------------------
-# Spots are detected using a spot finder based on trackpy's locate method, which identifies
-# local intensity maxima, and spots are matched to the gene they represent by looking them up in a
-# codebook that records which (round, channel) matches which gene target.
-
-tlmpf = starfish.spots.FindSpots.TrackpyLocalMaxPeakFinder(
-    spot_diameter=5,  # must be odd integer
-    min_mass=0.02,
-    max_size=2,  # this is max radius
-    separation=7,
-    preprocess=False,
-    percentile=10,  # this is irrelevant when min_mass, spot_diameter, and max_size are set properly
-    verbose=True,
-    is_volume=True,
-)
+# Now that all the images are greyscale, we can use the TileFetcher interface and
+# write_experiment_json() function to load the images and save in SpaceTx Format. We also define a
+# build_codebook() function that turns the gene list.txt into a codebook.json, which is needed by
+# experiment.json.
 
-###################################################################################################
-# Construct the pipeline
-# ----------------------
+import functools
+import os
+from typing import Mapping, Tuple, Union
+
+import numpy as np
+from skimage.io import imread
+from slicedimage import ImageFormat
+
+from starfish import Codebook
+from starfish.experiment.builder import FetchedTile, TileFetcher, write_experiment_json
+from starfish.types import Axes, Coordinates, Features
+
+
+@functools.lru_cache(maxsize=1)
+def cached_read_fn(file_path) -> np.ndarray:
+    return imread(file_path)
+
+
+def ch_to_Tnum(ch_label: int, round_label: int):
+    """
+    The file naming scheme for channel doesn't start over at '1' for each round.
+    Instead it continues counting from the previous round.
+    Convert channel int used by SpaceTx format to T-number used in filenames.
+    """
+
+    return 4 * (round_label) + (ch_label + 1)
+
+
+class RNATile(FetchedTile):
+
+    def __init__(
+            self,
+            file_path: str
+    ) -> None:
+        """Parser for an RNAScope tile.
+
+        Parameters
+        ----------
+        file_path : str
+            location of the tiff
+        coordinates :
+            the coordinates for the selected RNAScope tile, extracted from the metadata
+        """
+        self.file_path = file_path
+
+        # dummy coordinates must match shape
+        self._coordinates = {
+            Coordinates.X: (0.0, 0.1120),
+            Coordinates.Y: (0.0, 0.0539),
+            Coordinates.Z: (0.0, 0.0001),
+        }
+
+    @property
+    def shape(self) -> Mapping[Axes, int]:
+        return {Axes.Y: 539, Axes.X: 1120}  # hard coded for these datasets.
+
+    @property
+    def coordinates(self):
+        return self._coordinates  # noqa
+
+    def tile_data(self) -> np.ndarray:
+        return cached_read_fn(self.file_path)  # slice out the correct z-plane
+
+
+class RNADAPITile(FetchedTile):
+
+    def __init__(
+            self,
+            file_path: str
+    ) -> None:
+        """Parser for an RNAScope tile.
 
-def processing_pipeline(
-    experiment: starfish.Experiment,
-    fov_name: str,
-    n_processes: Optional[int]=None
-) -> Tuple[starfish.ImageStack, starfish.IntensityTable]:
-    """Process a single field of view of an experiment
+        Parameters
+        ----------
+        file_path : str
+            location of the tiff
+        coordinates :
+            the coordinates for the selected RNAScope tile, extracted from the metadata
+        """
+        self.file_path = file_path
+
+        # dummy coordinates must match shape
+        self._coordinates = {
+            Coordinates.X: (0.0, 0.1120),
+            Coordinates.Y: (0.0, 0.0539),
+            Coordinates.Z: (0.0, 0.0001),
+        }
+
+    @property
+    def shape(self) -> Mapping[Axes, int]:
+        return {Axes.Y: 539, Axes.X: 1120}  # hard coded for these datasets.
+
+    @property
+    def coordinates(self):
+        return self._coordinates  # noqa
+
+    def tile_data(self) -> np.ndarray:
+        return cached_read_fn(self.file_path)  # slice out the correct z-plane
+
+
+class RNATileFetcher(TileFetcher):
+
+    def __init__(self, input_dir: str) -> None:
+        """Implement a TileFetcher for RNAScope data.
+
+        This TileFetcher constructs spaceTx format for a stitched tissue slice, where
+        `input_dir` is a directory containing .tif files with the following structure:
+
+        └── RNAScope_greyscale
+            ├── R1_DAPI.tif
+            ├── R1_Probe T1.tif
+            ├── R1_Probe T2.tif
+            ├── R1_Probe T3.tif
+            ├── R1_Probe T4.tif
+            ├── R2_DAPI.tif
+            ├── R2_Probe T5.tif
+            ├── R2_Probe T6.tif
+            ├── ...
+
+        Notes
+        -----
+        - The spatial organization of the fields of view are not known so they are filled by
+        dummy coordinates
+        """
+
+        self.input_dir = input_dir
+        self.num_z = 1
+
+    def get_tile(
+            self, fov_id: int, round_label: int, ch_label: int, zplane_label: int) -> FetchedTile:
+        filename = f"R{(round_label + 1)}_Probe T{ch_to_Tnum(ch_label, round_label)}.tif"
+        return RNATile(os.path.join(self.input_dir, filename))
+
+
+class RNADAPITileFetcher(TileFetcher):
+
+    def __init__(self, input_dir: str) -> None:
+        """Implement a TileFetcher for dapi auxiliary images of RNAScope experiment.
+
+        Every round has a DAPI image, which can be used for image registration and cell segmentation
+
+        """
+        self.input_dir = input_dir
+
+    def get_tile(
+            self, fov_id: int, round_label: int, ch_label: int, zplane_label: int) -> FetchedTile:
+        filename = f"R{(round_label + 1)}_DAPI.tif"
+        return RNADAPITile(os.path.join(self.input_dir, filename))
+
+
+def build_codebook(gene_file: str, codebook_json: str) -> Codebook:
+    """Writes a codebook json file that a experiment.json file can point to.
+
+    Data provided by ACDBio included a file with genes listed in order from T1-T12 starting from
+    line 3.
+    Codebook will map 3 rounds and 4 channels to genes.
 
     Parameters
     ----------
-    experiment : starfish.Experiment
-        starfish experiment containing fields of view to analyze
-    fov_name : str
-        name of the field of view to process
-    n_processes : int
+    gene_list : str
+        Parse text file with genes listed in order of Tnum
+    codebook_json : str
+        Save a codebook to json using SpaceTx Format.
 
     Returns
     -------
-    starfish.IntensityTable :
-        decoded IntensityTable containing spots matched to the genes they are hybridized against
+    Codebook :
+        Codebook object in SpaceTx format.
+    """
+    mappings = list()
+    rnd = 0
+    ch = 0
+    with open(gene_file, 'r') as gene_list:
+        for line_num, line in enumerate(gene_list):
+            if line_num > 1:
+                mappings.append({Features.CODEWORD: [
+                    {Axes.ROUND.value: rnd, Axes.CH.value: ch, Features.CODE_VALUE: 1}],
+                                 Features.TARGET: line.strip()})
+                ch = ch + 1
+                if ch > 3:
+                    rnd = rnd + 1
+                    ch = 0
+
+    Codebook.from_code_array(mappings).to_json(codebook_json)
+    return Codebook.from_code_array(mappings)
+
+
+def format_experiment(input_dir, output_dir) -> None:
+    """CLI entrypoint for spaceTx format construction for RNAScope data
+
+    Parameters
+    ----------
+    input_dir : str
+        directory containing input data. See TileFetcher classes for expected directory structures.
+    output_dir : str
+        directory that 2-d images and starfish metadata will be written to.
     """
+    abs_output_dir = os.path.abspath(output_dir)
+    abs_input_dir = os.path.abspath(input_dir)
+    os.makedirs(abs_output_dir, exist_ok=True)
 
-    all_intensities = list()
-    codebook = experiment.codebook
+    primary_tile_fetcher = RNATileFetcher(abs_input_dir)
+    dapi_tile_fetcher = RNADAPITileFetcher(abs_input_dir)
 
-    print("Loading images...")
-    images = enumerate(experiment[fov_name].get_images(FieldOfView.PRIMARY_IMAGES))
+    # This is hardcoded for this example data set
+    primary_image_dimensions: Mapping[Union[str, Axes], int] = {
+        Axes.ROUND: 3,
+        Axes.CH: 4,
+        Axes.ZPLANE: 1,
+    }
 
-    decoder = starfish.spots.DecodeSpots.PerRoundMaxChannel(
-        codebook=codebook,
-        trace_building_strategy=TraceBuildingStrategies.SEQUENTIAL
+    aux_images_dimensions: Mapping[str, Mapping[Union[str, Axes], int]] = {
+        "nuclei": {
+            Axes.ROUND: 3,
+            Axes.CH: 1,
+            Axes.ZPLANE: 1,
+        },
+    }
+
+    write_experiment_json(
+        path=output_dir,
+        fov_count=1,
+        tile_format=ImageFormat.TIFF,
+        primary_image_dimensions=primary_image_dimensions,
+        aux_name_to_dimensions=aux_images_dimensions,
+        primary_tile_fetcher=primary_tile_fetcher,
+        aux_tile_fetcher={"nuclei": dapi_tile_fetcher},
+        dimension_order=(Axes.ROUND, Axes.CH, Axes.ZPLANE)
     )
 
-    for image_number, primary_image in images:
-        print(f"Filtering image {image_number}...")
-        filter_kwargs = dict(
-            in_place=True,
-            verbose=True,
-            n_processes=n_processes
-        )
-        clip1.run(primary_image, **filter_kwargs)
-        bandpass.run(primary_image, **filter_kwargs)
-        glp.run(primary_image, **filter_kwargs)
-        clip2.run(primary_image, **filter_kwargs)
+    build_codebook(os.path.join(abs_input_dir, 'list.txt'),
+                   os.path.join(abs_output_dir, 'codebook.json'))
+
+
+format_experiment('/Users/mcai/RNAScope HiPlex/RNAScope_grayscale', 'starfish_format')
+
+###################################################################################################
+# starfish pipeline
+# -----------------
+# To process the images with starfish first load the experiment: codebook, primary images, and DAPI
+# images.
+
+from starfish import Experiment, FieldOfView
+
+# from starfish import Experiment, FieldOfView
+experiment = Experiment.from_json('https://d26bikfyahveg8.cloudfront.net/RNAScope/HiPlex_formatted/experiment.json')
+codebook = experiment.codebook
+imgs = experiment["fov_000"].get_image(FieldOfView.PRIMARY_IMAGES) # primary images contain fluorescence RNA spots
+dapi = experiment["fov_000"].get_image('nuclei') # dapi images conatain nuclei
+
+###################################################################################################
+# Visually check images look correct.
+
+# Uncomment code below to visualize with napari
+#%gui qt
+#from starfish import display
+
+#viewer = display(imgs)
+
+###################################################################################################
+# Image Registration
+# --------------------
+# Images need to be registered. Here we use dapi to learn the translation transform of images
+# relative to the first round, and then apply it to imgs.
+
+%matplotlib inline
+
+import matplotlib
+from starfish.util.plot import diagnose_registration
+from starfish.types import Axes
+
+# Visualize alignment of dapi images across rounds
+matplotlib.rcParams["figure.dpi"] = 250
+diagnose_registration(dapi, {Axes.ROUND:0}, {Axes.ROUND:1}, {Axes.ROUND:2})
+
+###################################################################################################
+# We use FFT cross-correlation to find the translational shift with precision of 1/1000th of a
+# pixel. No scaling or rotation is computed.
+
+from starfish.image import LearnTransform
+
+learn_translation = LearnTransform.Translation(reference_stack=dapi.sel({Axes.ROUND: 0}), axes=Axes.ROUND, upsampling=1000)
+transforms_list = learn_translation.run(dapi)
+
+###################################################################################################
+# Now we apply the learned transform to dapi images and confirm registration was successful.
+
+from starfish.image import ApplyTransform
+
+warp = ApplyTransform.Warp()
+registered_dapi = warp.run(dapi, transforms_list=transforms_list, in_place=False)
+diagnose_registration(registered_dapi, {Axes.ROUND:0}, {Axes.ROUND:1}, {Axes.ROUND:2})
+
+###################################################################################################
+# Due to a bug, we need to repeat learn_translation.run before applying transform to primary images.
+
+transforms_list = learn_translation.run(dapi)
+registered_imgs = warp.run(imgs, transforms_list=transforms_list, in_place=False)
+
+###################################################################################################
+# Signal Enhancement and Background Reduction
+# -------------------------------------------
+# Representative raw primary images from each channel show RNA spots have Airy pattern in channels 1
+# and 2 with faint concentric rings around discs. Channels 3 and 4 have more background, lower SNR.
+
+import matplotlib.pyplot as plt
+from starfish.util.plot import imshow_plane
+
+f, (ax1, ax2, ax3, ax4) = plt.subplots(ncols=4, figsize=(14, 4.5))
+imshow_plane(registered_imgs, sel={Axes.ROUND: 0, Axes.CH: 0, Axes.X: (200,400), Axes.Y: (250,450)}, ax=ax1, title='Ch 1')
+imshow_plane(registered_imgs, sel={Axes.ROUND: 0, Axes.CH: 1, Axes.X: (200,400), Axes.Y: (250,450)}, ax=ax2, title='Ch 2')
+imshow_plane(registered_imgs, sel={Axes.ROUND: 0, Axes.CH: 2, Axes.X: (200,400), Axes.Y: (250,450)}, ax=ax3, title='Ch 3')
+imshow_plane(registered_imgs, sel={Axes.ROUND: 0, Axes.CH: 3, Axes.X: (200,400), Axes.Y: (250,450)}, ax=ax4, title='Ch 4')
+f.suptitle('raw primary images')
+f.tight_layout()
+
+###################################################################################################
+# The background can be removed with a white-tophat filter. The kernel has a radius larger than
+# an RNA spot, so only foreground larger than RNA is removed.
+
+from starfish.image import Filter
+
+# white tophat filter
+whitetophat = Filter.WhiteTophat(masking_radius=3, is_volume=False)
+wth_imgs = whitetophat.run(registered_imgs, in_place=False)
 
-        print("Calling spots...")
-        spots = tlmpf.run(primary_image)
-        print("Decoding spots...")
-        decoded_intensities = decoder.run(spots=spots)
-        all_intensities.append(decoded_intensities)
+f, (ax1, ax2, ax3, ax4) = plt.subplots(ncols=4, figsize=(14, 4.5))
+imshow_plane(wth_imgs, sel={Axes.ROUND: 0, Axes.CH: 0, Axes.X: (200,400), Axes.Y: (250,450)}, ax=ax1, title='Ch 1')
+imshow_plane(wth_imgs, sel={Axes.ROUND: 0, Axes.CH: 1, Axes.X: (200,400), Axes.Y: (250,450)}, ax=ax2, title='Ch 2')
+imshow_plane(wth_imgs, sel={Axes.ROUND: 0, Axes.CH: 2, Axes.X: (200,400), Axes.Y: (250,450)}, ax=ax3, title='Ch 3')
+imshow_plane(wth_imgs, sel={Axes.ROUND: 0, Axes.CH: 3, Axes.X: (200,400), Axes.Y: (250,450)}, ax=ax4, title='Ch 4')
+f.suptitle('white tophat filtered')
+f.tight_layout()
 
-    decoded = DecodedIntensityTable.concatenate_intensity_tables(all_intensities)
-    decoded = decoded[decoded["total_intensity"] > .025]
+###################################################################################################
+# Spot Finding with BlobDetector
+# ------------------------------
+# Since each gene is detected in an independent (round, channel), run BlobDetector without a
+# reference_image. This will find spots in every (round, channel) image.
+#
+# Run with is_volume=True because of issue #1870
+
+from starfish.spots import FindSpots
+
+bd = FindSpots.BlobDetector(
+    min_sigma=0.5,
+    max_sigma=2,
+    num_sigma=9,
+    threshold=0.1,
+    is_volume=True,
+    overlap=0.1,
+    measurement_type='mean',
+)
+bd_spots = bd.run(image_stack=wth_imgs)
+
+###################################################################################################
+# To check the SpotFindingResults with filtered fluorescence image, we first need to separate the
+# results by (round, channel) and convert to numpy array. Here we store the arrays in a list.
+
+# save SpotAttributes for each (round,channel) as numpy array in list
+bd_spots_numpy = list()
+for rnd in bd_spots.round_labels:
+    for ch in bd_spots.ch_labels:
+        bd_spots_numpy.append(bd_spots[{Axes.CH:ch, Axes.ROUND:rnd}].spot_attrs.data[['z', 'y', 'x']].to_numpy())
+
+###################################################################################################
+# We can interactively visualize the SpotFindingResults for each (round, channel) as a separate
+# points layer in napari on top of the ImageStack. This is the ideal way to explore results and
+# find errors.
+
+# display found spots for each (round,channel) as a layer in napari
+viewer = display(stack=wth_imgs)
+layer_index = 0
+for rnd in bd_spots.round_labels:
+    for ch in bd_spots.ch_labels:
+        viewer.add_points(data=bd_spots_numpy[layer_index], symbol='ring', face_color='red', size=5,
+                          name=f'r: {rnd}, ch: {ch}', visible=False)
+        layer_index = layer_index + 1
+
+###################################################################################################
+# For readability of this notebook in printed format, here are the SpotFindingResults plotted in
+# a figure.
+
+f, ((ax1, ax2, ax3, ax4), (ax5, ax6, ax7, ax8)) = plt.subplots(nrows = 2, ncols=4, figsize=(14, 9))
+imshow_plane(registered_imgs, sel={Axes.ROUND: 0, Axes.CH: 0, Axes.X: (200,400),
+                                   Axes.Y: (250, 450)}, ax=ax1, title='Ch 1')
+imshow_plane(registered_imgs, sel={Axes.ROUND: 0, Axes.CH: 1, Axes.X: (200,400),
+                                   Axes.Y: (250, 450)}, ax=ax2, title='Ch 2')
+imshow_plane(registered_imgs, sel={Axes.ROUND: 0, Axes.CH: 2, Axes.X: (200,400),
+                                   Axes.Y: (250, 450)}, ax=ax3, title='Ch 3')
+imshow_plane(registered_imgs, sel={Axes.ROUND: 0, Axes.CH: 3, Axes.X: (200,400),
+                                   Axes.Y: (250, 450)}, ax=ax4, title='Ch 4')
+layer_index = 0
+for ax in [ax5, ax6, ax7, ax8]:
+    ax.scatter(bd_spots_numpy[layer_index][:,2], bd_spots_numpy[layer_index][:,1], s=3,
+               facecolors='none', edgecolors='r')
+    ax.set_xlim([200, 400])
+    ax.set_ylim([250, 450])
+    ax.invert_yaxis()
+    ax.set_aspect('equal')
+    layer_index = layer_index + 1
+f.suptitle('blobdetector spots found')
+f.tight_layout()
 
-    return primary_image, decoded
+###################################################################################################
+# Decoding Spots
+# --------------
+# For RNAScope, decoding is simply labeling each spot with the gene that corresponds to the (round,
+# channel) it was found in. Therefore, we can use SimpleLookupDecoder here.
+
+from starfish.spots import DecodeSpots
+
+decoder = DecodeSpots.SimpleLookupDecoder(codebook=experiment.codebook)
+decoded_intensities = decoder.run(spots=bd_spots)
+
+###################################################################################################
+# Watershed Segmentation of Cells
+# -------------------------------
+# To then partition RNA spots into single cells, we first need to segment the image into regions
+# that are defined with binary masks. Here we will use intensity thresholds, connected component
+# analysis, and watershed to do the image segmentation.
+#
+# We take advantage of the cell background in the unfiltered primary images and treat it as a
+# "cell stain" image. starfish's Segment.Watershed uses the dapi image to seed a watershed of the
+# "cell stain" image, resulting in binary masks that define the area regions of the cells.
+
+from starfish.image import Segment
+
+# set parameters
+dapi_thresh = .15  # global threshold value for nuclei images
+stain_thresh = .16  # global threshold value for primary images
+min_dist = 17  # minimum distance (pixels) between nuclei distance transformed peaks
+
+seg = Segment.Watershed(
+    nuclei_threshold=dapi_thresh,
+    input_threshold=stain_thresh,
+    min_distance=min_dist
+)
+
+# masks is BinaryMaskCollection for downstream steps
+masks = seg.run(registered_imgs, registered_dapi)
+
+# display intermediate images and result
+seg.show()
 
 ###################################################################################################
-# Load data, run pipeline, display results
-# ----------------------------------------
+# Assign Spots to Cells
+# ---------------------
+# Once the image is segmented into single cells, assigning RNA spots is simple.
+# AssignTargets.Label labels each spot with the cell mask it is located in (or 'nan' if not
+# located in a cell). Then the DecodedIntensityTable can be transformed to an ExpressionMatrix
+# and viewed as a DataFrame or saved for downstream analysis.
 
-experiment = starfish.data.allen_smFISH(use_test_data=True)
+from starfish.spots import AssignTargets
 
-image, intensities = processing_pipeline(experiment, fov_name='fov_001')
+# Assign spots to cells by labeling each spot with cell_id
+al = AssignTargets.Label()
+labeled = al.run(masks, decoded_intensities)
+
+# Filter out spots that are not located in any cell mask
+labeled_filtered = labeled[labeled.cell_id != 'nan']
+
+# Transform to expression matrix and show first 3 genes
+mat = labeled_filtered.to_expression_matrix()
+mat.to_pandas()
+
+###################################################################################################
+# Save ExpressionMatrix as .h5ad-format AnnData
+# ---------------------------------------------
 
-# uncomment the below line to visualize the output with the spot calls.
-# viewer = starfish.display(image, intensities)
\ No newline at end of file
+mat.save_anndata('RNAScope_HiPlex_Example.h5ad')
\ No newline at end of file

From 505a4f6774ffff80a77685ba119ce55da6893aa4 Mon Sep 17 00:00:00 2001
From: Matt Cai <matt.cai@gmail.com>
Date: Wed, 29 Jul 2020 20:48:03 -0700
Subject: [PATCH 3/3] fixed so it would build docs

---
 examples/pipelines/rnascope_hiplex.py | 46 +++++++++++++++++----------
 1 file changed, 29 insertions(+), 17 deletions(-)

diff --git a/examples/pipelines/rnascope_hiplex.py b/examples/pipelines/rnascope_hiplex.py
index 5c9d5f993..f102332de 100644
--- a/examples/pipelines/rnascope_hiplex.py
+++ b/examples/pipelines/rnascope_hiplex.py
@@ -2,7 +2,7 @@
 .. _RNAScope_processing_example:
 
 Single Field of View for RNAScope HiPlex
-========================================
+=========================================
 
 ACDBio has a 12-gene HiPlex assay that uses 3 rounds of fluorescence imaging with 4 fluorescence RNA
 channels and 1 DAPI-stained nuclei channel. In the example data provided by ACDBio, these 15 images
@@ -12,19 +12,29 @@
 into cell x gene expression matrices using modules contained in starfish.
 
 Feedback from ACDBio:
+
 * Blob detection needs improvement in:
+
   * Low-contrast regions - skip background subtraction and enhance contrast instead
+
   * Clustered regions - use pre-defined spot size to break up clusters and count as single spots
-  * High-intensity saturated regions - use pre-defined spot size and intensity to to resolve
-  saturated regions as single spots
-  * Airy disks - either remove airy disks prior to spot finding or modify spot finding algorithm
-  to account for airy disk pattern
+
+  * High-intensity saturated regions - use pre-defined spot size and intensity to resolve saturated regions as single spots
+
+  * Airy disks - either remove airy disks prior to spot finding or modify spot finding algorithm to account for airy disk pattern
+
 * Image registration could be improved with rotation (low priority)
+
 * Cell Segmentation (watershed)
+
   * sum intensities instead of max
+
   * connect nuclear fragments with morphological operation
+
 * Expression Matrix
+
   * Summary of data like dot size, intensity, etc - this is in the DecodedIntensityTable object
+
   * Further analysis e.g. normalization - not in the scope of starfish
 
 * Show blob results overlaid on raw
@@ -79,8 +89,8 @@ def convert_rgb_to_greyscale(input_dir, output_dir):
             # 601-2 luma transform from PIL
             img.save(os.path.join(abs_output_dir, file), format='tiff')
 
-
-convert_rgb_to_greyscale('original_data', 'RNAScope_grayscale')
+# Uncomment to run
+#convert_rgb_to_greyscale('original_data', 'RNAScope_grayscale')
 
 ###################################################################################################
 # Formatting into SpaceTx Format
@@ -324,7 +334,8 @@ def format_experiment(input_dir, output_dir) -> None:
                    os.path.join(abs_output_dir, 'codebook.json'))
 
 
-format_experiment('/Users/mcai/RNAScope HiPlex/RNAScope_grayscale', 'starfish_format')
+# Uncomment to run
+#format_experiment('/Users/mcai/RNAScope HiPlex/RNAScope_grayscale', 'starfish_format')
 
 ###################################################################################################
 # starfish pipeline
@@ -355,7 +366,7 @@ def format_experiment(input_dir, output_dir) -> None:
 # Images need to be registered. Here we use dapi to learn the translation transform of images
 # relative to the first round, and then apply it to imgs.
 
-%matplotlib inline
+#%matplotlib inline
 
 import matplotlib
 from starfish.util.plot import diagnose_registration
@@ -461,13 +472,14 @@ def format_experiment(input_dir, output_dir) -> None:
 # find errors.
 
 # display found spots for each (round,channel) as a layer in napari
-viewer = display(stack=wth_imgs)
-layer_index = 0
-for rnd in bd_spots.round_labels:
-    for ch in bd_spots.ch_labels:
-        viewer.add_points(data=bd_spots_numpy[layer_index], symbol='ring', face_color='red', size=5,
-                          name=f'r: {rnd}, ch: {ch}', visible=False)
-        layer_index = layer_index + 1
+#viewer = display(stack=wth_imgs)
+#layer_index = 0
+#for rnd in bd_spots.round_labels:
+#    for ch in bd_spots.ch_labels:
+#        viewer.add_points(data=bd_spots_numpy[layer_index], symbol='ring', face_color='red',
+#                size=5,
+#                          name=f'r: {rnd}, ch: {ch}', visible=False)
+#        layer_index = layer_index + 1
 
 ###################################################################################################
 # For readability of this notebook in printed format, here are the SpotFindingResults plotted in
@@ -560,4 +572,4 @@ def format_experiment(input_dir, output_dir) -> None:
 # Save ExpressionMatrix as .h5ad-format AnnData
 # ---------------------------------------------
 
-mat.save_anndata('RNAScope_HiPlex_Example.h5ad')
\ No newline at end of file
+#mat.save_anndata('RNAScope_HiPlex_Example.h5ad')
\ No newline at end of file