From 321b7db9ee53f028c170028d653141a59854be8a Mon Sep 17 00:00:00 2001 From: Matt Cai Date: Thu, 7 May 2020 15:51:08 -0700 Subject: [PATCH 1/3] outline of rnascope hiplex pipeline example --- examples/pipelines/rnascope_hiplex.py | 139 ++++++++++++++++++++++++++ 1 file changed, 139 insertions(+) create mode 100644 examples/pipelines/rnascope_hiplex.py diff --git a/examples/pipelines/rnascope_hiplex.py b/examples/pipelines/rnascope_hiplex.py new file mode 100644 index 000000000..bde3cbab9 --- /dev/null +++ b/examples/pipelines/rnascope_hiplex.py @@ -0,0 +1,139 @@ +""" +.. _RNAScope_processing_example: + +Single Field of View for RNAScope HiPlex +========================================= + +This notebook walks through a work flow that analyzes one field of view of _____ from +ACDBio, using the starfish package. + +This example processes an experiment with a single round from a single field of view of RNAScope +HiPlex data taken from ____. These data + +The data consist of 15 images from 3 round, 5 channels, and 1 z-plane. Each image is +(2048, 2048) (y, x). +""" + +from typing import Optional, Tuple +from IPython import get_ipython + +import starfish +import starfish.data +from starfish import FieldOfView, DecodedIntensityTable +from starfish.types import TraceBuildingStrategies + +# equivalent to %gui qt +ipython = get_ipython() +ipython.magic("gui qt5") + + +################################################################################################### +# Define image filters +# -------------------- +# The 3d smFISH workflow run by the Allen runs a bandpass filter to remove high and low frequency +# signal and blurs over z with a 1-pixel gaussian to smooth the signal over the z-axis. +# +# low-intensity signal is (stringently) clipped from the images before and after these filters. + +# bandpass filter to remove cellular background and camera noise +bandpass = starfish.image.Filter.Bandpass(lshort=.5, llong=7, threshold=0.0) + +# gaussian blur to smooth z-axis +glp = starfish.image.Filter.GaussianLowPass( + sigma=(1, 0, 0), + is_volume=True +) + +# pre-filter clip to remove low-intensity background signal +clip1 = starfish.image.Filter.Clip(p_min=50, p_max=100) + +# post-filter clip to eliminate all but the highest-intensity peaks +clip2 = starfish.image.Filter.Clip(p_min=99, p_max=100, is_volume=True) + +################################################################################################### +# Define a spot detection method +# ------------------------------ +# Spots are detected using a spot finder based on trackpy's locate method, which identifies +# local intensity maxima, and spots are matched to the gene they represent by looking them up in a +# codebook that records which (round, channel) matches which gene target. + +tlmpf = starfish.spots.FindSpots.TrackpyLocalMaxPeakFinder( + spot_diameter=5, # must be odd integer + min_mass=0.02, + max_size=2, # this is max radius + separation=7, + preprocess=False, + percentile=10, # this is irrelevant when min_mass, spot_diameter, and max_size are set properly + verbose=True, + is_volume=True, +) + +################################################################################################### +# Construct the pipeline +# ---------------------- + +def processing_pipeline( + experiment: starfish.Experiment, + fov_name: str, + n_processes: Optional[int]=None +) -> Tuple[starfish.ImageStack, starfish.IntensityTable]: + """Process a single field of view of an experiment + + Parameters + ---------- + experiment : starfish.Experiment + starfish experiment containing fields of view to analyze + fov_name : str + name of the field of view to process + n_processes : int + + Returns + ------- + starfish.IntensityTable : + decoded IntensityTable containing spots matched to the genes they are hybridized against + """ + + all_intensities = list() + codebook = experiment.codebook + + print("Loading images...") + images = enumerate(experiment[fov_name].get_images(FieldOfView.PRIMARY_IMAGES)) + + decoder = starfish.spots.DecodeSpots.PerRoundMaxChannel( + codebook=codebook, + trace_building_strategy=TraceBuildingStrategies.SEQUENTIAL + ) + + for image_number, primary_image in images: + print(f"Filtering image {image_number}...") + filter_kwargs = dict( + in_place=True, + verbose=True, + n_processes=n_processes + ) + clip1.run(primary_image, **filter_kwargs) + bandpass.run(primary_image, **filter_kwargs) + glp.run(primary_image, **filter_kwargs) + clip2.run(primary_image, **filter_kwargs) + + print("Calling spots...") + spots = tlmpf.run(primary_image) + print("Decoding spots...") + decoded_intensities = decoder.run(spots=spots) + all_intensities.append(decoded_intensities) + + decoded = DecodedIntensityTable.concatenate_intensity_tables(all_intensities) + decoded = decoded[decoded["total_intensity"] > .025] + + return primary_image, decoded + +################################################################################################### +# Load data, run pipeline, display results +# ---------------------------------------- + +experiment = starfish.data.allen_smFISH(use_test_data=True) + +image, intensities = processing_pipeline(experiment, fov_name='fov_001') + +# uncomment the below line to visualize the output with the spot calls. +# viewer = starfish.display(image, intensities) \ No newline at end of file From 0e3c332a3cfa63231abbdcf3c213ba696ccd524e Mon Sep 17 00:00:00 2001 From: Matt Cai Date: Wed, 29 Jul 2020 20:15:42 -0700 Subject: [PATCH 2/3] added current state of rnascope_hiplex.py pipeline in gallery along with feedback from ACDBio. --- examples/pipelines/rnascope_hiplex.py | 618 ++++++++++++++++++++++---- 1 file changed, 521 insertions(+), 97 deletions(-) diff --git a/examples/pipelines/rnascope_hiplex.py b/examples/pipelines/rnascope_hiplex.py index bde3cbab9..5c9d5f993 100644 --- a/examples/pipelines/rnascope_hiplex.py +++ b/examples/pipelines/rnascope_hiplex.py @@ -2,138 +2,562 @@ .. _RNAScope_processing_example: Single Field of View for RNAScope HiPlex -========================================= +======================================== -This notebook walks through a work flow that analyzes one field of view of _____ from -ACDBio, using the starfish package. +ACDBio has a 12-gene HiPlex assay that uses 3 rounds of fluorescence imaging with 4 fluorescence RNA +channels and 1 DAPI-stained nuclei channel. In the example data provided by ACDBio, these 15 images +are saved as single-plane RGB tiffs with filenames that indicate the round and channel. A key for +mapping images to genes is included in list.txt. This vignette will demonstrate how to format the +data into SpaceTx format, register the images into the same coordinate system, and process them +into cell x gene expression matrices using modules contained in starfish. -This example processes an experiment with a single round from a single field of view of RNAScope -HiPlex data taken from ____. These data +Feedback from ACDBio: +* Blob detection needs improvement in: + * Low-contrast regions - skip background subtraction and enhance contrast instead + * Clustered regions - use pre-defined spot size to break up clusters and count as single spots + * High-intensity saturated regions - use pre-defined spot size and intensity to to resolve + saturated regions as single spots + * Airy disks - either remove airy disks prior to spot finding or modify spot finding algorithm + to account for airy disk pattern +* Image registration could be improved with rotation (low priority) +* Cell Segmentation (watershed) + * sum intensities instead of max + * connect nuclear fragments with morphological operation +* Expression Matrix + * Summary of data like dot size, intensity, etc - this is in the DecodedIntensityTable object + * Further analysis e.g. normalization - not in the scope of starfish -The data consist of 15 images from 3 round, 5 channels, and 1 z-plane. Each image is -(2048, 2048) (y, x). +* Show blob results overlaid on raw + +* overlay_spot_calls() wrong coordinates? """ -from typing import Optional, Tuple -from IPython import get_ipython +################################################################################################### +# Convert RGB images to greyscale images +# -------------------------------------- +# starfish and the SpaceTx format expect greyscale images so the RGB data must first be converted. +# This can be done in many tools, such as MATLAB and ImageJ. Here we chose to use Python Imaging +# Library (PIL): -import starfish -import starfish.data -from starfish import FieldOfView, DecodedIntensityTable -from starfish.types import TraceBuildingStrategies +import os +import numpy as np +from shutil import copy2 +from PIL import Image -# equivalent to %gui qt -ipython = get_ipython() -ipython.magic("gui qt5") +def convert_rgb_to_greyscale(input_dir, output_dir): + """ + Original example data is stored in a directory named 'original_data' and consists of 15 RGB + images and 1 text file. + Copy everything into a new `output_dir` and convert any RGB tiffs to greyscale. -################################################################################################### -# Define image filters -# -------------------- -# The 3d smFISH workflow run by the Allen runs a bandpass filter to remove high and low frequency -# signal and blurs over z with a 1-pixel gaussian to smooth the signal over the z-axis. -# -# low-intensity signal is (stringently) clipped from the images before and after these filters. + └── original_data + ├── list.txt + ├── R1_DAPI.tif + ├── R1_Probe T1.tif + ├── R1_Probe T2.tif + ├── R1_Probe T3.tif + ├── R1_Probe T4.tif + ├── R2_DAPI.tif + ├── R2_Probe T5.tif + ├── R2_Probe T6.tif + ├── ... -# bandpass filter to remove cellular background and camera noise -bandpass = starfish.image.Filter.Bandpass(lshort=.5, llong=7, threshold=0.0) + """ -# gaussian blur to smooth z-axis -glp = starfish.image.Filter.GaussianLowPass( - sigma=(1, 0, 0), - is_volume=True -) + abs_input_dir = os.path.abspath(input_dir) + abs_output_dir = os.path.abspath(output_dir) + os.makedirs(abs_output_dir, exist_ok=True) -# pre-filter clip to remove low-intensity background signal -clip1 = starfish.image.Filter.Clip(p_min=50, p_max=100) + for file in os.listdir(abs_input_dir): + if file.endswith(".txt"): + copy2(os.path.join(abs_input_dir, file), abs_output_dir) + elif file.endswith(".tif"): + rgb_array = np.asarray(Image.open(os.path.join(abs_input_dir, file))) + img = Image.fromarray(np.amax(rgb_array, + axis=2)) # take the max rgb value rather than ITU-R + # 601-2 luma transform from PIL + img.save(os.path.join(abs_output_dir, file), format='tiff') -# post-filter clip to eliminate all but the highest-intensity peaks -clip2 = starfish.image.Filter.Clip(p_min=99, p_max=100, is_volume=True) + +convert_rgb_to_greyscale('original_data', 'RNAScope_grayscale') ################################################################################################### -# Define a spot detection method +# Formatting into SpaceTx Format # ------------------------------ -# Spots are detected using a spot finder based on trackpy's locate method, which identifies -# local intensity maxima, and spots are matched to the gene they represent by looking them up in a -# codebook that records which (round, channel) matches which gene target. - -tlmpf = starfish.spots.FindSpots.TrackpyLocalMaxPeakFinder( - spot_diameter=5, # must be odd integer - min_mass=0.02, - max_size=2, # this is max radius - separation=7, - preprocess=False, - percentile=10, # this is irrelevant when min_mass, spot_diameter, and max_size are set properly - verbose=True, - is_volume=True, -) +# Now that all the images are greyscale, we can use the TileFetcher interface and +# write_experiment_json() function to load the images and save in SpaceTx Format. We also define a +# build_codebook() function that turns the gene list.txt into a codebook.json, which is needed by +# experiment.json. -################################################################################################### -# Construct the pipeline -# ---------------------- +import functools +import os +from typing import Mapping, Tuple, Union + +import numpy as np +from skimage.io import imread +from slicedimage import ImageFormat + +from starfish import Codebook +from starfish.experiment.builder import FetchedTile, TileFetcher, write_experiment_json +from starfish.types import Axes, Coordinates, Features + + +@functools.lru_cache(maxsize=1) +def cached_read_fn(file_path) -> np.ndarray: + return imread(file_path) + + +def ch_to_Tnum(ch_label: int, round_label: int): + """ + The file naming scheme for channel doesn't start over at '1' for each round. + Instead it continues counting from the previous round. + Convert channel int used by SpaceTx format to T-number used in filenames. + """ + + return 4 * (round_label) + (ch_label + 1) + + +class RNATile(FetchedTile): + + def __init__( + self, + file_path: str + ) -> None: + """Parser for an RNAScope tile. + + Parameters + ---------- + file_path : str + location of the tiff + coordinates : + the coordinates for the selected RNAScope tile, extracted from the metadata + """ + self.file_path = file_path + + # dummy coordinates must match shape + self._coordinates = { + Coordinates.X: (0.0, 0.1120), + Coordinates.Y: (0.0, 0.0539), + Coordinates.Z: (0.0, 0.0001), + } + + @property + def shape(self) -> Mapping[Axes, int]: + return {Axes.Y: 539, Axes.X: 1120} # hard coded for these datasets. + + @property + def coordinates(self): + return self._coordinates # noqa + + def tile_data(self) -> np.ndarray: + return cached_read_fn(self.file_path) # slice out the correct z-plane + + +class RNADAPITile(FetchedTile): + + def __init__( + self, + file_path: str + ) -> None: + """Parser for an RNAScope tile. -def processing_pipeline( - experiment: starfish.Experiment, - fov_name: str, - n_processes: Optional[int]=None -) -> Tuple[starfish.ImageStack, starfish.IntensityTable]: - """Process a single field of view of an experiment + Parameters + ---------- + file_path : str + location of the tiff + coordinates : + the coordinates for the selected RNAScope tile, extracted from the metadata + """ + self.file_path = file_path + + # dummy coordinates must match shape + self._coordinates = { + Coordinates.X: (0.0, 0.1120), + Coordinates.Y: (0.0, 0.0539), + Coordinates.Z: (0.0, 0.0001), + } + + @property + def shape(self) -> Mapping[Axes, int]: + return {Axes.Y: 539, Axes.X: 1120} # hard coded for these datasets. + + @property + def coordinates(self): + return self._coordinates # noqa + + def tile_data(self) -> np.ndarray: + return cached_read_fn(self.file_path) # slice out the correct z-plane + + +class RNATileFetcher(TileFetcher): + + def __init__(self, input_dir: str) -> None: + """Implement a TileFetcher for RNAScope data. + + This TileFetcher constructs spaceTx format for a stitched tissue slice, where + `input_dir` is a directory containing .tif files with the following structure: + + └── RNAScope_greyscale + ├── R1_DAPI.tif + ├── R1_Probe T1.tif + ├── R1_Probe T2.tif + ├── R1_Probe T3.tif + ├── R1_Probe T4.tif + ├── R2_DAPI.tif + ├── R2_Probe T5.tif + ├── R2_Probe T6.tif + ├── ... + + Notes + ----- + - The spatial organization of the fields of view are not known so they are filled by + dummy coordinates + """ + + self.input_dir = input_dir + self.num_z = 1 + + def get_tile( + self, fov_id: int, round_label: int, ch_label: int, zplane_label: int) -> FetchedTile: + filename = f"R{(round_label + 1)}_Probe T{ch_to_Tnum(ch_label, round_label)}.tif" + return RNATile(os.path.join(self.input_dir, filename)) + + +class RNADAPITileFetcher(TileFetcher): + + def __init__(self, input_dir: str) -> None: + """Implement a TileFetcher for dapi auxiliary images of RNAScope experiment. + + Every round has a DAPI image, which can be used for image registration and cell segmentation + + """ + self.input_dir = input_dir + + def get_tile( + self, fov_id: int, round_label: int, ch_label: int, zplane_label: int) -> FetchedTile: + filename = f"R{(round_label + 1)}_DAPI.tif" + return RNADAPITile(os.path.join(self.input_dir, filename)) + + +def build_codebook(gene_file: str, codebook_json: str) -> Codebook: + """Writes a codebook json file that a experiment.json file can point to. + + Data provided by ACDBio included a file with genes listed in order from T1-T12 starting from + line 3. + Codebook will map 3 rounds and 4 channels to genes. Parameters ---------- - experiment : starfish.Experiment - starfish experiment containing fields of view to analyze - fov_name : str - name of the field of view to process - n_processes : int + gene_list : str + Parse text file with genes listed in order of Tnum + codebook_json : str + Save a codebook to json using SpaceTx Format. Returns ------- - starfish.IntensityTable : - decoded IntensityTable containing spots matched to the genes they are hybridized against + Codebook : + Codebook object in SpaceTx format. + """ + mappings = list() + rnd = 0 + ch = 0 + with open(gene_file, 'r') as gene_list: + for line_num, line in enumerate(gene_list): + if line_num > 1: + mappings.append({Features.CODEWORD: [ + {Axes.ROUND.value: rnd, Axes.CH.value: ch, Features.CODE_VALUE: 1}], + Features.TARGET: line.strip()}) + ch = ch + 1 + if ch > 3: + rnd = rnd + 1 + ch = 0 + + Codebook.from_code_array(mappings).to_json(codebook_json) + return Codebook.from_code_array(mappings) + + +def format_experiment(input_dir, output_dir) -> None: + """CLI entrypoint for spaceTx format construction for RNAScope data + + Parameters + ---------- + input_dir : str + directory containing input data. See TileFetcher classes for expected directory structures. + output_dir : str + directory that 2-d images and starfish metadata will be written to. """ + abs_output_dir = os.path.abspath(output_dir) + abs_input_dir = os.path.abspath(input_dir) + os.makedirs(abs_output_dir, exist_ok=True) - all_intensities = list() - codebook = experiment.codebook + primary_tile_fetcher = RNATileFetcher(abs_input_dir) + dapi_tile_fetcher = RNADAPITileFetcher(abs_input_dir) - print("Loading images...") - images = enumerate(experiment[fov_name].get_images(FieldOfView.PRIMARY_IMAGES)) + # This is hardcoded for this example data set + primary_image_dimensions: Mapping[Union[str, Axes], int] = { + Axes.ROUND: 3, + Axes.CH: 4, + Axes.ZPLANE: 1, + } - decoder = starfish.spots.DecodeSpots.PerRoundMaxChannel( - codebook=codebook, - trace_building_strategy=TraceBuildingStrategies.SEQUENTIAL + aux_images_dimensions: Mapping[str, Mapping[Union[str, Axes], int]] = { + "nuclei": { + Axes.ROUND: 3, + Axes.CH: 1, + Axes.ZPLANE: 1, + }, + } + + write_experiment_json( + path=output_dir, + fov_count=1, + tile_format=ImageFormat.TIFF, + primary_image_dimensions=primary_image_dimensions, + aux_name_to_dimensions=aux_images_dimensions, + primary_tile_fetcher=primary_tile_fetcher, + aux_tile_fetcher={"nuclei": dapi_tile_fetcher}, + dimension_order=(Axes.ROUND, Axes.CH, Axes.ZPLANE) ) - for image_number, primary_image in images: - print(f"Filtering image {image_number}...") - filter_kwargs = dict( - in_place=True, - verbose=True, - n_processes=n_processes - ) - clip1.run(primary_image, **filter_kwargs) - bandpass.run(primary_image, **filter_kwargs) - glp.run(primary_image, **filter_kwargs) - clip2.run(primary_image, **filter_kwargs) + build_codebook(os.path.join(abs_input_dir, 'list.txt'), + os.path.join(abs_output_dir, 'codebook.json')) + + +format_experiment('/Users/mcai/RNAScope HiPlex/RNAScope_grayscale', 'starfish_format') + +################################################################################################### +# starfish pipeline +# ----------------- +# To process the images with starfish first load the experiment: codebook, primary images, and DAPI +# images. + +from starfish import Experiment, FieldOfView + +# from starfish import Experiment, FieldOfView +experiment = Experiment.from_json('https://d26bikfyahveg8.cloudfront.net/RNAScope/HiPlex_formatted/experiment.json') +codebook = experiment.codebook +imgs = experiment["fov_000"].get_image(FieldOfView.PRIMARY_IMAGES) # primary images contain fluorescence RNA spots +dapi = experiment["fov_000"].get_image('nuclei') # dapi images conatain nuclei + +################################################################################################### +# Visually check images look correct. + +# Uncomment code below to visualize with napari +#%gui qt +#from starfish import display + +#viewer = display(imgs) + +################################################################################################### +# Image Registration +# -------------------- +# Images need to be registered. Here we use dapi to learn the translation transform of images +# relative to the first round, and then apply it to imgs. + +%matplotlib inline + +import matplotlib +from starfish.util.plot import diagnose_registration +from starfish.types import Axes + +# Visualize alignment of dapi images across rounds +matplotlib.rcParams["figure.dpi"] = 250 +diagnose_registration(dapi, {Axes.ROUND:0}, {Axes.ROUND:1}, {Axes.ROUND:2}) + +################################################################################################### +# We use FFT cross-correlation to find the translational shift with precision of 1/1000th of a +# pixel. No scaling or rotation is computed. + +from starfish.image import LearnTransform + +learn_translation = LearnTransform.Translation(reference_stack=dapi.sel({Axes.ROUND: 0}), axes=Axes.ROUND, upsampling=1000) +transforms_list = learn_translation.run(dapi) + +################################################################################################### +# Now we apply the learned transform to dapi images and confirm registration was successful. + +from starfish.image import ApplyTransform + +warp = ApplyTransform.Warp() +registered_dapi = warp.run(dapi, transforms_list=transforms_list, in_place=False) +diagnose_registration(registered_dapi, {Axes.ROUND:0}, {Axes.ROUND:1}, {Axes.ROUND:2}) + +################################################################################################### +# Due to a bug, we need to repeat learn_translation.run before applying transform to primary images. + +transforms_list = learn_translation.run(dapi) +registered_imgs = warp.run(imgs, transforms_list=transforms_list, in_place=False) + +################################################################################################### +# Signal Enhancement and Background Reduction +# ------------------------------------------- +# Representative raw primary images from each channel show RNA spots have Airy pattern in channels 1 +# and 2 with faint concentric rings around discs. Channels 3 and 4 have more background, lower SNR. + +import matplotlib.pyplot as plt +from starfish.util.plot import imshow_plane + +f, (ax1, ax2, ax3, ax4) = plt.subplots(ncols=4, figsize=(14, 4.5)) +imshow_plane(registered_imgs, sel={Axes.ROUND: 0, Axes.CH: 0, Axes.X: (200,400), Axes.Y: (250,450)}, ax=ax1, title='Ch 1') +imshow_plane(registered_imgs, sel={Axes.ROUND: 0, Axes.CH: 1, Axes.X: (200,400), Axes.Y: (250,450)}, ax=ax2, title='Ch 2') +imshow_plane(registered_imgs, sel={Axes.ROUND: 0, Axes.CH: 2, Axes.X: (200,400), Axes.Y: (250,450)}, ax=ax3, title='Ch 3') +imshow_plane(registered_imgs, sel={Axes.ROUND: 0, Axes.CH: 3, Axes.X: (200,400), Axes.Y: (250,450)}, ax=ax4, title='Ch 4') +f.suptitle('raw primary images') +f.tight_layout() + +################################################################################################### +# The background can be removed with a white-tophat filter. The kernel has a radius larger than +# an RNA spot, so only foreground larger than RNA is removed. + +from starfish.image import Filter + +# white tophat filter +whitetophat = Filter.WhiteTophat(masking_radius=3, is_volume=False) +wth_imgs = whitetophat.run(registered_imgs, in_place=False) - print("Calling spots...") - spots = tlmpf.run(primary_image) - print("Decoding spots...") - decoded_intensities = decoder.run(spots=spots) - all_intensities.append(decoded_intensities) +f, (ax1, ax2, ax3, ax4) = plt.subplots(ncols=4, figsize=(14, 4.5)) +imshow_plane(wth_imgs, sel={Axes.ROUND: 0, Axes.CH: 0, Axes.X: (200,400), Axes.Y: (250,450)}, ax=ax1, title='Ch 1') +imshow_plane(wth_imgs, sel={Axes.ROUND: 0, Axes.CH: 1, Axes.X: (200,400), Axes.Y: (250,450)}, ax=ax2, title='Ch 2') +imshow_plane(wth_imgs, sel={Axes.ROUND: 0, Axes.CH: 2, Axes.X: (200,400), Axes.Y: (250,450)}, ax=ax3, title='Ch 3') +imshow_plane(wth_imgs, sel={Axes.ROUND: 0, Axes.CH: 3, Axes.X: (200,400), Axes.Y: (250,450)}, ax=ax4, title='Ch 4') +f.suptitle('white tophat filtered') +f.tight_layout() - decoded = DecodedIntensityTable.concatenate_intensity_tables(all_intensities) - decoded = decoded[decoded["total_intensity"] > .025] +################################################################################################### +# Spot Finding with BlobDetector +# ------------------------------ +# Since each gene is detected in an independent (round, channel), run BlobDetector without a +# reference_image. This will find spots in every (round, channel) image. +# +# Run with is_volume=True because of issue #1870 + +from starfish.spots import FindSpots + +bd = FindSpots.BlobDetector( + min_sigma=0.5, + max_sigma=2, + num_sigma=9, + threshold=0.1, + is_volume=True, + overlap=0.1, + measurement_type='mean', +) +bd_spots = bd.run(image_stack=wth_imgs) + +################################################################################################### +# To check the SpotFindingResults with filtered fluorescence image, we first need to separate the +# results by (round, channel) and convert to numpy array. Here we store the arrays in a list. + +# save SpotAttributes for each (round,channel) as numpy array in list +bd_spots_numpy = list() +for rnd in bd_spots.round_labels: + for ch in bd_spots.ch_labels: + bd_spots_numpy.append(bd_spots[{Axes.CH:ch, Axes.ROUND:rnd}].spot_attrs.data[['z', 'y', 'x']].to_numpy()) + +################################################################################################### +# We can interactively visualize the SpotFindingResults for each (round, channel) as a separate +# points layer in napari on top of the ImageStack. This is the ideal way to explore results and +# find errors. + +# display found spots for each (round,channel) as a layer in napari +viewer = display(stack=wth_imgs) +layer_index = 0 +for rnd in bd_spots.round_labels: + for ch in bd_spots.ch_labels: + viewer.add_points(data=bd_spots_numpy[layer_index], symbol='ring', face_color='red', size=5, + name=f'r: {rnd}, ch: {ch}', visible=False) + layer_index = layer_index + 1 + +################################################################################################### +# For readability of this notebook in printed format, here are the SpotFindingResults plotted in +# a figure. + +f, ((ax1, ax2, ax3, ax4), (ax5, ax6, ax7, ax8)) = plt.subplots(nrows = 2, ncols=4, figsize=(14, 9)) +imshow_plane(registered_imgs, sel={Axes.ROUND: 0, Axes.CH: 0, Axes.X: (200,400), + Axes.Y: (250, 450)}, ax=ax1, title='Ch 1') +imshow_plane(registered_imgs, sel={Axes.ROUND: 0, Axes.CH: 1, Axes.X: (200,400), + Axes.Y: (250, 450)}, ax=ax2, title='Ch 2') +imshow_plane(registered_imgs, sel={Axes.ROUND: 0, Axes.CH: 2, Axes.X: (200,400), + Axes.Y: (250, 450)}, ax=ax3, title='Ch 3') +imshow_plane(registered_imgs, sel={Axes.ROUND: 0, Axes.CH: 3, Axes.X: (200,400), + Axes.Y: (250, 450)}, ax=ax4, title='Ch 4') +layer_index = 0 +for ax in [ax5, ax6, ax7, ax8]: + ax.scatter(bd_spots_numpy[layer_index][:,2], bd_spots_numpy[layer_index][:,1], s=3, + facecolors='none', edgecolors='r') + ax.set_xlim([200, 400]) + ax.set_ylim([250, 450]) + ax.invert_yaxis() + ax.set_aspect('equal') + layer_index = layer_index + 1 +f.suptitle('blobdetector spots found') +f.tight_layout() - return primary_image, decoded +################################################################################################### +# Decoding Spots +# -------------- +# For RNAScope, decoding is simply labeling each spot with the gene that corresponds to the (round, +# channel) it was found in. Therefore, we can use SimpleLookupDecoder here. + +from starfish.spots import DecodeSpots + +decoder = DecodeSpots.SimpleLookupDecoder(codebook=experiment.codebook) +decoded_intensities = decoder.run(spots=bd_spots) + +################################################################################################### +# Watershed Segmentation of Cells +# ------------------------------- +# To then partition RNA spots into single cells, we first need to segment the image into regions +# that are defined with binary masks. Here we will use intensity thresholds, connected component +# analysis, and watershed to do the image segmentation. +# +# We take advantage of the cell background in the unfiltered primary images and treat it as a +# "cell stain" image. starfish's Segment.Watershed uses the dapi image to seed a watershed of the +# "cell stain" image, resulting in binary masks that define the area regions of the cells. + +from starfish.image import Segment + +# set parameters +dapi_thresh = .15 # global threshold value for nuclei images +stain_thresh = .16 # global threshold value for primary images +min_dist = 17 # minimum distance (pixels) between nuclei distance transformed peaks + +seg = Segment.Watershed( + nuclei_threshold=dapi_thresh, + input_threshold=stain_thresh, + min_distance=min_dist +) + +# masks is BinaryMaskCollection for downstream steps +masks = seg.run(registered_imgs, registered_dapi) + +# display intermediate images and result +seg.show() ################################################################################################### -# Load data, run pipeline, display results -# ---------------------------------------- +# Assign Spots to Cells +# --------------------- +# Once the image is segmented into single cells, assigning RNA spots is simple. +# AssignTargets.Label labels each spot with the cell mask it is located in (or 'nan' if not +# located in a cell). Then the DecodedIntensityTable can be transformed to an ExpressionMatrix +# and viewed as a DataFrame or saved for downstream analysis. -experiment = starfish.data.allen_smFISH(use_test_data=True) +from starfish.spots import AssignTargets -image, intensities = processing_pipeline(experiment, fov_name='fov_001') +# Assign spots to cells by labeling each spot with cell_id +al = AssignTargets.Label() +labeled = al.run(masks, decoded_intensities) + +# Filter out spots that are not located in any cell mask +labeled_filtered = labeled[labeled.cell_id != 'nan'] + +# Transform to expression matrix and show first 3 genes +mat = labeled_filtered.to_expression_matrix() +mat.to_pandas() + +################################################################################################### +# Save ExpressionMatrix as .h5ad-format AnnData +# --------------------------------------------- -# uncomment the below line to visualize the output with the spot calls. -# viewer = starfish.display(image, intensities) \ No newline at end of file +mat.save_anndata('RNAScope_HiPlex_Example.h5ad') \ No newline at end of file From 505a4f6774ffff80a77685ba119ce55da6893aa4 Mon Sep 17 00:00:00 2001 From: Matt Cai Date: Wed, 29 Jul 2020 20:48:03 -0700 Subject: [PATCH 3/3] fixed so it would build docs --- examples/pipelines/rnascope_hiplex.py | 46 +++++++++++++++++---------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/examples/pipelines/rnascope_hiplex.py b/examples/pipelines/rnascope_hiplex.py index 5c9d5f993..f102332de 100644 --- a/examples/pipelines/rnascope_hiplex.py +++ b/examples/pipelines/rnascope_hiplex.py @@ -2,7 +2,7 @@ .. _RNAScope_processing_example: Single Field of View for RNAScope HiPlex -======================================== +========================================= ACDBio has a 12-gene HiPlex assay that uses 3 rounds of fluorescence imaging with 4 fluorescence RNA channels and 1 DAPI-stained nuclei channel. In the example data provided by ACDBio, these 15 images @@ -12,19 +12,29 @@ into cell x gene expression matrices using modules contained in starfish. Feedback from ACDBio: + * Blob detection needs improvement in: + * Low-contrast regions - skip background subtraction and enhance contrast instead + * Clustered regions - use pre-defined spot size to break up clusters and count as single spots - * High-intensity saturated regions - use pre-defined spot size and intensity to to resolve - saturated regions as single spots - * Airy disks - either remove airy disks prior to spot finding or modify spot finding algorithm - to account for airy disk pattern + + * High-intensity saturated regions - use pre-defined spot size and intensity to resolve saturated regions as single spots + + * Airy disks - either remove airy disks prior to spot finding or modify spot finding algorithm to account for airy disk pattern + * Image registration could be improved with rotation (low priority) + * Cell Segmentation (watershed) + * sum intensities instead of max + * connect nuclear fragments with morphological operation + * Expression Matrix + * Summary of data like dot size, intensity, etc - this is in the DecodedIntensityTable object + * Further analysis e.g. normalization - not in the scope of starfish * Show blob results overlaid on raw @@ -79,8 +89,8 @@ def convert_rgb_to_greyscale(input_dir, output_dir): # 601-2 luma transform from PIL img.save(os.path.join(abs_output_dir, file), format='tiff') - -convert_rgb_to_greyscale('original_data', 'RNAScope_grayscale') +# Uncomment to run +#convert_rgb_to_greyscale('original_data', 'RNAScope_grayscale') ################################################################################################### # Formatting into SpaceTx Format @@ -324,7 +334,8 @@ def format_experiment(input_dir, output_dir) -> None: os.path.join(abs_output_dir, 'codebook.json')) -format_experiment('/Users/mcai/RNAScope HiPlex/RNAScope_grayscale', 'starfish_format') +# Uncomment to run +#format_experiment('/Users/mcai/RNAScope HiPlex/RNAScope_grayscale', 'starfish_format') ################################################################################################### # starfish pipeline @@ -355,7 +366,7 @@ def format_experiment(input_dir, output_dir) -> None: # Images need to be registered. Here we use dapi to learn the translation transform of images # relative to the first round, and then apply it to imgs. -%matplotlib inline +#%matplotlib inline import matplotlib from starfish.util.plot import diagnose_registration @@ -461,13 +472,14 @@ def format_experiment(input_dir, output_dir) -> None: # find errors. # display found spots for each (round,channel) as a layer in napari -viewer = display(stack=wth_imgs) -layer_index = 0 -for rnd in bd_spots.round_labels: - for ch in bd_spots.ch_labels: - viewer.add_points(data=bd_spots_numpy[layer_index], symbol='ring', face_color='red', size=5, - name=f'r: {rnd}, ch: {ch}', visible=False) - layer_index = layer_index + 1 +#viewer = display(stack=wth_imgs) +#layer_index = 0 +#for rnd in bd_spots.round_labels: +# for ch in bd_spots.ch_labels: +# viewer.add_points(data=bd_spots_numpy[layer_index], symbol='ring', face_color='red', +# size=5, +# name=f'r: {rnd}, ch: {ch}', visible=False) +# layer_index = layer_index + 1 ################################################################################################### # For readability of this notebook in printed format, here are the SpotFindingResults plotted in @@ -560,4 +572,4 @@ def format_experiment(input_dir, output_dir) -> None: # Save ExpressionMatrix as .h5ad-format AnnData # --------------------------------------------- -mat.save_anndata('RNAScope_HiPlex_Example.h5ad') \ No newline at end of file +#mat.save_anndata('RNAScope_HiPlex_Example.h5ad') \ No newline at end of file