Skip to content

Commit

Permalink
Merge pull request hubmapconsortium#57 from hubmapconsortium/pennycud…
Browse files Browse the repository at this point in the history
…a/fix-antibodies-util

Remove references to defunct antibodies util
  • Loading branch information
pennycuda authored Aug 7, 2024
2 parents 2e574a8 + d855591 commit 0fadb51
Show file tree
Hide file tree
Showing 12 changed files with 134 additions and 22 deletions.
111 changes: 103 additions & 8 deletions bin/convert_to_ometiff.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import yaml
from aicsimageio import AICSImage
from aicsimageio.writers.ome_tiff_writer import OmeTiffWriter
from antibodies_tsv_util import antibodies_tsv_util as antb_tools
from ome_types.model import AnnotationRef, Map, MapAnnotation, StructuredAnnotationList
from tifffile import TiffFile

Expand All @@ -25,6 +24,104 @@
"nucleus_boundaries",
]
TIFF_FILE_NAMING_PATTERN = re.compile(r"^R\d{3}_X(\d{3})_Y(\d{3})\.tif")
metadata_filename_pattern = re.compile(r"^[0-9A-Fa-f]{32}antibodies\.tsv$")


def find_antibodies_meta(input_dir: Path) -> Optional[Path]:
"""
Finds and returns the first metadata file for a HuBMAP data set.
Does not check whether the dataset ID (32 hex characters) matches
the directory name, nor whether there might be multiple metadata files.
"""
# possible_dirs = [input_dir, input_dir / "extras"]
metadata_filename_pattern = re.compile(r"^[0-9A-Za-z\-_]*antibodies\.tsv$")
found_files = []
for dirpath, dirnames, filenames in walk(input_dir):
for filename in filenames:
if metadata_filename_pattern.match(filename):
found_files.append(Path(dirpath) / filename)

if len(found_files) == 0:
logger.warning("No antibody.tsv file found")
antb_path = None
else:
antb_path = found_files[0]
return antb_path


def sort_by_cycle(antb_path: Path):
"""
Sorts antibodies.tsv by cycle and channel number. The original tsv is not sorted correctly.
"""
df = pd.read_table(antb_path)
cycle_channel_pattern = re.compile(r"cycle(?P<cycle>\d+)_ch(?P<channel>\d+)", re.IGNORECASE)
searches = [cycle_channel_pattern.search(v) for v in df["channel_id"]]
cycles = [int(s.group("cycle")) for s in searches]
channels = [int(s.group("channel")) for s in searches]
df.index = [cycles, channels]
df = df.sort_index()
return df


def get_ch_info_from_antibodies_meta(df: pd.DataFrame) -> Optional[pd.DataFrame]:
"""
Adds "target" column with the antibody name that we want to replace.
"""
# df = df.set_index("channel_id", inplace=False)
antb_names = df["antibody_name"].to_list()
antb_targets = [get_analyte_name(antb) for antb in antb_names]
df["target"] = antb_targets
return df


def get_analyte_name(antibody_name: str) -> str:
"""
Strips unnecessary prefixes and suffixes off of antibody name from antibodies.tsv.
"""
antb = re.sub(r"Anti-", "", antibody_name)
antb = re.sub(r"\s+antibody", "", antb)
return antb


def create_original_channel_names_df(channelList: List[str]) -> pd.DataFrame:
"""
Creates a dataframe with the original channel names, cycle numbers, and channel numbers.
"""
# Separate channel and cycle info from channel names and remove "orig"
cyc_ch_pattern = re.compile(r"cyc(\d+)_ch(\d+)_orig(.*)")
og_ch_names_df = pd.DataFrame(channelList, columns=["Original_Channel_Name"])
og_ch_names_df[["Cycle", "Channel", "channel_name"]] = og_ch_names_df[
"Original_Channel_Name"
].str.extract(cyc_ch_pattern)
og_ch_names_df["Cycle"] = pd.to_numeric(og_ch_names_df["Cycle"])
og_ch_names_df["Channel"] = pd.to_numeric(og_ch_names_df["Channel"])
og_ch_names_df["channel_id"] = (
"cycle"
+ og_ch_names_df["Cycle"].astype(str)
+ "_ch"
+ og_ch_names_df["Channel"].astype(str)
)

return og_ch_names_df


def replace_provider_ch_names_with_antb(
og_ch_names_df: pd.DataFrame, antibodies_df: pd.DataFrame
) -> List[str]:
"""
Uses cycle and channel mapping to replace the channel name with the one in antibodies.tsv.
"""
updated_channel_names = []
mapping = map_cycles_and_channels(antibodies_df)
for i in og_ch_names_df.index:
channel_id = og_ch_names_df.at[i, "channel_id"].lower()
original_name = og_ch_names_df.at[i, "channel_name"]
target = mapping.get(channel_id, None)
if target is not None:
updated_channel_names.append(target)
else:
updated_channel_names.append(original_name)
return updated_channel_names


def generate_sa_ch_info(
Expand Down Expand Up @@ -280,16 +377,14 @@ def check_dir_is_empty(dir_path: Path):

segmentationFileList = collect_tiff_file_list(cytometryTileDir, TIFF_FILE_NAMING_PATTERN)
extractFileList = collect_tiff_file_list(extractDir, TIFF_FILE_NAMING_PATTERN)
antb_path = antb_tools.find_antibodies_meta(args.input_data_dir)
antb_path = find_antibodies_meta(args.input_data_dir)

lateral_resolution = get_lateral_resolution(args.cytokit_config)
df = antb_tools.sort_by_cycle(antb_path)
antb_info = antb_tools.get_ch_info_from_antibodies_meta(df)
df = sort_by_cycle(antb_path)
antb_info = get_ch_info_from_antibodies_meta(df)
extractChannelNames = collect_expressions_extract_channels(extractFileList[0])
original_ch_names_df = antb_tools.create_original_channel_names_df(extractChannelNames)
updated_channel_names = antb_tools.replace_provider_ch_names_with_antb(
original_ch_names_df, antb_info
)
original_ch_names_df = create_original_channel_names_df(extractChannelNames)
updated_channel_names = replace_provider_ch_names_with_antb(original_ch_names_df, antb_info)

# Create segmentation mask OME-TIFFs
if segmentationFileList:
Expand Down
26 changes: 22 additions & 4 deletions bin/dataset_info/collect_dataset_info_old.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@
from pprint import pprint
from typing import Dict, List, Optional, Tuple

from antibodies_tsv_util import antibodies_tsv_util as antb_tools

sys.path.append("/opt")
from pipeline_utils.dataset_listing import get_tile_dtype, get_tile_shape

Expand All @@ -29,6 +27,26 @@
logger.addHandler(handler)


def add_cycle_channel_numbers(channel_names: List[str]) -> List[str]:
"""
Adds cycle and channel info during the collect dataset info step. Replaces a similar function that adds a number on the end of duplicate channel names.
"""
new_names = []
cycle_count = 1
channel_count = 1

for original_name in channel_names:
new_name = f"cyc{cycle_count}_ch{channel_count}_orig{original_name}"
new_names.append(new_name)

channel_count += 1
if channel_count > 4: # Assuming 4 channels per cycle, modify accordingly
channel_count = 1
cycle_count += 1

return new_names


def find_files(
base_directory: Path,
filename: str,
Expand Down Expand Up @@ -444,7 +462,7 @@ def standardize_metadata(directory: Path, num_concurrent_tasks: int):

# If there are identical channel names, make them unique by adding
# incremental numbers to the end.
channelNames = antb_tools.add_cycle_channel_numbers(channelNames)
channelNames = add_cycle_channel_numbers(channelNames)
print(channelNames)

datasetInfo["channel_names"] = channelNames
Expand All @@ -459,7 +477,7 @@ def standardize_metadata(directory: Path, num_concurrent_tasks: int):
for row in csvreader:
ch_names_qc.append(row[0])
qc_vals.append(row[1].strip())
unique_qc_ch_names = antb_tools.add_cycle_channel_numbers(ch_names_qc)
unique_qc_ch_names = add_cycle_channel_numbers(ch_names_qc)
for i, ch in enumerate(unique_qc_ch_names):
channel_names_qc_pass[ch] = [qc_vals[i]]
else:
Expand Down
1 change: 0 additions & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,3 @@ dependencies:
- opencv-contrib-python-headless>4.0,<5.0
- pint==0.22
- jsonschema==4.19.0
- git+https://github.com/hubmapconsortium/antibodies-tsv-util.git
2 changes: 1 addition & 1 deletion steps/illumination_first_stitching/best_focus.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ class: CommandLineTool

requirements:
DockerRequirement:
dockerPull: hubmap/codex-scripts
dockerPull: hubmap/codex-scripts:latest
dockerOutputDirectory: "/output"

baseCommand: ["python", "/opt/best_focus/run_best_focus_selection.py"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ label: Collect dataset info for Cytokit

requirements:
DockerRequirement:
dockerPull: hubmap/codex-scripts
dockerPull: hubmap/codex-scripts:latest

baseCommand: ["python", "/opt/dataset_info/run_collection.py"]

Expand Down
2 changes: 1 addition & 1 deletion steps/illumination_first_stitching/create_yaml_config.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ label: Create Cytokit experiment config

requirements:
DockerRequirement:
dockerPull: hubmap/codex-scripts
dockerPull: hubmap/codex-scripts:latest

baseCommand: ["python", "/opt/create_cytokit_config.py"]

Expand Down
2 changes: 1 addition & 1 deletion steps/illumination_first_stitching/first_stitching.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ class: CommandLineTool

requirements:
DockerRequirement:
dockerPull: hubmap/codex-scripts
dockerPull: hubmap/codex-scripts:latest
dockerOutputDirectory: "/output"

baseCommand: ["python", "/opt/codex_stitching/run_stitching.py"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ class: CommandLineTool

requirements:
DockerRequirement:
dockerPull: hubmap/codex-scripts
dockerPull: hubmap/codex-scripts:latest
dockerOutputDirectory: "/output"

baseCommand: ["python", "/opt/illumination_correction/run_illumination_correction.py"]
Expand Down
2 changes: 1 addition & 1 deletion steps/illumination_first_stitching/slicing.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ class: CommandLineTool

requirements:
DockerRequirement:
dockerPull: hubmap/codex-scripts
dockerPull: hubmap/codex-scripts:latest
dockerOutputDirectory: "/output"

baseCommand: ["python", "/opt/slicing/run_slicing.py"]
Expand Down
2 changes: 1 addition & 1 deletion steps/ometiff_second_stitching/background_subtraction.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ class: CommandLineTool

requirements:
DockerRequirement:
dockerPull: hubmap/codex-scripts
dockerPull: hubmap/codex-scripts:latest
dockerOutputDirectory: "/output"

baseCommand: ["python", "/opt/background_subtraction/run_background_subtraction.py"]
Expand Down
2 changes: 1 addition & 1 deletion steps/ometiff_second_stitching/ome_tiff_creation.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ label: Create OME-TIFF versions of Cytokit segmentation and extract results

requirements:
DockerRequirement:
dockerPull: hubmap/codex-scripts
dockerPull: hubmap/codex-scripts:latest

baseCommand: ["python", "/opt/convert_to_ometiff.py"]

Expand Down
2 changes: 1 addition & 1 deletion steps/ometiff_second_stitching/second_stitching.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ class: CommandLineTool

requirements:
DockerRequirement:
dockerPull: hubmap/codex-scripts
dockerPull: hubmap/codex-scripts:latest
dockerOutputDirectory: /output

baseCommand: ["python", "/opt/codex_stitching/secondary_stitcher/secondary_stitcher_runner.py"]
Expand Down

0 comments on commit 0fadb51

Please sign in to comment.