From 466f34d4f5de6ab2fddb34493ed71a8b187e8a84 Mon Sep 17 00:00:00 2001 From: JoshuaHess12 <46495012+JoshuaHess12@users.noreply.github.com> Date: Fri, 23 Jul 2021 11:17:32 -0400 Subject: [PATCH] Attempted fix for Issues 26 and 28 by writing CSVs to different files (#29) * Attempted fix for MCMICRO Issues 26 and 28 write csvs to different files * uncouple CellIDs in each mask --- .gitignore | 2 ++ SingleCellDataExtraction.py | 68 ++++++++++++++++++------------------- 2 files changed, 35 insertions(+), 35 deletions(-) diff --git a/.gitignore b/.gitignore index a81c8ee..24594d9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ +.DS_Store + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/SingleCellDataExtraction.py b/SingleCellDataExtraction.py index 98e3103..5d019de 100644 --- a/SingleCellDataExtraction.py +++ b/SingleCellDataExtraction.py @@ -10,6 +10,7 @@ import skimage.measure as measure from pathlib import Path import csv +from scipy.spatial import KDTree def MaskChannel(mask_loaded,image_loaded_z): @@ -119,7 +120,6 @@ def PrepareData(image,z): #Return the objects return image_loaded_z - def MaskZstack(masks_loaded,image,channel_names_loaded): """This function will extract the stats for each cell mask through each channel in the input image @@ -130,8 +130,7 @@ def MaskZstack(masks_loaded,image,channel_names_loaded): #Get the names of the keys for the masks dictionary mask_names = list(masks_loaded.keys()) - #Get the CellIDs for this dataset by using only a single mask (first mask) - IDs = pd.DataFrame(MaskIDs(masks_loaded[mask_names[0]])) + #Create empty dictionary to store channel results per mask dict_of_chan = {m_name: [] for m_name in mask_names} #Get the z channel and the associated channel name from list of channel names @@ -148,41 +147,32 @@ def MaskZstack(masks_loaded,image,channel_names_loaded): #Iterate through the rest of the masks to modify names of channels and convert to data table for nm in mask_names: - #Check if this is the first mask - if nm == mask_names[0]: - #Create channel names for this mask - new_names = [channel_names_loaded[i]+"_"+str(nm) for i in range(len(channel_names_loaded))] - #Convert the channel names list and the list of intensity values to a dictionary and combine with CellIDs and XY - dict_of_chan[nm] = pd.concat([IDs,pd.DataFrame(dict(zip(new_names,dict_of_chan[nm])))],axis=1) - #Get the name of the columns in the dataframe so we can reorder to histoCAT convention - cols = list(dict_of_chan[nm].columns.values) - #Reorder the list (Move xy position to end with spatial information) - cols.append(cols.pop(cols.index("X_centroid"))) - cols.append(cols.pop(cols.index("Y_centroid"))) - cols.append(cols.pop(cols.index("column_centroid"))) - cols.append(cols.pop(cols.index("row_centroid"))) - cols.append(cols.pop(cols.index("Area"))) - cols.append(cols.pop(cols.index("MajorAxisLength"))) - cols.append(cols.pop(cols.index("MinorAxisLength"))) - cols.append(cols.pop(cols.index("Eccentricity"))) - cols.append(cols.pop(cols.index("Solidity"))) - cols.append(cols.pop(cols.index("Extent"))) - cols.append(cols.pop(cols.index("Orientation"))) - #Reindex the dataframe with new order - dict_of_chan[nm] = dict_of_chan[nm].reindex(columns=cols) - #Otherwise, add no spatial information - else: - #Create channel names for this mask - new_names = [channel_names_loaded[i]+"_"+str(nm) for i in range(len(channel_names_loaded))] - #Use the above information to mask z stack - dict_of_chan[nm] = pd.DataFrame(dict(zip(new_names,dict_of_chan[nm]))) + #Get the CellIDs for this dataset by using only a single mask (first mask) + IDs = pd.DataFrame(MaskIDs(masks_loaded[nm])) + #Convert the channel names list and the list of intensity values to a dictionary and combine with CellIDs and XY + dict_of_chan[nm] = pd.concat([IDs,pd.DataFrame(dict(zip(channel_names_loaded,dict_of_chan[nm])))],axis=1) + #Get the name of the columns in the dataframe so we can reorder to histoCAT convention + cols = list(dict_of_chan[nm].columns.values) + #Reorder the list (Move xy position to end with spatial information) + cols.append(cols.pop(cols.index("X_centroid"))) + cols.append(cols.pop(cols.index("Y_centroid"))) + cols.append(cols.pop(cols.index("column_centroid"))) + cols.append(cols.pop(cols.index("row_centroid"))) + cols.append(cols.pop(cols.index("Area"))) + cols.append(cols.pop(cols.index("MajorAxisLength"))) + cols.append(cols.pop(cols.index("MinorAxisLength"))) + cols.append(cols.pop(cols.index("Eccentricity"))) + cols.append(cols.pop(cols.index("Solidity"))) + cols.append(cols.pop(cols.index("Extent"))) + cols.append(cols.pop(cols.index("Orientation"))) + #Reindex the dataframe with new order + dict_of_chan[nm] = dict_of_chan[nm].reindex(columns=cols) #Concatenate all data from all masks to return - dat = pd.concat([dict_of_chan[nm] for nm in mask_names],axis=1) + #dat = pd.concat([dict_of_chan[nm] for nm in mask_names],axis=1) #Return the dataframe - return dat - + return dict_of_chan def ExtractSingleCells(masks,image,channel_names,output): """Function for extracting single cell information from input @@ -247,7 +237,15 @@ def ExtractSingleCells(masks,image,channel_names,output): im_full_name = os.path.basename(image) im_name = im_full_name.split('.')[0] - scdata_z.to_csv(str(Path(os.path.join(str(output),str(im_name+".csv")))),index=False) + + # iterate through each mask and export csv with mask name as suffix + for k,v in scdata_z.items(): + # export the csv for this mask name + scdata_z[k].to_csv( + str(Path(os.path.join(str(output), + str(im_name+"_{}"+".csv").format(k)))), + index=False + ) def MultiExtractSingleCells(masks,image,channel_names,output):