Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds isolation in 2d to segmentation file. #76

Draft
wants to merge 4 commits into
base: dev
Choose a base branch
from
Draft
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 45 additions & 6 deletions tobac/segmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ def segmentation_2D(features,field,dxy,threshold=3e-3,target='maximum',level=Non
return segmentation(features,field,dxy,threshold=threshold,target=target,level=level,method=method,max_distance=max_distance)


def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None,vertical_coord='auto'):
def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None,vertical_coord='auto',ISO_dilate = 8):
"""
Function performing watershedding for an individual timestep of the data

Expand All @@ -25,13 +25,15 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu
method: string
flag determining the algorithm to use (currently watershedding implemented)
max_distance: float
maximum distance from a marker allowed to be classified as belonging to that cell
maximum distance from a marker allowed to be classified as belonging to that cell
ISO_dilate: int
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is 8 the default here? I'm not opposed to the value, I'm just curious.

value by which to dilate the feature size for the isolation parameter. Default is 8

Output:
segmentation_out: iris.cube.Cube
cloud mask, 0 outside and integer numbers according to track inside the clouds
cloud mask, 0 outside and integer numbers according to track inside the clouds
features_out: pandas.DataFrame
feature dataframe including the number of cells (2D or 3D) in the segmented area/volume of the feature at the timestep
feature dataframe including the number of cells (2D or 3D) in the segmented area/volume of the feature at the timestep
"""
# The location of watershed within skimage submodules changes with v0.19, but I've kept both for backward compatibility for now
try:
Expand All @@ -40,6 +42,7 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu
from skimage.morphology import watershed
# from skimage.segmentation import random_walker
from scipy.ndimage import distance_transform_edt
from skimage.morphology import dilation, disk
from copy import deepcopy
import numpy as np

Expand Down Expand Up @@ -120,6 +123,33 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu
else:
raise ValueError('unknown method, must be watershed')

#create isolation, currently only available for 2d tracking
if field_in.ndim==2:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we want to make this entirely optional? Is running the isolation here necessary for everyone, or would it be better to have a parameter in place to turn it on/off?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess the question here ultimately is: what's the performance penalty?

nobj = features_in['feature'].values
nobj_len = len(nobj)
iso = np.empty(nobj_len, dtype='bool')
iso[:] = False
num_obj_around = np.zeros(nobj_len, dtype = 'int')
segmask2 = dilation(segmentation_mask,disk(ISO_dilate)) #formerly square
for iso_id in np.arange(nobj_len):
if iso_id == 0:
continue
obj_ind = np.where(segmask2 == nobj[iso_id])
objects = np.unique(segmentation_mask[obj_ind])
if len(objects) >= 3: #one object will always be 0, the element of the feature, any more than those two are considered neighbors
iso[iso_id] = True
num_obj_around[iso_id] = len(objects)-1 #this subtracts the 0 element included in the count

features_out['isolated'] = iso
features_out['num_objects'] = num_obj_around








# remove everything from the individual masks that is more than max_distance_pixel away from the markers
if max_distance is not None:
D=distance_transform_edt((markers==0).astype(int))
Expand All @@ -139,7 +169,7 @@ def segmentation_timestep(field_in,features_in,dxy,threshold=3e-3,target='maximu

return segmentation_out,features_out

def segmentation(features,field,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None,vertical_coord='auto'):
def segmentation(features,field,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None,vertical_coord='auto',ISO_dilate = 8):
"""
Function using watershedding or random walker to determine cloud volumes associated with tracked updrafts

Expand All @@ -162,13 +192,22 @@ def segmentation(features,field,dxy,threshold=3e-3,target='maximum',level=None,m
max_distance: float
Maximum distance from a marker allowed to be classified as belonging to that cell

ISO_dilate: int
value by which to dilate the feature size for the isolation parameter.
Default is 8


Output:
segmentation_out: iris.cube.Cube
Cloud mask, 0 outside and integer numbers according to track inside the cloud
"""
import pandas as pd
from iris.cube import CubeList

features['isolated'] = None
features['num_objects'] = None
print(['iso_dilate:'+str(ISO_dilate)])

logging.info('Start watershedding 3D')

# check input for right dimensions:
Expand All @@ -186,7 +225,7 @@ def segmentation(features,field,dxy,threshold=3e-3,target='maximum',level=None,m
for i,field_i in enumerate(field_time):
time_i=field_i.coord('time').units.num2date(field_i.coord('time').points[0])
features_i=features.loc[features['time']==time_i]
segmentation_out_i,features_out_i=segmentation_timestep(field_i,features_i,dxy,threshold=threshold,target=target,level=level,method=method,max_distance=max_distance,vertical_coord=vertical_coord)
segmentation_out_i,features_out_i=segmentation_timestep(field_i,features_i,dxy,threshold=threshold,target=target,level=level,method=method,max_distance=max_distance,vertical_coord=vertical_coord, ISO_dilate = ISO_dilate)
segmentation_out_list.append(segmentation_out_i)
features_out_list.append(features_out_i)
logging.debug('Finished segmentation for '+time_i.strftime('%Y-%m-%d_%H:%M:%S'))
Expand Down