-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathflowcam_data_processor.py
144 lines (107 loc) · 4.89 KB
/
flowcam_data_processor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import cv2
import numpy as np
from os import listdir, path
import pandas as pd
from typing import List, Tuple
class FlowcamDataProcessor():
""" Process flb or lst and image collage files in a directory
Given a directory this class looks for a flb or lst file
# Arguments
flowcam_files_dir: A String
Directory containing files generated by FlowCam, this
should contain one or more image collage files (tif) and
one or more flb or lst files.
(flb and lst files generated by FlowCam are identical formats)
"""
def __init__(self, flowcam_files_dir: path) -> None:
self.flowcam_files_dir = flowcam_files_dir
def parse_lst_or_flb_file(self, file_content: List[str],
target: int) -> pd.DataFrame:
""" Parses a flb or lst file created by FlowCam, and creates
a panda dataframe for all the geometrics in the file.
Arguments:
file_content {List[str]} -- [description]
target {int} -- [description]
Returns:
pd.DataFrame -- [description]
"""
# thinking in terms of a csv file, there are column names
col_names: List = []
# and column values for each sample
col_values: List = []
# ignore the first two lines of the file
for line in file_content[2:]:
# if line contains only one '|' then it is a column name
if line.count('|') == 1:
col_names.append(line.split('|')[0])
# else each line contains numerous features for each particle
else:
features = line.split('|')
col_values.append(features)
# put data into a panda dataframe for easier future analysis
df: pd.DataFrame = pd.DataFrame(col_values)
df.columns = col_names
# target is the particle class id
df['_target'] = target
return df
def process_lst_or_flb_files(self, target: int = 0) -> pd.DataFrame:
"""Given flowcam files in a directory, extract data from
the flb or lst files and store the data in a pandas dataframe
Keyword Arguments:
target {int} -- [description] (default: {0})
Returns:
pd.DataFrame -- [description]
"""
# in case there is more than one lst file,
# create list to store processed data from each one
dataframes = []
# Iterate through the files generated by FlowCam
for idx, filename in enumerate(listdir(self.flowcam_files_dir)):
# search for the lst/flb file containing info for all the samples
if filename.endswith(".lst") or filename.endswith(".flb"):
# if found, instruct Python to open the file
with open(path.join(self.flowcam_files_dir, filename)) as file:
# extract all content from the file
file_content = file.readlines()
# and then pass content to function for processing
df = self.parse_lst_or_flb_file(file_content, target)
dataframes.append(df)
df = pd.concat(dataframes, sort=True)
# dataframe contains information for all samples in this class (target)
return df
def snip_images(self,
df: pd.DataFrame,
desired_image_size: Tuple[int, int],
resize: bool = True) -> np.array:
"""For each row in the dataframe, this finds the particle within
it's relevant image collage and "snips" it out. Optionally these
images will be resized to a uniform size.
Arguments:
df {pd.DataFrame} -- [description]
desired_image_size {Tuple[int, int]} -- [description]
Keyword Arguments:
resize {bool} -- [description] (default: {True})
Returns:
np.array -- 4d array containing all segmented images
"""
images_data: List = []
for index, row in df.iterrows():
image_collage_path = path.join(
self.flowcam_files_dir, row['collage_file'])
im = cv2.imread(image_collage_path)
# snip out the particle from the image collage
im = im[
int(row['image_y']):int(row['image_y'])+int(row['image_h']),
int(row['image_x']):int(row['image_x'])+int(row['image_w'])]
if resize:
# make all images have portrait orientation
# if width > height
(h, w) = im.shape[:2]
if w > h:
# calculate the center of the image
im = cv2.rotate(im, cv2.ROTATE_90_CLOCKWISE)
im = cv2.resize(im, desired_image_size)
im = im / 255
images_data.append(im)
images_data: np.array = np.array(images_data)
return images_data