-
Notifications
You must be signed in to change notification settings - Fork 9
/
chicago_fs_wild.py
executable file
·149 lines (124 loc) · 5.53 KB
/
chicago_fs_wild.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# ==============================================================================
# Based on the work by B. Shi and al.:
# Fingerspelling recognition in the wild with iterative visual attention
# ==============================================================================
import os
import json
import torch
import cv2 as cv
import numpy as np
from torch.utils.data import Dataset
class ChicagoFSWild(Dataset):
"""
Chicago Fingerspelling in the Wild Data Sets.
For mode information and downloads go to:
https://ttic.uchicago.edu/~klivescu/ChicagoFSWild.htm#overview
"""
def __init__(self, split, img_dir, fcsv, vocab_map, transform,
img_size=224, map_size=14, lambda_x=None, scale_x=None):
"""
:param split: the dataset split must be 'test'.
:param img_dir: the dataset RGB root directory.
:param fcsv: the CSV filename of the dataset split.
:param vocab_map: maps label's chars to integers.
:param transform: image transformations.
:param img_size: model image input size.
:param map_size: prior map size, which is equal to the CNN features map
size in output
:param lambda_x: the JSON filename of the dataset split.
:param scale_x: the zooming factor to apply to the samples.
"""
assert split == 'test', 'Unknown split: %s'.format(split)
self.split = split
self.img_dir = img_dir
self.fcsv = fcsv
self.vocab_map = vocab_map
self.transform = transform
self.img_size = img_size
self.map_size = map_size
with open(lambda_x, 'r') as f:
self.lambda_x = json.load(f)
assert scale_x in ['1', '2', '3', '4'], 'Invalid value for `scale_x` parameter: %d' % scale_x
self.scale_x = scale_x
self._parse()
def _parse(self):
with open(self.fcsv, 'r') as fo:
lns = fo.readlines()
print('%d %s samples' % (len(lns), self.split))
self.imdirs, self.labels, self.n_frames = [], [], []
for i in range(len(lns)):
imdir, label, nframes = lns[i].strip().split(',')
self.imdirs.append(imdir)
self.labels.append(label)
self.n_frames.append(int(nframes))
def __len__(self):
return len(self.imdirs)
def __getitem__(self, idx):
"""Loads a sample video at the scale specified by the `scale_x` instance attribute."""
subdir = self.imdirs[idx]
label = list(map(lambda x: self.vocab_map[x], self.labels[idx]))
fnames = [str(i).zfill(4) + '.jpg' for i in range(1, self.n_frames[idx]+1)]
pad = self.lambda_x[subdir]['pad']
l_pad, u_pad, r_pad, d_pad = pad['l'], pad['u'], pad['r'], pad['d']
# boxes are stored in polar-like coordinates
x0, y0, x1, y1 = self.to_cartesian_coord(self.lambda_x[subdir][self.scale_x])
imgs, grays = [], []
for fname in fnames:
rgb = cv.imread(os.path.join(self.img_dir, subdir, fname))
rgb = cv.cvtColor(rgb, cv.COLOR_BGR2RGB)
expand_rgb = cv.copyMakeBorder(rgb, u_pad, d_pad, l_pad, r_pad,
cv.BORDER_CONSTANT, value=(0, 0, 0))
patch_rgb = expand_rgb[y0 + u_pad: y1 + u_pad, x0 + l_pad: x1 + l_pad]
patch_rgb = cv.resize(patch_rgb, (self.img_size, self.img_size))
patch_gray = cv.cvtColor(patch_rgb, cv.COLOR_RGB2GRAY)
imgs.append(patch_rgb)
grays.append(patch_gray)
imgs, gray = np.stack(imgs), np.stack(grays)[..., np.newaxis]
sample = {'imgs': imgs, 'gray': gray, 'label': label}
return self.transform(sample)
def to_cartesian_coord(self, polar_coord):
"""Convert a squared-box from polar-like coordinates to cartesian coordinates."""
cx, cy, r = polar_coord['cx'], polar_coord['cy'], polar_coord['r']
return [cx - r, cy - r, cx + r, cy + r]
class ToTensor(object):
"""Convert sample to tensors."""
def __init__(self):
return
def __call__(self, sample):
# swap color axis: DxHxWxC => DxCxHxW
imgs = torch.from_numpy(sample['imgs'])
imgs = imgs.transpose(2, 3).transpose(1, 2)
sample['imgs'] = imgs
if 'gray' in sample.keys():
sample['gray'] = torch.from_numpy(sample['gray'])
if 'maps' in sample.keys():
sample['maps'] = torch.from_numpy(sample['maps'])
if 'label' in sample.keys():
sample['label'] = torch.IntTensor(sample['label'])
return sample
class Normalize(object):
"""Normalize sample RGB images."""
def __init__(self, mean, std):
self.mean = torch.FloatTensor(mean).view(1, 3, 1, 1)
self.std = torch.FloatTensor(std).view(1, 3, 1, 1)
def __call__(self, sample):
sample['imgs'] = (sample['imgs'] / 255.0 - self.mean) / self.std
return sample
class PriorToMap(object):
"""Convert ndarrays in sample to Tensors."""
def __init__(self, map_size):
self.map_size = map_size
return
def __call__(self, sample):
priors = sample['priors']
maps = [cv.resize(prior, (self.map_size, self.map_size)) for prior in priors]
sample['maps'] = np.stack(maps, axis=0)
return sample
class Batchify(object):
"""Unsqueezes sample's tensors to insert a batch dimension of 1."""
def __init__(self):
return
def __call__(self, sample):
sample['imgs'] = sample['imgs'].unsqueeze(dim=0)
sample['maps'] = sample['maps'].unsqueeze(dim=0)
return sample