forked from chaoyuaw/pytorch-coviar
-
Notifications
You must be signed in to change notification settings - Fork 0
/
model.py
84 lines (65 loc) · 2.57 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
"""Model definition."""
from torch import nn
from transforms import GroupMultiScaleCrop
from transforms import GroupRandomHorizontalFlip
import torchvision
class Flatten(nn.Module):
def __init__(self):
super(Flatten, self).__init__()
def forward(self, x):
return x.view(x.size(0), -1)
class Model(nn.Module):
def __init__(self, num_class, num_segments, representation,
base_model='resnet152'):
super(Model, self).__init__()
self._representation = representation
self.num_segments = num_segments
print(("""
Initializing model:
base model: {}.
input_representation: {}.
num_class: {}.
num_segments: {}.
""".format(base_model, self._representation, num_class, self.num_segments)))
self._prepare_base_model(base_model)
self._prepare_tsn(num_class)
def _prepare_tsn(self, num_class):
feature_dim = getattr(self.base_model, 'fc').in_features
setattr(self.base_model, 'fc', nn.Linear(feature_dim, num_class))
if self._representation == 'mv':
setattr(self.base_model, 'conv1',
nn.Conv2d(2, 64,
kernel_size=(7, 7),
stride=(2, 2),
padding=(3, 3),
bias=False))
self.data_bn = nn.BatchNorm2d(2)
if self._representation == 'residual':
self.data_bn = nn.BatchNorm2d(3)
def _prepare_base_model(self, base_model):
if 'resnet' in base_model:
self.base_model = getattr(torchvision.models, base_model)(pretrained=True)
self._input_size = 224
else:
raise ValueError('Unknown base model: {}'.format(base_model))
def forward(self, input):
input = input.view((-1, ) + input.size()[-3:])
if self._representation in ['mv', 'residual']:
input = self.data_bn(input)
base_out = self.base_model(input)
return base_out
@property
def crop_size(self):
return self._input_size
@property
def scale_size(self):
return self._input_size * 256 // 224
def get_augmentation(self):
if self._representation in ['mv', 'residual']:
scales = [1, .875, .75]
else:
scales = [1, .875, .75, .66]
print('Augmentation scales:', scales)
return torchvision.transforms.Compose(
[GroupMultiScaleCrop(self._input_size, scales),
GroupRandomHorizontalFlip(is_mv=(self._representation == 'mv'))])