forked from garythung/torch-lrcn
-
Notifications
You must be signed in to change notification settings - Fork 0
/
LRCN.lua
94 lines (78 loc) · 3.55 KB
/
LRCN.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
require 'nn'
require 'LSTM'
local utils = require 'util.utils'
-- Convenience layers
function convBatchNormReluPool(model, inputLayers, hiddenLayers, cnnKernel, cnnStride, cnnPad, batchnorm, poolKernel, poolPad)
model:add(nn.SpatialConvolution(inputLayers, hiddenLayers, cnnKernel, cnnKernel, cnnStride, cnnStride, cnnPad, cnnPad))
if batchnorm == 1 then
model:add(nn.SpatialBatchNormalization(hiddenLayers))
end
model:add(nn.ReLU())
model:add(nn.SpatialMaxPooling(poolKernel, poolKernel, poolPad, poolPad))
end
function convRelu(model, inputLayers, hiddenLayers, cnnKernel, cnnStride, cnnPad)
model:add(nn.SpatialConvolution(inputLayers, hiddenLayers, cnnKernel, cnnKernel, cnnStride, cnnStride, cnnPad, cnnPad))
model:add(nn.ReLU())
end
function convReluPool(model, inputLayers, hiddenLayers, cnnKernel, cnnStride, cnnPad, poolKernel, poolPad)
model:add(nn.SpatialConvolution(inputLayers, hiddenLayers, cnnKernel, cnnKernel, cnnStride, cnnStride, cnnPad, cnnPad))
model:add(nn.ReLU())
model:add(nn.SpatialMaxPooling(poolKernel, poolKernel, poolPad, poolPad))
end
--[[
Construct an LRCN with specific parameters.
]]--
function LRCN(kwargs)
assert(kwargs ~= nil)
local batchnorm = utils.getKwarg(kwargs, 'batchnorm')
local dropout = utils.getKwarg(kwargs, 'dropout')
local scaledHeight = utils.getKwarg(kwargs, 'scaledHeight')
local scaledWidth = utils.getKwarg(kwargs, 'scaledWidth')
local seqLength = utils.getKwarg(kwargs, 'seqLength')
local numClasses = utils.getKwarg(kwargs, 'numClasses')
local numChannels = utils.getKwarg(kwargs, 'numChannels')
local lstmHidden = utils.getKwarg(kwargs, 'lstmHidden')
-- Should use about 3.5 GB VRAM. See comments AlexNet # of hidden layers.
local cnn = {}
cnn.stride = 1
cnn.kernel1 = 7
cnn.pad1 = 3
cnn.kernel2 = 5
cnn.pad2 = 2
cnn.kernel3 = 3
cnn.pad3 = 1
cnn.numHidden1 = 64 -- 96
cnn.numHidden2 = 96 -- 256
cnn.numHidden3 = 128 -- 384
cnn.numHidden4 = 128 -- 384
cnn.numHidden5 = 196 -- 256
cnn.numHidden6 = 320 -- 4096
cnn.poolKernel = 2
cnn.poolPad = 2
cnn.reductionFactor = (1/2) ^ 3 -- three 2x2 stride 2 pool layers = (1/2)^3; set this accordingly if you change the network!
local lstm = {}
lstm.numHidden = lstmHidden -- 256 default
-- CNN similar to AlexNet; this version only has one FC layer
local model = nn.Sequential()
convBatchNormReluPool(model, numChannels, cnn.numHidden1, cnn.kernel1, cnn.stride, cnn.pad1, batchnorm, cnn.poolKernel, cnn.poolPad)
convBatchNormReluPool(model, cnn.numHidden1, cnn.numHidden2, cnn.kernel2, cnn.stride, cnn.pad2, batchnorm, cnn.poolKernel, cnn.poolPad)
convRelu(model, cnn.numHidden2, cnn.numHidden3, cnn.kernel3, cnn.stride, cnn.pad3)
convRelu(model, cnn.numHidden3, cnn.numHidden4, cnn.kernel3, cnn.stride, cnn.pad3)
convReluPool(model, cnn.numHidden4, cnn.numHidden5, cnn.kernel3, cnn.stride, cnn.pad3, cnn.poolKernel, cnn.poolPad)
model:add(nn.View(cnn.numHidden5 * scaledWidth*cnn.reductionFactor * scaledHeight*cnn.reductionFactor))
model:add(nn.Linear(cnn.numHidden5 * scaledWidth*cnn.reductionFactor * scaledHeight*cnn.reductionFactor, cnn.numHidden6))
model:add(nn.ReLU())
if dropout > 0 then
model:add(nn.Dropout(dropout))
end
-- Reshape for LSTM; N items x T sequence length x H hidden size
model:add(nn.View(-1, seqLength, cnn.numHidden6))
model:add(nn.LSTM(cnn.numHidden6, lstm.numHidden))
model:add(nn.View(-1, lstm.numHidden))
if dropout > 0 then
model:add(nn.Dropout(dropout))
end
model:add(nn.Linear(lstm.numHidden, numClasses))
model:add(nn.LogSoftMax())
return model
end