-
Notifications
You must be signed in to change notification settings - Fork 0
/
model.jl
147 lines (104 loc) · 3.26 KB
/
model.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
using Knet: @diff, Param, value, grad, params
using Knet: sigm, tanh, softmax
const in_size = 10
const hiddens = (15)
const out_size = 10
const memory_size = 5
const turing_hiddens = (8) # TODO
mutable struct Layer
wri::Param
wrs::Param
wrm::Param
wwi::Param
wws::Param
wwm::Param
wki::Param
wks::Param
wkm::Param
wfi::Param
wfs::Param
wfm::Param
wii::Param
wis::Param
wim::Param
wsi::Param
wss::Param
wsm::Param
w::Param
end
Layer(in_size,layer_size) =
begin
# read
wri = Param(randn(in_size, memory_size))
wrs = Param(randn(layer_size, memory_size))
wrm = Param(randn(memory_size, memory_size))
# write
wwi = Param(randn(in_size, memory_size))
wws = Param(randn(layer_size, memory_size))
wwm = Param(randn(memory_size, memory_size))
# keep
wki = Param(randn(in_size, layer_size))
wks = Param(randn(layer_size, layer_size))
wkm = Param(randn(memory_size, layer_size))
# forget
wfi = Param(randn(in_size, layer_size))
wfs = Param(randn(layer_size, layer_size))
wfm = Param(randn(memory_size, layer_size))
# intermediate
wii = Param(randn(in_size, layer_size))
wis = Param(randn(layer_size, layer_size))
wim = Param(randn(memory_size, layer_size))
# show
wsi = Param(randn(in_size, layer_size))
wss = Param(randn(layer_size, layer_size))
wsm = Param(randn(memory_size, layer_size))
# intermediate memory
w = Param(randn(layer_size, memory_size))
# TODO : hidden params wll be added here.
Layer(wri,wrs,wrm,wwi,wws,wwm,wki,wks,wkm,wfi,wfs,wfm,wii,wis,wim,wsi,wss,wsm,w)
end
(layer::Layer)(in, state, memory) =
begin
read = softmax(in * layer.wri + state * layer.wrs + memory * layer.wrm)
write = softmax(in * layer.wwi + state * layer.wws + memory * layer.wwm)
attn_m = read .* memory
keep = sigm.(in * layer.wki + state * layer.wks + attn_m * layer.wkm)
forget = sigm.(in * layer.wfi + state * layer.wfs + attn_m * layer.wfm)
interm = tanh.(in * layer.wii + state * layer.wis + attn_m * layer.wim)
show = sigm.(in * layer.wsi + state * layer.wss + attn_m * layer.wsm)
interm_m = tanh.(interm * layer.w)
state = forget .* state + keep .* interm
out = show .* tanh.(state)
memory += write .* interm_m
(out, state, memory)
end
make() =
begin
model = []
hm_layers = length(hiddens)+1
for i in 1:hm_layers
if i == 1 i, o = in_size, hiddens[1]
elseif i == hm_layers i, o = hiddens[end], out_size
else i, o = hiddens[i-1], hiddens[i]
end
push!(model, Layer(i,o))
end
model
end
zero_states = [zeros(1,l_size) for l_size in (hiddens..., out_size)]
zero_memory = zeros(1,memory_size)
prop(model, data; states=zero_states, memory=zero_memory) =
begin
response = []
for timestep in data
neu_states = []
for (layer, state) in zip(model, states)
timestep, state, memory = layer(timestep, state, memory)
push!(neu_states, state)
end
states = neu_states
push!(response, timestep)
end
(response, states, memory)
end
loss(seq1, seq2) = sum([sum((s1 - s2).^2) for (s1,s2) in zip(seq1, seq2)])