-
Notifications
You must be signed in to change notification settings - Fork 6
/
flow_network.py
205 lines (175 loc) · 9.05 KB
/
flow_network.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
from __future__ import division
import network
import utils
import flows
import tensorflow as tf
import numpy as np
# TODO write and use inverse softplus function to reduce redundancy of
# rewriting.
class flow_network(network.model_base):
"""flow_network is a subclass of model_base for networks with normalizing
flows on top.
The flow networks consists of an MLP which is used to parameterize the
target distribution, whose form is defined by a normalizing flow.
"""
def __init__(self, n_flows=1, predict_var=False, input_dependent=True,
r_mag_W=0., r_mag_alpha=0., r_mag_beta=0., r_mag_z_0=0., **kwargs):
"""initialize a network with normalizing flows.
Args:
n_flows: number of stages in the normalizing flows.
input_dependent: if the parameter of the flow should be a
function of the input.
"""
### Determine number of outputs based on number of flows and types
self.input_dependent = input_dependent
if kwargs.has_key('noise_dim') and kwargs['noise_dim'] != 0:
assert input_dependent
# 1 output for predicting mean, 2 if also variance. Then add an
# additional 3 paramters for each of input-dependent the radial flows
n_outputs = 2 if predict_var else 1
if input_dependent: n_outputs += 3*n_flows
network.model_base.__init__(self, n_outputs=n_outputs, **kwargs)
print "network outputs.shape", self.outputs.shape
### Construct likelihood using normalizing flows
# In this case, the likelihood is defined by our normalizing flow.
additional_params = self.construct_flow(
self.outputs, self.y, n_flows+1, predict_var, input_dependent
)
self.nlog_ls, self.nlog_l = self.likelihood(self.y)
tf.summary.histogram("nlog_ls",self.nlog_ls)
tf.summary.scalar("nlog_l",self.nlog_l)
# If we are doing a 2-stage training, we will only optimize wrt this
# full set of parameters in the 2nd stage.
self.all_params.extend(additional_params)
### Construct Cost (likelihood and regularizers)
self.cost = self.nlog_l
if r_mag_W != 0.:
self.add_weight_decay(r_mag_W)
if r_mag_alpha != 0 or r_mag_beta != 0 or r_mag_z_0 !=0:
self.add_flow_regularization(r_mag_alpha, r_mag_beta, r_mag_z_0)
tf.summary.scalar("cost", self.cost)
### Create 2 optimizer stages
self.construct_optimizer()
def construct_flow(self, outputs, y, n_flows, predict_var,
input_dependent=False):
"""construct_flow builds and links together the normalizing flow and
establishes the log likelihood of samples.
args:
outputs: the outputs of the neural network which we will use to
parameterize the flows.
y: the placeholder tensor for the outputs to be passed through the
flow.
n_flows: number of stages in the flow.
predict_var: true is predicting slope of first flow
input_dependent: True if predicting the variance of flows
after the first one.
Returns:
new parameters of flows (i.e. those not defined as outputs of
the network) and the negative log likelihoods
"""
self.flows, flow_params = [], []
# check for correct number of input dimensions.
if input_dependent or (self.noise_dim != 0):
assert outputs.shape[-1] == (n_flows-1)*3 + (2 if predict_var else 1)
else:
assert outputs.shape[-1] == 2 if predict_var else 1
out_idx = 0 # keep track of which output we are working with.
with tf.name_scope("Normalizing_Flows"):
with tf.variable_scope('network'):
## Construct first flow, a Linear Flow
b = outputs[:, :,out_idx]; out_idx += 1
tf.summary.histogram("LinearFlow_b",b)
if predict_var:
log_m = outputs[:, :, out_idx]; out_idx += 1
else:
log_m = tf.get_variable('log_m', shape=[],
initializer=tf.constant_initializer(0.0))
### Add this to the core set of parameters
self.params.append(log_m)
flow_params.append(log_m)
m = tf.exp(log_m,name="linear_flow_slope")
if predict_var:
tf.summary.histogram("LinearFlow_m",m)
else:
tf.summary.scalar("LinearFlow_m",m)
self.flows.append(flows.LinearFlow(b=b, m=m))
print "b.shape", self.flows[-1].b.shape
print "m.shape", self.flows[-1].m.shape
## Construct the Subsequent Flows
for f_i in range(1, n_flows):
if input_dependent or (self.noise_dim != 0):
z_i = outputs[:, :,out_idx]; out_idx += 1
beta_raw = outputs[:, :,out_idx]; out_idx += 1
alpha_raw = outputs[:, :,out_idx]; out_idx += 1
else:
z_i = tf.get_variable('z_%d'%f_i,
initializer=tf.constant(np.random.normal(size=[1],scale=0.25).astype(np.float32)))
beta_raw = tf.get_variable('beta_%d'%f_i, shape=[1],
initializer=tf.constant_initializer(0.0))
alpha_raw = tf.get_variable('alpha_%d'%f_i, shape=[1],
initializer=tf.constant_initializer(0.0))
flow_params.extend([z_i, beta_raw, alpha_raw])
# In the 1D regression case, it does not make sense ot
# thread the z_0's through previous flows, so prev=None
flow = flows.RadialFlow(z_i, alpha_raw, beta_raw,
prev=None)
self.flows.append(flow)
if input_dependent or (self.noise_dim != 0):
tf.summary.histogram("flow_%d_zi"%f_i,flow.z_0)
tf.summary.histogram("flow_%d_beta"%f_i,flow.beta)
tf.summary.histogram("flow_%d_alpha"%f_i,flow.alpha)
else:
tf.summary.scalar("flow_%d_zi"%f_i,flow.z_0[0])
tf.summary.scalar("flow_%d_beta"%f_i,flow.beta[0])
tf.summary.scalar("flow_%d_alpha"%f_i,flow.alpha[0])
## Check that every output has been used
assert out_idx == outputs.shape[-1]
return flow_params
def likelihood(self, y):
### Link the stages of the flow together. The zs are ordered from the
# base distribution to the observation distribution.
print "y.shape", y.shape
y = tf.stack([y]*self.n_samples)
zs, log_dz0_dy = flows.link(self.flows, y[:, :,0])
# Consider the observed values mapped through flows and make histogram.
self.z_0 = zs[-1]
print "z_0.shape", self.z_0.shape
print "log_dz0_dy.shape", log_dz0_dy.shape
print "y_std.shape", self.y_std.shape
tf.summary.histogram("z_0", self.z_0)
# Define the base distribution that will be warped as unit gaussian
dist = tf.contrib.distributions.Normal(loc=0., scale=1.)
print "log_prob.shape",dist.log_prob(self.z_0)
# Calculate the negative log likelihood
self.log_dz0_dy = log_dz0_dy
self.base_log_prob = dist.log_prob(self.z_0)
nlog_ls = -(log_dz0_dy+dist.log_prob(self.z_0) - tf.log(self.y_std[0]))
### Do log sum exp, if only 1 sample this merely cuts the first dimension.
nlog_ls = -tf.reduce_logsumexp(-nlog_ls, axis=0)
nlog_ls += tf.log(float(self.n_samples))
nlog_l = tf.reduce_mean(nlog_ls)
return nlog_ls, nlog_l
def add_flow_regularization(self, r_mag_alpha, r_mag_beta, r_mag_z_0):
"""add_flow_regularization adds regularization terms on the flows to
the objective.
Args:
r_mag_alpha: penalty on 1/alpha
r_mag_beta: L2 norm penalty on beta
r_mag_z_0: L2 norm penaty on z_0
"""
with tf.name_scope("regularization"):
## Add regularization to Normalizing Flow
self.z_0_cost, self.beta_cost, self.alpha_cost = 0., 0., 0.
for flow in self.flows[1:]:
self.alpha_cost += tf.reduce_mean(1./flow.alpha)*r_mag_alpha
self.beta_cost += tf.reduce_mean(flow.beta**2)*r_mag_beta
self.z_0_cost += tf.reduce_mean(flow.z_0**2)*r_mag_z_0
tf.summary.scalar('alpha_cost', self.alpha_cost)
tf.summary.scalar('beta_cost', self.beta_cost)
tf.summary.scalar('z_0_cost', self.z_0_cost)
self.cost += self.beta_cost + self.z_0_cost + self.alpha_cost
if len(self.flows) > 1:
self.regularizers.update({
"Radius":self.z_0_cost, "Magnitude":self.beta_cost,
"Alpha":self.alpha_cost
})