-
Notifications
You must be signed in to change notification settings - Fork 6
/
mog_network.py
142 lines (116 loc) · 5.99 KB
/
mog_network.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
from __future__ import division
import network
import tensorflow as tf
import numpy as np
class mixture_density_network(network.model_base):
"""mixture_density is a subclass of model_base for networks with a mixture
of Gaussians likelihood model.
The mixing proportions are defined by a softmax transformed output.
The means and variances may also be outputs
"""
def __init__(self, n_components=1, input_dependent=False,
r_mag_W= 0., **kwargs):
"""initialize a network with normalizing flows.
Args:
n_components: number of mixture components.
"""
### Determine number of outputs based on number of flows and types
self.input_dependent = input_dependent
self.n_components = n_components
# 1 output for predicting offset.
# if input dependent, then predict mixing proportion, mean and variance
# for each mixing component.
n_outputs = 1
if self.input_dependent: n_outputs += self.n_components*3
network.model_base.__init__(self, n_outputs=n_outputs, **kwargs)
print "nework outputs.shape", self.outputs.shape
### Construct likelihood using normalizing flows
# In this case, the likelihood is defined by our normalizing flow.
additional_params = self.construct_mog(
self.outputs, input_dependent
)
self.nlog_ls, self.nlog_l = self.likelihood(self.y)
tf.summary.histogram("nlog_ls", self.nlog_ls)
tf.summary.scalar("nlog_l", self.nlog_l)
# If we are doing a 2-stage training, we will only optimize wrt this
# full set of parameters in the 2nd stage.
self.all_params.extend(additional_params)
### Construct Cost (likelihood and regularizers)
self.cost = self.nlog_l
self.add_weight_decay(r_mag_W)
tf.summary.scalar("cost", self.cost)
### set 2 optimizer stages
self.construct_optimizer()
def construct_mog(self, outputs, input_dependent=False):
"""construct_flow builds and links together the normalizing flow and
establishes the log likelihood of samples.
args:
outputs: the outputs of the neural network which we will use to
parameterize the flows.
input_dependent: True if predicting the components and proportions
should be a function of the input.
Returns:
new parameters of mog (i.e. those not defined as outputs of
the network), and the negative log likelihoods
"""
mog_params = []
# check for correct number of input dimensions.
if input_dependent:
assert outputs.shape[-1] == (self.n_components)*3 + 1
else:
assert outputs.shape[-1] == 1
out_idx = 0 # keep track of which output we are working with.
self.shift = outputs[0, :, out_idx:out_idx+1]; out_idx += 1
with tf.name_scope("Mixture_of_Gaussians"):
with tf.variable_scope('network'):
# get mixing proportions
if input_dependent:
theta_raw = outputs[:, :,out_idx:out_idx+self.n_components];
out_idx += self.n_components
self.theta = tf.nn.softmax(theta_raw)
log_sigmas = outputs[:, :, out_idx:out_idx+self.n_components];
out_idx += self.n_components
self.sigmas = tf.exp(log_sigmas,name="sigmas")
self.mus = outputs[:, :, out_idx:out_idx+self.n_components];
out_idx += self.n_components
for k in range(self.n_components):
tf.summary.histogram("Gaussian_%d_sigma"%k,self.sigmas[:,k])
tf.summary.histogram("Gaussian_%d_proportion"%k,self.theta[:,k])
tf.summary.histogram("Gaussian_%d_mus"%k,self.mus[:,k])
else:
theta_raw = tf.get_variable('theta', shape=[self.n_components],
initializer=tf.constant_initializer(0.0))
self.theta = tf.nn.softmax(theta_raw)
log_sigmas = tf.get_variable('log_sigmas', shape=[self.n_components],
initializer=tf.constant_initializer(0.0))
self.sigmas = tf.exp(log_sigmas,name="sigmas")
self.mus = tf.get_variable('mus', initializer=tf.constant(
np.random.normal(size=[self.n_components],scale=0.75).astype(np.float32))
)
### Add these to the core set of parameters
self.params.extend([theta_raw, log_sigmas, self.mus])
mog_params.extend([theta_raw, log_sigmas, self.mus])
for k in range(self.n_components):
tf.summary.scalar("Gaussian_%d_proportion"%k,self.theta[k])
tf.summary.scalar("Gaussian_%d_sigma"%k,self.sigmas[k])
tf.summary.scalar("Gaussian_%d_mu"%k,self.mus[k])
## Check that every output has been used
assert out_idx == outputs.shape[-1]
return mog_params
def likelihood(self, y):
### Construct Likelihood for MOG
dist = tf.contrib.distributions.Normal(loc=self.mus,scale=self.sigmas)
print "y.shape", y.shape
if len(y.shape) == 2:
obs = tf.transpose([y[:,0]-self.shift[:, 0]]*self.n_components)
else:
obs = tf.transpose([y[:,0]-self.shift[:,0]]*self.n_components,[1,2,0])
likelihoods = dist.prob(obs)
likelihoods = likelihoods*self.theta
print "likelihoods shape(pre reduce): ",likelihoods.shape
likelihoods = tf.reduce_sum(likelihoods, axis=-1,keep_dims=True)
print "likelihoods shape(post reduce): ",likelihoods.shape
# Calculate the negative log likelihood
nlog_ls = -(tf.log(likelihoods) - tf.log(self.y_std))
nlog_l = tf.reduce_mean(nlog_ls)
return nlog_ls, nlog_l