forked from andrewwijaya/Bayesian-Neural-Networks-in-Julia
-
Notifications
You must be signed in to change notification settings - Fork 0
/
BayesVI.jl
182 lines (167 loc) · 7.5 KB
/
BayesVI.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
#=
Implementation of Variational Inference Bayesian Neural Networks. This implementation is mostly a port of the Python implementation from Autograd. A custom Neural Network is implemented, and learning is done using Flux.jl. The variational inference implemented uses both the mean field and fixed Gaussian form assumptions.
=#
module BayesVI
using Flux
using Distributions
using Plots
using OrderedCollections
using Random
using OMEinsum
using LinearAlgebra
using BenchmarkTools
include("./BlackboxSVI.jl")
using .BlackboxSVI
#= Function to specify, and produce the Bayesian Neural Network.
Input:
layer_sizes = vector of integers specifying number of nodes in each layer. Example: [1, 20, 20, 1]
L2_reg = a single float value specifying the L2 regularisation constant.
nonlinearity = function used as non-linearity between layers.
Output:
num_weights = total number of weights in the produced neural network.
predictions = forward pass function.
logprob = log probability function.
=#
function make_neural_network_functions(layer_sizes, L2_reg, noise_variance, nonlinearity)
shapes = collect(zip(layer_sizes[1:end-1], layer_sizes[2:end]))
num_weights = sum((m+1)*n for (m, n) in shapes)
function unpack_layers(weights)
num_weights_sets = size(weights)[1]
W = OrderedDict()
b = OrderedDict()
i=1
for (m, n) in shapes
W[i] = reshape(weights[:,1:m*n], (num_weights_sets, m, n))
b[i] = reshape(weights[:,m*n:(m*n+n)-1], (num_weights_sets, 1, n))
weights = weights[:, (m+1)*n:end]
i += 1
end
return (W, b)
end
# Outputs the predictions for each number of models sampled from posterior.
# inputs dimension: observations x features
# weights dimensions:
function predictions(weights, inputs)
inputs = reshape(inputs, 1, size(inputs)...)
params = unpack_layers(weights)
Ws = params[1]
bs = params[2]
inputs_stacked = vcat()
for i in range(1, size(weights)[1])
inputs_stacked = vcat(inputs_stacked, inputs)
end
inputs = inputs_stacked
#Go through all samples for each layer. (W,b) is the collection of all samples of weights and biases for a particular layer.
for j in range(1, length(Ws))
W = Ws[j]
b = bs[j]
outputs = ein"mnd,mdo->mno"(inputs, W) .+ b
inputs = nonlinearity(outputs)
if j == length(Ws)
return outputs
end
end
end
function ensemble_prediction(params, inputs, number_of_ensemble)
mean_vals, log_std = unpack_params(params)
samples = randn(number_of_ensemble, length(mean_vals)) .* exp.(log_std)' .+ mean_vals'
mean(predictions(samples, inputs), dims=1)
end
# This method returns a vector of length [mc_samples] containing the log probability value for each sample.
# weights = mc_samples x num_weights (non-variational)
# outputs = vector of likelihoods of size mc_samples
function logprob(weights, inputs, targets)
log_prior = -L2_reg * sum(weights.^2, dims=2)
preds = predictions(weights, inputs)
log_lik = -sum((preds .- targets').^2, dims=2)[:,1]./noise_variance
return log_prior + log_lik
end
return num_weights, predictions, logprob, ensemble_prediction
end
# Create samples of weights based on variational parameters.
function sample_posteriors(variational_parameters)
means, log_stds = unpack_params(variational_parameters)
#length(means) is equivalent to the number of weights in the (non-variational) neural network
return randn(1, length(means)) .* exp.(log_stds)' .+ means'
end
# Initialise variational parameters randomly.
function initialise_variational_parameters(num_weights)
init_mean = randn(num_weights)
init_log_std = -5 * ones(num_weights)
return vcat(init_mean, init_log_std)
end
#=
RBF activation function.
=#
function rbf(x)
exp.(-x.^2)
end
#=
Linear activation function.
=#
function linear(x)
x
end
#=
Trains the bayesian neural network.
Inputs:
epochs: number of epochs of training.
learning_rate: the learning rate for the ADAM optimiser.
num_weights: number of weights in the model.
objective: variational objective function, a result from the blackbox VI library.
Output:
param_hist: history of variational parameters throughout training.
elbos: list of elbos throughout training.
=#
function train_bayesian_neural_network(epochs, learning_rate, num_weights, objective)
init_var_params = initialise_variational_parameters(num_weights)
param_hist = Array{}[]
opt = ADAM(learning_rate)
elbos = zeros(epochs)
for i in range(1,epochs)
Flux.Optimise.update!(opt, init_var_params, gradient(objective, (init_var_params))[1])
push!(param_hist, copy(init_var_params))
elbos[i] = -objective(init_var_params)
end
return param_hist, elbos
end
# Sample posteriors and plot predictions.
function sample_and_plot(inputs, targets, init_var_params, number_of_models, prediction_fn, title_name="", ylims=(-3, 3), xlims=(-8, 8), xmin=-8, xmax=8, xrange=150)
plot_inputs = collect(LinRange(xmin, xmax, xrange))
plot_inputs = reshape(plot_inputs, (length(plot_inputs), 1))
scatter(inputs, targets, label="")
for i in range(1, number_of_models)
sample_weights = sample_posteriors(init_var_params)
outs = prediction_fn(sample_weights, plot_inputs)
plot!(plot_inputs, outs[1, :, 1], ylim=ylims, xlim=xlims, size=(700,400), label="", title=title_name)
end
end
#=
Convenience method used to visualise drawn samples as GIFs. This method returns a gif which shows the observation data, and predictions
based on the drawn models. This is only used for Variational Inference results.
Inputs:
param_hist: history of variational parameters.
number_of_models: number of samples (whole neural networks) to be sampled from the variational posterior.
the rest of the parameters are properties of the plot.
Output:
a gif animation showing changing in predictions for each model sampled from the posterior.
=#
function animate_variational_params(inputs, targets, param_hist, number_of_models, prediction_fn, ylims=(-3, 3), xlims=(-8, 8), xmin=-8, xmax=8, xrange=150)
epochs = length(param_hist)
anim = @animate for i in range(1, epochs)
title="Iteration: "*string(i)*"/$epochs - Bayesian Neural Network"
sample_and_plot(inputs, targets, param_hist[i], number_of_models, prediction_fn, title, ylims, xlims, xmin, xmax, xrange)
end
gif(anim, fps=20)
end
export unpack_params
export black_box_variational_inference
export make_neural_network_functions
export sample_posteriors
export initialise_variational_parameters
export sample_and_plot
export rbf
export linear
export train_bayesian_neural_network
export animate_variational_params
end