-
Notifications
You must be signed in to change notification settings - Fork 1
/
utils.py
71 lines (58 loc) · 2.02 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import numpy as np
import typing
import torch
import torch.nn as nn
class MLP(nn.Module):
def __init__(self, dimensions):
super(MLP, self).__init__()
self.layers = nn.ModuleList()
for i in range(len(dimensions)-1):
self.layers.append(nn.Linear(dimensions[i], dimensions[i+1]))
def forward(self, x):
for l in self.layers[:-1]:
x = nn.functional.relu(l(x))
x = self.layers[-1](x)
return x
def initialize(self):
'''
Initialize weights using Glorot initialization
or also known as Xavier initialization
'''
def initialize_weights(m):
if type(m) == nn.Linear:
torch.nn.init.xavier_uniform_(m.weight)
m.bias.data.fill_(0.01)
self.apply(initialize_weights)
class DQNWithPrior(nn.Module):
def __init__(self,dimensions,scale=5):
'''
:param dimensions: dimensions of the neural network
prior network with immutable weights and
difference network whose weights will be learnt
'''
super(DQNWithPrior,self).__init__()
self.f_diff = MLP(dimensions)
self.f_prior = MLP(dimensions)
self.scale = scale
def forward(self, x):
'''
:param x: input to the network
:return: computes f_diff(x) + f_prior(x)
performs forward pass of the network
'''
return self.f_diff(x) + self.scale*self.f_prior(x)
def initialize(self):
'''
:param scale: scale with which weights need to be initialized
Initialize weights using Glorot initialization and freeze f_prior
or also known as Xavier initialization
'''
self.f_prior.initialize()
self.f_prior.eval()
self.f_diff.initialize()
def parameters(self, recurse:bool =True):
'''
:param recurse: bool Recursive or not
:return: all the parameters of the network that are mutable or learnable
'''
return self.f_diff.parameters(recurse)