Skip to content

Commit

Permalink
update u,V,rho network together
Browse files Browse the repository at this point in the history
  • Loading branch information
LovelyBuggies committed Sep 15, 2022
1 parent 65e4f89 commit 2a08ec3
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 42 deletions.
40 changes: 5 additions & 35 deletions MFG_VI.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,44 +2,14 @@
from value_iteration_ddpg import train_ddpg
from utils import get_rho_from_u
from utils import plot_3d
import pandas as pd

if __name__ == '__main__':
n_cell = 32
T_terminal = 1
d = np.array([
0.8000,
0.7999,
0.7998,
0.7993,
0.7981,
0.7950,
0.7881,
0.7739,
0.7478,
0.7041,
0.6386,
0.5509,
0.4474,
0.3426,
0.2558,
0.2065,
0.2065,
0.2558,
0.3426,
0.4474,
0.5509,
0.6386,
0.7041,
0.7478,
0.7739,
0.7881,
0.7950,
0.7981,
0.7993,
0.7998,
0.7999,
0.8000
])
u, rho = train_ddpg(n_cell, T_terminal, d, 800)
data = pd.read_csv('data_rho_sep.csv')
rho = np.array(data.iloc[:, 1:len(data.iloc[0, :])])
d = rho[:, 0]
u, rho = train_ddpg(n_cell, T_terminal, d, 3000)


File renamed without changes.
File renamed without changes.
24 changes: 17 additions & 7 deletions value_iteration_ddpg.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,14 @@ def forward(self, x):
return self.model(torch.from_numpy(x).float())


def train_rho_network(n_cell, T_terminal, rho, rho_network, rho_optimizer):
def train_rho_network_one_step(n_cell, T_terminal, rho, rho_network, rho_optimizer):
truths, keys = list(), list()
for i in range(len(rho)):
for j in range(len(rho[0])):
truths.append(rho[i, j])
keys.append(np.array([i, j]) / n_cell)

for _ in range(2000):
for _ in range(1):
truths = torch.tensor(truths, requires_grad=True)
preds = torch.reshape(rho_network(np.array(keys)), (1, len(truths)))
loss = (truths - preds).abs().mean()
Expand All @@ -79,26 +79,36 @@ def train_ddpg(n_cell, T_terminal, d, iterations):

rho_network = RhoNetwork(2)
rho_optimizer = torch.optim.Adam(rho_network.parameters(), lr=1e-3)
rho_network = train_rho_network(n_cell, T_terminal, rho, rho_network, rho_optimizer)
rho_network = train_rho_network_one_step(n_cell, T_terminal, rho, rho_network, rho_optimizer)

for it in range(iterations):
# train V
for t in range(T):
for i in range(n_cell):
rho_i_t = float(rho_network.forward(np.array([i, t]) / n_cell))
u[i, t] = (V[i, t + 1] - V[i + 1, t + 1]) / delta_T + 1 - rho_i_t
# switch lwr, non-sep, sep
# u[i, t] = 1 - rho_i_t
# u[i, t] = (V[i, t + 1] - V[i + 1, t + 1]) / delta_T + 1 - rho_i_t
u[i, t] = (V[i, t + 1] - V[i + 1, t + 1]) / delta_T + 1

u[i, t] = min(max(u[i, t], 0), 1)
V[i, t] = delta_T * (0.5 * u[i, t] ** 2 + rho_i_t * u[i, t] - u[i, t]) + (1 - u[i, t]) * V[
# V[i, t] = delta_T * 0.5 * (1 - rho_i_t - u[i, t]) ** 2 + (1 - u[i, t]) * V[
# i, t + 1] + u[i, t] * V[i + 1, t + 1]
# V[i, t] = delta_T * (0.5 * u[i, t] ** 2 + rho_i_t * u[i, t] - u[i, t]) + (1 - u[i, t]) * V[
# i, t + 1] + u[i, t] * V[i + 1, t + 1]
V[i, t] = delta_T * (0.5 * u[i, t] ** 2 + rho_i_t - u[i, t]) + (1 - u[i, t]) * V[
i, t + 1] + u[i, t] * V[i + 1, t + 1]



V[-1, :] = V[0, :].copy()

u_hist.append(u)
u = np.array(u_hist).mean(axis=0)
rho = get_rho_from_u(u, d)
rho_network = train_rho_network(n_cell, T_terminal, rho, rho_network, rho_optimizer)
rho_network = train_rho_network_one_step(n_cell, T_terminal, rho, rho_network, rho_optimizer)

if it % 10 == 0 and it != 0:
if it % 50 == 0 and it != 0:
plot_3d(32, 1, u, f"./fig/u/{it}.png") # show without fp
plot_3d(32, 1, rho, f"./fig/rho/{it}.png") # show with fp on rho

Expand Down

0 comments on commit 2a08ec3

Please sign in to comment.