update u,V,rho network together

LovelyBuggies · Sep 15, 2022 · 2a08ec3 · 2a08ec3
1 parent 65e4f89
commit 2a08ec3
Show file tree

Hide file tree

Showing 4 changed files with 22 additions and 42 deletions.
diff --git a/MFG_VI.py b/MFG_VI.py
@@ -2,44 +2,14 @@
 from value_iteration_ddpg import train_ddpg
 from utils import get_rho_from_u
 from utils import plot_3d
+import pandas as pd
 
 if __name__ == '__main__':
     n_cell = 32
     T_terminal = 1
-    d = np.array([
-        0.8000,
-        0.7999,
-        0.7998,
-        0.7993,
-        0.7981,
-        0.7950,
-        0.7881,
-        0.7739,
-        0.7478,
-        0.7041,
-        0.6386,
-        0.5509,
-        0.4474,
-        0.3426,
-        0.2558,
-        0.2065,
-        0.2065,
-        0.2558,
-        0.3426,
-        0.4474,
-        0.5509,
-        0.6386,
-        0.7041,
-        0.7478,
-        0.7739,
-        0.7881,
-        0.7950,
-        0.7981,
-        0.7993,
-        0.7998,
-        0.7999,
-        0.8000
-    ])
-    u, rho = train_ddpg(n_cell, T_terminal, d, 800)
+    data = pd.read_csv('data_rho_sep.csv')
+    rho = np.array(data.iloc[:, 1:len(data.iloc[0, :])])
+    d = rho[:, 0]
+    u, rho = train_ddpg(n_cell, T_terminal, d, 3000)
 
 
diff --git a/data_rho_lwr_new.csv → data_rho_lwr.csv b/data_rho_lwr_new.csv → data_rho_lwr.csv
diff --git a/data_rho_sep_new.csv → data_rho_sep.csv b/data_rho_sep_new.csv → data_rho_sep.csv
diff --git a/value_iteration_ddpg.py b/value_iteration_ddpg.py
@@ -50,14 +50,14 @@ def forward(self, x):
         return self.model(torch.from_numpy(x).float())
 
 
-def train_rho_network(n_cell, T_terminal, rho, rho_network, rho_optimizer):
+def train_rho_network_one_step(n_cell, T_terminal, rho, rho_network, rho_optimizer):
     truths, keys = list(), list()
     for i in range(len(rho)):
         for j in range(len(rho[0])):
             truths.append(rho[i, j])
             keys.append(np.array([i, j]) / n_cell)
 
-    for _ in range(2000):
+    for _ in range(1):
         truths = torch.tensor(truths, requires_grad=True)
         preds = torch.reshape(rho_network(np.array(keys)), (1, len(truths)))
         loss = (truths - preds).abs().mean()
@@ -79,26 +79,36 @@ def train_ddpg(n_cell, T_terminal, d, iterations):
 
     rho_network = RhoNetwork(2)
     rho_optimizer = torch.optim.Adam(rho_network.parameters(), lr=1e-3)
-    rho_network = train_rho_network(n_cell, T_terminal, rho, rho_network, rho_optimizer)
+    rho_network = train_rho_network_one_step(n_cell, T_terminal, rho, rho_network, rho_optimizer)
 
     for it in range(iterations):
         # train V
         for t in range(T):
             for i in range(n_cell):
                 rho_i_t = float(rho_network.forward(np.array([i, t]) / n_cell))
-                u[i, t] = (V[i, t + 1] - V[i + 1, t + 1]) / delta_T + 1 - rho_i_t
+                # switch lwr, non-sep, sep
+                # u[i, t] = 1 - rho_i_t
+                # u[i, t] = (V[i, t + 1] - V[i + 1, t + 1]) / delta_T + 1 - rho_i_t
+                u[i, t] = (V[i, t + 1] - V[i + 1, t + 1]) / delta_T + 1
+
                 u[i, t] = min(max(u[i, t], 0), 1)
-                V[i, t] = delta_T * (0.5 * u[i, t] ** 2 + rho_i_t * u[i, t] - u[i, t]) + (1 - u[i, t]) * V[
+                # V[i, t] = delta_T * 0.5 * (1 - rho_i_t - u[i, t]) ** 2 + (1 - u[i, t]) * V[
+                #     i, t + 1] + u[i, t] * V[i + 1, t + 1]
+                # V[i, t] = delta_T * (0.5 * u[i, t] ** 2 + rho_i_t * u[i, t] - u[i, t]) + (1 - u[i, t]) * V[
+                #     i, t + 1] + u[i, t] * V[i + 1, t + 1]
+                V[i, t] = delta_T * (0.5 * u[i, t] ** 2 + rho_i_t - u[i, t]) + (1 - u[i, t]) * V[
                     i, t + 1] + u[i, t] * V[i + 1, t + 1]
 
+
+
         V[-1, :] = V[0, :].copy()
 
         u_hist.append(u)
         u = np.array(u_hist).mean(axis=0)
         rho = get_rho_from_u(u, d)
-        rho_network = train_rho_network(n_cell, T_terminal, rho, rho_network, rho_optimizer)
+        rho_network = train_rho_network_one_step(n_cell, T_terminal, rho, rho_network, rho_optimizer)
 
-        if it % 10 == 0 and it != 0:
+        if it % 50 == 0 and it != 0:
             plot_3d(32, 1, u, f"./fig/u/{it}.png")  # show without fp
             plot_3d(32, 1, rho, f"./fig/rho/{it}.png")  # show with fp on rho