bmeyers · bmeyers · Mar 19, 2019 · Mar 19, 2019 · Mar 19, 2019 · Mar 20, 2019
diff --git a/Siobhan_scratch_testing.ipynb b/Siobhan_scratch_testing.ipynb
diff --git a/scripts/run_ddpg.py b/scripts/run_ddpg.py
@@ -6,7 +6,8 @@
 from virtual_microgrids.algorithms import DDPG
 
 if __name__ == '__main__':
-    config = get_config('Six_Bus_POC', algorithm='DDPG')
+    config = get_config('rural_1', algorithm='DDPG')
+    # config = get_config('Six_Bus_MVP2', algorithm='DDPG')
     env = NetModel(config=config)
     # train model
     model = DDPG(env, config)

diff --git a/virtual_microgrids/algorithms/ddpg.py b/virtual_microgrids/algorithms/ddpg.py
@@ -180,7 +180,7 @@ def train(self):
                                            self.config.reasonable_max_episodes*self.config.max_ep_steps)
         critic_lr_schedule = LinearSchedule(self.config.critic_learning_rate_start, self.config.critic_learning_rate_end,
                                             self.config.reasonable_max_episodes*self.config.max_ep_steps)
-        noise_schedule = LogSchedule(0.5, 0.0001, self.config.reasonable_max_episodes*self.config.max_ep_steps)
+        noise_schedule = LogSchedule(1.0, 0.001, self.config.reasonable_max_episodes*self.config.max_ep_steps)
 
         # noise_schedule = LinearSchedule(0.5, 0.01, self.config.reasonable_max_episodes*self.config.max_ep_steps)
 
@@ -199,6 +199,7 @@ def train(self):
 
             best_r = 0.0
             best_reward_logical = None
+            optimal_action = None
 
             soc_track = np.zeros((self.config.max_ep_steps, self.env.net.storage.shape[0]))
             p_track = np.zeros((self.config.max_ep_steps, self.env.net.storage.shape[0]))
@@ -252,6 +253,7 @@ def train(self):
                 if done:
                     if ep_reward > best_ep_reward:
                         best_ep_reward = ep_reward
+                        optimal_action = a
                     total_rewards.append(ep_reward)
                     ep_ave_max_q /= j
                     ave_max_q.append(ep_ave_max_q)
@@ -276,8 +278,10 @@ def train(self):
 
                 msg2 = "Max single reward: "+str(best_r)
                 msg3 = "Max reward happened on lines: "+str(best_reward_logical)
+                msg4 = "The optimal action was: "+str(optimal_action)
                 end = "\n--------------------------------------------------------"
                 self.logger.info(msg2)
+                self.logger.info(msg4)
                 self.logger.info(msg3 + end)
 
                 fig, ax = plt.subplots(nrows=3, sharex=True)

diff --git a/virtual_microgrids/configs/six_bus_mvp1.py b/virtual_microgrids/configs/six_bus_mvp1.py
@@ -44,4 +44,4 @@ def __init__(self, use_baseline, actor):
 
         # reward function
         self.reward_epsilon = 0.001
-        self.cont_reward_lambda = 0.1
+        self.cont_reward_lambda = 0.1
diff --git a/virtual_microgrids/configs/standard_lv_network.py b/virtual_microgrids/configs/standard_lv_network.py
@@ -1,7 +1,9 @@
 import numpy as np
 from pandapower.networks import create_synthetic_voltage_control_lv_network as mknet
+from virtual_microgrids.configs.config_base import ConfigBase
 
-class StandardLVNetwork(object):
+
+class StandardLVNetwork(ConfigBase):
     """The configurations for using any of the standard low voltage (LV) test networks shipped with pandapower.
 
     Options in this set up include choices to remove the generation and load elements built in to the test network, and
@@ -18,38 +20,10 @@ class StandardLVNetwork(object):
     """
     def __init__(self, env_name, use_baseline, actor):
         self.env_name = env_name
+        super().__init__(use_baseline, actor, self.env_name)
 
-        # output config
-        baseline_str = 'baseline' if use_baseline else 'no_baseline'
-        self.output_path = "results/{}-{}-{}/".format(self.env_name, baseline_str, actor)
-        self.model_output = self.output_path + "model.weights/"
-        self.log_path = self.output_path + "log.txt"
-        self.plot_output = self.output_path + "scores.png"
-        self.record_path = self.output_path
-        self.record_freq = 5
-        self.summary_freq = 1
-        self.summary_freq2 = 1000
-
-        # model and training - general
-        self.gamma                  = 0.9  # the discount factor
-
-        # model and training config - PG
-        self.num_batches            = 500  # number of batches trained on
-        self.batch_size             = 1000  # number of steps used to compute each policy update
-        self.max_ep_len             = 60  # maximum episode length
-        self.learning_rate          = 3e-2
-        self.use_baseline           = use_baseline
-        self.normalize_advantage    = True
-
-        # model and training config - DDPG
-        self.tau                    = 0.001
-        self.reward_epsilon = 0.001
-        self.actor_learning_rate    = 1e-3
-        self.critic_learning_rate   = 1e-2
-        self.buffer_size            = 1e6
-        self.minibatch_size         = 64
-        self.max_episodes           = 500
-        self.max_ep_steps           = self.max_ep_len
+        self.reasonable_max_episodes = 1000
+        self.max_episodes = 2000
 
         self.remove_q = True
         self.clear_loads_sgen = False
@@ -65,38 +39,62 @@ def __init__(self, env_name, use_baseline, actor):
             self.static_feeds = {}
         else:
             self.static_feeds = self.static_feeds_new.copy()
+
+        n = self.max_ep_len + 1
         net = mknet(network_class=env_name)
         if not self.clear_loads_sgen:
             if net.load.shape[0] > 0:
                 for idx, row in net.load.iterrows():
-                    self.static_feeds[row['bus']] = row['p_kw'] * np.ones(self.max_ep_len)
+                    if row['bus'] in self.static_feeds:
+                        self.static_feeds[row['bus']].update({0: row['p_kw'] * np.ones(n)})
+                    else:
+                        self.static_feeds[row['bus']] = {0: row['p_kw'] * np.ones(n)}
             if net.sgen.shape[0] > 0:
                 for idx, row in net.sgen.iterrows():
-                    self.static_feeds[row['bus']] = row['p_kw'] * np.ones(self.max_ep_len)
+                    if row['bus'] in self.static_feeds:
+                        self.static_feeds[row['bus']].update({1: row['p_kw'] * np.ones(n)})
+                    else:
+                        self.static_feeds[row['bus']] = {1: row['p_kw'] * np.ones(n)}
 
-        self.battery_locations = None  # Specify specific locations, or can pick options for random generation:
+        self.battery_locations = [19, 23, 24, 22, 8, 10]  # None  # Specify specific locations, or can pick options for random generation:
         self.percent_battery_buses = 0.5  # How many of the buses should be assigned batteries
         self.batteries_on_leaf_nodes_only = True
 
         # Action space
-        self.gen_p_min = -50.0
+        self.gen_p_min = -10.0
         self.gen_p_max = 0.0
-        self.storage_p_min = -50.0
-        self.storage_p_max = 50.0
+        self.storage_p_min = -20.0
+        self.storage_p_max = 20.0
 
-        # Generation
-        self.gen_locations = [4]
-        self.gen_max_p_kw = [20.0]
+        # # Generation
+        self.gen_locations = None
+        # self.gen_locations = [4]
+        self.gen_max_p_kw = 20.0
 
         self.init_soc = 0.5
-        self.energy_capacity = 20.0
-
-        # parameters for the policy and baseline models
-        self.n_layers               = 1
-        self.layer_size             = 16
-        self.activation             = None
-
-        # since we start new episodes for each batch
-        assert self.max_ep_len <= self.batch_size
-        if self.max_ep_len < 0:
-            self.max_ep_len = self.batch_size
+        self.energy_capacity = 50.0
+
+        # state space
+        self.with_soc = False
+
+        # reward function
+        self.reward_epsilon = 0.01
+        self.cont_reward_lambda = 0.1
+
+        self.moving = True
+        self.randomize_env = True  # If this is true, fix the buses where the storage is
+
+        if self.moving:
+            for bus, feed in self.static_feeds.items():
+                if isinstance(feed, dict):
+                    for idx, feed2 in feed.items():
+                        a = np.random.uniform(-1, 1)
+                        scale = np.random.uniform(0.5, 2)
+                        feed2 += a * np.sin(2 * np.pi * np.arange(n) * scale / n)
+                else:
+                    a = np.random.uniform(-1, 1)
+                    scale = np.random.uniform(0.5, 2)
+                    feed += a * np.sin(2 * np.pi * np.arange(n) * scale / n)
+
+        self.n_layers = 2
+        self.layer_size = 128
diff --git a/virtual_microgrids/powerflow/network_generation.py b/virtual_microgrids/powerflow/network_generation.py
@@ -122,12 +122,21 @@ def six_bus(vn_high=20, vn_low=0.4, length_km=0.03, std_type='NAYY 4x50 SE', bat
     else:
         if len(static_feeds) > 0:
             for key, val in static_feeds.items():
-                init_flow = val[0]
-                print('init_flow: ', init_flow, 'at bus: ', key)
-                if init_flow > 0:
-                    pp.create_load(net, bus=key, p_kw=init_flow, q_kvar=0)
+                if isinstance(val, dict):
+                    for key2, val2 in val.items():
+                        init_flow = val2[0]
+                        print('init_flow: ', init_flow, 'at bus: ', key)
+                        if init_flow > 0:
+                            pp.create_load(net, bus=key, p_kw=init_flow, q_kvar=0)
+                        else:
+                            pp.create_sgen(net, bus=key, p_kw=init_flow, q_kvar=0)
                 else:
-                    pp.create_sgen(net, bus=key, p_kw=init_flow, q_kvar=0)
+                    init_flow = val[0]
+                    print('init_flow: ', init_flow, 'at bus: ', key)
+                    if init_flow > 0:
+                        pp.create_load(net, bus=key, p_kw=init_flow, q_kvar=0)
+                    else:
+                        pp.create_sgen(net, bus=key, p_kw=init_flow, q_kvar=0)
 
     return net
 
@@ -186,6 +195,7 @@ def standard_lv(env_name, remove_q=True, static_feeds_new=None, clear_loads_sgen
             applied_battery_locations = np.random.choice(net.bus.shape[0],
                                                          int(percent_battery_buses * net.bus.shape[0]), replace=False)
     if len(applied_battery_locations) > 0:
+        print('Storage applied on nodes: ', applied_battery_locations)
         num_batteries = len(applied_battery_locations)
         for idx, bus_number in enumerate(applied_battery_locations):
             energy_capacity_here = energy_capacity
@@ -209,14 +219,24 @@ def standard_lv(env_name, remove_q=True, static_feeds_new=None, clear_loads_sgen
     if static_feeds_new is None:
         print('No loads or generation added to network')
     else:
+        print('Didnt think static_feeds_new was None')
         if len(static_feeds_new) > 0:
             for key, val in static_feeds_new.items():
-                init_flow = val[0]
-                print('init_flow: ', init_flow, 'at bus: ', key)
-                if init_flow > 0:
-                    pp.create_load(net, bus=key, p_kw=init_flow, q_kvar=0)
+                if isinstance(val, dict):
+                    for key2, val2 in val.items():
+                        init_flow = val2[0]
+                        print('init_flow: ', init_flow, 'at bus: ', key)
+                        if init_flow > 0:
+                            pp.create_load(net, bus=key, p_kw=init_flow, q_kvar=0)
+                        else:
+                            pp.create_sgen(net, bus=key, p_kw=init_flow, q_kvar=0)
                 else:
-                    pp.create_sgen(net, bus=key, p_kw=init_flow, q_kvar=0)
+                    init_flow = val[0]
+                    print('init_flow: ', init_flow, 'at bus: ', key)
+                    if init_flow > 0:
+                        pp.create_load(net, bus=key, p_kw=init_flow, q_kvar=0)
+                    else:
+                        pp.create_sgen(net, bus=key, p_kw=init_flow, q_kvar=0)
 
     #  Name buses for plotting
     for i in range(net.bus.name.shape[0]):

diff --git a/virtual_microgrids/powerflow/pp_network.py b/virtual_microgrids/powerflow/pp_network.py
@@ -45,9 +45,13 @@ def __init__(self, config=None, env_name='Six_Bus_POC', baseline=True,
         self.graph = Graph(len(self.net.bus))
         for idx, entry in self.net.line.iterrows():
             self.graph.addEdge(entry.from_bus, entry.to_bus)
+        for idx, entry in self.net.trafo.iterrows():
+            self.graph.addEdge(entry.hv_bus, entry.lv_bus)
+
         self.current_state = None
         self.last_state = None
 
+
     def reset(self):
         """Reset the network and reward values back to how they were initialized."""
         if not self.config.randomize_env:
@@ -76,11 +80,19 @@ def step(self, p_set):
         new_loads = pd.Series(data=None, index=self.net.load.bus)
         new_sgens = pd.Series(data=None, index=self.net.sgen.bus)
         for bus, feed in self.config.static_feeds.items():
-            p_new = feed[self.time]
-            if p_new > 0:
-                new_loads[bus] = p_new
+            if isinstance(feed, dict):
+                for idx, feed2 in feed.items():
+                    p_new = feed2[self.time]
+                    if p_new > 0:
+                        new_loads[bus] = p_new
+                    else:
+                        new_sgens[bus] = p_new
             else:
-                new_sgens[bus] = p_new
+                p_new = feed[self.time]
+                if p_new > 0:
+                    new_loads[bus] = p_new
+                else:
+                    new_sgens[bus] = p_new
         self.update_loads(new_p=new_loads.values)
         self.update_static_generation(new_p=new_sgens.values)
         # Update controllable resources
@@ -301,7 +313,7 @@ def calculate_reward(self, eps=0.001, type=4):
             line_flow_values = np.maximum(np.abs(self.net.res_line.p_to_kw),
                                           np.abs(self.net.res_line.p_from_kw)) - self.net.res_line.pl_kw
             self.reward_val -= self.config.cont_reward_lambda * np.sum(np.minimum(np.abs(line_flow_values),
-                                                                                  1.0*np.ones(np.shape(line_flow_values)[0])))
+                                                                                  3.0*np.ones(np.shape(line_flow_values)[0])))
         # Costs for running batteries
         cap_costs = self.net.storage.cap_cost
         max_e = self.net.storage.max_e_kwh
@@ -313,8 +325,9 @@ def calculate_reward(self, eps=0.001, type=4):
         return self.reward_val
 
 if __name__ == "__main__":
-    env1 = NetModel(env_name='Six_Bus_POC')
-    #env1 = NetModel(env_name='Six_Bus_MVP3')
-    env1.config.reward_epsilon = 0.1
-    env1.reset()
-    env1.step([-0.02, -0.02])
+    env1 = NetModel(env_name='rural_1')  # 'Six_Bus_POC')
+    # env1.config.reward_epsilon = 0.1
+    # env1.reset()
+    env1.step([-20.17500389, -20.46192559, -19.49983787,  19.80725726, 20.07191253, 20.18946847])
+    # env1.step([-0.02, -0.02])
+