From 82e6688c5c59333161de20bf58ff5bc91730f55c Mon Sep 17 00:00:00 2001
From: sven1977 <svenmika1977@gmail.com>
Date: Tue, 15 Oct 2019 08:37:33 +0200
Subject: [PATCH] Fixed test case for Breakout learning with DDDQN.

---
 README.md                                     |  2 +-
 ...WORKING___YAY_dddqn_breakout_learning.json | 76 -------------------
 .../configs/dddqn_breakout_learning.json      |  2 +-
 .../test_dddqn_long_tasks.py                  | 41 +---------
 4 files changed, 4 insertions(+), 117 deletions(-)
 delete mode 100644 surreal/tests/algos/configs/WORKING___YAY_dddqn_breakout_learning.json

diff --git a/README.md b/README.md
index af67f8d..74354de 100644
--- a/README.md
+++ b/README.md
@@ -27,7 +27,7 @@ algorithm is 40 lines at its core - not counting comments, import statements and
         self.memory.add_records(dict(s=self.s, a=self.a, r=r, s_=s_, t=t))  # s_ = s'; t = is s' terminal?
 ```
 
-[![asdsd](https://img.shields.io/badge/python-3.7-orange.svg)](https://www.python.org/downloads/release/python-374/)
+
 
 ```
     # Handle ε-greedy exploration (decay).
diff --git a/surreal/tests/algos/configs/WORKING___YAY_dddqn_breakout_learning.json b/surreal/tests/algos/configs/WORKING___YAY_dddqn_breakout_learning.json
deleted file mode 100644
index 77078ed..0000000
--- a/surreal/tests/algos/configs/WORKING___YAY_dddqn_breakout_learning.json
+++ /dev/null
@@ -1,76 +0,0 @@
-{
-  "gamma": 0.99,
-
-  "n_step": 4,
-
-  "memory_capacity": 100000,  # this is too high, makes PC (currently 24Gb RAM) crash
-  "memory_batch_size": 256,
-
-  "epsilon": {
-    "type": "linear-decay",
-    "from": 1.0,
-    "to": 0.0,  # <- this is probably a bad idea, should be 0.1, but did learn anyway
-    "end_time_percentage": 0.1
-  },
-
-  "q_network": [
-    {
-      "name": "conv2d",
-      "filters": 16,
-      "kernel_size": 8,
-      "strides": 4,
-      "padding": "same",
-      "activation": "relu"
-    },
-    {
-      "name": "conv2d",
-      "filters": 32,
-      "kernel_size": 4,
-      "strides": 2,
-      "padding": "same",
-      "activation": "relu"
-    },
-    {
-      "name": "conv2d",
-      "filters": 256,
-      "kernel_size": 11,
-      "strides": 1,
-      "padding": "valid",
-      "activation": "relu"
-    },
-    {
-      "name": "flatten"
-    },
-    {
-      "name": "dense",
-      "units": 512,
-      "activation": "relu"
-    }
-  ],
-
-  "dueling_a_network": [
-    {
-      "name": "dense",
-      "units": 64,
-      "activation": "relu"
-    }
-  ],
-
-  "dueling_v_network": [
-    {
-      "name": "dense",
-      "units": 64,
-      "activation": "relu"
-    }
-  ],
-
-  "update_frequency": 32,
-  "update_after": "when-memory-ready",
-  "sync_frequency": 10000,
-
-  "optimizer": {
-    "type": "adam",
-    "learning_rate": 0.0002,
-    "clip_norm": 40
-  }
-}
diff --git a/surreal/tests/algos/configs/dddqn_breakout_learning.json b/surreal/tests/algos/configs/dddqn_breakout_learning.json
index 7ac1838..4b90ffd 100644
--- a/surreal/tests/algos/configs/dddqn_breakout_learning.json
+++ b/surreal/tests/algos/configs/dddqn_breakout_learning.json
@@ -3,7 +3,7 @@
 
   "n_step": 4,
 
-  "memory_capacity": 75000,
+  "memory_capacity": 101376,
   "memory_batch_size": 256,
 
   "epsilon": {
diff --git a/surreal/tests/algos/learning_up_to_1d/test_dddqn_long_tasks.py b/surreal/tests/algos/learning_up_to_1d/test_dddqn_long_tasks.py
index 4c4061b..ff26c5c 100644
--- a/surreal/tests/algos/learning_up_to_1d/test_dddqn_long_tasks.py
+++ b/surreal/tests/algos/learning_up_to_1d/test_dddqn_long_tasks.py
@@ -87,48 +87,11 @@ def action_map(a):
 
         env.terminate()
 
-    def test_learning_on_breakout_WORKING___YAY(self):
-        # Create an Env object.
-        env = OpenAIGymEnv(
-            "Breakout-v4", actors=16, fire_after_reset=True, episodic_life=True, max_num_noops_after_reset=8
-        )
-
-        preprocessor = Preprocessor(
-            ImageCrop(x=5, y=29, width=150, height=167),
-            GrayScale(keepdims=True),
-            ImageResize(width=84, height=84, interpolation="bilinear"),
-            lambda inputs_: ((inputs_ / 128) - 1.0).astype(np.float32),  # simple preprocessor: [0,255] to [-1.0,1.0]
-            Sequence(sequence_length=4, adddim=False)
-        )
-        # Create a DQN2015Config.
-        config = DDDQNConfig.make(
-            "../configs/WORKING___YAY_dddqn_breakout_learning.json",
-            preprocessor=preprocessor,
-            state_space=env.actors[0].state_space,
-            action_space=env.actors[0].action_space
-        )
-        # Create an Algo object.
-        algo = DDDQN(config=config, name="my-dddqn")
-
-        # Point actor(s) to the algo.
-        for actor in env.actors:
-            actor.set_algo(algo)
-
-        # Run and wait for env to complete.
-        env.run(actor_time_steps=10000000, sync=True, render=debug.RenderEnvInLearningTests)
-
-        # Check last n episode returns.
-        n = 10
-        mean_last_n = np.mean(env.historic_episodes_returns[-n:])
-        print("Avg return over last {} episodes: {}".format(n, mean_last_n))
-        self.assertTrue(mean_last_n > 150.0)
-
-        env.terminate()
-
     def test_learning_on_breakout(self):
         # Create an Env object.
         env = OpenAIGymEnv(
-            "Breakout-v4", actors=1, fire_after_reset=True, episodic_life=True, max_num_noops_after_reset=8
+            "Breakout-v4", actors=16, fire_after_reset=True, episodic_life=True, max_num_noops_after_reset=8,
+            frame_skip=(2, 5)
         )
 
         preprocessor = Preprocessor(