Fixed test case for Breakout learning with DDDQN.

ducandu · Oct 15, 2019 · 82e6688 · 82e6688
1 parent f190fe6
commit 82e6688
Show file tree

Hide file tree

Showing 4 changed files with 4 additions and 117 deletions.
diff --git a/README.md b/README.md
@@ -27,7 +27,7 @@ algorithm is 40 lines at its core - not counting comments, import statements and
         self.memory.add_records(dict(s=self.s, a=self.a, r=r, s_=s_, t=t))  # s_ = s'; t = is s' terminal?
 ```
 
-[![asdsd](https://img.shields.io/badge/python-3.7-orange.svg)](https://www.python.org/downloads/release/python-374/)
+
 
 ```
     # Handle ε-greedy exploration (decay).

diff --git a/surreal/tests/algos/configs/WORKING___YAY_dddqn_breakout_learning.json b/surreal/tests/algos/configs/WORKING___YAY_dddqn_breakout_learning.json
diff --git a/surreal/tests/algos/configs/dddqn_breakout_learning.json b/surreal/tests/algos/configs/dddqn_breakout_learning.json
@@ -3,7 +3,7 @@
 
   "n_step": 4,
 
-  "memory_capacity": 75000,
+  "memory_capacity": 101376,
   "memory_batch_size": 256,
 
   "epsilon": {

diff --git a/surreal/tests/algos/learning_up_to_1d/test_dddqn_long_tasks.py b/surreal/tests/algos/learning_up_to_1d/test_dddqn_long_tasks.py
@@ -87,48 +87,11 @@ def action_map(a):
 
         env.terminate()
 
-    def test_learning_on_breakout_WORKING___YAY(self):
-        # Create an Env object.
-        env = OpenAIGymEnv(
-            "Breakout-v4", actors=16, fire_after_reset=True, episodic_life=True, max_num_noops_after_reset=8
-        )
-
-        preprocessor = Preprocessor(
-            ImageCrop(x=5, y=29, width=150, height=167),
-            GrayScale(keepdims=True),
-            ImageResize(width=84, height=84, interpolation="bilinear"),
-            lambda inputs_: ((inputs_ / 128) - 1.0).astype(np.float32),  # simple preprocessor: [0,255] to [-1.0,1.0]
-            Sequence(sequence_length=4, adddim=False)
-        )
-        # Create a DQN2015Config.
-        config = DDDQNConfig.make(
-            "../configs/WORKING___YAY_dddqn_breakout_learning.json",
-            preprocessor=preprocessor,
-            state_space=env.actors[0].state_space,
-            action_space=env.actors[0].action_space
-        )
-        # Create an Algo object.
-        algo = DDDQN(config=config, name="my-dddqn")
-
-        # Point actor(s) to the algo.
-        for actor in env.actors:
-            actor.set_algo(algo)
-
-        # Run and wait for env to complete.
-        env.run(actor_time_steps=10000000, sync=True, render=debug.RenderEnvInLearningTests)
-
-        # Check last n episode returns.
-        n = 10
-        mean_last_n = np.mean(env.historic_episodes_returns[-n:])
-        print("Avg return over last {} episodes: {}".format(n, mean_last_n))
-        self.assertTrue(mean_last_n > 150.0)
-
-        env.terminate()
-
     def test_learning_on_breakout(self):
         # Create an Env object.
         env = OpenAIGymEnv(
-            "Breakout-v4", actors=1, fire_after_reset=True, episodic_life=True, max_num_noops_after_reset=8
+            "Breakout-v4", actors=16, fire_after_reset=True, episodic_life=True, max_num_noops_after_reset=8,
+            frame_skip=(2, 5)
         )
 
         preprocessor = Preprocessor(