From 82e6688c5c59333161de20bf58ff5bc91730f55c Mon Sep 17 00:00:00 2001 From: sven1977 Date: Tue, 15 Oct 2019 08:37:33 +0200 Subject: [PATCH] Fixed test case for Breakout learning with DDDQN. --- README.md | 2 +- ...WORKING___YAY_dddqn_breakout_learning.json | 76 ------------------- .../configs/dddqn_breakout_learning.json | 2 +- .../test_dddqn_long_tasks.py | 41 +--------- 4 files changed, 4 insertions(+), 117 deletions(-) delete mode 100644 surreal/tests/algos/configs/WORKING___YAY_dddqn_breakout_learning.json diff --git a/README.md b/README.md index af67f8d..74354de 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ algorithm is 40 lines at its core - not counting comments, import statements and self.memory.add_records(dict(s=self.s, a=self.a, r=r, s_=s_, t=t)) # s_ = s'; t = is s' terminal? ``` -[![asdsd](https://img.shields.io/badge/python-3.7-orange.svg)](https://www.python.org/downloads/release/python-374/) + ``` # Handle ε-greedy exploration (decay). diff --git a/surreal/tests/algos/configs/WORKING___YAY_dddqn_breakout_learning.json b/surreal/tests/algos/configs/WORKING___YAY_dddqn_breakout_learning.json deleted file mode 100644 index 77078ed..0000000 --- a/surreal/tests/algos/configs/WORKING___YAY_dddqn_breakout_learning.json +++ /dev/null @@ -1,76 +0,0 @@ -{ - "gamma": 0.99, - - "n_step": 4, - - "memory_capacity": 100000, # this is too high, makes PC (currently 24Gb RAM) crash - "memory_batch_size": 256, - - "epsilon": { - "type": "linear-decay", - "from": 1.0, - "to": 0.0, # <- this is probably a bad idea, should be 0.1, but did learn anyway - "end_time_percentage": 0.1 - }, - - "q_network": [ - { - "name": "conv2d", - "filters": 16, - "kernel_size": 8, - "strides": 4, - "padding": "same", - "activation": "relu" - }, - { - "name": "conv2d", - "filters": 32, - "kernel_size": 4, - "strides": 2, - "padding": "same", - "activation": "relu" - }, - { - "name": "conv2d", - "filters": 256, - "kernel_size": 11, - "strides": 1, - "padding": "valid", - "activation": "relu" - }, - { - "name": "flatten" - }, - { - "name": "dense", - "units": 512, - "activation": "relu" - } - ], - - "dueling_a_network": [ - { - "name": "dense", - "units": 64, - "activation": "relu" - } - ], - - "dueling_v_network": [ - { - "name": "dense", - "units": 64, - "activation": "relu" - } - ], - - "update_frequency": 32, - "update_after": "when-memory-ready", - "sync_frequency": 10000, - - "optimizer": { - "type": "adam", - "learning_rate": 0.0002, - "clip_norm": 40 - } -} diff --git a/surreal/tests/algos/configs/dddqn_breakout_learning.json b/surreal/tests/algos/configs/dddqn_breakout_learning.json index 7ac1838..4b90ffd 100644 --- a/surreal/tests/algos/configs/dddqn_breakout_learning.json +++ b/surreal/tests/algos/configs/dddqn_breakout_learning.json @@ -3,7 +3,7 @@ "n_step": 4, - "memory_capacity": 75000, + "memory_capacity": 101376, "memory_batch_size": 256, "epsilon": { diff --git a/surreal/tests/algos/learning_up_to_1d/test_dddqn_long_tasks.py b/surreal/tests/algos/learning_up_to_1d/test_dddqn_long_tasks.py index 4c4061b..ff26c5c 100644 --- a/surreal/tests/algos/learning_up_to_1d/test_dddqn_long_tasks.py +++ b/surreal/tests/algos/learning_up_to_1d/test_dddqn_long_tasks.py @@ -87,48 +87,11 @@ def action_map(a): env.terminate() - def test_learning_on_breakout_WORKING___YAY(self): - # Create an Env object. - env = OpenAIGymEnv( - "Breakout-v4", actors=16, fire_after_reset=True, episodic_life=True, max_num_noops_after_reset=8 - ) - - preprocessor = Preprocessor( - ImageCrop(x=5, y=29, width=150, height=167), - GrayScale(keepdims=True), - ImageResize(width=84, height=84, interpolation="bilinear"), - lambda inputs_: ((inputs_ / 128) - 1.0).astype(np.float32), # simple preprocessor: [0,255] to [-1.0,1.0] - Sequence(sequence_length=4, adddim=False) - ) - # Create a DQN2015Config. - config = DDDQNConfig.make( - "../configs/WORKING___YAY_dddqn_breakout_learning.json", - preprocessor=preprocessor, - state_space=env.actors[0].state_space, - action_space=env.actors[0].action_space - ) - # Create an Algo object. - algo = DDDQN(config=config, name="my-dddqn") - - # Point actor(s) to the algo. - for actor in env.actors: - actor.set_algo(algo) - - # Run and wait for env to complete. - env.run(actor_time_steps=10000000, sync=True, render=debug.RenderEnvInLearningTests) - - # Check last n episode returns. - n = 10 - mean_last_n = np.mean(env.historic_episodes_returns[-n:]) - print("Avg return over last {} episodes: {}".format(n, mean_last_n)) - self.assertTrue(mean_last_n > 150.0) - - env.terminate() - def test_learning_on_breakout(self): # Create an Env object. env = OpenAIGymEnv( - "Breakout-v4", actors=1, fire_after_reset=True, episodic_life=True, max_num_noops_after_reset=8 + "Breakout-v4", actors=16, fire_after_reset=True, episodic_life=True, max_num_noops_after_reset=8, + frame_skip=(2, 5) ) preprocessor = Preprocessor(