added SAC hyperparams to config.json

trackmania-rl · Dec 6, 2021 · c959672 · c959672
1 parent 2260537
commit c959672
Show file tree

Hide file tree

Showing 3 changed files with 23 additions and 23 deletions.
diff --git a/README.md b/README.md
@@ -1,7 +1,7 @@
 # TMRL
 
-TrackMania through Reinforcement Learning (`tmrl`) consists of a Real-Time Gym environment and a distributed framework for training your AIs in real time.
-It is demonstrated on the TrackMania 2020 and Trackmania Nations Forever video games.
+TrackMania through Reinforcement Learning (`tmrl`) consists of a Gym environment and a distributed framework for training AIs in real time.
+It is demonstrated on the TrackMania 2020 video game.
 
 ![example](readme/img/video_lidar.gif)
 

diff --git a/setup.py b/setup.py
@@ -13,7 +13,7 @@
     sys.exit('Sorry, Python < 3.7 is not supported. We use dataclasses that have been introduced in 3.7.')
 
 
-RESOURCES_URL = "https://github.com/trackmania-rl/tmrl/releases/download/v0.0.1/resources.zip"
+RESOURCES_URL = "https://github.com/trackmania-rl/tmrl/releases/download/v0.0.2/resources.zip"
 
 
 def url_retrieve(url: str, outfile: Path, overwrite: bool = False):
@@ -111,13 +111,13 @@ def url_retrieve(url: str, outfile: Path, overwrite: bool = False):
 
 setup(
     name='tmrl',
-    version='0.0.1',
+    version='0.0.2',
     description='Autonomous racing in Trackmania',
     long_description=README,
     long_description_content_type='text/markdown',
     keywords='reinforcement learning, trackmania, self driving, roborace',
     url='https://github.com/trackmania-rl/tmrl',
-    download_url='https://github.com/trackmania-rl/tmrl/archive/refs/tags/v0.0.1.tar.gz',
+    download_url='https://github.com/trackmania-rl/tmrl/archive/refs/tags/v0.0.2.tar.gz',
     author='Yann Bouteiller, Edouard Geze',
     author_email='[email protected], [email protected]',
     license='MIT',

diff --git a/tmrl/config/config_objects.py b/tmrl/config/config_objects.py
@@ -102,14 +102,14 @@
         SAC_Agent,
         device='cuda' if cfg.PRAGMA_CUDA_TRAINING else 'cpu',
         Model=partial(TRAIN_MODEL, act_buf_len=cfg.ACT_BUF_LEN),
-        lr_actor=0.0003,
-        lr_critic=0.00005,  # 0.0001 # default 0.0003
-        lr_entropy=0.0003,
-        gamma=0.995,  # default and best tmnf so far: 0.99
-        polyak=0.995,  # 0.999 # default 0.995
-        learn_entropy_coef=False,  # False for SAC v2 with no temperature autotuning
-        target_entropy=-7.0,  # None for automatic
-        alpha=1.0 / 2.7)  # best: 1 / 2.5  # inverse of reward scale
+        lr_actor=cfg.TMRL_CONFIG["LR_ACTOR"],
+        lr_critic=cfg.TMRL_CONFIG["LR_CRITIC"],
+        lr_entropy=cfg.TMRL_CONFIG["LR_ENTROPY"],
+        gamma=cfg.TMRL_CONFIG["GAMMA"],
+        polyak=cfg.TMRL_CONFIG["POLYAK"],
+        learn_entropy_coef=cfg.TMRL_CONFIG["LEARN_ENTROPY_COEF"],  # False for SAC v2 with no temperature autotuning
+        target_entropy=cfg.TMRL_CONFIG["TARGET_ENTROPY"],  # None for automatic
+        alpha=cfg.TMRL_CONFIG["ALPHA"])  # inverse of reward scale
 
 # TRAINER: =====================================================
 
@@ -127,20 +127,20 @@ def sac_v2_entropy_scheduler(agent, epoch):
         TrainingOffline,
         Env=partial(UntouchedGymEnv, id="rtgym:real-time-gym-v0", gym_kwargs={"config": CONFIG_DICT}),
         Memory=MEMORY,
-        memory_size=1000000,
-        batchsize=256,  # RTX3080: 256 up to 1024
-        epochs=10000,  # 400
-        rounds=10,  # 10
-        steps=1000,  # 1000
-        update_model_interval=1000,
-        update_buffer_interval=1000,
-        max_training_steps_per_env_step=4.0,  # 1.0
+        memory_size=cfg.TMRL_CONFIG["MEMORY_SIZE"],
+        batchsize=cfg.TMRL_CONFIG["BATCH_SIZE"],  # RTX3080: 256 up to 1024
+        epochs=cfg.TMRL_CONFIG["MAX_EPOCHS"],  # 400
+        rounds=cfg.TMRL_CONFIG["ROUNDS_PER_EPOCH"],  # 10
+        steps=cfg.TMRL_CONFIG["TRAINING_STEPS_PER_ROUND"],  # 1000
+        update_model_interval=cfg.TMRL_CONFIG["UPDATE_MODEL_INTERVAL"],
+        update_buffer_interval=cfg.TMRL_CONFIG["UPDATE_BUFFER_INTERVAL"],
+        max_training_steps_per_env_step=cfg.TMRL_CONFIG["MAX_TRAINING_STEPS_PER_ENVIRONMENT_STEP"],  # 1.0
         profiling=cfg.PROFILE_TRAINER,
         Agent=AGENT,
         agent_scheduler=None,  # sac_v2_entropy_scheduler
-        start_training=1000)  # set this > 0 to start from an existing policy (fills the buffer up to this number of samples before starting training)
+        start_training=cfg.TMRL_CONFIG["ENVIRONMENT_STEPS_BEFORE_TRAINING"])  # set this > 0 to start from an existing policy (fills the buffer up to this number of samples before starting training)
 else:  # images
-    TRAINER = partial(
+    TRAINER = partial0(
         TrainingOffline,
         Env=partial(UntouchedGymEnv, id="rtgym:real-time-gym-v0", gym_kwargs={"config": CONFIG_DICT}),
         Memory=MEMORY,