From 11b6d29e6cb8074bbc8472ea57478f729e6510a5 Mon Sep 17 00:00:00 2001
From: Yann Bouteiller <yann.bouteiller@polymtl.ca>
Date: Mon, 6 Dec 2021 18:26:40 -0500
Subject: [PATCH] More parameters moved to config.json

---
 README.md                       |  8 ++++----
 readme/Install.md               |  2 +-
 readme/get_started.md           | 18 +++++++++++-------
 setup.py                        |  2 +-
 tmrl/config/config_constants.py | 17 ++++++++---------
 tmrl/config/config_objects.py   | 32 ++++++++++++++------------------
 6 files changed, 39 insertions(+), 40 deletions(-)

diff --git a/README.md b/README.md
index 6d2a6ca..1602f7b 100644
--- a/README.md
+++ b/README.md
@@ -49,7 +49,7 @@ _(note: you can only use the MLP so far)_.
 
 ### Developer features:
 * **Real-Time Gym environment:**
-`tmrl` comes with real-time Gym environment based on [rtgym](https://pypi.org/project/rtgym/). Once `tmrl` is installed, it is possible to use this environment in your own training framework.
+`tmrl` comes with a real-time Gym environment based on [rtgym](https://pypi.org/project/rtgym/). Once `tmrl` is installed, it is easy to use this environment in your own training framework. More information [here](#gym-environment).
 
 * **Distributed training:**
 Our training framework is based on a single-server / multiple-clients architecture.
@@ -57,8 +57,8 @@ It enables collecting samples locally on one or several computers and training d
 Find out more [here](#distant-training-architecture).
 
 * **Real-time training:**
-Our policies are trained in real-time, with no insider access to the game: we do not pause the simulation to collect samples nor in order to compute actions.
-As such, our approach can easily be extended to other video games, and to real-world robotic applications.
+Policies are trained in real-time, with no insider access to the game: we do not pause the simulation to collect samples nor in order to compute actions.
+As such, the framework can easily be extended to other video games, and to real-world robotic applications.
 Find out more [here](#real-time-gym-framework).
 
 * **Flexible framework:**
@@ -77,7 +77,7 @@ Detailed installation instructions are provided [here](readme/Install.md).
 
 ## Getting started
 
-Full guidance toward testing pre-trained weights, as well as a tutorial to train, test, and fine-tune your own models,
+Full guidance toward setting up the environment, testing pre-trained weights, as well as a tutorial to train, test, and fine-tune your own models,
 are provided at [this link](readme/get_started.md).
 
 ## Gym environment
diff --git a/readme/Install.md b/readme/Install.md
index 3d92542..43c1168 100644
--- a/readme/Install.md
+++ b/readme/Install.md
@@ -70,7 +70,7 @@ In particular, you may want to adapt the following entries:
 - `PORT_TRAINER` and `PORT_ROLLOUT` need to be forwarded on the `server` if not running on localhost
 - `WANDB_PROJECT`, `WANDB_ENTITY` and `WANDB_KEY` can be replaced by you own [wandb](https://wandb.ai/site) credentials for monitoring training
 
-You can delete the content of all folders (but not the folders) whenever you like (except `config.json`, a default version is provided in `resources` if you delete this).
+You can delete the content of all folders (but not the folders themselves) whenever you like (except `config.json`, a default version is provided in `resources` if you delete this).
 If at some point you want to do a clean install, delete the whole `TmrlData` folder and pip install `tmrl` again.
 
 _NB: when pip uninstalling `tmrl`, the `TmrlData` folder is not deleted._
diff --git a/readme/get_started.md b/readme/get_started.md
index 4d04539..2a9ee6d 100644
--- a/readme/get_started.md
+++ b/readme/get_started.md
@@ -14,8 +14,8 @@ You can test our pre-trained AI diectly in TrackMania by following these steps (
 - Launch TrackMania 2020
 - In case the OpenPlanet menu is showing in the top part of the screen, hide it using the `f3` key
 - Launch the `tmrl-test` track. This can be done by selecting `create > map editor > edit a map > tmrl-test > select map` and hitting the green flag.
-- Set the game in windowed mode. To do this, bring the cursor to the top of the screen and a drop-down menu will show. Hit the windowed icon.
-- Bring the TrackMania window to the top-left corner of the screen. On Windows10, it should automatically fit to a quarter of the screen.
+x- Set the game in windowed mode. To do this, bring the cursor to the top of the screen and a drop-down menu will show. Hit the windowed icon.
+- Bring the TrackMania window to the top-left corner of the screen. On Windows10, it should automatically fit to a quarter of the screen _(NB: this requires `1920x1080` screen resolution, otherwise, see [troubleshooting](#troubleshooting))_.
 
 - Enter the cockpit view by hitting the `3` key (the car must be hidden, press several times if the cockpit is visible).
 - Hide the ghost by pressing the `g` key.
@@ -37,9 +37,11 @@ You should now see the car drive autonomously.
 If you get an error saying that communication was refused, try reloading the `TMRL grab data` script in the OpenPlanet menu.
 
 If you see many warnings saying that time-steps time out, this means that your computer struggles at running the AI and trackmania in parallel.
-Try reducing the trackmania graphics to the minimum (in particular, try setting the maximum fps to 30, but not much less than this, because screenshots are captured at 20 fps).
+Try reducing the trackmania graphics to the minimum (in particular, try setting the maximum fps to 30, but not much less than this, because screenshots are captured at 20 fps)
+_(NB: seeing these warnings once at each environment reset is normal, this is because we purposefully sleep when the car is waiting for green light)._
 
-In the `Graphics` tab of the settings, ensure that the resolution is 958 (width) * 488 (height) pixels.
+In the `Graphics` tab of the TM20 settings, ensure that the resolution is 958 (width) * 488 (height) pixels.
+If your screen resolution is not `1920x1080`, you will probably need to place the window manually. You may use the `--check-environment` option described later to precisely place the window.
 
 ## Train your own self-driving AI
 
@@ -57,12 +59,12 @@ _(Instructions for TrackMania 2020 using LIDAR)_
   ```
   - Press `e` to start recording
   - Complete the track
-- Check that your reward and environment works correctly:
+- Check that your reward and environment work correctly:
   - Execute:
   ```shell
   python -m tmrl --check-environment
   ```
-  - Control the car manually. You should see the LIDAR and rewards printed.
+  - Control the car manually. You should see the LIDAR and rewards.
   - Press `CTRL + C` to exit.
 
 - Open 3 terminals and put them where they do not overlap with the trackmania window.
@@ -78,8 +80,10 @@ python -m tmrl --trainer
 python -m tmrl --worker
 ```
 
+_(Note: you may want to run these commands on separate computers instead, for instance if the trainer is located on a remote HPC computer. You can adapt `TmrlData\config\config.json` for this matter)_
+
 During training, make sure you don't see too many 'timestep timeouts' in the worker terminal.
-If you do, this means that your GPU is not powerful enough, and you should use distant training instead of localhost training (see `TmrlData\config\config.json`).
+If you do, this means that your GPU is not powerful enough, and you should use remote training instead of localhost training (see `TmrlData\config\config.json`).
 
 With an RTX3080 on a distant machine as trainer and one local machine as worker/server, it takes approximatively 5 hours for the car to understand how to take a turn correctly.
 
diff --git a/setup.py b/setup.py
index ed74aa2..7d28c6b 100644
--- a/setup.py
+++ b/setup.py
@@ -78,7 +78,7 @@ def url_retrieve(url: str, outfile: Path, overwrite: bool = False):
     'imageio-ffmpeg',
     'pandas',
     'gym',
-    'rtgym',
+    'rtgym>=0.5',
     'pyyaml',
     'wandb',
     'requests',
diff --git a/tmrl/config/config_constants.py b/tmrl/config/config_constants.py
index 9a518a9..4b7ce45 100644
--- a/tmrl/config/config_constants.py
+++ b/tmrl/config/config_constants.py
@@ -20,8 +20,6 @@
 BUFFERS_MAXLEN = TMRL_CONFIG["BUFFERS_MAXLEN"]  # Maximum length of the local buffers for RolloutWorkers, Server and TrainerInterface
 RW_MAX_SAMPLES_PER_EPISODE = TMRL_CONFIG["RW_MAX_SAMPLES_PER_EPISODE"]  # If this number of timesteps is reached, the RolloutWorker will reset the episode
 
-PRAGMA_TM2020_TMNF = TMRL_CONFIG["TM2020"]  # True if TM2020, False if TMNF
-PRAGMA_LIDAR = True  # True if Lidar, False if images
 PRAGMA_RNN = False  # True to use an RNN, False to use an MLP
 
 PRAGMA_CUDA_TRAINING = TMRL_CONFIG["CUDA_TRAINING"]  # True if CUDA, False if CPU (trainer)
@@ -40,29 +38,30 @@
 
 # ENVIRONMENT: =======================================================
 
+ENV_CONFIG = TMRL_CONFIG["ENV"]
+RTGYM_INTERFACE = ENV_CONFIG["RTGYM_INTERFACE"]
+PRAGMA_TM2020_TMNF = RTGYM_INTERFACE.startswith("TM20")  # True if TM2020, False if TMNF
+PRAGMA_LIDAR = RTGYM_INTERFACE.endswith("LIDAR")  # True if Lidar, False if images
 LIDAR_BLACK_THRESHOLD = [55, 55, 55]  # [88, 88, 88] for tiny road, [55, 55, 55] FOR BASIC ROAD
 REWARD_END_OF_TRACK = 0  # bonus reward at the end of the track
 CONSTANT_PENALTY = 0  # should be <= 0 : added to the reward at each time step
-SLEEP_TIME_AT_RESET = 1.5  # 1.5 to start in a Markov state with the lidar, 0.0 for saving replays
-ACT_BUF_LEN = 2
-IMG_HIST_LEN = 4  # 4 without RNN, 1 with RNN
+SLEEP_TIME_AT_RESET = ENV_CONFIG["SLEEP_TIME_AT_RESET"]  # 1.5 to start in a Markov state with the lidar
+IMG_HIST_LEN = ENV_CONFIG["IMG_HIST_LEN"]  # 4 without RNN, 1 with RNN
+ACT_BUF_LEN = ENV_CONFIG["RTGYM_CONFIG"]["act_buf_len"]
 
 # DEBUGGING AND BENCHMARKING: ===================================
 
 CRC_DEBUG = False  # Only for checking the consistency of the custom networking methods, set it to False otherwise. Caution: difficult to handle if reset transitions are collected.
 CRC_DEBUG_SAMPLES = 100  # Number of samples collected in CRC_DEBUG mode
 PROFILE_TRAINER = False  # Will profile each epoch in the Trainer when True
-BENCHMARK = False  # The environment will be benchmarked when this is True
 SYNCHRONIZE_CUDA = False  # Set to True for profiling, False otherwise
 
 # FILE SYSTEM: =================================================
 
-# PATH_FILE = Path(__file__)  # TODO: this won't work with PyPI or normal install
-# logging.debug(f" PATH_FILE:{PATH_FILE}")
 PATH_DATA = TMRL_FOLDER
 logging.debug(f" PATH_DATA:{PATH_DATA}")
 
-MODEL_HISTORY = 10  # 0 for not saving history, x for saving model history every x new model received by RolloutWorker
+MODEL_HISTORY = TMRL_CONFIG["SAVE_MODEL_EVERY"]  # 0 for not saving history, x for saving model history every x new model received by RolloutWorker
 
 MODEL_PATH_WORKER = str(WEIGHTS_FOLDER / (RUN_NAME + ".pth"))
 MODEL_PATH_SAVE_HISTORY = str(WEIGHTS_FOLDER / (RUN_NAME + "_"))
diff --git a/tmrl/config/config_objects.py b/tmrl/config/config_objects.py
index b1e5fe1..f6817a6 100644
--- a/tmrl/config/config_objects.py
+++ b/tmrl/config/config_objects.py
@@ -43,16 +43,9 @@
 
 CONFIG_DICT = rtgym.DEFAULT_CONFIG_DICT
 CONFIG_DICT["interface"] = INT
-CONFIG_DICT["time_step_duration"] = 0.05
-CONFIG_DICT["start_obs_capture"] = 0.04  # /!\ lidar capture takes 0.03s
-CONFIG_DICT["time_step_timeout_factor"] = 1.0
-CONFIG_DICT["ep_max_length"] = np.inf
-CONFIG_DICT["real_time"] = True
-CONFIG_DICT["async_threading"] = True
-CONFIG_DICT["act_in_obs"] = True  # ACT_IN_OBS
-CONFIG_DICT["act_buf_len"] = cfg.ACT_BUF_LEN
-CONFIG_DICT["benchmark"] = cfg.BENCHMARK
-CONFIG_DICT["wait_on_done"] = True
+CONFIG_DICT_MODIFIERS = cfg.ENV_CONFIG["RTGYM_CONFIG"]
+for k, v in CONFIG_DICT_MODIFIERS.items():
+    CONFIG_DICT[k] = v
 
 # to compress a sample before sending it over the local network/Internet:
 SAMPLE_COMPRESSOR = get_local_buffer_sample if cfg.PRAGMA_LIDAR else get_local_buffer_sample_tm20_imgs
@@ -84,6 +77,8 @@
 
 # ALGORITHM: ===================================================
 
+ALG_CONFIG = cfg.TMRL_CONFIG["ALG"]
+
 if cfg.PRAGMA_DCAC:  # DCAC
     AGENT = partial(
         DCAC_Agent,
@@ -98,18 +93,19 @@
         reward_scale=2.0,  # 2.0,  # default: 5.0, best tmnf so far: 0.1, best tm20 so far: 2.0
         entropy_scale=1.0)  # default: 1.0),  # default: 1.0
 else:  # SAC
+    assert ALG_CONFIG["ALGORITHM"] == "SAC"
     AGENT = partial(
         SAC_Agent,
         device='cuda' if cfg.PRAGMA_CUDA_TRAINING else 'cpu',
         Model=partial(TRAIN_MODEL, act_buf_len=cfg.ACT_BUF_LEN),
-        lr_actor=cfg.TMRL_CONFIG["LR_ACTOR"],
-        lr_critic=cfg.TMRL_CONFIG["LR_CRITIC"],
-        lr_entropy=cfg.TMRL_CONFIG["LR_ENTROPY"],
-        gamma=cfg.TMRL_CONFIG["GAMMA"],
-        polyak=cfg.TMRL_CONFIG["POLYAK"],
-        learn_entropy_coef=cfg.TMRL_CONFIG["LEARN_ENTROPY_COEF"],  # False for SAC v2 with no temperature autotuning
-        target_entropy=cfg.TMRL_CONFIG["TARGET_ENTROPY"],  # None for automatic
-        alpha=cfg.TMRL_CONFIG["ALPHA"])  # inverse of reward scale
+        lr_actor=ALG_CONFIG["LR_ACTOR"],
+        lr_critic=ALG_CONFIG["LR_CRITIC"],
+        lr_entropy=ALG_CONFIG["LR_ENTROPY"],
+        gamma=ALG_CONFIG["GAMMA"],
+        polyak=ALG_CONFIG["POLYAK"],
+        learn_entropy_coef=ALG_CONFIG["LEARN_ENTROPY_COEF"],  # False for SAC v2 with no temperature autotuning
+        target_entropy=ALG_CONFIG["TARGET_ENTROPY"],  # None for automatic
+        alpha=ALG_CONFIG["ALPHA"])  # inverse of reward scale
 
 # TRAINER: =====================================================