From 11b6d29e6cb8074bbc8472ea57478f729e6510a5 Mon Sep 17 00:00:00 2001 From: Yann Bouteiller Date: Mon, 6 Dec 2021 18:26:40 -0500 Subject: [PATCH] More parameters moved to config.json --- README.md | 8 ++++---- readme/Install.md | 2 +- readme/get_started.md | 18 +++++++++++------- setup.py | 2 +- tmrl/config/config_constants.py | 17 ++++++++--------- tmrl/config/config_objects.py | 32 ++++++++++++++------------------ 6 files changed, 39 insertions(+), 40 deletions(-) diff --git a/README.md b/README.md index 6d2a6ca..1602f7b 100644 --- a/README.md +++ b/README.md @@ -49,7 +49,7 @@ _(note: you can only use the MLP so far)_. ### Developer features: * **Real-Time Gym environment:** -`tmrl` comes with real-time Gym environment based on [rtgym](https://pypi.org/project/rtgym/). Once `tmrl` is installed, it is possible to use this environment in your own training framework. +`tmrl` comes with a real-time Gym environment based on [rtgym](https://pypi.org/project/rtgym/). Once `tmrl` is installed, it is easy to use this environment in your own training framework. More information [here](#gym-environment). * **Distributed training:** Our training framework is based on a single-server / multiple-clients architecture. @@ -57,8 +57,8 @@ It enables collecting samples locally on one or several computers and training d Find out more [here](#distant-training-architecture). * **Real-time training:** -Our policies are trained in real-time, with no insider access to the game: we do not pause the simulation to collect samples nor in order to compute actions. -As such, our approach can easily be extended to other video games, and to real-world robotic applications. +Policies are trained in real-time, with no insider access to the game: we do not pause the simulation to collect samples nor in order to compute actions. +As such, the framework can easily be extended to other video games, and to real-world robotic applications. Find out more [here](#real-time-gym-framework). * **Flexible framework:** @@ -77,7 +77,7 @@ Detailed installation instructions are provided [here](readme/Install.md). ## Getting started -Full guidance toward testing pre-trained weights, as well as a tutorial to train, test, and fine-tune your own models, +Full guidance toward setting up the environment, testing pre-trained weights, as well as a tutorial to train, test, and fine-tune your own models, are provided at [this link](readme/get_started.md). ## Gym environment diff --git a/readme/Install.md b/readme/Install.md index 3d92542..43c1168 100644 --- a/readme/Install.md +++ b/readme/Install.md @@ -70,7 +70,7 @@ In particular, you may want to adapt the following entries: - `PORT_TRAINER` and `PORT_ROLLOUT` need to be forwarded on the `server` if not running on localhost - `WANDB_PROJECT`, `WANDB_ENTITY` and `WANDB_KEY` can be replaced by you own [wandb](https://wandb.ai/site) credentials for monitoring training -You can delete the content of all folders (but not the folders) whenever you like (except `config.json`, a default version is provided in `resources` if you delete this). +You can delete the content of all folders (but not the folders themselves) whenever you like (except `config.json`, a default version is provided in `resources` if you delete this). If at some point you want to do a clean install, delete the whole `TmrlData` folder and pip install `tmrl` again. _NB: when pip uninstalling `tmrl`, the `TmrlData` folder is not deleted._ diff --git a/readme/get_started.md b/readme/get_started.md index 4d04539..2a9ee6d 100644 --- a/readme/get_started.md +++ b/readme/get_started.md @@ -14,8 +14,8 @@ You can test our pre-trained AI diectly in TrackMania by following these steps ( - Launch TrackMania 2020 - In case the OpenPlanet menu is showing in the top part of the screen, hide it using the `f3` key - Launch the `tmrl-test` track. This can be done by selecting `create > map editor > edit a map > tmrl-test > select map` and hitting the green flag. -- Set the game in windowed mode. To do this, bring the cursor to the top of the screen and a drop-down menu will show. Hit the windowed icon. -- Bring the TrackMania window to the top-left corner of the screen. On Windows10, it should automatically fit to a quarter of the screen. +x- Set the game in windowed mode. To do this, bring the cursor to the top of the screen and a drop-down menu will show. Hit the windowed icon. +- Bring the TrackMania window to the top-left corner of the screen. On Windows10, it should automatically fit to a quarter of the screen _(NB: this requires `1920x1080` screen resolution, otherwise, see [troubleshooting](#troubleshooting))_. - Enter the cockpit view by hitting the `3` key (the car must be hidden, press several times if the cockpit is visible). - Hide the ghost by pressing the `g` key. @@ -37,9 +37,11 @@ You should now see the car drive autonomously. If you get an error saying that communication was refused, try reloading the `TMRL grab data` script in the OpenPlanet menu. If you see many warnings saying that time-steps time out, this means that your computer struggles at running the AI and trackmania in parallel. -Try reducing the trackmania graphics to the minimum (in particular, try setting the maximum fps to 30, but not much less than this, because screenshots are captured at 20 fps). +Try reducing the trackmania graphics to the minimum (in particular, try setting the maximum fps to 30, but not much less than this, because screenshots are captured at 20 fps) +_(NB: seeing these warnings once at each environment reset is normal, this is because we purposefully sleep when the car is waiting for green light)._ -In the `Graphics` tab of the settings, ensure that the resolution is 958 (width) * 488 (height) pixels. +In the `Graphics` tab of the TM20 settings, ensure that the resolution is 958 (width) * 488 (height) pixels. +If your screen resolution is not `1920x1080`, you will probably need to place the window manually. You may use the `--check-environment` option described later to precisely place the window. ## Train your own self-driving AI @@ -57,12 +59,12 @@ _(Instructions for TrackMania 2020 using LIDAR)_ ``` - Press `e` to start recording - Complete the track -- Check that your reward and environment works correctly: +- Check that your reward and environment work correctly: - Execute: ```shell python -m tmrl --check-environment ``` - - Control the car manually. You should see the LIDAR and rewards printed. + - Control the car manually. You should see the LIDAR and rewards. - Press `CTRL + C` to exit. - Open 3 terminals and put them where they do not overlap with the trackmania window. @@ -78,8 +80,10 @@ python -m tmrl --trainer python -m tmrl --worker ``` +_(Note: you may want to run these commands on separate computers instead, for instance if the trainer is located on a remote HPC computer. You can adapt `TmrlData\config\config.json` for this matter)_ + During training, make sure you don't see too many 'timestep timeouts' in the worker terminal. -If you do, this means that your GPU is not powerful enough, and you should use distant training instead of localhost training (see `TmrlData\config\config.json`). +If you do, this means that your GPU is not powerful enough, and you should use remote training instead of localhost training (see `TmrlData\config\config.json`). With an RTX3080 on a distant machine as trainer and one local machine as worker/server, it takes approximatively 5 hours for the car to understand how to take a turn correctly. diff --git a/setup.py b/setup.py index ed74aa2..7d28c6b 100644 --- a/setup.py +++ b/setup.py @@ -78,7 +78,7 @@ def url_retrieve(url: str, outfile: Path, overwrite: bool = False): 'imageio-ffmpeg', 'pandas', 'gym', - 'rtgym', + 'rtgym>=0.5', 'pyyaml', 'wandb', 'requests', diff --git a/tmrl/config/config_constants.py b/tmrl/config/config_constants.py index 9a518a9..4b7ce45 100644 --- a/tmrl/config/config_constants.py +++ b/tmrl/config/config_constants.py @@ -20,8 +20,6 @@ BUFFERS_MAXLEN = TMRL_CONFIG["BUFFERS_MAXLEN"] # Maximum length of the local buffers for RolloutWorkers, Server and TrainerInterface RW_MAX_SAMPLES_PER_EPISODE = TMRL_CONFIG["RW_MAX_SAMPLES_PER_EPISODE"] # If this number of timesteps is reached, the RolloutWorker will reset the episode -PRAGMA_TM2020_TMNF = TMRL_CONFIG["TM2020"] # True if TM2020, False if TMNF -PRAGMA_LIDAR = True # True if Lidar, False if images PRAGMA_RNN = False # True to use an RNN, False to use an MLP PRAGMA_CUDA_TRAINING = TMRL_CONFIG["CUDA_TRAINING"] # True if CUDA, False if CPU (trainer) @@ -40,29 +38,30 @@ # ENVIRONMENT: ======================================================= +ENV_CONFIG = TMRL_CONFIG["ENV"] +RTGYM_INTERFACE = ENV_CONFIG["RTGYM_INTERFACE"] +PRAGMA_TM2020_TMNF = RTGYM_INTERFACE.startswith("TM20") # True if TM2020, False if TMNF +PRAGMA_LIDAR = RTGYM_INTERFACE.endswith("LIDAR") # True if Lidar, False if images LIDAR_BLACK_THRESHOLD = [55, 55, 55] # [88, 88, 88] for tiny road, [55, 55, 55] FOR BASIC ROAD REWARD_END_OF_TRACK = 0 # bonus reward at the end of the track CONSTANT_PENALTY = 0 # should be <= 0 : added to the reward at each time step -SLEEP_TIME_AT_RESET = 1.5 # 1.5 to start in a Markov state with the lidar, 0.0 for saving replays -ACT_BUF_LEN = 2 -IMG_HIST_LEN = 4 # 4 without RNN, 1 with RNN +SLEEP_TIME_AT_RESET = ENV_CONFIG["SLEEP_TIME_AT_RESET"] # 1.5 to start in a Markov state with the lidar +IMG_HIST_LEN = ENV_CONFIG["IMG_HIST_LEN"] # 4 without RNN, 1 with RNN +ACT_BUF_LEN = ENV_CONFIG["RTGYM_CONFIG"]["act_buf_len"] # DEBUGGING AND BENCHMARKING: =================================== CRC_DEBUG = False # Only for checking the consistency of the custom networking methods, set it to False otherwise. Caution: difficult to handle if reset transitions are collected. CRC_DEBUG_SAMPLES = 100 # Number of samples collected in CRC_DEBUG mode PROFILE_TRAINER = False # Will profile each epoch in the Trainer when True -BENCHMARK = False # The environment will be benchmarked when this is True SYNCHRONIZE_CUDA = False # Set to True for profiling, False otherwise # FILE SYSTEM: ================================================= -# PATH_FILE = Path(__file__) # TODO: this won't work with PyPI or normal install -# logging.debug(f" PATH_FILE:{PATH_FILE}") PATH_DATA = TMRL_FOLDER logging.debug(f" PATH_DATA:{PATH_DATA}") -MODEL_HISTORY = 10 # 0 for not saving history, x for saving model history every x new model received by RolloutWorker +MODEL_HISTORY = TMRL_CONFIG["SAVE_MODEL_EVERY"] # 0 for not saving history, x for saving model history every x new model received by RolloutWorker MODEL_PATH_WORKER = str(WEIGHTS_FOLDER / (RUN_NAME + ".pth")) MODEL_PATH_SAVE_HISTORY = str(WEIGHTS_FOLDER / (RUN_NAME + "_")) diff --git a/tmrl/config/config_objects.py b/tmrl/config/config_objects.py index b1e5fe1..f6817a6 100644 --- a/tmrl/config/config_objects.py +++ b/tmrl/config/config_objects.py @@ -43,16 +43,9 @@ CONFIG_DICT = rtgym.DEFAULT_CONFIG_DICT CONFIG_DICT["interface"] = INT -CONFIG_DICT["time_step_duration"] = 0.05 -CONFIG_DICT["start_obs_capture"] = 0.04 # /!\ lidar capture takes 0.03s -CONFIG_DICT["time_step_timeout_factor"] = 1.0 -CONFIG_DICT["ep_max_length"] = np.inf -CONFIG_DICT["real_time"] = True -CONFIG_DICT["async_threading"] = True -CONFIG_DICT["act_in_obs"] = True # ACT_IN_OBS -CONFIG_DICT["act_buf_len"] = cfg.ACT_BUF_LEN -CONFIG_DICT["benchmark"] = cfg.BENCHMARK -CONFIG_DICT["wait_on_done"] = True +CONFIG_DICT_MODIFIERS = cfg.ENV_CONFIG["RTGYM_CONFIG"] +for k, v in CONFIG_DICT_MODIFIERS.items(): + CONFIG_DICT[k] = v # to compress a sample before sending it over the local network/Internet: SAMPLE_COMPRESSOR = get_local_buffer_sample if cfg.PRAGMA_LIDAR else get_local_buffer_sample_tm20_imgs @@ -84,6 +77,8 @@ # ALGORITHM: =================================================== +ALG_CONFIG = cfg.TMRL_CONFIG["ALG"] + if cfg.PRAGMA_DCAC: # DCAC AGENT = partial( DCAC_Agent, @@ -98,18 +93,19 @@ reward_scale=2.0, # 2.0, # default: 5.0, best tmnf so far: 0.1, best tm20 so far: 2.0 entropy_scale=1.0) # default: 1.0), # default: 1.0 else: # SAC + assert ALG_CONFIG["ALGORITHM"] == "SAC" AGENT = partial( SAC_Agent, device='cuda' if cfg.PRAGMA_CUDA_TRAINING else 'cpu', Model=partial(TRAIN_MODEL, act_buf_len=cfg.ACT_BUF_LEN), - lr_actor=cfg.TMRL_CONFIG["LR_ACTOR"], - lr_critic=cfg.TMRL_CONFIG["LR_CRITIC"], - lr_entropy=cfg.TMRL_CONFIG["LR_ENTROPY"], - gamma=cfg.TMRL_CONFIG["GAMMA"], - polyak=cfg.TMRL_CONFIG["POLYAK"], - learn_entropy_coef=cfg.TMRL_CONFIG["LEARN_ENTROPY_COEF"], # False for SAC v2 with no temperature autotuning - target_entropy=cfg.TMRL_CONFIG["TARGET_ENTROPY"], # None for automatic - alpha=cfg.TMRL_CONFIG["ALPHA"]) # inverse of reward scale + lr_actor=ALG_CONFIG["LR_ACTOR"], + lr_critic=ALG_CONFIG["LR_CRITIC"], + lr_entropy=ALG_CONFIG["LR_ENTROPY"], + gamma=ALG_CONFIG["GAMMA"], + polyak=ALG_CONFIG["POLYAK"], + learn_entropy_coef=ALG_CONFIG["LEARN_ENTROPY_COEF"], # False for SAC v2 with no temperature autotuning + target_entropy=ALG_CONFIG["TARGET_ENTROPY"], # None for automatic + alpha=ALG_CONFIG["ALPHA"]) # inverse of reward scale # TRAINER: =====================================================