Skip to content

Commit

Permalink
added SAC hyperparams to config.json
Browse files Browse the repository at this point in the history
  • Loading branch information
yannbouteiller committed Dec 6, 2021
1 parent 2260537 commit c959672
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 23 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# TMRL

TrackMania through Reinforcement Learning (`tmrl`) consists of a Real-Time Gym environment and a distributed framework for training your AIs in real time.
It is demonstrated on the TrackMania 2020 and Trackmania Nations Forever video games.
TrackMania through Reinforcement Learning (`tmrl`) consists of a Gym environment and a distributed framework for training AIs in real time.
It is demonstrated on the TrackMania 2020 video game.

![example](readme/img/video_lidar.gif)

Expand Down
6 changes: 3 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
sys.exit('Sorry, Python < 3.7 is not supported. We use dataclasses that have been introduced in 3.7.')


RESOURCES_URL = "https://github.com/trackmania-rl/tmrl/releases/download/v0.0.1/resources.zip"
RESOURCES_URL = "https://github.com/trackmania-rl/tmrl/releases/download/v0.0.2/resources.zip"


def url_retrieve(url: str, outfile: Path, overwrite: bool = False):
Expand Down Expand Up @@ -111,13 +111,13 @@ def url_retrieve(url: str, outfile: Path, overwrite: bool = False):

setup(
name='tmrl',
version='0.0.1',
version='0.0.2',
description='Autonomous racing in Trackmania',
long_description=README,
long_description_content_type='text/markdown',
keywords='reinforcement learning, trackmania, self driving, roborace',
url='https://github.com/trackmania-rl/tmrl',
download_url='https://github.com/trackmania-rl/tmrl/archive/refs/tags/v0.0.1.tar.gz',
download_url='https://github.com/trackmania-rl/tmrl/archive/refs/tags/v0.0.2.tar.gz',
author='Yann Bouteiller, Edouard Geze',
author_email='[email protected], [email protected]',
license='MIT',
Expand Down
36 changes: 18 additions & 18 deletions tmrl/config/config_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,14 +102,14 @@
SAC_Agent,
device='cuda' if cfg.PRAGMA_CUDA_TRAINING else 'cpu',
Model=partial(TRAIN_MODEL, act_buf_len=cfg.ACT_BUF_LEN),
lr_actor=0.0003,
lr_critic=0.00005, # 0.0001 # default 0.0003
lr_entropy=0.0003,
gamma=0.995, # default and best tmnf so far: 0.99
polyak=0.995, # 0.999 # default 0.995
learn_entropy_coef=False, # False for SAC v2 with no temperature autotuning
target_entropy=-7.0, # None for automatic
alpha=1.0 / 2.7) # best: 1 / 2.5 # inverse of reward scale
lr_actor=cfg.TMRL_CONFIG["LR_ACTOR"],
lr_critic=cfg.TMRL_CONFIG["LR_CRITIC"],
lr_entropy=cfg.TMRL_CONFIG["LR_ENTROPY"],
gamma=cfg.TMRL_CONFIG["GAMMA"],
polyak=cfg.TMRL_CONFIG["POLYAK"],
learn_entropy_coef=cfg.TMRL_CONFIG["LEARN_ENTROPY_COEF"], # False for SAC v2 with no temperature autotuning
target_entropy=cfg.TMRL_CONFIG["TARGET_ENTROPY"], # None for automatic
alpha=cfg.TMRL_CONFIG["ALPHA"]) # inverse of reward scale

# TRAINER: =====================================================

Expand All @@ -127,20 +127,20 @@ def sac_v2_entropy_scheduler(agent, epoch):
TrainingOffline,
Env=partial(UntouchedGymEnv, id="rtgym:real-time-gym-v0", gym_kwargs={"config": CONFIG_DICT}),
Memory=MEMORY,
memory_size=1000000,
batchsize=256, # RTX3080: 256 up to 1024
epochs=10000, # 400
rounds=10, # 10
steps=1000, # 1000
update_model_interval=1000,
update_buffer_interval=1000,
max_training_steps_per_env_step=4.0, # 1.0
memory_size=cfg.TMRL_CONFIG["MEMORY_SIZE"],
batchsize=cfg.TMRL_CONFIG["BATCH_SIZE"], # RTX3080: 256 up to 1024
epochs=cfg.TMRL_CONFIG["MAX_EPOCHS"], # 400
rounds=cfg.TMRL_CONFIG["ROUNDS_PER_EPOCH"], # 10
steps=cfg.TMRL_CONFIG["TRAINING_STEPS_PER_ROUND"], # 1000
update_model_interval=cfg.TMRL_CONFIG["UPDATE_MODEL_INTERVAL"],
update_buffer_interval=cfg.TMRL_CONFIG["UPDATE_BUFFER_INTERVAL"],
max_training_steps_per_env_step=cfg.TMRL_CONFIG["MAX_TRAINING_STEPS_PER_ENVIRONMENT_STEP"], # 1.0
profiling=cfg.PROFILE_TRAINER,
Agent=AGENT,
agent_scheduler=None, # sac_v2_entropy_scheduler
start_training=1000) # set this > 0 to start from an existing policy (fills the buffer up to this number of samples before starting training)
start_training=cfg.TMRL_CONFIG["ENVIRONMENT_STEPS_BEFORE_TRAINING"]) # set this > 0 to start from an existing policy (fills the buffer up to this number of samples before starting training)
else: # images
TRAINER = partial(
TRAINER = partial0(
TrainingOffline,
Env=partial(UntouchedGymEnv, id="rtgym:real-time-gym-v0", gym_kwargs={"config": CONFIG_DICT}),
Memory=MEMORY,
Expand Down

0 comments on commit c959672

Please sign in to comment.