From 6a3808a3809364ba17203d9b99f3b71e906d71c8 Mon Sep 17 00:00:00 2001 From: Yann Bouteiller Date: Sun, 5 May 2024 15:12:02 -0400 Subject: [PATCH] Release 0.6.5 --- setup.py | 4 ++-- tmrl/__main__.py | 3 ++- tmrl/tools/record.py | 21 +++++++++++++++++---- 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/setup.py b/setup.py index 7badc75..692abd6 100644 --- a/setup.py +++ b/setup.py @@ -137,13 +137,13 @@ def url_retrieve(url: str, outfile: Path, overwrite: bool = False): setup( name='tmrl', - version='0.6.4', + version='0.6.5', description='Network-based framework for real-time robot learning', long_description=README, long_description_content_type='text/markdown', keywords='reinforcement learning, robot learning, trackmania, self driving, roborace', url='https://github.com/trackmania-rl/tmrl', - download_url='https://github.com/trackmania-rl/tmrl/archive/refs/tags/v0.6.4.tar.gz', + download_url='https://github.com/trackmania-rl/tmrl/archive/refs/tags/v0.6.5.tar.gz', author='Yann Bouteiller, Edouard Geze', author_email='yann.bouteiller@polymtl.ca, edouard.geze@hotmail.fr', license='MIT', diff --git a/tmrl/__main__.py b/tmrl/__main__.py index 953bfa6..cd0c310 100644 --- a/tmrl/__main__.py +++ b/tmrl/__main__.py @@ -57,7 +57,7 @@ def main(args): else: trainer.run() elif args.record_reward: - record_reward_dist(path_reward=cfg.REWARD_PATH) + record_reward_dist(path_reward=cfg.REWARD_PATH, use_keyboard=args.use_keyboard) elif args.check_env: if cfg.PRAGMA_LIDAR: check_env_tm20lidar() @@ -79,6 +79,7 @@ def main(args): parser.add_argument('--test', action='store_true', help='runs inference without training') parser.add_argument('--benchmark', action='store_true', help='runs a benchmark of the environment') parser.add_argument('--record-reward', dest='record_reward', action='store_true', help='utility to record a reward function in TM20') + parser.add_argument('--use-keyboard', dest='use_keyboard', action='store_true', help='modifier for --record-reward') parser.add_argument('--check-environment', dest='check_env', action='store_true', help='utility to check the environment') parser.add_argument('--wandb', dest='wandb', action='store_true', help='(use with --trainer) if you want to log results on Weights and Biases, use this option') parser.add_argument('-d', '--config', type=json.loads, default={}, help='dictionary containing configuration options (modifiers) for the rtgym environment') diff --git a/tmrl/tools/record.py b/tmrl/tools/record.py index f7615e9..ed93cf0 100644 --- a/tmrl/tools/record.py +++ b/tmrl/tools/record.py @@ -15,7 +15,10 @@ DATASET_PATH = cfg.DATASET_PATH -def record_reward_dist(path_reward=PATH_REWARD): +def record_reward_dist(path_reward=PATH_REWARD, use_keyboard=False): + if use_keyboard: + import keyboard + positions = [] client = TM2020OpenPlanetClient() path = path_reward @@ -23,13 +26,23 @@ def record_reward_dist(path_reward=PATH_REWARD): is_recording = False while True: if not is_recording: - logging.info(f"start recording") - is_recording = True + if not use_keyboard: + logging.info(f"start recording") + is_recording = True + else: + if keyboard.is_pressed('e'): + logging.info(f"start recording") + is_recording = True if is_recording: data = client.retrieve_data(sleep_if_empty=0.01) # we need many points to build a smooth curve terminated = bool(data[8]) - early_stop = False + + if not use_keyboard: + early_stop = False + else: + early_stop = keyboard.is_pressed('q') + if early_stop or terminated: logging.info(f"Computing reward function checkpoints from captured positions...") logging.info(f"Initial number of captured positions: {len(positions)}")