Release 0.6.5

trackmania-rl · May 5, 2024 · 6a3808a · 6a3808a
1 parent 923091e
commit 6a3808a
Show file tree

Hide file tree

Showing 3 changed files with 21 additions and 7 deletions.
diff --git a/setup.py b/setup.py
@@ -137,13 +137,13 @@ def url_retrieve(url: str, outfile: Path, overwrite: bool = False):
 
 setup(
     name='tmrl',
-    version='0.6.4',
+    version='0.6.5',
     description='Network-based framework for real-time robot learning',
     long_description=README,
     long_description_content_type='text/markdown',
     keywords='reinforcement learning, robot learning, trackmania, self driving, roborace',
     url='https://github.com/trackmania-rl/tmrl',
-    download_url='https://github.com/trackmania-rl/tmrl/archive/refs/tags/v0.6.4.tar.gz',
+    download_url='https://github.com/trackmania-rl/tmrl/archive/refs/tags/v0.6.5.tar.gz',
     author='Yann Bouteiller, Edouard Geze',
     author_email='[email protected], [email protected]',
     license='MIT',

diff --git a/tmrl/__main__.py b/tmrl/__main__.py
@@ -57,7 +57,7 @@ def main(args):
         else:
             trainer.run()
     elif args.record_reward:
-        record_reward_dist(path_reward=cfg.REWARD_PATH)
+        record_reward_dist(path_reward=cfg.REWARD_PATH, use_keyboard=args.use_keyboard)
     elif args.check_env:
         if cfg.PRAGMA_LIDAR:
             check_env_tm20lidar()
@@ -79,6 +79,7 @@ def main(args):
     parser.add_argument('--test', action='store_true', help='runs inference without training')
     parser.add_argument('--benchmark', action='store_true', help='runs a benchmark of the environment')
     parser.add_argument('--record-reward', dest='record_reward', action='store_true', help='utility to record a reward function in TM20')
+    parser.add_argument('--use-keyboard', dest='use_keyboard', action='store_true', help='modifier for --record-reward')
     parser.add_argument('--check-environment', dest='check_env', action='store_true', help='utility to check the environment')
     parser.add_argument('--wandb', dest='wandb', action='store_true', help='(use with --trainer) if you want to log results on Weights and Biases, use this option')
     parser.add_argument('-d', '--config', type=json.loads, default={}, help='dictionary containing configuration options (modifiers) for the rtgym environment')

diff --git a/tmrl/tools/record.py b/tmrl/tools/record.py
@@ -15,21 +15,34 @@
 DATASET_PATH = cfg.DATASET_PATH
 
 
-def record_reward_dist(path_reward=PATH_REWARD):
+def record_reward_dist(path_reward=PATH_REWARD, use_keyboard=False):
+    if use_keyboard:
+        import keyboard
+
     positions = []
     client = TM2020OpenPlanetClient()
     path = path_reward
 
     is_recording = False
     while True:
         if not is_recording:
-            logging.info(f"start recording")
-            is_recording = True
+            if not use_keyboard:
+                logging.info(f"start recording")
+                is_recording = True
+            else:
+                if keyboard.is_pressed('e'):
+                    logging.info(f"start recording")
+                    is_recording = True
 
         if is_recording:
             data = client.retrieve_data(sleep_if_empty=0.01)  # we need many points to build a smooth curve
             terminated = bool(data[8])
-            early_stop = False
+
+            if not use_keyboard:
+                early_stop = False
+            else:
+                early_stop = keyboard.is_pressed('q')
+
             if early_stop or terminated:
                 logging.info(f"Computing reward function checkpoints from captured positions...")
                 logging.info(f"Initial number of captured positions: {len(positions)}")