Release 0.3.1

trackmania-rl · Sep 16, 2022 · c61fc1e · c61fc1e
1 parent 64c9525
commit c61fc1e
Show file tree

Hide file tree

Showing 9 changed files with 13 additions and 231 deletions.
diff --git a/readme/competition.md b/readme/competition.md
@@ -32,11 +32,11 @@ Competitors solve real-time Gym environments featuring snapshots from the real `
 ### Current iteration (Alpha)
 The `tmrl` competition is an open-source research initiative, currently in its very first iteration :hatching_chick:
 
-In this iteration, competitors race on the default `tmrl-test` track (plain road) by solving the default [TrackMania 2020 Gym environment](https://github.com/trackmania-rl/tmrl#gym-environment) (either with raw snapshots or with the readily available LIDAR reduction).
+In this iteration, competitors race on the default `tmrl-test` track (plain road) by solving the `Full` version of the [TrackMania 2020 Gym environment](https://github.com/trackmania-rl/tmrl#gym-environment) (the `LIDAR` version is also accepted).
 
 The setting is the following:
 - The `action space` is the default TrackMania 2020 continuous action space (3 floats between -1.0 and 1.0).
-- The `observation space` is a history of 4 raw snapshots along with the speed and 2 previous actions. Using the readily available default 19-beam LIDAR reduction instead of raw snapshots is allowed.
+- The `observation space` is a history of 4 raw snapshots along with the speed, gear, rpm and 2 previous actions.
 - The `control frequency` is 20 Hz.
 
 The only thing that is required in your entry to the competition is an implementation of the [ActorModule](https://github.com/trackmania-rl/tmrl/blob/master/tmrl/actor.py) python interface.

diff --git a/setup.py b/setup.py
@@ -125,13 +125,13 @@ def url_retrieve(url: str, outfile: Path, overwrite: bool = False):
 
 setup(
     name='tmrl',
-    version='0.3.0',
+    version='0.3.1',
     description='Autonomous racing in Trackmania',
     long_description=README,
     long_description_content_type='text/markdown',
     keywords='reinforcement learning, trackmania, self driving, roborace',
     url='https://github.com/trackmania-rl/tmrl',
-    download_url='https://github.com/trackmania-rl/tmrl/archive/refs/tags/v0.3.0.tar.gz',
+    download_url='https://github.com/trackmania-rl/tmrl/archive/refs/tags/v0.3.1.tar.gz',
     author='Yann Bouteiller, Edouard Geze',
     author_email='[email protected], [email protected]',
     license='MIT',

diff --git a/tmrl/__main__.py b/tmrl/__main__.py
@@ -56,10 +56,9 @@ def main(args):
         else:
             trainer.run()
     elif args.record_reward:
-        assert cfg.PRAGMA_TM2020_TMNF, "Not supported for this environment."
         record_reward_dist(path_reward=cfg.REWARD_PATH)
     elif args.check_env:
-        assert cfg.PRAGMA_LIDAR and cfg.PRAGMA_TM2020_TMNF, "Not supported for this environment."
+        assert cfg.PRAGMA_LIDAR, "Not supported for this environment."
         check_env_tm20lidar()
     else:
         raise ArgumentTypeError('Enter a valid argument')

diff --git a/tmrl/config/config_constants.py b/tmrl/config/config_constants.py
@@ -38,7 +38,6 @@
 
 ENV_CONFIG = TMRL_CONFIG["ENV"]
 RTGYM_INTERFACE = ENV_CONFIG["RTGYM_INTERFACE"]
-PRAGMA_TM2020_TMNF = RTGYM_INTERFACE.startswith("TM20")  # True if TM2020, False if TMNF
 PRAGMA_LIDAR = RTGYM_INTERFACE.endswith("LIDAR")  # True if Lidar, False if images
 PRAGMA_PROGRESS = RTGYM_INTERFACE.endswith("LIDARPROGRESS")
 if PRAGMA_PROGRESS:

diff --git a/tmrl/config/config_objects.py b/tmrl/config/config_objects.py
@@ -7,7 +7,7 @@
 # local imports
 import tmrl.config.config_constants as cfg
 from tmrl.training_offline import TrainingOffline
-from tmrl.custom.custom_gym_interfaces import TM2020Interface, TM2020InterfaceLidar, TM2020InterfaceLidarProgress, TMInterface, TMInterfaceLidar
+from tmrl.custom.custom_gym_interfaces import TM2020Interface, TM2020InterfaceLidar, TM2020InterfaceLidarProgress
 from tmrl.custom.custom_memories import MemoryTMFull, MemoryTMLidar, MemoryTMLidarProgress, get_local_buffer_sample_lidar, get_local_buffer_sample_lidar_progress, get_local_buffer_sample_tm20_imgs
 from tmrl.custom.custom_preprocessors import obs_preprocessor_tm_act_in_obs, obs_preprocessor_tm_lidar_act_in_obs,obs_preprocessor_tm_lidar_progress_act_in_obs
 from tmrl.envs import GenericGymEnv
@@ -40,15 +40,12 @@
     POLICY = SquashedGaussianVanillaCNNActor
 
 if cfg.PRAGMA_LIDAR:
-    if cfg.PRAGMA_TM2020_TMNF:
-        if cfg.PRAGMA_PROGRESS:
-            INT = partial(TM2020InterfaceLidarProgress, img_hist_len=cfg.IMG_HIST_LEN, gamepad=cfg.PRAGMA_GAMEPAD)
-        else:
-            INT = partial(TM2020InterfaceLidar, img_hist_len=cfg.IMG_HIST_LEN, gamepad=cfg.PRAGMA_GAMEPAD)
+    if cfg.PRAGMA_PROGRESS:
+        INT = partial(TM2020InterfaceLidarProgress, img_hist_len=cfg.IMG_HIST_LEN, gamepad=cfg.PRAGMA_GAMEPAD)
     else:
-        INT = partial(TMInterfaceLidar, img_hist_len=cfg.IMG_HIST_LEN)
+        INT = partial(TM2020InterfaceLidar, img_hist_len=cfg.IMG_HIST_LEN, gamepad=cfg.PRAGMA_GAMEPAD)
 else:
-    INT = partial(TM2020Interface, img_hist_len=cfg.IMG_HIST_LEN, gamepad=cfg.PRAGMA_GAMEPAD) if cfg.PRAGMA_TM2020_TMNF else partial(TMInterface, img_hist_len=cfg.IMG_HIST_LEN)
+    INT = partial(TM2020Interface, img_hist_len=cfg.IMG_HIST_LEN, gamepad=cfg.PRAGMA_GAMEPAD)
 
 CONFIG_DICT = rtgym.DEFAULT_CONFIG_DICT.copy()
 CONFIG_DICT["interface"] = INT
@@ -86,7 +83,6 @@
         else:
             MEM = MemoryTMLidar
 else:
-    assert cfg.PRAGMA_TM2020_TMNF, "TMNF is not officially supported."
     MEM = MemoryTMFull
 
 MEMORY = partial(MEM,

diff --git a/tmrl/custom/custom_gym_interfaces.py b/tmrl/custom/custom_gym_interfaces.py
@@ -22,7 +22,7 @@
 from tmrl.custom.utils.control_keyboard import apply_control, keyres
 from tmrl.custom.utils.control_mouse import mouse_close_finish_pop_up_tm20, mouse_save_replay_tm20
 from tmrl.custom.utils.window import WindowInterface
-from tmrl.custom.utils.tools import Lidar, TM2020OpenPlanetClient, get_speed, load_digits
+from tmrl.custom.utils.tools import Lidar, TM2020OpenPlanetClient
 
 # Globals ==============================================================================================================
 
@@ -48,7 +48,6 @@ def __init__(self,
         Args:
         """
         self.last_time = None
-        self.digits = None
         self.img_hist_len = img_hist_len
         self.img_hist = None
         self.img = None
@@ -76,7 +75,6 @@ def initialize_common(self):
         self.window_interface = WindowInterface("Trackmania")
         self.window_interface.move_and_resize()
         self.last_time = time.time()
-        self.digits = load_digits()
         self.img_hist = deque(maxlen=self.img_hist_len)
         self.img = None
         self.reward_function = RewardFunction(reward_data_path=cfg.REWARD_PATH,
@@ -451,163 +449,5 @@ def get_observation_space(self):
         return spaces.Tuple((speed, progress, imgs))
 
 
-# UNSUPPORTED: =========================================================================================================
-
-# Interface for Trackmania Nations Forever: ============================================================================
-
-
-class TMInterface(RealTimeGymInterface):
-    """
-    This is the API needed for the algorithm to control Trackmania Nations Forever
-    """
-    def __init__(self, img_hist_len=4):
-        """
-        Args:
-        """
-        self.window_interface = WindowInterface("Trackmania")
-        self.last_time = time.time()
-        self.digits = load_digits()
-        self.img_hist_len = img_hist_len
-        self.img_hist = deque(maxlen=self.img_hist_len)
-
-    def send_control(self, control):
-        """
-        Non-blocking function
-        Applies the action given by the RL policy
-        If control is None, does nothing
-        Args:
-            control: np.array: [forward,backward,right,left]
-        """
-        if control is not None:
-            actions = []
-            if control[0] > 0:
-                actions.append('f')
-            if control[1] > 0:
-                actions.append('b')
-            if control[2] > 0.5:
-                actions.append('r')
-            elif control[2] < -0.5:
-                actions.append('l')
-            apply_control(actions)
-
-    def grab_img_and_speed(self):
-        img = self.window_interface.screenshot()[:, :, :3]
-        speed = np.array([
-            get_speed(img, self.digits),
-        ], dtype='float32')
-        img = img[100:-150, :]
-        img = cv2.resize(img, (190, 50))
-        # img = np.moveaxis(img, -1, 0)
-        return img, speed
-
-    def reset(self):
-        """
-        obs must be a list of numpy arrays
-        """
-        self.send_control(self.get_default_action())
-        keyres()
-        # time.sleep(0.05)  # must be long enough for image to be refreshed
-        img, speed = self.grab_img_and_speed()
-        for _ in range(self.img_hist_len):
-            self.img_hist.append(img)
-        imgs = np.array(list(self.img_hist), dtype='float32')
-        obs = [speed, imgs]
-        return obs, {}
-
-    def wait(self):
-        """
-        Non-blocking function
-        The agent stays 'paused', waiting in position
-        """
-        self.send_control(self.get_default_action())
-
-    def get_obs_rew_terminated_info(self):
-        """
-        returns the observation, the reward, and a terminated signal for end of episode
-        obs must be a list of numpy arrays
-        """
-        img, speed = self.grab_img_and_speed()
-        rew = speed[0]
-        self.img_hist.append(img)
-        imgs = np.array(list(self.img_hist), dtype='float32')
-        obs = [speed, imgs]
-        terminated = False  # TODO: True if race complete
-        # logging.debug(f" len(obs):{len(obs)}, obs[0]:{obs[0]}, obs[1].shape:{obs[1].shape}")
-        return obs, rew, terminated, {}
-
-    def get_observation_space(self):
-        """
-        must be a Tuple
-        """
-        speed = spaces.Box(low=0.0, high=1000.0, shape=(1, ))
-        imgs = spaces.Box(low=0.0, high=255.0, shape=(self.img_hist_len, 50, 190, 3))
-        return spaces.Tuple((speed, imgs))
-
-    def get_action_space(self):
-        """
-        must be a Box
-        """
-        return spaces.Box(low=-1.0, high=1.0, shape=(3, ))  # 1=f; 1=b; -1=l,+1=r
-
-    def get_default_action(self):
-        """
-        initial action at episode start
-        """
-        return np.array([0.0, 0.0, 0.0], dtype='float32')
-
-
-class TMInterfaceLidar(TMInterface):
-    def __init__(self, img_hist_len=4):
-        super().__init__(img_hist_len)
-        self.lidar = Lidar(self.window_interface.screenshot())
-
-    def grab_lidar_and_speed(self):
-        img = self.window_interface.screenshot()[:, :, :3]
-        speed = np.array([
-            get_speed(img, self.digits),
-        ], dtype='float32')
-        lidar = self.lidar.lidar_20(img=img, show=False)
-        return lidar, speed
-
-    def reset(self):
-        """
-        obs must be a list of numpy arrays
-        """
-        self.send_control(self.get_default_action())
-        keyres()
-        # time.sleep(0.05)  # must be long enough for image to be refreshed
-        img, speed = self.grab_lidar_and_speed()
-        for _ in range(self.img_hist_len):
-            self.img_hist.append(img)
-        imgs = np.array(list(self.img_hist), dtype='float32')
-        obs = [speed, imgs]
-        return obs, {}
-
-    def get_obs_rew_terminated_info(self):
-        """
-        returns the observation, the reward, and a terminated signal for end of episode
-        obs must be a list of numpy arrays
-        """
-        img, speed = self.grab_lidar_and_speed()
-        rew = speed[0]
-        self.img_hist.append(img)
-        imgs = np.array(list(self.img_hist), dtype='float32')
-        obs = [speed, imgs]
-        terminated = False  # TODO: True if race complete
-        # logging.debug(f" len(obs):{len(obs)}, obs[0]:{obs[0]}, obs[1].shape:{obs[1].shape}")
-        return obs, rew, terminated, {}
-
-    def get_observation_space(self):
-        """
-        must be a Tuple
-        """
-        speed = spaces.Box(low=0.0, high=1000.0, shape=(1, ))
-        imgs = spaces.Box(low=0.0, high=np.inf, shape=(
-            self.img_hist_len,
-            19,
-        ))  # lidars
-        return spaces.Tuple((speed, imgs))
-
-
 if __name__ == "__main__":
     pass
diff --git a/tmrl/custom/utils/tools.py b/tmrl/custom/utils/tools.py
@@ -67,57 +67,6 @@ def retrieve_data(self, sleep_if_empty=0.1):
         return data
 
 
-def load_digits():
-    p = Path(os.path.dirname(os.path.realpath(__file__))) / 'digits'
-    zero = cv2.imread(str(p / '0.png'), 0)
-    One = cv2.imread(str(p / '1.png'), 0)
-    Two = cv2.imread(str(p / '2.png'), 0)
-    Three = cv2.imread(str(p / '3.png'), 0)
-    four = cv2.imread(str(p / '4.png'), 0)
-    five = cv2.imread(str(p / '5.png'), 0)
-    six = cv2.imread(str(p / '6.png'), 0)
-    seven = cv2.imread(str(p / '7.png'), 0)
-    eight = cv2.imread(str(p / '8.png'), 0)
-    nine = cv2.imread(str(p / '9.png'), 0)
-    digits = np.array([zero, One, Two, Three, four, five, six, seven, eight, nine])
-    return digits
-
-
-def get_speed(img, digits):
-    img1 = np.array(img[464:, 887:908])
-    img2 = np.array(img[464:, 909:930])
-    img3 = np.array(img[464:, 930:951])
-
-    img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
-    img1[img1 > 250] = 255
-    img1[img1 <= 250] = 0
-    img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
-    img2[img2 > 250] = 255
-    img2[img2 <= 250] = 0
-    img3 = cv2.cvtColor(img3, cv2.COLOR_BGR2GRAY)
-    img3[img3 > 250] = 255
-    img3[img3 <= 250] = 0
-
-    best1, best2, best3 = 100000, 100000, 100000
-    for idx, num in enumerate(digits):
-        if np.sum(np.bitwise_xor(img1, num)) < best1:
-            best1 = np.sum(np.bitwise_xor(img1, num))
-            num1 = idx
-        if np.sum(np.bitwise_xor(img2, num)) < best2:
-            best2 = np.sum(np.bitwise_xor(img2, num))
-            num2 = idx
-        if np.sum(np.bitwise_xor(img3, num)) < best3:
-            best3 = np.sum(np.bitwise_xor(img3, num))
-            num3 = idx
-        if np.max(img1) == 0:
-            best1, num1 = 0, 0
-        if np.max(img2) == 0:
-            best2, num2 = 0, 0
-        if np.max(img3) == 0:
-            best3, num3 = 0, 0
-    return float(100 * num1 + 10 * num2 + num3)
-
-
 def armin(tab):
     nz = np.nonzero(tab)[0]
     if len(nz) != 0:

diff --git a/tmrl/tools/benchmark_environment.py b/tmrl/tools/benchmark_environment.py
@@ -8,8 +8,7 @@
 from rtgym.envs.real_time_env import DEFAULT_CONFIG_DICT
 
 # local imports
-from tmrl.custom.custom_gym_interfaces import (TM2020Interface, TM2020InterfaceLidar,
-                                               TMInterface, TMInterfaceLidar)
+from tmrl.custom.custom_gym_interfaces import (TM2020Interface, TM2020InterfaceLidar)
 import logging
 
 NB_STEPS = 1000

diff --git a/tmrl/tools/check_environment.py b/tmrl/tools/check_environment.py
@@ -3,7 +3,7 @@
 from rtgym.envs.real_time_env import DEFAULT_CONFIG_DICT
 
 # local imports
-from tmrl.custom.custom_gym_interfaces import TM2020Interface, TM2020InterfaceLidar, TMInterface, TMInterfaceLidar
+from tmrl.custom.custom_gym_interfaces import TM2020Interface, TM2020InterfaceLidar
 from tmrl.custom.utils.window import WindowInterface
 from tmrl.custom.utils.tools import Lidar
 import logging