Support for custom image dimensions

trackmania-rl · Jan 9, 2023 · 990cf31 · 990cf31
1 parent 3c5119b
commit 990cf31
Show file tree

Hide file tree

Showing 7 changed files with 73 additions and 26 deletions.
diff --git a/readme/competition.md b/readme/competition.md
@@ -36,7 +36,7 @@ The `tmrl` competition is an open research initiative, currently in its first it
 In this iteration, competitors race on the default `tmrl-test` track (plain road) by solving the `Full` version of the [TrackMania 2020 Gym environment](https://github.com/trackmania-rl/tmrl#gym-environment) (the `LIDAR` version is also accepted).
 
 - The `action space` is the default TrackMania 2020 continuous action space (3 floats between -1.0 and 1.0).
-- The `observation space` is a history of 4 raw snapshots along with the speed, gear, rpm and 2 previous actions. The choice of camera is up to you as long as you use one of the default. You are also allowed to use colors if you wish (set the `"IMG_GRAYSCALE"` entry to `false` in `config.json`).
+- The `observation space` is a history of 4 raw snapshots along with the speed, gear, rpm and 2 previous actions. The choice of camera is up to you as long as you use one of the default. You are allowed to use colors if you wish (set the `"IMG_GRAYSCALE"` entry to `false` in `config.json`). You may also customize the actual image dimensions (`"IMG_WIDTH"` and `"IMG_HEIGHT"`), and the game window dimensions (`"WINDOW_WIDTH"` and `"WINDOW_HEIGHT"`) if you need to. However, the window dimensions must remain between `(256, 128)` and `(958, 488)` (dimensions greater than `(958, 488)` are **not** allowed).
 - The `control frequency` is 20 Hz.
 
 An entry to the competition simply needs to be a working implementation of the [ActorModule](https://github.com/trackmania-rl/tmrl/blob/master/tmrl/actor.py) interface.

diff --git a/tmrl/config/config_constants.py b/tmrl/config/config_constants.py
@@ -51,6 +51,8 @@
 WINDOW_WIDTH = ENV_CONFIG["WINDOW_WIDTH"]
 WINDOW_HEIGHT = ENV_CONFIG["WINDOW_HEIGHT"]
 GRAYSCALE = ENV_CONFIG["IMG_GRAYSCALE"] if "IMG_GRAYSCALE" in ENV_CONFIG else False
+IMG_WIDTH = ENV_CONFIG["IMG_WIDTH"] if "IMG_WIDTH" in ENV_CONFIG else 64
+IMG_HEIGHT = ENV_CONFIG["IMG_HEIGHT"] if "IMG_HEIGHT" in ENV_CONFIG else 64
 
 # DEBUGGING AND BENCHMARKING: ===================================
 

diff --git a/tmrl/config/config_objects.py b/tmrl/config/config_objects.py
@@ -45,7 +45,11 @@
     else:
         INT = partial(TM2020InterfaceLidar, img_hist_len=cfg.IMG_HIST_LEN, gamepad=cfg.PRAGMA_GAMEPAD)
 else:
-    INT = partial(TM2020Interface, img_hist_len=cfg.IMG_HIST_LEN, gamepad=cfg.PRAGMA_GAMEPAD, grayscale=cfg.GRAYSCALE)
+    INT = partial(TM2020Interface,
+                  img_hist_len=cfg.IMG_HIST_LEN,
+                  gamepad=cfg.PRAGMA_GAMEPAD,
+                  grayscale=cfg.GRAYSCALE,
+                  resize_to=(cfg.IMG_WIDTH, cfg.IMG_HEIGHT))
 
 CONFIG_DICT = rtgym.DEFAULT_CONFIG_DICT.copy()
 CONFIG_DICT["interface"] = INT

diff --git a/tmrl/custom/custom_gym_interfaces.py b/tmrl/custom/custom_gym_interfaces.py
@@ -33,7 +33,7 @@
 
 class TM2020Interface(RealTimeGymInterface):
     """
-    This is the API needed for the algorithm to control Trackmania2020
+    This is the API needed for the algorithm to control TrackMania 2020
     """
     def __init__(self,
                  img_hist_len: int = 4,
@@ -45,7 +45,17 @@ def __init__(self,
                  finish_reward=cfg.REWARD_END_OF_TRACK,
                  constant_penalty=cfg.CONSTANT_PENALTY):
         """
+        Base rtgym interface for TrackMania 2020 (Full environment)
+
         Args:
+            img_hist_len: int: history of images that are part of observations
+            gamepad: bool: whether to use a virtual gamepad for control
+            min_nb_steps_before_failure: int: episode is done if not receiving reward during this nb of timesteps
+            save_replay: bool: whether to save TrackMania replays
+            grayscale: bool: whether to output grayscale images or color images
+            resize_to: Tuple[int, int]: resize output images to this (width, height)
+            finish_reward: float: reward when passing the finish line
+            constant_penalty: float: constant reward given at each time-step
         """
         self.last_time = None
         self.img_hist_len = img_hist_len
@@ -115,7 +125,7 @@ def send_control(self, control):
 
     def grab_data_and_img(self):
         img = self.window_interface.screenshot()[:, :, :3]  # BGR ordering
-        if self.resize_to is not None:
+        if self.resize_to is not None:  # cv2.resize takes dim as (width, height)
             img = cv2.resize(img, self.resize_to)
         if self.grayscale:
             img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
@@ -215,13 +225,13 @@ def get_observation_space(self):
         gear = spaces.Box(low=0.0, high=6, shape=(1, ))
         rpm = spaces.Box(low=0.0, high=np.inf, shape=(1, ))
         if self.resize_to is not None:
-            h, w = self.resize_to
+            w, h = self.resize_to
         else:
-            h, w = cfg.WINDOW_HEIGHT, cfg.WINDOW_WIDTH
+            w, h = cfg.WINDOW_HEIGHT, cfg.WINDOW_WIDTH
         if self.grayscale:
-            img = spaces.Box(low=0.0, high=255.0, shape=(self.img_hist_len, h, w))
+            img = spaces.Box(low=0.0, high=255.0, shape=(self.img_hist_len, h, w))  # cv2 grayscale images are (h, w)
         else:
-            img = spaces.Box(low=0.0, high=255.0, shape=(self.img_hist_len, h, w, 3))
+            img = spaces.Box(low=0.0, high=255.0, shape=(self.img_hist_len, h, w, 3))  # cv2 images are (h, w, c)
         return spaces.Tuple((speed, gear, rpm, img))
 
     def get_action_space(self):

diff --git a/tmrl/custom/custom_models.py b/tmrl/custom/custom_models.py
@@ -16,6 +16,7 @@
 # local imports
 from tmrl.util import prod
 from tmrl.actor import TorchActorModule
+import tmrl.config.config_constants as cfg
 
 
 # SUPPORTED ============================================================================================================
@@ -493,9 +494,10 @@ class VanillaCNN(Module):
     def __init__(self, q_net):
         super(VanillaCNN, self).__init__()
         self.q_net = q_net
-        self.h_out, self.w_out = 64, 64
+        self.h_out, self.w_out = cfg.IMG_HEIGHT, cfg.IMG_WIDTH
+        hist = cfg.IMG_HIST_LEN
 
-        self.conv1 = Conv2d(4, 64, 8, stride=2)
+        self.conv1 = Conv2d(hist, 64, 8, stride=2)
         self.h_out, self.w_out = conv2d_out_dims(self.conv1, self.h_out, self.w_out)
         self.conv2 = Conv2d(64, 64, 4, stride=2)
         self.h_out, self.w_out = conv2d_out_dims(self.conv2, self.h_out, self.w_out)

diff --git a/tmrl/tools/check_environment.py b/tmrl/tools/check_environment.py
@@ -7,6 +7,7 @@
 from tmrl.custom.custom_gym_interfaces import TM2020Interface, TM2020InterfaceLidar
 from tmrl.custom.utils.window import WindowInterface
 from tmrl.custom.utils.tools import Lidar
+import tmrl.config.config_constants as cfg
 import logging
 
 
@@ -35,13 +36,21 @@ def show_imgs(imgs):
         concat = imgs.reshape((nb*h, w))
         cv2.imshow("Environment", concat)
         cv2.waitKey(1)
+    elif len(imshape) == 4:  # color
+        nb, h, w, c = imshape
+        concat = imgs.reshape((nb*h, w, c))
+        cv2.imshow("Environment", concat)
+        cv2.waitKey(1)
 
 
 def check_env_tm20full():
     env_config = DEFAULT_CONFIG_DICT.copy()
     env_config["interface"] = TM2020Interface
     env_config["wait_on_done"] = True
-    env_config["interface_kwargs"] = {"gamepad": False, "min_nb_steps_before_failure": int(20 * 60)}
+    env_config["interface_kwargs"] = {"gamepad": False,
+                                      "min_nb_steps_before_failure": int(20 * 60),
+                                      "grayscale": cfg.GRAYSCALE,
+                                      "resize_to": (cfg.IMG_WIDTH, cfg.IMG_HEIGHT)}
     env = gym.make("real-time-gym-v0", config=env_config)
     o, i = env.reset()
     show_imgs(o[3])

diff --git a/tmrl/tuto/competition/custom_actor_module.py b/tmrl/tuto/competition/custom_actor_module.py
@@ -69,7 +69,7 @@
 # =====================================================================
 # USEFUL PARAMETERS
 # =====================================================================
-# You can change these parameters here directly,
+# You can change these parameters here directly (not recommended),
 # or you can change them in the TMRL config.json file (recommended).
 
 # Maximum number of training 'epochs':
@@ -130,12 +130,15 @@
 security = cfg.SECURITY  # when training over the Internet, it is safer to change this to "TLS"
 # (please read the security instructions on GitHub)
 
+
 # =====================================================================
 # ADVANCED PARAMETERS
 # =====================================================================
 # You may want to change the following in advanced applications;
 # however, most competitors will not need to change this.
 # If interested, read the full TMRL tutorial on GitHub.
+# These parameters are to change here directly (if you want).
+# (Note: The tutorial may stop working if you change these)
 
 # Base class of the replay memory used by the trainer:
 memory_base_cls = cfg_obj.MEM
@@ -158,25 +161,41 @@
 # COMPETITION FIXED PARAMETERS
 # =====================================================================
 # Competitors CANNOT change the following parameters.
-# (Note: For models such as RNNs, you don't need to use imgs_buf_len
-# and act_buf_len, but your ActorModule implementation needs to work
-# with the observations corresponding to their default values. The rule
-# about these history lengths is only here for simplicity. You are
-# allowed to hack this within your ActorModule implementation by, e.g.,
-# storing your own histories if you want.)
 
 # rtgym environment class (full TrackMania Gym environment):
 env_cls = cfg_obj.ENV_CLS
 
-# Number of consecutive screenshots (this is part of observations):
+# Device used for inference on workers (change if you like but keep in mind that the competition evaluation is on CPU)
+device_worker = 'cpu'
+
+
+# =====================================================================
+# ENVIRONMENT PARAMETERS
+# =====================================================================
+# You are allowed to customize these environment parameters.
+# Do not change these here though, customize them in config.json.
+# Your environment configuration must be part of your submission,
+# e.g., the "ENV" entry of your config.json file.
+
+# Dimensions of the TrackMania window:
+window_width = cfg.WINDOW_WIDTH  # must be between 256 and 958
+window_height = cfg.WINDOW_HEIGHT  # must be between 128 and 488
+
+# Dimensions of the actual images in observations:
+img_width = cfg.IMG_WIDTH
+img_height = cfg.IMG_HEIGHT
+
+# Whether you are using grayscale (default) or color images:
+# (Note: The tutorial will stop working if you use colors)
+img_grayscale = cfg.GRAYSCALE
+
+# Number of consecutive screenshots in each observation:
 imgs_buf_len = cfg.IMG_HIST_LEN
 
 # Number of actions in the action buffer (this is part of observations):
+# (Note: The tutorial will stop working if you change this)
 act_buf_len = cfg.ACT_BUF_LEN
 
-# Device used for inference on workers (change if you like but keep in mind that the competition evaluation is on CPU)
-device_worker = 'cpu'
-
 
 # =====================================================================
 # MEMORY CLASS
@@ -287,8 +306,9 @@ def __init__(self, q_net):
         self.q_net = q_net
 
         # Convolutional layers processing screenshots:
-        self.h_out, self.w_out = 64, 64  # We will feed grayscale images of 64 x 64 pixels to our model
-        self.conv1 = nn.Conv2d(4, 64, 8, stride=2)
+        # The default config.json gives 4 grayscale images of 64 x 64 pixels
+        self.h_out, self.w_out = img_height, img_width
+        self.conv1 = nn.Conv2d(imgs_buf_len, 64, 8, stride=2)
         self.h_out, self.w_out = conv2d_out_dims(self.conv1, self.h_out, self.w_out)
         self.conv2 = nn.Conv2d(64, 64, 4, stride=2)
         self.h_out, self.w_out = conv2d_out_dims(self.conv2, self.h_out, self.w_out)
@@ -335,8 +355,8 @@ def forward(self, x):
             speed, gear, rpm, images, act1, act2 = x
 
         # Forward pass of our images in the CNN:
-        # (note that the competition environment outputs histories of 4 images,
-        # plus, the default observation preprocessor transforms these into 64 x 64 greyscale,
+        # (note that the competition environment outputs histories of 4 images
+        # and the default config outputs these as 64 x 64 greyscale,
         # we will stack these greyscale images along the channel dimension of our input tensor)
         x = F.relu(self.conv1(images))
         x = F.relu(self.conv2(x))