Skip to content

Commit

Permalink
Release 0.3.1
Browse files Browse the repository at this point in the history
  • Loading branch information
yannbouteiller committed Sep 16, 2022
1 parent 64c9525 commit c61fc1e
Show file tree
Hide file tree
Showing 9 changed files with 13 additions and 231 deletions.
4 changes: 2 additions & 2 deletions readme/competition.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,11 @@ Competitors solve real-time Gym environments featuring snapshots from the real `
### Current iteration (Alpha)
The `tmrl` competition is an open-source research initiative, currently in its very first iteration :hatching_chick:

In this iteration, competitors race on the default `tmrl-test` track (plain road) by solving the default [TrackMania 2020 Gym environment](https://github.com/trackmania-rl/tmrl#gym-environment) (either with raw snapshots or with the readily available LIDAR reduction).
In this iteration, competitors race on the default `tmrl-test` track (plain road) by solving the `Full` version of the [TrackMania 2020 Gym environment](https://github.com/trackmania-rl/tmrl#gym-environment) (the `LIDAR` version is also accepted).

The setting is the following:
- The `action space` is the default TrackMania 2020 continuous action space (3 floats between -1.0 and 1.0).
- The `observation space` is a history of 4 raw snapshots along with the speed and 2 previous actions. Using the readily available default 19-beam LIDAR reduction instead of raw snapshots is allowed.
- The `observation space` is a history of 4 raw snapshots along with the speed, gear, rpm and 2 previous actions.
- The `control frequency` is 20 Hz.

The only thing that is required in your entry to the competition is an implementation of the [ActorModule](https://github.com/trackmania-rl/tmrl/blob/master/tmrl/actor.py) python interface.
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,13 +125,13 @@ def url_retrieve(url: str, outfile: Path, overwrite: bool = False):

setup(
name='tmrl',
version='0.3.0',
version='0.3.1',
description='Autonomous racing in Trackmania',
long_description=README,
long_description_content_type='text/markdown',
keywords='reinforcement learning, trackmania, self driving, roborace',
url='https://github.com/trackmania-rl/tmrl',
download_url='https://github.com/trackmania-rl/tmrl/archive/refs/tags/v0.3.0.tar.gz',
download_url='https://github.com/trackmania-rl/tmrl/archive/refs/tags/v0.3.1.tar.gz',
author='Yann Bouteiller, Edouard Geze',
author_email='[email protected], [email protected]',
license='MIT',
Expand Down
3 changes: 1 addition & 2 deletions tmrl/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,9 @@ def main(args):
else:
trainer.run()
elif args.record_reward:
assert cfg.PRAGMA_TM2020_TMNF, "Not supported for this environment."
record_reward_dist(path_reward=cfg.REWARD_PATH)
elif args.check_env:
assert cfg.PRAGMA_LIDAR and cfg.PRAGMA_TM2020_TMNF, "Not supported for this environment."
assert cfg.PRAGMA_LIDAR, "Not supported for this environment."
check_env_tm20lidar()
else:
raise ArgumentTypeError('Enter a valid argument')
Expand Down
1 change: 0 additions & 1 deletion tmrl/config/config_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@

ENV_CONFIG = TMRL_CONFIG["ENV"]
RTGYM_INTERFACE = ENV_CONFIG["RTGYM_INTERFACE"]
PRAGMA_TM2020_TMNF = RTGYM_INTERFACE.startswith("TM20") # True if TM2020, False if TMNF
PRAGMA_LIDAR = RTGYM_INTERFACE.endswith("LIDAR") # True if Lidar, False if images
PRAGMA_PROGRESS = RTGYM_INTERFACE.endswith("LIDARPROGRESS")
if PRAGMA_PROGRESS:
Expand Down
14 changes: 5 additions & 9 deletions tmrl/config/config_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
# local imports
import tmrl.config.config_constants as cfg
from tmrl.training_offline import TrainingOffline
from tmrl.custom.custom_gym_interfaces import TM2020Interface, TM2020InterfaceLidar, TM2020InterfaceLidarProgress, TMInterface, TMInterfaceLidar
from tmrl.custom.custom_gym_interfaces import TM2020Interface, TM2020InterfaceLidar, TM2020InterfaceLidarProgress
from tmrl.custom.custom_memories import MemoryTMFull, MemoryTMLidar, MemoryTMLidarProgress, get_local_buffer_sample_lidar, get_local_buffer_sample_lidar_progress, get_local_buffer_sample_tm20_imgs
from tmrl.custom.custom_preprocessors import obs_preprocessor_tm_act_in_obs, obs_preprocessor_tm_lidar_act_in_obs,obs_preprocessor_tm_lidar_progress_act_in_obs
from tmrl.envs import GenericGymEnv
Expand Down Expand Up @@ -40,15 +40,12 @@
POLICY = SquashedGaussianVanillaCNNActor

if cfg.PRAGMA_LIDAR:
if cfg.PRAGMA_TM2020_TMNF:
if cfg.PRAGMA_PROGRESS:
INT = partial(TM2020InterfaceLidarProgress, img_hist_len=cfg.IMG_HIST_LEN, gamepad=cfg.PRAGMA_GAMEPAD)
else:
INT = partial(TM2020InterfaceLidar, img_hist_len=cfg.IMG_HIST_LEN, gamepad=cfg.PRAGMA_GAMEPAD)
if cfg.PRAGMA_PROGRESS:
INT = partial(TM2020InterfaceLidarProgress, img_hist_len=cfg.IMG_HIST_LEN, gamepad=cfg.PRAGMA_GAMEPAD)
else:
INT = partial(TMInterfaceLidar, img_hist_len=cfg.IMG_HIST_LEN)
INT = partial(TM2020InterfaceLidar, img_hist_len=cfg.IMG_HIST_LEN, gamepad=cfg.PRAGMA_GAMEPAD)
else:
INT = partial(TM2020Interface, img_hist_len=cfg.IMG_HIST_LEN, gamepad=cfg.PRAGMA_GAMEPAD) if cfg.PRAGMA_TM2020_TMNF else partial(TMInterface, img_hist_len=cfg.IMG_HIST_LEN)
INT = partial(TM2020Interface, img_hist_len=cfg.IMG_HIST_LEN, gamepad=cfg.PRAGMA_GAMEPAD)

CONFIG_DICT = rtgym.DEFAULT_CONFIG_DICT.copy()
CONFIG_DICT["interface"] = INT
Expand Down Expand Up @@ -86,7 +83,6 @@
else:
MEM = MemoryTMLidar
else:
assert cfg.PRAGMA_TM2020_TMNF, "TMNF is not officially supported."
MEM = MemoryTMFull

MEMORY = partial(MEM,
Expand Down
162 changes: 1 addition & 161 deletions tmrl/custom/custom_gym_interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from tmrl.custom.utils.control_keyboard import apply_control, keyres
from tmrl.custom.utils.control_mouse import mouse_close_finish_pop_up_tm20, mouse_save_replay_tm20
from tmrl.custom.utils.window import WindowInterface
from tmrl.custom.utils.tools import Lidar, TM2020OpenPlanetClient, get_speed, load_digits
from tmrl.custom.utils.tools import Lidar, TM2020OpenPlanetClient

# Globals ==============================================================================================================

Expand All @@ -48,7 +48,6 @@ def __init__(self,
Args:
"""
self.last_time = None
self.digits = None
self.img_hist_len = img_hist_len
self.img_hist = None
self.img = None
Expand Down Expand Up @@ -76,7 +75,6 @@ def initialize_common(self):
self.window_interface = WindowInterface("Trackmania")
self.window_interface.move_and_resize()
self.last_time = time.time()
self.digits = load_digits()
self.img_hist = deque(maxlen=self.img_hist_len)
self.img = None
self.reward_function = RewardFunction(reward_data_path=cfg.REWARD_PATH,
Expand Down Expand Up @@ -451,163 +449,5 @@ def get_observation_space(self):
return spaces.Tuple((speed, progress, imgs))


# UNSUPPORTED: =========================================================================================================

# Interface for Trackmania Nations Forever: ============================================================================


class TMInterface(RealTimeGymInterface):
"""
This is the API needed for the algorithm to control Trackmania Nations Forever
"""
def __init__(self, img_hist_len=4):
"""
Args:
"""
self.window_interface = WindowInterface("Trackmania")
self.last_time = time.time()
self.digits = load_digits()
self.img_hist_len = img_hist_len
self.img_hist = deque(maxlen=self.img_hist_len)

def send_control(self, control):
"""
Non-blocking function
Applies the action given by the RL policy
If control is None, does nothing
Args:
control: np.array: [forward,backward,right,left]
"""
if control is not None:
actions = []
if control[0] > 0:
actions.append('f')
if control[1] > 0:
actions.append('b')
if control[2] > 0.5:
actions.append('r')
elif control[2] < -0.5:
actions.append('l')
apply_control(actions)

def grab_img_and_speed(self):
img = self.window_interface.screenshot()[:, :, :3]
speed = np.array([
get_speed(img, self.digits),
], dtype='float32')
img = img[100:-150, :]
img = cv2.resize(img, (190, 50))
# img = np.moveaxis(img, -1, 0)
return img, speed

def reset(self):
"""
obs must be a list of numpy arrays
"""
self.send_control(self.get_default_action())
keyres()
# time.sleep(0.05) # must be long enough for image to be refreshed
img, speed = self.grab_img_and_speed()
for _ in range(self.img_hist_len):
self.img_hist.append(img)
imgs = np.array(list(self.img_hist), dtype='float32')
obs = [speed, imgs]
return obs, {}

def wait(self):
"""
Non-blocking function
The agent stays 'paused', waiting in position
"""
self.send_control(self.get_default_action())

def get_obs_rew_terminated_info(self):
"""
returns the observation, the reward, and a terminated signal for end of episode
obs must be a list of numpy arrays
"""
img, speed = self.grab_img_and_speed()
rew = speed[0]
self.img_hist.append(img)
imgs = np.array(list(self.img_hist), dtype='float32')
obs = [speed, imgs]
terminated = False # TODO: True if race complete
# logging.debug(f" len(obs):{len(obs)}, obs[0]:{obs[0]}, obs[1].shape:{obs[1].shape}")
return obs, rew, terminated, {}

def get_observation_space(self):
"""
must be a Tuple
"""
speed = spaces.Box(low=0.0, high=1000.0, shape=(1, ))
imgs = spaces.Box(low=0.0, high=255.0, shape=(self.img_hist_len, 50, 190, 3))
return spaces.Tuple((speed, imgs))

def get_action_space(self):
"""
must be a Box
"""
return spaces.Box(low=-1.0, high=1.0, shape=(3, )) # 1=f; 1=b; -1=l,+1=r

def get_default_action(self):
"""
initial action at episode start
"""
return np.array([0.0, 0.0, 0.0], dtype='float32')


class TMInterfaceLidar(TMInterface):
def __init__(self, img_hist_len=4):
super().__init__(img_hist_len)
self.lidar = Lidar(self.window_interface.screenshot())

def grab_lidar_and_speed(self):
img = self.window_interface.screenshot()[:, :, :3]
speed = np.array([
get_speed(img, self.digits),
], dtype='float32')
lidar = self.lidar.lidar_20(img=img, show=False)
return lidar, speed

def reset(self):
"""
obs must be a list of numpy arrays
"""
self.send_control(self.get_default_action())
keyres()
# time.sleep(0.05) # must be long enough for image to be refreshed
img, speed = self.grab_lidar_and_speed()
for _ in range(self.img_hist_len):
self.img_hist.append(img)
imgs = np.array(list(self.img_hist), dtype='float32')
obs = [speed, imgs]
return obs, {}

def get_obs_rew_terminated_info(self):
"""
returns the observation, the reward, and a terminated signal for end of episode
obs must be a list of numpy arrays
"""
img, speed = self.grab_lidar_and_speed()
rew = speed[0]
self.img_hist.append(img)
imgs = np.array(list(self.img_hist), dtype='float32')
obs = [speed, imgs]
terminated = False # TODO: True if race complete
# logging.debug(f" len(obs):{len(obs)}, obs[0]:{obs[0]}, obs[1].shape:{obs[1].shape}")
return obs, rew, terminated, {}

def get_observation_space(self):
"""
must be a Tuple
"""
speed = spaces.Box(low=0.0, high=1000.0, shape=(1, ))
imgs = spaces.Box(low=0.0, high=np.inf, shape=(
self.img_hist_len,
19,
)) # lidars
return spaces.Tuple((speed, imgs))


if __name__ == "__main__":
pass
51 changes: 0 additions & 51 deletions tmrl/custom/utils/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,57 +67,6 @@ def retrieve_data(self, sleep_if_empty=0.1):
return data


def load_digits():
p = Path(os.path.dirname(os.path.realpath(__file__))) / 'digits'
zero = cv2.imread(str(p / '0.png'), 0)
One = cv2.imread(str(p / '1.png'), 0)
Two = cv2.imread(str(p / '2.png'), 0)
Three = cv2.imread(str(p / '3.png'), 0)
four = cv2.imread(str(p / '4.png'), 0)
five = cv2.imread(str(p / '5.png'), 0)
six = cv2.imread(str(p / '6.png'), 0)
seven = cv2.imread(str(p / '7.png'), 0)
eight = cv2.imread(str(p / '8.png'), 0)
nine = cv2.imread(str(p / '9.png'), 0)
digits = np.array([zero, One, Two, Three, four, five, six, seven, eight, nine])
return digits


def get_speed(img, digits):
img1 = np.array(img[464:, 887:908])
img2 = np.array(img[464:, 909:930])
img3 = np.array(img[464:, 930:951])

img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
img1[img1 > 250] = 255
img1[img1 <= 250] = 0
img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
img2[img2 > 250] = 255
img2[img2 <= 250] = 0
img3 = cv2.cvtColor(img3, cv2.COLOR_BGR2GRAY)
img3[img3 > 250] = 255
img3[img3 <= 250] = 0

best1, best2, best3 = 100000, 100000, 100000
for idx, num in enumerate(digits):
if np.sum(np.bitwise_xor(img1, num)) < best1:
best1 = np.sum(np.bitwise_xor(img1, num))
num1 = idx
if np.sum(np.bitwise_xor(img2, num)) < best2:
best2 = np.sum(np.bitwise_xor(img2, num))
num2 = idx
if np.sum(np.bitwise_xor(img3, num)) < best3:
best3 = np.sum(np.bitwise_xor(img3, num))
num3 = idx
if np.max(img1) == 0:
best1, num1 = 0, 0
if np.max(img2) == 0:
best2, num2 = 0, 0
if np.max(img3) == 0:
best3, num3 = 0, 0
return float(100 * num1 + 10 * num2 + num3)


def armin(tab):
nz = np.nonzero(tab)[0]
if len(nz) != 0:
Expand Down
3 changes: 1 addition & 2 deletions tmrl/tools/benchmark_environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@
from rtgym.envs.real_time_env import DEFAULT_CONFIG_DICT

# local imports
from tmrl.custom.custom_gym_interfaces import (TM2020Interface, TM2020InterfaceLidar,
TMInterface, TMInterfaceLidar)
from tmrl.custom.custom_gym_interfaces import (TM2020Interface, TM2020InterfaceLidar)
import logging

NB_STEPS = 1000
Expand Down
2 changes: 1 addition & 1 deletion tmrl/tools/check_environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from rtgym.envs.real_time_env import DEFAULT_CONFIG_DICT

# local imports
from tmrl.custom.custom_gym_interfaces import TM2020Interface, TM2020InterfaceLidar, TMInterface, TMInterfaceLidar
from tmrl.custom.custom_gym_interfaces import TM2020Interface, TM2020InterfaceLidar
from tmrl.custom.utils.window import WindowInterface
from tmrl.custom.utils.tools import Lidar
import logging
Expand Down

0 comments on commit c61fc1e

Please sign in to comment.