diff --git a/README.md b/README.md index a3a0fbb..4e53833 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # Real-Time Gym -Easily implement your custom [OpenAI Gym](https://github.com/openai/gym#openai-gym) environments for real-time applications. +Easily implement your custom [Gymnasium](https://gymnasium.farama.org) environments for real-time applications. Real-Time Gym (```rtgym```) is typically needed when trying to use Reinforcement Learning algorithms in robotics or real-time video games. Its purpose is to clock your Gym environments in a way that is transparent to the user. @@ -25,7 +25,7 @@ pip install rtgym ```` ## Real-time Gym framework -Real-Time Gym (```rtgym```) is a simple and efficient real-time threaded framework built on top of [OpenAI Gym](https://github.com/openai/gym#openai-gym). +Real-Time Gym (```rtgym```) is a simple and efficient real-time threaded framework built on top of [Gymnasium](https://gymnasium.farama.org). It is coded in python. ```rtgym``` enables real-time implementations of Delayed Markov Decision Processes in real-world applications. @@ -45,7 +45,7 @@ from rtgym.envs.real_time_env import DEFAULT_CONFIG_DICT my_config = DEFAULT_CONFIG_DICT my_config['interface'] = MyCustomInterface -env = gym.make("real-time-gym-v0", my_config, disable_env_checker=True) +env = gymnasium.make("real-time-gym-v1", my_config, disable_env_checker=True) obs, info = env.reset() while True: # when this loop is broken, the current time-step will timeout @@ -195,8 +195,8 @@ Note that this will be taken care of automatically, so you don't need to worry a Create a custom class that inherits the RealTimeGymInterface class: ```python from rtgym import RealTimeGymInterface, DummyRCDrone -import gym.spaces as spaces -import gym +import gymnasium.spaces as spaces +import gymnasium import numpy as np @@ -217,7 +217,7 @@ class MyRealTimeInterface(RealTimeGymInterface): def send_control(self, control): pass - def reset(self): + def reset(self, seed=None, options=None): pass def get_obs_rew_terminated_info(self): @@ -235,7 +235,7 @@ def __init__(self): ``` --- -The ```get_action_space``` method returns a ```gym.spaces.Box``` object. +The ```get_action_space``` method returns a ```gymnasium.spaces.Box``` object. This object defines the shape and bounds of the ```control``` argument that will be passed to the ```send_control``` method. In our case, we have two actions: ```vel_x``` and ```vel_y```. @@ -306,7 +306,7 @@ def wait(self): Ok, in this case this is actually equivalent, but you get the idea. You may want your drone to land when this function is called for example. --- -The ```get_observation_space``` method outputs a ```gym.spaces.Tuple``` object. +The ```get_observation_space``` method outputs a ```gymnasium.spaces.Tuple``` object. This object describes the structure of the observations returned from the ```reset``` and ```get_obs_rew_terminated_info``` methods of our interface. In our case, the observation will contain ```pos_x``` and ```pos_y```, which are both constrained between ```-1.0``` and ```1.0``` in our simple 2D world. @@ -366,10 +366,10 @@ def get_obs_rew_terminated_info(self): ``` We did not implement the 100 time-steps limit here because this will be done later in the configuration dictionary. -_Note: `obs` is a list although the observation space defined in `get_observation_space` must be a `gym.spaces.Tuple`. +_Note: `obs` is a list although the observation space defined in `get_observation_space` must be a `gymnasium.spaces.Tuple`. This is expected in `rtgym`. However, the inner components of this list must agree with the inner observation spaces of the tuple. -Thus, our inner components are numpy arrays here, because we have defined inner observation spaces as corresponding `gym.spaces.Box` in `get_observation_space`._ +Thus, our inner components are numpy arrays here, because we have defined inner observation spaces as corresponding `gymnasium.spaces.Box` in `get_observation_space`._ --- Finally, the last mandatory method that we need to implement is ```reset```, which will be called at the beginning of each new episode. @@ -389,7 +389,7 @@ def __init__(self): ``` And implement the ```reset``` method as follows: ```python -def reset(self): +def reset(self, seed=None, options=None): if not self.initialized: self.rc_drone = DummyRCDrone() self.initialized = True @@ -403,7 +403,7 @@ def reset(self): ``` We have now fully implemented our custom ```RealTimeGymInterface``` and can use it to instantiate a Gym environment for our real-time application. -To do this, we simply pass our custom interface as a parameter to ```gym.make``` in a configuration dictionary, as illustrated in the next section. +To do this, we simply pass our custom interface as a parameter to ```gymnasium.make``` in a configuration dictionary, as illustrated in the next section. --- #### Create a configuration dictionary @@ -464,7 +464,7 @@ We are all done! Instantiating our Gym environment is now as simple as: ```python -env = gym.make("real-time-gym-v0", config=my_config) +env = gymnasium.make("real-time-gym-v1", config=my_config) ``` We can use it as any usual Gym environment: diff --git a/rtgym/__init__.py b/rtgym/__init__.py index 4401cb2..1d4decc 100644 --- a/rtgym/__init__.py +++ b/rtgym/__init__.py @@ -1,11 +1,11 @@ from rtgym.envs.real_time_env import RealTimeGymInterface, DEFAULT_CONFIG_DICT from rtgym.tuto.dummy_drone import DummyRCDrone import logging -from gym.envs.registration import register +from gymnasium.envs.registration import register logger = logging.getLogger(__name__) register( - id='real-time-gym-v0', + id='real-time-gym-v1', entry_point='rtgym.envs:RealTimeEnv', ) diff --git a/rtgym/envs/real_time_env.py b/rtgym/envs/real_time_env.py index 04db267..c498d76 100644 --- a/rtgym/envs/real_time_env.py +++ b/rtgym/envs/real_time_env.py @@ -1,6 +1,6 @@ """Real-Time Gym environment core. -The final environment instantiated by gym.make is RealTimeEnv. +The final environment instantiated by gymnasium.make is RealTimeEnv. The developer defines a custom implementation of the RealTimeGymInterface abstract class. Then, they create a config dictionary by copying DEFAULT_CONFIG_DICT. In this config, they replace the 'interface' entry with their custom RealTimeGymInterface. @@ -10,8 +10,8 @@ """ -from gym import Env -import gym.spaces as spaces +from gymnasium import Env +import gymnasium.spaces as spaces import time from collections import deque from threading import Thread, Lock @@ -46,9 +46,13 @@ def send_control(self, control): raise NotImplementedError - def reset(self): + def reset(self, seed=None, options=None): """Resets the episode. + Args: + seed (optional): Gymnasium seed + options (optional): Gymnasium options + Returns: obs: must be a list (corresponding to the tuple from get_observation_space) info: must be a dictionary @@ -87,7 +91,7 @@ def get_observation_space(self): """Returns the observation space. Returns: - observation_space: gym.spaces.Tuple + observation_space: gymnasium.spaces.Tuple Note: Do NOT put the action buffer here (automated). """ @@ -99,7 +103,7 @@ def get_action_space(self): """Returns the action space. Returns: - action_space: gym.spaces.Box + action_space: gymnasium.spaces.Box """ # return spaces.Box(...) @@ -118,7 +122,7 @@ def get_default_action(self): def render(self): """Renders the environment (optional). - Implement this if you want to use the render() method of the gym environment. + Implement this if you want to use the render() method of the gymnasium environment. """ pass @@ -148,13 +152,13 @@ def render(self): } """Default configuration dictionary of Real-Time Gym. -Copy this dictionary and pass it as argument to gym.make. +Copy this dictionary and pass it as argument to gymnasium.make. Typical usage example: my_config = DEFAULT_CONFIG_DICT my_config["interface"] = MyRealTimeInterface - env = gym.make("real-time-gym-v0", config=my_config) + env = gymnasium.make("real-time-gym-v1", config=my_config) """ @@ -284,7 +288,7 @@ def end_send_control_time(self): class RealTimeEnv(Env): def __init__(self, config: dict=DEFAULT_CONFIG_DICT): - """Final class instantiated by gym.make. + """Final class instantiated by gymnasium.make. Args: config: a custom implementation of DEFAULT_CONFIG_DICT @@ -343,7 +347,7 @@ def __init__(self, config: dict=DEFAULT_CONFIG_DICT): self.default_action = self.interface.get_default_action() self.last_action = self.default_action - # gym variables: + # gymnasium variables: self.seed = None self.options = None @@ -475,8 +479,8 @@ def reset(self, seed=None, options=None): """Resets the environment. Args: - seed: placeholder - options: placeholder + seed (optional): seed passed to the reset() method of the rtgym interface + options (optional): seed passed to the reset() method of the rtgym interface Returns: obs diff --git a/rtgym/tuto/tuto.py b/rtgym/tuto/tuto.py index cbe152e..cb9c391 100644 --- a/rtgym/tuto/tuto.py +++ b/rtgym/tuto/tuto.py @@ -1,6 +1,6 @@ from rtgym import RealTimeGymInterface, DEFAULT_CONFIG_DICT, DummyRCDrone -import gym.spaces as spaces -import gym +import gymnasium.spaces as spaces +import gymnasium import numpy as np import cv2 @@ -48,7 +48,7 @@ def send_control(self, control): vel_y = control[1] self.rc_drone.send_control(vel_x, vel_y) - def reset(self): + def reset(self, seed=None, options=None): if not self.initialized: self.rc_drone = DummyRCDrone() self.initialized = True @@ -105,7 +105,7 @@ def render(self): my_config["benchmark"] = True my_config["benchmark_polyak"] = 0.2 -env = gym.make("real-time-gym-v0", config=my_config) +env = gymnasium.make("real-time-gym-v1", config=my_config) obs_space = env.observation_space act_space = env.action_space diff --git a/setup.py b/setup.py index 975fb2f..bc28af7 100644 --- a/setup.py +++ b/setup.py @@ -7,16 +7,16 @@ setup(name='rtgym', packages=[package for package in find_packages()], - version='0.7', + version='0.8', license='MIT', - description='Easily implement custom OpenAI Gym environments for real-time applications', + description='Easily implement custom Gymnasium environments for real-time applications', long_description=long_description, long_description_content_type="text/markdown", author='Yann Bouteiller', url='https://github.com/yannbouteiller/rtgym', - download_url='https://github.com/yannbouteiller/rtgym/archive/refs/tags/v0.7.tar.gz', - keywords=['gym', 'real', 'time', 'custom', 'environment', 'reinforcement', 'learning', 'random', 'delays'], - install_requires=['gym>=0.26', 'numpy'], + download_url='https://github.com/yannbouteiller/rtgym/archive/refs/tags/v0.8.tar.gz', + keywords=['gymnasium', 'real', 'time', 'custom', 'environment', 'reinforcement', 'learning', 'random', 'delays'], + install_requires=['gymnasium', 'numpy'], classifiers=[ 'Development Status :: 5 - Production/Stable', 'Intended Audience :: Developers', diff --git a/tests/test_all.py b/tests/test_all.py index b0ec5f6..a4e2a9e 100644 --- a/tests/test_all.py +++ b/tests/test_all.py @@ -1,36 +1,42 @@ import unittest from rtgym import RealTimeGymInterface, DEFAULT_CONFIG_DICT import time -import gym +import gymnasium import numpy as np class DummyInterface(RealTimeGymInterface): def __init__(self): - self.control_time = None - self.control = None + self.control_time = 0 + self.control = [0.0] def send_control(self, control): self.control_time = time.time() self.control = control - def reset(self): - return [time.time(), self.control, self.control_time], {} + def reset(self, seed=None, options=None): + obs = [np.array([time.time()], dtype=np.float64), + np.array(self.control, dtype=np.float64), + np.array([self.control_time], dtype=np.float64)] + return obs, {} def get_obs_rew_terminated_info(self): - return [time.time(), self.control, self.control_time], 0.0, False, {} + obs = [np.array([time.time()], dtype=np.float64), + np.array(self.control, dtype=np.float64), + np.array([self.control_time], dtype=np.float64)] + return obs, 0.0, False, {} def get_observation_space(self): - ob = gym.spaces.Box(low=np.array([0.0]), high=np.array([np.inf]), dtype=np.float32) - co = gym.spaces.Box(low=np.array([-1.0]), high=np.array([np.inf]), dtype=np.float32) - ct = gym.spaces.Box(low=np.array([0.0]), high=np.array([np.inf]), dtype=np.float32) - return gym.spaces.Tuple((ob, co, ct)) + ob = gymnasium.spaces.Box(low=-np.inf, high=np.inf, shape=(1, ), dtype=np.float64) + co = gymnasium.spaces.Box(low=-np.inf, high=np.inf, shape=(1, ), dtype=np.float64) + ct = gymnasium.spaces.Box(low=-np.inf, high=np.inf, shape=(1, ), dtype=np.float64) + return gymnasium.spaces.Tuple((ob, co, ct)) def get_action_space(self): - return gym.spaces.Box(low=np.array([-1.0]), high=np.array([np.inf]), dtype=np.float32) + return gymnasium.spaces.Box(low=np.array([-np.inf]), high=np.array([np.inf]), dtype=np.float64) def get_default_action(self): - return -1.0 + return np.array([-1.0], dtype=np.float64) config = DEFAULT_CONFIG_DICT @@ -43,15 +49,15 @@ def get_default_action(self): class TestEnv(unittest.TestCase): def test_timing(self): epsilon = 0.01 - env = gym.make("real-time-gym-v0", config=config) + env = gymnasium.make("real-time-gym-v1", config=config) obs1, info = env.reset() elapsed_since_obs1_capture = time.time() - obs1[0] self.assertGreater(epsilon, elapsed_since_obs1_capture) self.assertGreater(elapsed_since_obs1_capture, - epsilon) self.assertEqual(obs1[3], -1) - self.assertIs(obs1[1], None) - self.assertIs(obs1[2], None) - act = 0.0 + self.assertEqual(obs1[1], np.array([0.])) + self.assertEqual(obs1[2], np.array([0.])) + act = np.array([0.0], dtype=np.float64) obs2, _, _, _, _ = env.step(act) self.assertEqual(obs2[3], act) self.assertEqual(obs2[1], -1.0) @@ -61,7 +67,7 @@ def test_timing(self): self.assertGreater(0.08 + epsilon, obs2[0] - obs1[0]) for i in range(3): obs1 = obs2 - act = float(i + 1) + act = np.array([float(i + 1)]) obs2, _, _, _, _ = env.step(act) self.assertEqual(obs2[3], act) self.assertEqual(obs2[1], act - 1.0)