Farama-Foundation · Kallinteris-Andreas · May 29, 2024 · Feb 10, 2024 · Feb 10, 2024 · Feb 11, 2024
diff --git a/README.md b/README.md
@@ -54,7 +54,7 @@ goal, e.g. state derived from the simulation.
 ```python
 import gymnasium as gym
 
-env = gym.make("FetchReach-v2")
+env = gym.make("FetchReach-v3")
 env.reset()
 obs, reward, terminated, truncated, info = env.step(env.action_space.sample())
 

diff --git a/docs/content/multi-goal_api.md b/docs/content/multi-goal_api.md
@@ -25,7 +25,7 @@ import gymnasium_robotics
 
 gym.register_envs(gymnasium_robotics)
 
-env = gym.make("FetchReach-v2")
+env = gym.make("FetchReach-v3")
 env.reset()
 obs, reward, terminated, truncated, info = env.step(env.action_space.sample())
 

diff --git a/docs/envs/fetch/index.md b/docs/envs/fetch/index.md
@@ -7,10 +7,10 @@ lastpage:
 
 The Fetch environments are based on the 7-DoF [Fetch Mobile Manipulator](https://fetchrobotics.com/) arm, with a two-fingered parallel gripper attached to it. The main environment tasks are the following:
 
-* `FetchReach-v2`: Fetch has to move its end-effector to the desired goal position.
-* `FetchPush-v2`: Fetch has to move a box by pushing it until it reaches a desired goal position.
-* `FetchSlide-v2`: Fetch has to hit a puck across a long table such that it slides and comes to rest on the desired goal.
-* `FetchPickAndPlace-v2`: Fetch has to pick up a box from a table using its gripper and move it to a desired goal above the table.
+* `FetchReach-v3`: Fetch has to move its end-effector to the desired goal position.
+* `FetchPush-v3`: Fetch has to move a box by pushing it until it reaches a desired goal position.
+* `FetchSlide-v3`: Fetch has to hit a puck across a long table such that it slides and comes to rest on the desired goal.
+* `FetchPickAndPlace-v3`: Fetch has to pick up a box from a table using its gripper and move it to a desired goal above the table.
 
 ```{raw} html
     :file: list.html

diff --git a/docs/envs/shadow_dexterous_hand/index.md b/docs/envs/shadow_dexterous_hand/index.md
@@ -7,7 +7,7 @@ lastpage:
 
 These environments are based on the [Shadow Dexterous Hand](https://www.shadowrobot.com/), 5 which is an anthropomorphic robotic hand with 24 degrees of freedom. Of those 24 joints, 20 can be controlled independently whereas the remaining ones are coupled joints.
 
-* `HandReach-v1`: ShadowHand has to reach with its thumb and a selected finger until they meet at a desired goal position above the palm.
+* `HandReach-v2`: ShadowHand has to reach with its thumb and a selected finger until they meet at a desired goal position above the palm.
 * `HandManipulateBlock-v1`: ShadowHand has to manipulate a block until it achieves a desired goal position and rotation.
 * `HandManipulateEgg-v1`: ShadowHand has to manipulate an egg until it achieves a desired goal position and rotation.
 * `HandManipulatePen-v1`: ShadowHand has to manipulate a pen until it achieves a desired goal position and rotation.

diff --git a/docs/index.md b/docs/index.md
@@ -56,7 +56,7 @@ import gymnasium_robotics
 
 gym.register_envs(gymnasium_robotics)
 
-env = gym.make("FetchPickAndPlace-v2", render_mode="human")
+env = gym.make("FetchPickAndPlace-v3", render_mode="human")
 observation, info = env.reset(seed=42)
 for _ in range(1000):
    action = policy(observation)  # User-defined policy function

diff --git a/gymnasium_robotics/__init__.py b/gymnasium_robotics/__init__.py
@@ -30,7 +30,7 @@ def _merge(a, b):
         )
 
         register(
-            id=f"FetchSlide{suffix}-v2",
+            id=f"FetchSlide{suffix}-v3",
             entry_point="gymnasium_robotics.envs.fetch.slide:MujocoFetchSlideEnv",
             kwargs=kwargs,
             max_episode_steps=50,
@@ -44,7 +44,7 @@ def _merge(a, b):
         )
 
         register(
-            id=f"FetchPickAndPlace{suffix}-v2",
+            id=f"FetchPickAndPlace{suffix}-v3",
             entry_point="gymnasium_robotics.envs.fetch.pick_and_place:MujocoFetchPickAndPlaceEnv",
             kwargs=kwargs,
             max_episode_steps=50,
@@ -58,7 +58,7 @@ def _merge(a, b):
         )
 
         register(
-            id=f"FetchReach{suffix}-v2",
+            id=f"FetchReach{suffix}-v3",
             entry_point="gymnasium_robotics.envs.fetch.reach:MujocoFetchReachEnv",
             kwargs=kwargs,
             max_episode_steps=50,
@@ -72,7 +72,7 @@ def _merge(a, b):
         )
 
         register(
-            id=f"FetchPush{suffix}-v2",
+            id=f"FetchPush{suffix}-v3",
             entry_point="gymnasium_robotics.envs.fetch.push:MujocoFetchPushEnv",
             kwargs=kwargs,
             max_episode_steps=50,
@@ -87,7 +87,7 @@ def _merge(a, b):
         )
 
         register(
-            id=f"HandReach{suffix}-v1",
+            id=f"HandReach{suffix}-v2",
             entry_point="gymnasium_robotics.envs.shadow_dexterous_hand.reach:MujocoHandReachEnv",
             kwargs=kwargs,
             max_episode_steps=50,

diff --git a/gymnasium_robotics/envs/fetch/fetch_env.py b/gymnasium_robotics/envs/fetch/fetch_env.py
@@ -373,11 +373,8 @@ def _render_callback(self):
         self._mujoco.mj_forward(self.model, self.data)
 
     def _reset_sim(self):
-        self.data.time = self.initial_time
-        self.data.qpos[:] = np.copy(self.initial_qpos)
-        self.data.qvel[:] = np.copy(self.initial_qvel)
-        if self.model.na != 0:
-            self.data.act[:] = None
+        # Reset buffers for joint states, actuators, warm-start, control buffers etc.
+        self._mujoco.mj_resetData(self.model, self.data)
 
         # Randomize start position of object.
         if self.has_object:

diff --git a/gymnasium_robotics/envs/fetch/pick_and_place.py b/gymnasium_robotics/envs/fetch/pick_and_place.py
@@ -88,15 +88,15 @@ class MujocoFetchPickAndPlaceEnv(MujocoFetchEnv, EzPickle):
     - *sparse*: the returned reward can have two values: `-1` if the block hasn't reached its final target position, and `0` if the block is in the final target position (the block is considered to have reached the goal if the Euclidean distance between both is lower than 0.05 m).
     - *dense*: the returned reward is the negative Euclidean distance between the achieved goal position and the desired goal.
 
-    To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `FetchPickAndPlace-v2`. However, for `dense` reward the id must be modified to `FetchPickAndPlaceDense-v2` and initialized as follows:
+    To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `FetchPickAndPlace-3`. However, for `dense` reward the id must be modified to `FetchPickAndPlaceDense-v3` and initialized as follows:
 
     ```python
     import gymnasium as gym
     import gymnasium_robotics
 
     gym.register_envs(gymnasium_robotics)
 
-    env = gym.make('FetchPickAndPlaceDense-v2')
+    env = gym.make('FetchPickAndPlaceDense-v3')
     ```
 
     ## Starting State
@@ -125,11 +125,12 @@ class MujocoFetchPickAndPlaceEnv(MujocoFetchEnv, EzPickle):
 
     gym.register_envs(gymnasium_robotics)
 
-    env = gym.make('FetchPickAndPlace-v2', max_episode_steps=100)
+    env = gym.make('FetchPickAndPlace-v3', max_episode_steps=100)
     ```
 
     ## Version History
 
+    * v3: Fix slight differences between rollouts of the same environment when reset with the same seed.
     * v2: the environment depends on the newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind.
     * v1: the environment depends on `mujoco_py` which is no longer maintained.
     """

diff --git a/gymnasium_robotics/envs/fetch/push.py b/gymnasium_robotics/envs/fetch/push.py
@@ -116,15 +116,15 @@ class MujocoFetchPushEnv(MujocoFetchEnv, EzPickle):
     - *sparse*: the returned reward can have two values: `-1` if the block hasn't reached its final target position, and `0` if the block is in the final target position (the block is considered to have reached the goal if the Euclidean distance between both is lower than 0.05 m).
     - *dense*: the returned reward is the negative Euclidean distance between the achieved goal position and the desired goal.
 
-    To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `FetchPush-v2`. However, for `dense` reward the id must be modified to `FetchPush-v2` and initialized as follows:
+    To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `FetchPush-v3`. However, for `dense` reward the id must be modified to `FetchPushDense-v3` and initialized as follows:
 
     ```python
     import gymnasium as gym
     import gymnasium_robotics
 
     gym.register_envs(gymnasium_robotics)
 
-    env = gym.make('FetchPushDense-v2')
+    env = gym.make('FetchPushDense-v3')
     ```
 
     ## Starting State
@@ -153,11 +153,12 @@ class MujocoFetchPushEnv(MujocoFetchEnv, EzPickle):
 
     gym.register_envs(gymnasium_robotics)
 
-    env = gym.make('FetchPush-v2', max_episode_steps=100)
+    env = gym.make('FetchPush-v3', max_episode_steps=100)
     ```
 
     ## Version History
 
+    * v3: Fix slight differences between rollouts of the same environment when reset with the same seed.
     * v2: the environment depends on the newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind.
     * v1: the environment depends on `mujoco_py` which is no longer maintained.
     """

diff --git a/gymnasium_robotics/envs/fetch/reach.py b/gymnasium_robotics/envs/fetch/reach.py
@@ -77,16 +77,16 @@ class MujocoFetchReachEnv(MujocoFetchEnv, EzPickle):
     the end effector and the goal is lower than 0.05 m).
     - *dense*: the returned reward is the negative Euclidean distance between the achieved goal position and the desired goal.
 
-    To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `FetchReach-v2`. However, for `dense`
-    reward the id must be modified to `FetchReachDense-v2` and initialized as follows:
+    To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `FetchReach-v3`. However, for `dense`
+    reward the id must be modified to `FetchReachDense-v3` and initialized as follows:
 
     ```python
     import gymnasium as gym
     import gymnasium_robotics
 
     gym.register_envs(gymnasium_robotics)
 
-    env = gym.make('FetchReachDense-v2')
+    env = gym.make('FetchReachDense-v3')
     ```
 
     ## Starting State
@@ -111,11 +111,12 @@ class MujocoFetchReachEnv(MujocoFetchEnv, EzPickle):
 
     gym.register_envs(gymnasium_robotics)
 
-    env = gym.make('FetchReach-v2', max_episode_steps=100)
+    env = gym.make('FetchReach-v3', max_episode_steps=100)
     ```
 
     ## Version History
 
+    * v3: Fix slight differences between rollouts of the same environment when reset with the same seed.
     * v2: the environment depends on the newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind.
     * v1: the environment depends on `mujoco_py` which is no longer maintained.
     """

diff --git a/gymnasium_robotics/envs/fetch/slide.py b/gymnasium_robotics/envs/fetch/slide.py
@@ -116,15 +116,15 @@ class MujocoFetchSlideEnv(MujocoFetchEnv, EzPickle):
     - *sparse*: the returned reward can have two values: `-1` if the puck hasn't reached its final target position, and `0` if the puck is in the final target position (the puck is considered to have reached the goal if the Euclidean distance between both is lower than 0.05 m).
     - *dense*: the returned reward is the negative Euclidean distance between the achieved goal position and the desired goal.
 
-    To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `FetchSlide-v2`. However, for `dense` reward the id must be modified to `FetchSlideDense-v2` and initialized as follows:
+    To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `FetchSlide-v3`. However, for `dense` reward the id must be modified to `FetchSlideDense-v3` and initialized as follows:
 
     ```python
     import gymnasium as gym
     import gymnasium_robotics
 
     gym.register_envs(gymnasium_robotics)
 
-    env = gym.make('FetchSlideDense-v2')
+    env = gym.make('FetchSlideDense-v3')
     ```
 
     ## Starting State
@@ -152,11 +152,12 @@ class MujocoFetchSlideEnv(MujocoFetchEnv, EzPickle):
 
     gym.register_envs(gymnasium_robotics)
 
-    env = gym.make('FetchSlide-v2', max_episode_steps=100)
+    env = gym.make('FetchSlide-v3', max_episode_steps=100)
     ```
 
     ## Version History
 
+    * v3: Fix slight differences between rollouts of the same environment when reset with the same seed.
     * v2: the environment depends on the newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind.
     * v1: the environment depends on `mujoco_py` which is no longer maintained.
     """

diff --git a/gymnasium_robotics/envs/robot_env.py b/gymnasium_robotics/envs/robot_env.py
@@ -299,13 +299,8 @@ def _initialize_simulation(self):
         self.initial_qvel = np.copy(self.data.qvel)
 
     def _reset_sim(self):
-        self.data.time = self.initial_time
-        self.data.qpos[:] = np.copy(self.initial_qpos)
-        self.data.qvel[:] = np.copy(self.initial_qvel)
-        if self.model.na != 0:
-            self.data.act[:] = None
-
-        mujoco.mj_forward(self.model, self.data)
+        # Reset buffers for joint states, warm-start, control buffers etc.
+        mujoco.mj_resetData(self.model, self.data)
         return super()._reset_sim()
 
     def render(self):

diff --git a/gymnasium_robotics/envs/shadow_dexterous_hand/reach.py b/gymnasium_robotics/envs/shadow_dexterous_hand/reach.py
@@ -306,13 +306,13 @@ class MujocoHandReachEnv(get_base_hand_reanch_env(MujocoHandEnv)):
     the achieved goal vector and the desired goal vector is lower than 0.01).
     - *dense*: the returned reward is the negative 2-norm distance between the achieved goal vector and the desired goal vector.
 
-    To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `HandReach-v1`.
-    However, for `dense` reward the id must be modified to `HandReachDense-v1` and initialized as follows:
+    To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `HandReach-v2`.
+    However, for `dense` reward the id must be modified to `HandReachDense-v2` and initialized as follows:
 
     ```
     import gymnasium as gym
 
-    env = gym.make('HandReachDense-v1')
+    env = gym.make('HandReachDense-v2')
     ```
 
     ## Starting State
@@ -383,11 +383,12 @@ class MujocoHandReachEnv(get_base_hand_reanch_env(MujocoHandEnv)):
     ```
     import gymnasium as gym
 
-    env = gym.make('HandReach-v1', max_episode_steps=100)
+    env = gym.make('HandReach-v2', max_episode_steps=100)
     ```
 
     ## Version History
 
+    * v2: Fix slight differences between rollouts of the same environment when reset with the same seed.
     * v1: the environment depends on the newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind.
     * v0: the environment depends on `mujoco_py` which is no longer maintained.
 

diff --git a/tests/test_envs.py b/tests/test_envs.py
@@ -3,6 +3,7 @@
 
 import gymnasium as gym
 import pytest
+from gymnasium.envs.mujoco.utils import check_mujoco_reset_state
 from gymnasium.envs.registration import EnvSpec
 from gymnasium.error import Error
 from gymnasium.utils.env_checker import check_env, data_equivalence
@@ -106,6 +107,27 @@ def test_env_determinism_rollout(env_spec: EnvSpec):
     env_2.close()
 
 
+@pytest.mark.parametrize(
+    "env_spec", non_mujoco_py_env_specs, ids=[env.id for env in non_mujoco_py_env_specs]
+)
+def test_mujoco_reset_state_seeding(env_spec: EnvSpec):
+    """Check if the reset method of mujoco environments is deterministic for the same seed.
+
+    Note:
+        We exclude mujoco_py environments because they are deprecated and their implementation is
+        frozen at this point. They are affected by a subtle bug in their reset method producing
+        slightly different results for the same seed on subsequent resets of the same environment.
+        This will not be fixed and tests are expected to fail.
+    """
+    # Don't check rollout equality if it's a nondeterministic environment.
+    if env_spec.nondeterministic is True:
+        return
+
+    env = env_spec.make(disable_env_checker=True)
+
+    check_mujoco_reset_state(env)
+
+
 @pytest.mark.parametrize(
     "spec", non_mujoco_py_env_specs, ids=[spec.id for spec in non_mujoco_py_env_specs]
 )