From 54043fcd195ed4a7ddcaed33d637d5c3c2bb54b6 Mon Sep 17 00:00:00 2001
From: Martin Schuck <martin.schuck@tum.de>
Date: Sat, 10 Feb 2024 10:17:42 +0100
Subject: [PATCH 01/17] Fix reproducibility of FetchPickAndPlace-v2

---
 gymnasium_robotics/envs/fetch/fetch_env.py | 4 ++++
 gymnasium_robotics/envs/robot_env.py       | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/gymnasium_robotics/envs/fetch/fetch_env.py b/gymnasium_robotics/envs/fetch/fetch_env.py
index 78c4841f..add8f270 100644
--- a/gymnasium_robotics/envs/fetch/fetch_env.py
+++ b/gymnasium_robotics/envs/fetch/fetch_env.py
@@ -376,6 +376,10 @@ def _reset_sim(self):
         self.data.time = self.initial_time
         self.data.qpos[:] = np.copy(self.initial_qpos)
         self.data.qvel[:] = np.copy(self.initial_qvel)
+        self.data.qacc_warmstart[:] = np.copy(self.initial_qacc_warmstart)
+        self.data.ctrl[:] = np.copy(self.initial_ctrl)
+        self.data.mocap_pos[:] = np.copy(self.initial_mocap_pos)
+        self.data.mocap_quat[:] = np.copy(self.initial_mocap_quat)
         if self.model.na != 0:
             self.data.act[:] = None
 
diff --git a/gymnasium_robotics/envs/robot_env.py b/gymnasium_robotics/envs/robot_env.py
index 1a51a47d..09a10e50 100644
--- a/gymnasium_robotics/envs/robot_env.py
+++ b/gymnasium_robotics/envs/robot_env.py
@@ -297,6 +297,10 @@ def _initialize_simulation(self):
         self.initial_time = self.data.time
         self.initial_qpos = np.copy(self.data.qpos)
         self.initial_qvel = np.copy(self.data.qvel)
+        self.initial_ctrl = np.copy(self.data.ctrl)
+        self.initial_qacc_warmstart = np.copy(self.data.qacc_warmstart)
+        self.initial_mocap_pos = np.copy(self.data.mocap_pos)
+        self.initial_mocap_quat = np.copy(self.data.mocap_quat)
 
     def _reset_sim(self):
         self.data.time = self.initial_time

From e89b53ecbfec29ee2145f600e9ca06de4fcbb5b0 Mon Sep 17 00:00:00 2001
From: Martin Schuck <martin.schuck@tum.de>
Date: Sat, 10 Feb 2024 11:02:57 +0100
Subject: [PATCH 02/17] Add tests for same environment rollout determinism

---
 tests/test_envs.py | 93 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 93 insertions(+)

diff --git a/tests/test_envs.py b/tests/test_envs.py
index 6fc05928..9c006581 100644
--- a/tests/test_envs.py
+++ b/tests/test_envs.py
@@ -106,6 +106,99 @@ def test_env_determinism_rollout(env_spec: EnvSpec):
     env_2.close()
 
 
+@pytest.mark.parametrize(
+    "env_spec", all_testing_env_specs, ids=[env.id for env in all_testing_env_specs]
+)
+def test_same_env_determinism_rollout(env_spec: EnvSpec):
+    """Run two rollouts with a single environment and assert equality.
+
+    This test runs two rollouts of NUM_STEPS steps with one environment
+    reset with the same seed and asserts that:
+
+    - observations after the reset are the same
+    - same actions are sampled by the environment
+    - observations are contained in the observation space
+    - obs, rew, terminated, truncated and info are equals between the two rollouts
+    """
+    # Don't check rollout equality if it's a nondeterministic environment.
+    if env_spec.nondeterministic is True:
+        return
+
+    env = env_spec.make(disable_env_checker=True)
+
+    rollout_1 = {
+        "observations": [],
+        "actions": [],
+        "rewards": [],
+        "terminated": [],
+        "truncated": [],
+        "infos": [],
+    }
+    rollout_2 = {
+        "observations": [],
+        "actions": [],
+        "rewards": [],
+        "terminated": [],
+        "truncated": [],
+        "infos": [],
+    }
+
+    # Run two rollouts of the same environment instance
+    for rollout in [rollout_1, rollout_2]:
+        # Reset the environment with the same seed for both rollouts
+        obs, info = env.reset(seed=SEED)
+        env.action_space.seed(SEED)
+        rollout["observations"].append(obs)
+        rollout["infos"].append(info)
+
+        for time_step in range(NUM_STEPS):
+            action = env.action_space.sample()
+
+            obs, rew, terminated, truncated, info = env.step(action)
+            rollout["observations"].append(obs)
+            rollout["actions"].append(action)
+            rollout["rewards"].append(rew)
+            rollout["terminated"].append(terminated)
+            rollout["truncated"].append(truncated)
+            rollout["infos"].append(info)
+            if terminated or truncated:
+                env.reset(seed=SEED)
+
+    for time_step, (obs_1, obs_2) in enumerate(
+        zip(rollout_1["observations"], rollout_2["observations"])
+    ):
+        # -1 because of the initial observation stored on reset
+        time_step = "initial" if time_step == 0 else time_step - 1
+        assert_equals(obs_1, obs_2, f"[{time_step}] ")
+        assert env.observation_space.contains(
+            obs_1
+        )  # obs_2 verified by previous assertion
+    for time_step, (rew_1, rew_2) in enumerate(
+        zip(rollout_1["rewards"], rollout_2["rewards"])
+    ):
+        assert rew_1 == rew_2, f"[{time_step}] reward 1={rew_1}, reward 2={rew_2}"
+    for time_step, (terminated_1, terminated_2) in enumerate(
+        zip(rollout_1["terminated"], rollout_2["terminated"])
+    ):
+        assert (
+            terminated_1 == terminated_2
+        ), f"[{time_step}] terminated 1={terminated_1}, terminated 2={terminated_2}"
+    for time_step, (truncated_1, truncated_2) in enumerate(
+        zip(rollout_1["truncated"], rollout_2["truncated"])
+    ):
+        assert (
+            truncated_1 == truncated_2
+        ), f"[{time_step}] truncated 1={truncated_1}, truncated 2={truncated_2}"
+    for time_step, (info_1, info_2) in enumerate(
+        zip(rollout_1["infos"], rollout_2["infos"])
+    ):
+        # -1 because of the initial info stored on reset
+        time_step = "initial" if time_step == 0 else time_step - 1
+        assert_equals(info_1, info_2, f"[{time_step}] ")
+
+    env.close()
+
+
 @pytest.mark.parametrize(
     "spec", non_mujoco_py_env_specs, ids=[spec.id for spec in non_mujoco_py_env_specs]
 )

From 5e14ea14d1ac46c60aaed7a00edc4f997510e705 Mon Sep 17 00:00:00 2001
From: Martin Schuck <martin.schuck@tum.de>
Date: Sun, 11 Feb 2024 10:46:53 +0100
Subject: [PATCH 03/17] Fix reproducibility for all robotics environments - Fix
 remaining mujoco envs - Fix mujoco_py envs - Simplify reset

---
 gymnasium_robotics/envs/fetch/fetch_env.py | 7 +++----
 gymnasium_robotics/envs/robot_env.py       | 7 +++----
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/gymnasium_robotics/envs/fetch/fetch_env.py b/gymnasium_robotics/envs/fetch/fetch_env.py
index add8f270..53b307f2 100644
--- a/gymnasium_robotics/envs/fetch/fetch_env.py
+++ b/gymnasium_robotics/envs/fetch/fetch_env.py
@@ -249,6 +249,7 @@ def _viewer_setup(self):
                 setattr(self.viewer.cam, key, value)
 
     def _reset_sim(self):
+        self.sim.reset()  # Reset warm-start buffers, control buffers etc.
         self.sim.set_state(self.initial_state)
 
         # Randomize start position of object.
@@ -376,10 +377,8 @@ def _reset_sim(self):
         self.data.time = self.initial_time
         self.data.qpos[:] = np.copy(self.initial_qpos)
         self.data.qvel[:] = np.copy(self.initial_qvel)
-        self.data.qacc_warmstart[:] = np.copy(self.initial_qacc_warmstart)
-        self.data.ctrl[:] = np.copy(self.initial_ctrl)
-        self.data.mocap_pos[:] = np.copy(self.initial_mocap_pos)
-        self.data.mocap_quat[:] = np.copy(self.initial_mocap_quat)
+        # Reset buffers for warm-start, control buffers etc.
+        self._mujoco.mj_resetData(self.model, self.data)
         if self.model.na != 0:
             self.data.act[:] = None
 
diff --git a/gymnasium_robotics/envs/robot_env.py b/gymnasium_robotics/envs/robot_env.py
index 09a10e50..674eea03 100644
--- a/gymnasium_robotics/envs/robot_env.py
+++ b/gymnasium_robotics/envs/robot_env.py
@@ -297,12 +297,10 @@ def _initialize_simulation(self):
         self.initial_time = self.data.time
         self.initial_qpos = np.copy(self.data.qpos)
         self.initial_qvel = np.copy(self.data.qvel)
-        self.initial_ctrl = np.copy(self.data.ctrl)
-        self.initial_qacc_warmstart = np.copy(self.data.qacc_warmstart)
-        self.initial_mocap_pos = np.copy(self.data.mocap_pos)
-        self.initial_mocap_quat = np.copy(self.data.mocap_quat)
 
     def _reset_sim(self):
+        # Reset warm-start buffers, control buffers etc.
+        mujoco.mj_resetData(self.model, self.data)
         self.data.time = self.initial_time
         self.data.qpos[:] = np.copy(self.initial_qpos)
         self.data.qvel[:] = np.copy(self.initial_qvel)
@@ -381,6 +379,7 @@ def _initialize_simulation(self):
         self.initial_state = copy.deepcopy(self.sim.get_state())
 
     def _reset_sim(self):
+        self.sim.reset()  # Reset warm-start buffers, control buffers etc.
         self.sim.set_state(self.initial_state)
         self.sim.forward()
         return super()._reset_sim()

From 02ffe14ab321b9f4046dbfe9651b94026986efbc Mon Sep 17 00:00:00 2001
From: Martin Schuck <martin.schuck@tum.de>
Date: Mon, 12 Feb 2024 21:20:21 +0100
Subject: [PATCH 04/17] Revert changes to mujoco-py. Simplify _reset_sim.
 Remove mujoco-py envs from tests

---
 gymnasium_robotics/envs/fetch/fetch_env.py | 8 +-------
 gymnasium_robotics/envs/robot_env.py       | 9 +--------
 tests/test_envs.py                         | 8 +++++++-
 3 files changed, 9 insertions(+), 16 deletions(-)

diff --git a/gymnasium_robotics/envs/fetch/fetch_env.py b/gymnasium_robotics/envs/fetch/fetch_env.py
index 53b307f2..25b5b98b 100644
--- a/gymnasium_robotics/envs/fetch/fetch_env.py
+++ b/gymnasium_robotics/envs/fetch/fetch_env.py
@@ -249,7 +249,6 @@ def _viewer_setup(self):
                 setattr(self.viewer.cam, key, value)
 
     def _reset_sim(self):
-        self.sim.reset()  # Reset warm-start buffers, control buffers etc.
         self.sim.set_state(self.initial_state)
 
         # Randomize start position of object.
@@ -374,13 +373,8 @@ def _render_callback(self):
         self._mujoco.mj_forward(self.model, self.data)
 
     def _reset_sim(self):
-        self.data.time = self.initial_time
-        self.data.qpos[:] = np.copy(self.initial_qpos)
-        self.data.qvel[:] = np.copy(self.initial_qvel)
-        # Reset buffers for warm-start, control buffers etc.
+        # Reset buffers for joint states, actuators, warm-start, control buffers etc.
         self._mujoco.mj_resetData(self.model, self.data)
-        if self.model.na != 0:
-            self.data.act[:] = None
 
         # Randomize start position of object.
         if self.has_object:
diff --git a/gymnasium_robotics/envs/robot_env.py b/gymnasium_robotics/envs/robot_env.py
index 674eea03..e23dfc69 100644
--- a/gymnasium_robotics/envs/robot_env.py
+++ b/gymnasium_robotics/envs/robot_env.py
@@ -299,14 +299,8 @@ def _initialize_simulation(self):
         self.initial_qvel = np.copy(self.data.qvel)
 
     def _reset_sim(self):
-        # Reset warm-start buffers, control buffers etc.
+        # Reset buffers for joint states, warm-start, control buffers etc.
         mujoco.mj_resetData(self.model, self.data)
-        self.data.time = self.initial_time
-        self.data.qpos[:] = np.copy(self.initial_qpos)
-        self.data.qvel[:] = np.copy(self.initial_qvel)
-        if self.model.na != 0:
-            self.data.act[:] = None
-
         mujoco.mj_forward(self.model, self.data)
         return super()._reset_sim()
 
@@ -379,7 +373,6 @@ def _initialize_simulation(self):
         self.initial_state = copy.deepcopy(self.sim.get_state())
 
     def _reset_sim(self):
-        self.sim.reset()  # Reset warm-start buffers, control buffers etc.
         self.sim.set_state(self.initial_state)
         self.sim.forward()
         return super()._reset_sim()
diff --git a/tests/test_envs.py b/tests/test_envs.py
index 9c006581..d1418e24 100644
--- a/tests/test_envs.py
+++ b/tests/test_envs.py
@@ -107,7 +107,7 @@ def test_env_determinism_rollout(env_spec: EnvSpec):
 
 
 @pytest.mark.parametrize(
-    "env_spec", all_testing_env_specs, ids=[env.id for env in all_testing_env_specs]
+    "env_spec", non_mujoco_py_env_specs, ids=[env.id for env in non_mujoco_py_env_specs]
 )
 def test_same_env_determinism_rollout(env_spec: EnvSpec):
     """Run two rollouts with a single environment and assert equality.
@@ -119,6 +119,12 @@ def test_same_env_determinism_rollout(env_spec: EnvSpec):
     - same actions are sampled by the environment
     - observations are contained in the observation space
     - obs, rew, terminated, truncated and info are equals between the two rollouts
+
+    Note:
+        We exclude mujoco_py environments because they are deprecated and their implementation is
+        frozen at this point. They are affected by a subtle bug in their reset method producing
+        slightly different results for the same seed on subsequent resets of the same environment.
+        This will not be fixed and tests are expected to fail.
     """
     # Don't check rollout equality if it's a nondeterministic environment.
     if env_spec.nondeterministic is True:

From 11db01aa0c2032e81880fb9cdc658d087ee62a20 Mon Sep 17 00:00:00 2001
From: Martin Schuck <martin.schuck@tum.de>
Date: Thu, 15 Feb 2024 08:45:26 +0100
Subject: [PATCH 05/17] Remove unnecessary forward call

---
 gymnasium_robotics/envs/robot_env.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/gymnasium_robotics/envs/robot_env.py b/gymnasium_robotics/envs/robot_env.py
index e23dfc69..640e0f5f 100644
--- a/gymnasium_robotics/envs/robot_env.py
+++ b/gymnasium_robotics/envs/robot_env.py
@@ -301,7 +301,6 @@ def _initialize_simulation(self):
     def _reset_sim(self):
         # Reset buffers for joint states, warm-start, control buffers etc.
         mujoco.mj_resetData(self.model, self.data)
-        mujoco.mj_forward(self.model, self.data)
         return super()._reset_sim()
 
     def render(self):

From 9dd88f7078e3a40af3b4dba4902690c6636302f4 Mon Sep 17 00:00:00 2001
From: Martin Schuck <martin.schuck@tum.de>
Date: Thu, 29 Feb 2024 11:08:34 +0100
Subject: [PATCH 06/17] Bump versions of affected environments. Change
 documentation to reflect new version number. Simplify reset test.

---
 README.md                                     |  2 +-
 docs/content/multi-goal_api.md                |  2 +-
 docs/envs/fetch/index.md                      |  8 +-
 docs/envs/shadow_dexterous_hand/index.md      |  2 +-
 docs/index.md                                 |  2 +-
 gymnasium_robotics/__init__.py                | 10 +--
 .../envs/fetch/pick_and_place.py              |  7 +-
 gymnasium_robotics/envs/fetch/push.py         |  7 +-
 gymnasium_robotics/envs/fetch/reach.py        |  9 +-
 gymnasium_robotics/envs/fetch/slide.py        |  7 +-
 .../envs/shadow_dexterous_hand/reach.py       |  9 +-
 tests/test_envs.py                            | 85 +------------------
 12 files changed, 39 insertions(+), 111 deletions(-)

diff --git a/README.md b/README.md
index c8d0031d..60d9ec7b 100644
--- a/README.md
+++ b/README.md
@@ -54,7 +54,7 @@ goal, e.g. state derived from the simulation.
 ```python
 import gymnasium as gym
 
-env = gym.make("FetchReach-v2")
+env = gym.make("FetchReach-v3")
 env.reset()
 obs, reward, terminated, truncated, info = env.step(env.action_space.sample())
 
diff --git a/docs/content/multi-goal_api.md b/docs/content/multi-goal_api.md
index d4458dfa..c9b65ad2 100644
--- a/docs/content/multi-goal_api.md
+++ b/docs/content/multi-goal_api.md
@@ -25,7 +25,7 @@ import gymnasium_robotics
 
 gym.register_envs(gymnasium_robotics)
 
-env = gym.make("FetchReach-v2")
+env = gym.make("FetchReach-v3")
 env.reset()
 obs, reward, terminated, truncated, info = env.step(env.action_space.sample())
 
diff --git a/docs/envs/fetch/index.md b/docs/envs/fetch/index.md
index 380ad51d..f89d1c9e 100644
--- a/docs/envs/fetch/index.md
+++ b/docs/envs/fetch/index.md
@@ -7,10 +7,10 @@ lastpage:
 
 The Fetch environments are based on the 7-DoF [Fetch Mobile Manipulator](https://fetchrobotics.com/) arm, with a two-fingered parallel gripper attached to it. The main environment tasks are the following:
 
-* `FetchReach-v2`: Fetch has to move its end-effector to the desired goal position.
-* `FetchPush-v2`: Fetch has to move a box by pushing it until it reaches a desired goal position.
-* `FetchSlide-v2`: Fetch has to hit a puck across a long table such that it slides and comes to rest on the desired goal.
-* `FetchPickAndPlace-v2`: Fetch has to pick up a box from a table using its gripper and move it to a desired goal above the table.
+* `FetchReach-v3`: Fetch has to move its end-effector to the desired goal position.
+* `FetchPush-v3`: Fetch has to move a box by pushing it until it reaches a desired goal position.
+* `FetchSlide-v3`: Fetch has to hit a puck across a long table such that it slides and comes to rest on the desired goal.
+* `FetchPickAndPlace-v3`: Fetch has to pick up a box from a table using its gripper and move it to a desired goal above the table.
 
 ```{raw} html
     :file: list.html
diff --git a/docs/envs/shadow_dexterous_hand/index.md b/docs/envs/shadow_dexterous_hand/index.md
index ae93485e..aa3f296a 100644
--- a/docs/envs/shadow_dexterous_hand/index.md
+++ b/docs/envs/shadow_dexterous_hand/index.md
@@ -7,7 +7,7 @@ lastpage:
 
 These environments are based on the [Shadow Dexterous Hand](https://www.shadowrobot.com/), 5 which is an anthropomorphic robotic hand with 24 degrees of freedom. Of those 24 joints, 20 can be controlled independently whereas the remaining ones are coupled joints.
 
-* `HandReach-v1`: ShadowHand has to reach with its thumb and a selected finger until they meet at a desired goal position above the palm.
+* `HandReach-v2`: ShadowHand has to reach with its thumb and a selected finger until they meet at a desired goal position above the palm.
 * `HandManipulateBlock-v1`: ShadowHand has to manipulate a block until it achieves a desired goal position and rotation.
 * `HandManipulateEgg-v1`: ShadowHand has to manipulate an egg until it achieves a desired goal position and rotation.
 * `HandManipulatePen-v1`: ShadowHand has to manipulate a pen until it achieves a desired goal position and rotation.
diff --git a/docs/index.md b/docs/index.md
index f40b7a63..50c667b8 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -56,7 +56,7 @@ import gymnasium_robotics
 
 gym.register_envs(gymnasium_robotics)
 
-env = gym.make("FetchPickAndPlace-v2", render_mode="human")
+env = gym.make("FetchPickAndPlace-v3", render_mode="human")
 observation, info = env.reset(seed=42)
 for _ in range(1000):
    action = policy(observation)  # User-defined policy function
diff --git a/gymnasium_robotics/__init__.py b/gymnasium_robotics/__init__.py
index 49c86374..5bd5dca1 100644
--- a/gymnasium_robotics/__init__.py
+++ b/gymnasium_robotics/__init__.py
@@ -30,7 +30,7 @@ def _merge(a, b):
         )
 
         register(
-            id=f"FetchSlide{suffix}-v2",
+            id=f"FetchSlide{suffix}-v3",
             entry_point="gymnasium_robotics.envs.fetch.slide:MujocoFetchSlideEnv",
             kwargs=kwargs,
             max_episode_steps=50,
@@ -44,7 +44,7 @@ def _merge(a, b):
         )
 
         register(
-            id=f"FetchPickAndPlace{suffix}-v2",
+            id=f"FetchPickAndPlace{suffix}-v3",
             entry_point="gymnasium_robotics.envs.fetch.pick_and_place:MujocoFetchPickAndPlaceEnv",
             kwargs=kwargs,
             max_episode_steps=50,
@@ -58,7 +58,7 @@ def _merge(a, b):
         )
 
         register(
-            id=f"FetchReach{suffix}-v2",
+            id=f"FetchReach{suffix}-v3",
             entry_point="gymnasium_robotics.envs.fetch.reach:MujocoFetchReachEnv",
             kwargs=kwargs,
             max_episode_steps=50,
@@ -72,7 +72,7 @@ def _merge(a, b):
         )
 
         register(
-            id=f"FetchPush{suffix}-v2",
+            id=f"FetchPush{suffix}-v3",
             entry_point="gymnasium_robotics.envs.fetch.push:MujocoFetchPushEnv",
             kwargs=kwargs,
             max_episode_steps=50,
@@ -87,7 +87,7 @@ def _merge(a, b):
         )
 
         register(
-            id=f"HandReach{suffix}-v1",
+            id=f"HandReach{suffix}-v2",
             entry_point="gymnasium_robotics.envs.shadow_dexterous_hand.reach:MujocoHandReachEnv",
             kwargs=kwargs,
             max_episode_steps=50,
diff --git a/gymnasium_robotics/envs/fetch/pick_and_place.py b/gymnasium_robotics/envs/fetch/pick_and_place.py
index 74b5ed21..1ceff302 100644
--- a/gymnasium_robotics/envs/fetch/pick_and_place.py
+++ b/gymnasium_robotics/envs/fetch/pick_and_place.py
@@ -88,7 +88,7 @@ class MujocoFetchPickAndPlaceEnv(MujocoFetchEnv, EzPickle):
     - *sparse*: the returned reward can have two values: `-1` if the block hasn't reached its final target position, and `0` if the block is in the final target position (the block is considered to have reached the goal if the Euclidean distance between both is lower than 0.05 m).
     - *dense*: the returned reward is the negative Euclidean distance between the achieved goal position and the desired goal.
 
-    To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `FetchPickAndPlace-v2`. However, for `dense` reward the id must be modified to `FetchPickAndPlaceDense-v2` and initialized as follows:
+    To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `FetchPickAndPlace-3`. However, for `dense` reward the id must be modified to `FetchPickAndPlaceDense-v3` and initialized as follows:
 
     ```python
     import gymnasium as gym
@@ -96,7 +96,7 @@ class MujocoFetchPickAndPlaceEnv(MujocoFetchEnv, EzPickle):
 
     gym.register_envs(gymnasium_robotics)
 
-    env = gym.make('FetchPickAndPlaceDense-v2')
+    env = gym.make('FetchPickAndPlaceDense-v3')
     ```
 
     ## Starting State
@@ -125,11 +125,12 @@ class MujocoFetchPickAndPlaceEnv(MujocoFetchEnv, EzPickle):
 
     gym.register_envs(gymnasium_robotics)
 
-    env = gym.make('FetchPickAndPlace-v2', max_episode_steps=100)
+    env = gym.make('FetchPickAndPlace-v3', max_episode_steps=100)
     ```
 
     ## Version History
 
+    * v3: Fix slight differences between rollouts of the same environment when reset with the same seed.
     * v2: the environment depends on the newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind.
     * v1: the environment depends on `mujoco_py` which is no longer maintained.
     """
diff --git a/gymnasium_robotics/envs/fetch/push.py b/gymnasium_robotics/envs/fetch/push.py
index 10a87282..feb9ef52 100644
--- a/gymnasium_robotics/envs/fetch/push.py
+++ b/gymnasium_robotics/envs/fetch/push.py
@@ -116,7 +116,7 @@ class MujocoFetchPushEnv(MujocoFetchEnv, EzPickle):
     - *sparse*: the returned reward can have two values: `-1` if the block hasn't reached its final target position, and `0` if the block is in the final target position (the block is considered to have reached the goal if the Euclidean distance between both is lower than 0.05 m).
     - *dense*: the returned reward is the negative Euclidean distance between the achieved goal position and the desired goal.
 
-    To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `FetchPush-v2`. However, for `dense` reward the id must be modified to `FetchPush-v2` and initialized as follows:
+    To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `FetchPush-v3`. However, for `dense` reward the id must be modified to `FetchPushDense-v3` and initialized as follows:
 
     ```python
     import gymnasium as gym
@@ -124,7 +124,7 @@ class MujocoFetchPushEnv(MujocoFetchEnv, EzPickle):
 
     gym.register_envs(gymnasium_robotics)
 
-    env = gym.make('FetchPushDense-v2')
+    env = gym.make('FetchPushDense-v3')
     ```
 
     ## Starting State
@@ -153,11 +153,12 @@ class MujocoFetchPushEnv(MujocoFetchEnv, EzPickle):
 
     gym.register_envs(gymnasium_robotics)
 
-    env = gym.make('FetchPush-v2', max_episode_steps=100)
+    env = gym.make('FetchPush-v3', max_episode_steps=100)
     ```
 
     ## Version History
 
+    * v3: Fix slight differences between rollouts of the same environment when reset with the same seed.
     * v2: the environment depends on the newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind.
     * v1: the environment depends on `mujoco_py` which is no longer maintained.
     """
diff --git a/gymnasium_robotics/envs/fetch/reach.py b/gymnasium_robotics/envs/fetch/reach.py
index 962fd74d..bebe82dd 100644
--- a/gymnasium_robotics/envs/fetch/reach.py
+++ b/gymnasium_robotics/envs/fetch/reach.py
@@ -77,8 +77,8 @@ class MujocoFetchReachEnv(MujocoFetchEnv, EzPickle):
     the end effector and the goal is lower than 0.05 m).
     - *dense*: the returned reward is the negative Euclidean distance between the achieved goal position and the desired goal.
 
-    To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `FetchReach-v2`. However, for `dense`
-    reward the id must be modified to `FetchReachDense-v2` and initialized as follows:
+    To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `FetchReach-v3`. However, for `dense`
+    reward the id must be modified to `FetchReachDense-v3` and initialized as follows:
 
     ```python
     import gymnasium as gym
@@ -86,7 +86,7 @@ class MujocoFetchReachEnv(MujocoFetchEnv, EzPickle):
 
     gym.register_envs(gymnasium_robotics)
 
-    env = gym.make('FetchReachDense-v2')
+    env = gym.make('FetchReachDense-v3')
     ```
 
     ## Starting State
@@ -111,11 +111,12 @@ class MujocoFetchReachEnv(MujocoFetchEnv, EzPickle):
 
     gym.register_envs(gymnasium_robotics)
 
-    env = gym.make('FetchReach-v2', max_episode_steps=100)
+    env = gym.make('FetchReach-v3', max_episode_steps=100)
     ```
 
     ## Version History
 
+    * v3: Fix slight differences between rollouts of the same environment when reset with the same seed.
     * v2: the environment depends on the newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind.
     * v1: the environment depends on `mujoco_py` which is no longer maintained.
     """
diff --git a/gymnasium_robotics/envs/fetch/slide.py b/gymnasium_robotics/envs/fetch/slide.py
index 1381b4a5..47197823 100644
--- a/gymnasium_robotics/envs/fetch/slide.py
+++ b/gymnasium_robotics/envs/fetch/slide.py
@@ -116,7 +116,7 @@ class MujocoFetchSlideEnv(MujocoFetchEnv, EzPickle):
     - *sparse*: the returned reward can have two values: `-1` if the puck hasn't reached its final target position, and `0` if the puck is in the final target position (the puck is considered to have reached the goal if the Euclidean distance between both is lower than 0.05 m).
     - *dense*: the returned reward is the negative Euclidean distance between the achieved goal position and the desired goal.
 
-    To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `FetchSlide-v2`. However, for `dense` reward the id must be modified to `FetchSlideDense-v2` and initialized as follows:
+    To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `FetchSlide-v3`. However, for `dense` reward the id must be modified to `FetchSlideDense-v3` and initialized as follows:
 
     ```python
     import gymnasium as gym
@@ -124,7 +124,7 @@ class MujocoFetchSlideEnv(MujocoFetchEnv, EzPickle):
 
     gym.register_envs(gymnasium_robotics)
 
-    env = gym.make('FetchSlideDense-v2')
+    env = gym.make('FetchSlideDense-v3')
     ```
 
     ## Starting State
@@ -152,11 +152,12 @@ class MujocoFetchSlideEnv(MujocoFetchEnv, EzPickle):
 
     gym.register_envs(gymnasium_robotics)
 
-    env = gym.make('FetchSlide-v2', max_episode_steps=100)
+    env = gym.make('FetchSlide-v3', max_episode_steps=100)
     ```
 
     ## Version History
 
+    * v3: Fix slight differences between rollouts of the same environment when reset with the same seed.
     * v2: the environment depends on the newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind.
     * v1: the environment depends on `mujoco_py` which is no longer maintained.
     """
diff --git a/gymnasium_robotics/envs/shadow_dexterous_hand/reach.py b/gymnasium_robotics/envs/shadow_dexterous_hand/reach.py
index 984bd4d6..67a1682a 100644
--- a/gymnasium_robotics/envs/shadow_dexterous_hand/reach.py
+++ b/gymnasium_robotics/envs/shadow_dexterous_hand/reach.py
@@ -306,13 +306,13 @@ class MujocoHandReachEnv(get_base_hand_reanch_env(MujocoHandEnv)):
     the achieved goal vector and the desired goal vector is lower than 0.01).
     - *dense*: the returned reward is the negative 2-norm distance between the achieved goal vector and the desired goal vector.
 
-    To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `HandReach-v1`.
-    However, for `dense` reward the id must be modified to `HandReachDense-v1` and initialized as follows:
+    To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `HandReach-v2`.
+    However, for `dense` reward the id must be modified to `HandReachDense-v2` and initialized as follows:
 
     ```
     import gymnasium as gym
 
-    env = gym.make('HandReachDense-v1')
+    env = gym.make('HandReachDense-v2')
     ```
 
     ## Starting State
@@ -383,11 +383,12 @@ class MujocoHandReachEnv(get_base_hand_reanch_env(MujocoHandEnv)):
     ```
     import gymnasium as gym
 
-    env = gym.make('HandReach-v1', max_episode_steps=100)
+    env = gym.make('HandReach-v2', max_episode_steps=100)
     ```
 
     ## Version History
 
+    * v2: Fix slight differences between rollouts of the same environment when reset with the same seed.
     * v1: the environment depends on the newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind.
     * v0: the environment depends on `mujoco_py` which is no longer maintained.
 
diff --git a/tests/test_envs.py b/tests/test_envs.py
index 907a1a33..b018f896 100644
--- a/tests/test_envs.py
+++ b/tests/test_envs.py
@@ -3,6 +3,7 @@
 
 import gymnasium as gym
 import pytest
+from gymnasium.envs.mujoco.utils import check_mujoco_reset_state
 from gymnasium.envs.registration import EnvSpec
 from gymnasium.error import Error
 from gymnasium.utils.env_checker import check_env, data_equivalence
@@ -109,16 +110,8 @@ def test_env_determinism_rollout(env_spec: EnvSpec):
 @pytest.mark.parametrize(
     "env_spec", non_mujoco_py_env_specs, ids=[env.id for env in non_mujoco_py_env_specs]
 )
-def test_same_env_determinism_rollout(env_spec: EnvSpec):
-    """Run two rollouts with a single environment and assert equality.
-
-    This test runs two rollouts of NUM_STEPS steps with one environment
-    reset with the same seed and asserts that:
-
-    - observations after the reset are the same
-    - same actions are sampled by the environment
-    - observations are contained in the observation space
-    - obs, rew, terminated, truncated and info are equals between the two rollouts
+def test_mujoco_reset_state_seeding(env_spec: EnvSpec):
+    """Check if the reset method of mujoco environments is deterministic for the same seed.
 
     Note:
         We exclude mujoco_py environments because they are deprecated and their implementation is
@@ -132,77 +125,7 @@ def test_same_env_determinism_rollout(env_spec: EnvSpec):
 
     env = env_spec.make(disable_env_checker=True)
 
-    rollout_1 = {
-        "observations": [],
-        "actions": [],
-        "rewards": [],
-        "terminated": [],
-        "truncated": [],
-        "infos": [],
-    }
-    rollout_2 = {
-        "observations": [],
-        "actions": [],
-        "rewards": [],
-        "terminated": [],
-        "truncated": [],
-        "infos": [],
-    }
-
-    # Run two rollouts of the same environment instance
-    for rollout in [rollout_1, rollout_2]:
-        # Reset the environment with the same seed for both rollouts
-        obs, info = env.reset(seed=SEED)
-        env.action_space.seed(SEED)
-        rollout["observations"].append(obs)
-        rollout["infos"].append(info)
-
-        for time_step in range(NUM_STEPS):
-            action = env.action_space.sample()
-
-            obs, rew, terminated, truncated, info = env.step(action)
-            rollout["observations"].append(obs)
-            rollout["actions"].append(action)
-            rollout["rewards"].append(rew)
-            rollout["terminated"].append(terminated)
-            rollout["truncated"].append(truncated)
-            rollout["infos"].append(info)
-            if terminated or truncated:
-                env.reset(seed=SEED)
-
-    for time_step, (obs_1, obs_2) in enumerate(
-        zip(rollout_1["observations"], rollout_2["observations"])
-    ):
-        # -1 because of the initial observation stored on reset
-        time_step = "initial" if time_step == 0 else time_step - 1
-        assert_equals(obs_1, obs_2, f"[{time_step}] ")
-        assert env.observation_space.contains(
-            obs_1
-        )  # obs_2 verified by previous assertion
-    for time_step, (rew_1, rew_2) in enumerate(
-        zip(rollout_1["rewards"], rollout_2["rewards"])
-    ):
-        assert rew_1 == rew_2, f"[{time_step}] reward 1={rew_1}, reward 2={rew_2}"
-    for time_step, (terminated_1, terminated_2) in enumerate(
-        zip(rollout_1["terminated"], rollout_2["terminated"])
-    ):
-        assert (
-            terminated_1 == terminated_2
-        ), f"[{time_step}] terminated 1={terminated_1}, terminated 2={terminated_2}"
-    for time_step, (truncated_1, truncated_2) in enumerate(
-        zip(rollout_1["truncated"], rollout_2["truncated"])
-    ):
-        assert (
-            truncated_1 == truncated_2
-        ), f"[{time_step}] truncated 1={truncated_1}, truncated 2={truncated_2}"
-    for time_step, (info_1, info_2) in enumerate(
-        zip(rollout_1["infos"], rollout_2["infos"])
-    ):
-        # -1 because of the initial info stored on reset
-        time_step = "initial" if time_step == 0 else time_step - 1
-        assert_equals(info_1, info_2, f"[{time_step}] ")
-
-    env.close()
+    check_mujoco_reset_state(env)
 
 
 @pytest.mark.parametrize(

From c6b54d6905f9c44086601c1c4f8be0c84b57541d Mon Sep 17 00:00:00 2001
From: Martin Schuck <martin.schuck@tum.de>
Date: Fri, 1 Mar 2024 10:16:02 +0100
Subject: [PATCH 07/17] Fix typo, clarify version change with issue

---
 gymnasium_robotics/envs/fetch/pick_and_place.py        | 4 ++--
 gymnasium_robotics/envs/fetch/push.py                  | 2 +-
 gymnasium_robotics/envs/fetch/reach.py                 | 2 +-
 gymnasium_robotics/envs/fetch/slide.py                 | 2 +-
 gymnasium_robotics/envs/shadow_dexterous_hand/reach.py | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/gymnasium_robotics/envs/fetch/pick_and_place.py b/gymnasium_robotics/envs/fetch/pick_and_place.py
index 1ceff302..17369d90 100644
--- a/gymnasium_robotics/envs/fetch/pick_and_place.py
+++ b/gymnasium_robotics/envs/fetch/pick_and_place.py
@@ -88,7 +88,7 @@ class MujocoFetchPickAndPlaceEnv(MujocoFetchEnv, EzPickle):
     - *sparse*: the returned reward can have two values: `-1` if the block hasn't reached its final target position, and `0` if the block is in the final target position (the block is considered to have reached the goal if the Euclidean distance between both is lower than 0.05 m).
     - *dense*: the returned reward is the negative Euclidean distance between the achieved goal position and the desired goal.
 
-    To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `FetchPickAndPlace-3`. However, for `dense` reward the id must be modified to `FetchPickAndPlaceDense-v3` and initialized as follows:
+    To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `FetchPickAndPlace-v3`. However, for `dense` reward the id must be modified to `FetchPickAndPlaceDense-v3` and initialized as follows:
 
     ```python
     import gymnasium as gym
@@ -130,7 +130,7 @@ class MujocoFetchPickAndPlaceEnv(MujocoFetchEnv, EzPickle):
 
     ## Version History
 
-    * v3: Fix slight differences between rollouts of the same environment when reset with the same seed.
+    * v3: Fixed bug: Environments did not yield reproducible results if the same instance was reset with seeding after several steps. MuJoCo environments now properly reset their state (related [GitHub issue](https://github.com/Farama-Foundation/Gymnasium-Robotics/issues/207)).
     * v2: the environment depends on the newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind.
     * v1: the environment depends on `mujoco_py` which is no longer maintained.
     """
diff --git a/gymnasium_robotics/envs/fetch/push.py b/gymnasium_robotics/envs/fetch/push.py
index feb9ef52..e9698f54 100644
--- a/gymnasium_robotics/envs/fetch/push.py
+++ b/gymnasium_robotics/envs/fetch/push.py
@@ -158,7 +158,7 @@ class MujocoFetchPushEnv(MujocoFetchEnv, EzPickle):
 
     ## Version History
 
-    * v3: Fix slight differences between rollouts of the same environment when reset with the same seed.
+    * v3: Fixed bug: Environments did not yield reproducible results if the same instance was reset with seeding after several steps. MuJoCo environments now properly reset their state (related [GitHub issue](https://github.com/Farama-Foundation/Gymnasium-Robotics/issues/207)).
     * v2: the environment depends on the newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind.
     * v1: the environment depends on `mujoco_py` which is no longer maintained.
     """
diff --git a/gymnasium_robotics/envs/fetch/reach.py b/gymnasium_robotics/envs/fetch/reach.py
index bebe82dd..68eb74ce 100644
--- a/gymnasium_robotics/envs/fetch/reach.py
+++ b/gymnasium_robotics/envs/fetch/reach.py
@@ -116,7 +116,7 @@ class MujocoFetchReachEnv(MujocoFetchEnv, EzPickle):
 
     ## Version History
 
-    * v3: Fix slight differences between rollouts of the same environment when reset with the same seed.
+    * v3: Fixed bug: Environments did not yield reproducible results if the same instance was reset with seeding after several steps. MuJoCo environments now properly reset their state (related [GitHub issue](https://github.com/Farama-Foundation/Gymnasium-Robotics/issues/207)).
     * v2: the environment depends on the newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind.
     * v1: the environment depends on `mujoco_py` which is no longer maintained.
     """
diff --git a/gymnasium_robotics/envs/fetch/slide.py b/gymnasium_robotics/envs/fetch/slide.py
index 47197823..93842e15 100644
--- a/gymnasium_robotics/envs/fetch/slide.py
+++ b/gymnasium_robotics/envs/fetch/slide.py
@@ -157,7 +157,7 @@ class MujocoFetchSlideEnv(MujocoFetchEnv, EzPickle):
 
     ## Version History
 
-    * v3: Fix slight differences between rollouts of the same environment when reset with the same seed.
+    * v3: Fixed bug: Environments did not yield reproducible results if the same instance was reset with seeding after several steps. MuJoCo environments now properly reset their state (related [GitHub issue](https://github.com/Farama-Foundation/Gymnasium-Robotics/issues/207)).
     * v2: the environment depends on the newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind.
     * v1: the environment depends on `mujoco_py` which is no longer maintained.
     """
diff --git a/gymnasium_robotics/envs/shadow_dexterous_hand/reach.py b/gymnasium_robotics/envs/shadow_dexterous_hand/reach.py
index 67a1682a..e353f9e0 100644
--- a/gymnasium_robotics/envs/shadow_dexterous_hand/reach.py
+++ b/gymnasium_robotics/envs/shadow_dexterous_hand/reach.py
@@ -388,7 +388,7 @@ class MujocoHandReachEnv(get_base_hand_reanch_env(MujocoHandEnv)):
 
     ## Version History
 
-    * v2: Fix slight differences between rollouts of the same environment when reset with the same seed.
+    * v2: Fixed bug: Environments did not yield reproducible results if the same instance was reset with the same seed after several steps. MuJoCo environments now properly reset their state (related [GitHub issue](https://github.com/Farama-Foundation/Gymnasium-Robotics/issues/207)).
     * v1: the environment depends on the newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind.
     * v0: the environment depends on `mujoco_py` which is no longer maintained.
 

From b451ba79858ee39f6f70b916e7ccb2402969b0a4 Mon Sep 17 00:00:00 2001
From: Kallinteris Andreas
 <30759571+Kallinteris-Andreas@users.noreply.github.com>
Date: Fri, 24 May 2024 07:38:12 +0300
Subject: [PATCH 08/17] update to gymnasium==1.0a2

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 4c4961d5..77784ab1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -26,7 +26,7 @@ classifiers = [
 dependencies = [
     "mujoco>=2.2.0",
     "numpy>=1.21.0",
-    "gymnasium>=1.0.0a1",
+    "gymnasium>=1.0.0a2",
     "PettingZoo>=1.23.0",
     "Jinja2>=3.0.3",
     "imageio"

From e0f3fb8f20c0bf67bcab9aa95520589cd16fcb1d Mon Sep 17 00:00:00 2001
From: David Leins <dleins@techfak.uni-bielefeld.de>
Date: Wed, 29 May 2024 10:51:14 +0200
Subject: [PATCH 09/17] Add asserts for mj_id2name calls in mujoco_utils

---
 gymnasium_robotics/utils/mujoco_utils.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/gymnasium_robotics/utils/mujoco_utils.py b/gymnasium_robotics/utils/mujoco_utils.py
index 07e7b485..e26407ad 100644
--- a/gymnasium_robotics/utils/mujoco_utils.py
+++ b/gymnasium_robotics/utils/mujoco_utils.py
@@ -130,6 +130,7 @@ def get_site_jacr(model, data, site_id):
 def set_joint_qpos(model, data, name, value):
     """Set the joint positions (qpos) of the model."""
     joint_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_JOINT, name)
+    assert joint_id != -1, f"Joint with name '{name}' is not part of the model!"
     joint_type = model.jnt_type[joint_id]
     joint_addr = model.jnt_qposadr[joint_id]
 
@@ -154,6 +155,7 @@ def set_joint_qpos(model, data, name, value):
 def set_joint_qvel(model, data, name, value):
     """Set the joints linear and angular (qvel) of the model."""
     joint_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_JOINT, name)
+    assert joint_id != -1, f"Joint with name '{name}' is not part of the model!"
     joint_type = model.jnt_type[joint_id]
     joint_addr = model.jnt_dofadr[joint_id]
 
@@ -178,6 +180,7 @@ def set_joint_qvel(model, data, name, value):
 def get_joint_qpos(model, data, name):
     """Return the joints position and orientation (qpos) of the model."""
     joint_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_JOINT, name)
+    assert joint_id != -1, f"Joint with name '{name}' is not part of the model!"
     joint_type = model.jnt_type[joint_id]
     joint_addr = model.jnt_qposadr[joint_id]
 
@@ -198,6 +201,7 @@ def get_joint_qpos(model, data, name):
 def get_joint_qvel(model, data, name):
     """Return the joints linear and angular velocities (qvel) of the model."""
     joint_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_JOINT, name)
+    assert joint_id != -1, f"Joint with name '{name}' is not part of the model!"
     joint_type = model.jnt_type[joint_id]
     joint_addr = model.jnt_dofadr[joint_id]
 
@@ -217,11 +221,13 @@ def get_joint_qvel(model, data, name):
 
 def get_site_xpos(model, data, name):
     site_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_SITE, name)
+    assert site_id != -1, f"Site with name '{name}' is not part of the model!"
     return data.site_xpos[site_id]
 
 
 def get_site_xvelp(model, data, name):
     site_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_SITE, name)
+    assert site_id != -1, f"Site with name '{name}' is not part of the model!"
     jacp = get_site_jacp(model, data, site_id)
     xvelp = jacp @ data.qvel
     return xvelp
@@ -229,6 +235,7 @@ def get_site_xvelp(model, data, name):
 
 def get_site_xvelr(model, data, name):
     site_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_SITE, name)
+    assert site_id != -1, f"Site with name '{name}' is not part of the model!"
     jacp = get_site_jacr(model, data, site_id)
     xvelp = jacp @ data.qvel
     return xvelp
@@ -236,18 +243,21 @@ def get_site_xvelr(model, data, name):
 
 def set_mocap_pos(model, data, name, value):
     body_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_BODY, name)
+    assert body_id != -1, f"Body with name '{name}' is not part of the model!"
     mocap_id = model.body_mocapid[body_id]
     data.mocap_pos[mocap_id] = value
 
 
 def set_mocap_quat(model: MjModel, data: MjData, name: str, value):
     body_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_BODY, name)
+    assert body_id != -1, f"Body with name '{name}' is not part of the model!"
     mocap_id = model.body_mocapid[body_id]
     data.mocap_quat[mocap_id] = value
 
 
 def get_site_xmat(model: MjModel, data: MjData, name: str):
     site_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_SITE, name)
+    assert site_id != -1, f"Site with name '{name}' is not part of the model!"
     return data.site_xmat[site_id].reshape(3, 3)
 
 

From dace06bca3976312df71c5ce5bfc7e23329a54c3 Mon Sep 17 00:00:00 2001
From: Kallinteris Andreas
 <30759571+Kallinteris-Andreas@users.noreply.github.com>
Date: Wed, 29 May 2024 17:31:28 +0300
Subject: [PATCH 10/17] update changelog

---
 gymnasium_robotics/envs/fetch/pick_and_place.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gymnasium_robotics/envs/fetch/pick_and_place.py b/gymnasium_robotics/envs/fetch/pick_and_place.py
index 17369d90..37f9a75b 100644
--- a/gymnasium_robotics/envs/fetch/pick_and_place.py
+++ b/gymnasium_robotics/envs/fetch/pick_and_place.py
@@ -130,7 +130,7 @@ class MujocoFetchPickAndPlaceEnv(MujocoFetchEnv, EzPickle):
 
     ## Version History
 
-    * v3: Fixed bug: Environments did not yield reproducible results if the same instance was reset with seeding after several steps. MuJoCo environments now properly reset their state (related [GitHub issue](https://github.com/Farama-Foundation/Gymnasium-Robotics/issues/207)).
+    * v3: Fixed bug: `env.reset()` not properly resetting the internal state. Fetch environments now properly reset their state (related [GitHub issue](https://github.com/Farama-Foundation/Gymnasium-Robotics/issues/207)).
     * v2: the environment depends on the newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind.
     * v1: the environment depends on `mujoco_py` which is no longer maintained.
     """

From f0c1a32177f3b90ad0aa9ece71e42dc9e45947d0 Mon Sep 17 00:00:00 2001
From: Kallinteris Andreas
 <30759571+Kallinteris-Andreas@users.noreply.github.com>
Date: Wed, 29 May 2024 17:32:07 +0300
Subject: [PATCH 11/17] update changelog

---
 gymnasium_robotics/envs/fetch/push.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/gymnasium_robotics/envs/fetch/push.py b/gymnasium_robotics/envs/fetch/push.py
index e9698f54..5420be0b 100644
--- a/gymnasium_robotics/envs/fetch/push.py
+++ b/gymnasium_robotics/envs/fetch/push.py
@@ -157,8 +157,7 @@ class MujocoFetchPushEnv(MujocoFetchEnv, EzPickle):
     ```
 
     ## Version History
-
-    * v3: Fixed bug: Environments did not yield reproducible results if the same instance was reset with seeding after several steps. MuJoCo environments now properly reset their state (related [GitHub issue](https://github.com/Farama-Foundation/Gymnasium-Robotics/issues/207)).
+    * v3: Fixed bug: `env.reset()` not properly resetting the internal state. Fetch environments now properly reset their state (related [GitHub issue](https://github.com/Farama-Foundation/Gymnasium-Robotics/issues/207)).
     * v2: the environment depends on the newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind.
     * v1: the environment depends on `mujoco_py` which is no longer maintained.
     """

From 5f6c5e2500560c63348eb64b2cbb3830c19f27a9 Mon Sep 17 00:00:00 2001
From: Kallinteris Andreas
 <30759571+Kallinteris-Andreas@users.noreply.github.com>
Date: Wed, 29 May 2024 17:32:33 +0300
Subject: [PATCH 12/17] Update reach.py

---
 gymnasium_robotics/envs/fetch/reach.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/gymnasium_robotics/envs/fetch/reach.py b/gymnasium_robotics/envs/fetch/reach.py
index 68eb74ce..cb227f9f 100644
--- a/gymnasium_robotics/envs/fetch/reach.py
+++ b/gymnasium_robotics/envs/fetch/reach.py
@@ -115,8 +115,7 @@ class MujocoFetchReachEnv(MujocoFetchEnv, EzPickle):
     ```
 
     ## Version History
-
-    * v3: Fixed bug: Environments did not yield reproducible results if the same instance was reset with seeding after several steps. MuJoCo environments now properly reset their state (related [GitHub issue](https://github.com/Farama-Foundation/Gymnasium-Robotics/issues/207)).
+    * v3: Fixed bug: `env.reset()` not properly resetting the internal state. Fetch environments now properly reset their state (related [GitHub issue](https://github.com/Farama-Foundation/Gymnasium-Robotics/issues/207)).
     * v2: the environment depends on the newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind.
     * v1: the environment depends on `mujoco_py` which is no longer maintained.
     """

From 7ab8f631560274635cedd59d880c9b2793f635c5 Mon Sep 17 00:00:00 2001
From: Kallinteris Andreas
 <30759571+Kallinteris-Andreas@users.noreply.github.com>
Date: Wed, 29 May 2024 17:33:18 +0300
Subject: [PATCH 13/17] Update slide.py

---
 gymnasium_robotics/envs/fetch/slide.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/gymnasium_robotics/envs/fetch/slide.py b/gymnasium_robotics/envs/fetch/slide.py
index 93842e15..d4898aed 100644
--- a/gymnasium_robotics/envs/fetch/slide.py
+++ b/gymnasium_robotics/envs/fetch/slide.py
@@ -156,8 +156,7 @@ class MujocoFetchSlideEnv(MujocoFetchEnv, EzPickle):
     ```
 
     ## Version History
-
-    * v3: Fixed bug: Environments did not yield reproducible results if the same instance was reset with seeding after several steps. MuJoCo environments now properly reset their state (related [GitHub issue](https://github.com/Farama-Foundation/Gymnasium-Robotics/issues/207)).
+    * v3: Fixed bug: `env.reset()` not properly resetting the internal state. Fetch environments now properly reset their state (related [GitHub issue](https://github.com/Farama-Foundation/Gymnasium-Robotics/issues/207)).
     * v2: the environment depends on the newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind.
     * v1: the environment depends on `mujoco_py` which is no longer maintained.
     """

From 5da9ab63814e2ae82aa5aaa62ff98ac7ddd37747 Mon Sep 17 00:00:00 2001
From: Kallinteris Andreas
 <30759571+Kallinteris-Andreas@users.noreply.github.com>
Date: Wed, 29 May 2024 17:37:12 +0300
Subject: [PATCH 14/17] Update reach.py

---
 gymnasium_robotics/envs/shadow_dexterous_hand/reach.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/gymnasium_robotics/envs/shadow_dexterous_hand/reach.py b/gymnasium_robotics/envs/shadow_dexterous_hand/reach.py
index e353f9e0..377b7e41 100644
--- a/gymnasium_robotics/envs/shadow_dexterous_hand/reach.py
+++ b/gymnasium_robotics/envs/shadow_dexterous_hand/reach.py
@@ -387,8 +387,7 @@ class MujocoHandReachEnv(get_base_hand_reanch_env(MujocoHandEnv)):
     ```
 
     ## Version History
-
-    * v2: Fixed bug: Environments did not yield reproducible results if the same instance was reset with the same seed after several steps. MuJoCo environments now properly reset their state (related [GitHub issue](https://github.com/Farama-Foundation/Gymnasium-Robotics/issues/207)).
+    * v2: Fixed bug: `env.reset()` not properly resetting the internal state. Fetch environments now properly reset their state (related [GitHub issue](https://github.com/Farama-Foundation/Gymnasium-Robotics/issues/207)).
     * v1: the environment depends on the newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind.
     * v0: the environment depends on `mujoco_py` which is no longer maintained.
 

From 58175c3a90928c43eee8b630415bf85bb5f4796e Mon Sep 17 00:00:00 2001
From: Timo Friedl <mail@timofriedl.com>
Date: Fri, 12 Jul 2024 14:38:32 +0200
Subject: [PATCH 15/17] Fixed typo

---
 gymnasium_robotics/envs/robot_env.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gymnasium_robotics/envs/robot_env.py b/gymnasium_robotics/envs/robot_env.py
index 640e0f5f..a4d1a01d 100644
--- a/gymnasium_robotics/envs/robot_env.py
+++ b/gymnasium_robotics/envs/robot_env.py
@@ -190,7 +190,7 @@ def reset(
     def _mujoco_step(self, action):
         """Advance the mujoco simulation.
 
-        Override depending on the python binginds, either mujoco or mujoco_py
+        Override depending on the python bindings, either mujoco or mujoco_py
         """
         raise NotImplementedError
 

From bfa3cd40d9bc8437c5446d582baad31098f017c4 Mon Sep 17 00:00:00 2001
From: Kallinteris Andreas
 <30759571+Kallinteris-Andreas@users.noreply.github.com>
Date: Thu, 25 Jul 2024 12:52:37 +0300
Subject: [PATCH 16/17] Update README.md

---
 README.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 60d9ec7b..d1a2f9cc 100644
--- a/README.md
+++ b/README.md
@@ -24,6 +24,7 @@ We support and test for Python 3.8, 3.9, 3.10 and 3.11 on Linux and macOS. We wi
 
 * [Fetch](https://robotics.farama.org/envs/fetch/) - A collection of environments with a 7-DoF robot arm that has to perform manipulation tasks such as Reach, Push, Slide or Pick and Place.
 * [Shadow Dexterous Hand](https://robotics.farama.org/envs/shadow_dexterous_hand/) - A collection of environments with a 24-DoF anthropomorphic robotic hand that has to perform object manipulation tasks with a cube, egg-object, or pen. There are variations of these environments that also include data from 92 touch sensors in the observation space.
+* [MaMuJoCo](https://robotics.farama.org/envs/MaMuJoCo/) - A collection of multi agent factorizations of the [Gymnasium/MuJoCo](https://gymnasium.farama.org/environments/mujoco/) environments and a framework for factorizing robotic environments, uses the [pettingzoo.ParallelEnv](https://pettingzoo.farama.org/api/parallel/) API. 
 
 The [D4RL](https://github.com/Farama-Foundation/D4RL) environments are now available. These environments have been refactored and may not have the same action/observation spaces as the original, please read their documentation:
 
@@ -32,8 +33,6 @@ The [D4RL](https://github.com/Farama-Foundation/D4RL) environments are now avail
 The different tasks involve hammering a nail, opening a door, twirling a pen, or picking up and moving a ball.
 * [Franka Kitchen](https://robotics.farama.org/envs/franka_kitchen/) - Multitask environment in which a 9-DoF Franka robot is placed in a kitchen containing several common household items. The goal of each task is to interact with the items in order to reach a desired goal configuration.
 
-* [MaMuJoCo](https://robotics.farama.org/envs/MaMuJoCo/) - A collection of multi agent factorizations of the [Gymnasium/MuJoCo](https://gymnasium.farama.org/environments/mujoco/) environments and a framework for factorizing robotic environments, uses the [pettingzoo.ParallelEnv](https://pettingzoo.farama.org/api/parallel/) API. 
-
 **WIP**: generate new `D4RL` environment datasets with [Minari](https://github.com/Farama-Foundation/Minari).
 
 ## Multi-goal API

From 629c589541af3686e2bdb5f25502607da238ef72 Mon Sep 17 00:00:00 2001
From: Kallinteris Andreas
 <30759571+Kallinteris-Andreas@users.noreply.github.com>
Date: Tue, 30 Jul 2024 00:40:02 +0300
Subject: [PATCH 17/17] Update pyproject.toml

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 77784ab1..12fb458e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,7 +24,7 @@ classifiers = [
     'Topic :: Scientific/Engineering :: Artificial Intelligence',
 ]
 dependencies = [
-    "mujoco>=2.2.0",
+    "mujoco>=2.2.0, <3.2.0",
     "numpy>=1.21.0",
     "gymnasium>=1.0.0a2",
     "PettingZoo>=1.23.0",