Farama-Foundation · Kallinteris-Andreas · Dec 19, 2024 · Nov 26, 2024 · Nov 30, 2024 · Nov 30, 2024
diff --git a/gymnasium_robotics/__init__.py b/gymnasium_robotics/__init__.py
@@ -30,7 +30,7 @@ def _merge(a, b):
         )
 
         register(
-            id=f"FetchSlide{suffix}-v3",
+            id=f"FetchSlide{suffix}-v4",
             entry_point="gymnasium_robotics.envs.fetch.slide:MujocoFetchSlideEnv",
             kwargs=kwargs,
             max_episode_steps=50,
@@ -44,7 +44,7 @@ def _merge(a, b):
         )
 
         register(
-            id=f"FetchPickAndPlace{suffix}-v3",
+            id=f"FetchPickAndPlace{suffix}-v4",
             entry_point="gymnasium_robotics.envs.fetch.pick_and_place:MujocoFetchPickAndPlaceEnv",
             kwargs=kwargs,
             max_episode_steps=50,
@@ -58,7 +58,7 @@ def _merge(a, b):
         )
 
         register(
-            id=f"FetchReach{suffix}-v3",
+            id=f"FetchReach{suffix}-v4",
             entry_point="gymnasium_robotics.envs.fetch.reach:MujocoFetchReachEnv",
             kwargs=kwargs,
             max_episode_steps=50,
@@ -72,7 +72,7 @@ def _merge(a, b):
         )
 
         register(
-            id=f"FetchPush{suffix}-v3",
+            id=f"FetchPush{suffix}-v4",
             entry_point="gymnasium_robotics.envs.fetch.push:MujocoFetchPushEnv",
             kwargs=kwargs,
             max_episode_steps=50,
@@ -87,7 +87,7 @@ def _merge(a, b):
         )
 
         register(
-            id=f"HandReach{suffix}-v2",
+            id=f"HandReach{suffix}-v3",
             entry_point="gymnasium_robotics.envs.shadow_dexterous_hand.reach:MujocoHandReachEnv",
             kwargs=kwargs,
             max_episode_steps=50,

diff --git a/gymnasium_robotics/envs/fetch/fetch_env.py b/gymnasium_robotics/envs/fetch/fetch_env.py
@@ -376,6 +376,12 @@ def _reset_sim(self):
         # Reset buffers for joint states, actuators, warm-start, control buffers etc.
         self._mujoco.mj_resetData(self.model, self.data)
 
+        self.data.time = self.initial_time
+        self.data.qpos[:] = np.copy(self.initial_qpos)
+        self.data.qvel[:] = np.copy(self.initial_qvel)
+        if self.model.na != 0:
+            self.data.act[:] = None
+
         # Randomize start position of object.
         if self.has_object:
             object_xpos = self.initial_gripper_xpos[:2]

diff --git a/gymnasium_robotics/envs/fetch/pick_and_place.py b/gymnasium_robotics/envs/fetch/pick_and_place.py
@@ -130,6 +130,7 @@ class MujocoFetchPickAndPlaceEnv(MujocoFetchEnv, EzPickle):
 
     ## Version History
 
+    * v4: Fixed bug where initial state did not match initial state description in documentation. Fetch environments' initial states after reset now match the documentation (related [GitHub issue](https://github.com/Farama-Foundation/Gymnasium-Robotics/issues/251)).
     * v3: Fixed bug: `env.reset()` not properly resetting the internal state. Fetch environments now properly reset their state (related [GitHub issue](https://github.com/Farama-Foundation/Gymnasium-Robotics/issues/207)).
     * v2: the environment depends on the newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind.
     * v1: the environment depends on `mujoco_py` which is no longer maintained.

diff --git a/gymnasium_robotics/envs/fetch/push.py b/gymnasium_robotics/envs/fetch/push.py
@@ -157,6 +157,8 @@ class MujocoFetchPushEnv(MujocoFetchEnv, EzPickle):
     ```
 
     ## Version History
+
+    * v4: Fixed bug where initial state did not match initial state description in documentation. Fetch environments' initial states after reset now match the documentation (related [GitHub issue](https://github.com/Farama-Foundation/Gymnasium-Robotics/issues/251)).
     * v3: Fixed bug: `env.reset()` not properly resetting the internal state. Fetch environments now properly reset their state (related [GitHub issue](https://github.com/Farama-Foundation/Gymnasium-Robotics/issues/207)).
     * v2: the environment depends on the newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind.
     * v1: the environment depends on `mujoco_py` which is no longer maintained.

diff --git a/gymnasium_robotics/envs/fetch/reach.py b/gymnasium_robotics/envs/fetch/reach.py
@@ -115,6 +115,8 @@ class MujocoFetchReachEnv(MujocoFetchEnv, EzPickle):
     ```
 
     ## Version History
+
+    * v4: Fixed bug where initial state did not match initial state description in documentation. Fetch environments' initial states after reset now match the documentation (related [GitHub issue](https://github.com/Farama-Foundation/Gymnasium-Robotics/issues/251)).
     * v3: Fixed bug: `env.reset()` not properly resetting the internal state. Fetch environments now properly reset their state (related [GitHub issue](https://github.com/Farama-Foundation/Gymnasium-Robotics/issues/207)).
     * v2: the environment depends on the newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind.
     * v1: the environment depends on `mujoco_py` which is no longer maintained.

diff --git a/gymnasium_robotics/envs/fetch/slide.py b/gymnasium_robotics/envs/fetch/slide.py
@@ -156,6 +156,8 @@ class MujocoFetchSlideEnv(MujocoFetchEnv, EzPickle):
     ```
 
     ## Version History
+
+    * v4: Fixed bug where initial state did not match initial state description in documentation. Fetch environments' initial states after reset now match the documentation (related [GitHub issue](https://github.com/Farama-Foundation/Gymnasium-Robotics/issues/251)).
     * v3: Fixed bug: `env.reset()` not properly resetting the internal state. Fetch environments now properly reset their state (related [GitHub issue](https://github.com/Farama-Foundation/Gymnasium-Robotics/issues/207)).
     * v2: the environment depends on the newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind.
     * v1: the environment depends on `mujoco_py` which is no longer maintained.

diff --git a/gymnasium_robotics/envs/robot_env.py b/gymnasium_robotics/envs/robot_env.py
@@ -305,6 +305,14 @@ def _initialize_simulation(self):
     def _reset_sim(self):
         # Reset buffers for joint states, warm-start, control buffers etc.
         mujoco.mj_resetData(self.model, self.data)
+
+        self.data.time = self.initial_time
+        self.data.qpos[:] = np.copy(self.initial_qpos)
+        self.data.qvel[:] = np.copy(self.initial_qvel)
+        if self.model.na != 0:
+            self.data.act[:] = None
+
+        self._mujoco.mj_forward(self.model, self.data)
         return super()._reset_sim()
 
     def render(self):

diff --git a/gymnasium_robotics/envs/shadow_dexterous_hand/reach.py b/gymnasium_robotics/envs/shadow_dexterous_hand/reach.py
@@ -387,6 +387,8 @@ class MujocoHandReachEnv(get_base_hand_reanch_env(MujocoHandEnv)):
     ```
 
     ## Version History
+
+    * v3: Fixed bug where initial state did not match initial state description in documentation. Hand Reach environments' initial states after reset now match the documentation (related [GitHub issue](https://github.com/Farama-Foundation/Gymnasium-Robotics/issues/251)).
     * v2: Fixed bug: `env.reset()` not properly resetting the internal state. Fetch environments now properly reset their state (related [GitHub issue](https://github.com/Farama-Foundation/Gymnasium-Robotics/issues/207)).
     * v1: the environment depends on the newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind.
     * v0: the environment depends on `mujoco_py` which is no longer maintained.

diff --git a/tests/envs/hand/test_reach.py b/tests/envs/hand/test_reach.py
@@ -8,7 +8,7 @@
 
 
 def test_serialize_deserialize():
-    env1 = gym.make("HandReach-v2", distance_threshold=1e-6)
+    env1 = gym.make("HandReach-v3", distance_threshold=1e-6)
     env1.reset()
     env2 = pickle.loads(pickle.dumps(env1))
 

diff --git a/tests/test_envs.py b/tests/test_envs.py
@@ -2,6 +2,7 @@
 import warnings
 
 import gymnasium as gym
+import numpy as np
 import pytest
 from gymnasium.envs.mujoco.utils import check_mujoco_reset_state
 from gymnasium.envs.registration import EnvSpec
@@ -163,3 +164,62 @@ def test_pickle_env(env_spec):
     data_equivalence(env.step(action), pickled_env.step(action))
     env.close()
     pickled_env.close()
+
+
+_test_robot_env_reset_list = ["Fetch", "HandReach"]
+
+
+@pytest.mark.parametrize(
+    "spec",
+    [
+        spec
+        for spec in non_mujoco_py_env_specs
+        if np.any([tar in spec.id for tar in _test_robot_env_reset_list])
+    ],
+    ids=[
+        spec.id
+        for spec in non_mujoco_py_env_specs
+        if np.any([tar in spec.id for tar in _test_robot_env_reset_list])
+    ],
+)
+def test_robot_env_reset(spec):
+    """Check initial state of robotic environment, i.e. Fetch and Shadow Dexterous Hand Reach,
+    whether their initial states match the description in the documentation."""
+
+    def _test_initial_states(env, seed=None):
+        diag_dict = {}
+
+        env.reset(seed=seed)
+
+        diag_dict.update(
+            {
+                "qpos": env.unwrapped.data.qpos,
+                "qvel": env.unwrapped.data.qvel,
+                "init_qpos": env.unwrapped.initial_qpos,
+                "init_qvel": env.unwrapped.initial_qvel,
+            }
+        )
+
+        # exclude object location from environments
+        if np.any(
+            [
+                tar in spec.id
+                for tar in [
+                    "FetchPush",
+                    "FetchPickAndPlace",
+                    "FetchSlide",
+                ]
+            ]
+        ):
+            diag_dict["qpos"] = np.delete(diag_dict["qpos"], np.s_[-7:-5])
+            diag_dict["init_qpos"] = np.delete(diag_dict["init_qpos"], np.s_[-7:-5])
+
+        # testing
+        assert np.all(diag_dict["qpos"] == diag_dict["init_qpos"])
+        assert np.all(diag_dict["qvel"] == diag_dict["init_qvel"])
+        return diag_dict
+
+    cur_env: gym.Env = spec.make()
+
+    _test_initial_states(cur_env, seed=24)
+    _test_initial_states(cur_env, seed=10)