Farama-Foundation · Kallinteris-Andreas · May 29, 2024 · Feb 10, 2024 · Feb 10, 2024 · Feb 11, 2024
diff --git a/gymnasium_robotics/envs/fetch/fetch_env.py b/gymnasium_robotics/envs/fetch/fetch_env.py
@@ -373,11 +373,8 @@ def _render_callback(self):
         self._mujoco.mj_forward(self.model, self.data)
 
     def _reset_sim(self):
-        self.data.time = self.initial_time
-        self.data.qpos[:] = np.copy(self.initial_qpos)
-        self.data.qvel[:] = np.copy(self.initial_qvel)
-        if self.model.na != 0:
-            self.data.act[:] = None
+        # Reset buffers for joint states, actuators, warm-start, control buffers etc.
+        self._mujoco.mj_resetData(self.model, self.data)
 
         # Randomize start position of object.
         if self.has_object:

diff --git a/gymnasium_robotics/envs/robot_env.py b/gymnasium_robotics/envs/robot_env.py
@@ -299,12 +299,8 @@ def _initialize_simulation(self):
         self.initial_qvel = np.copy(self.data.qvel)
 
     def _reset_sim(self):
-        self.data.time = self.initial_time
-        self.data.qpos[:] = np.copy(self.initial_qpos)
-        self.data.qvel[:] = np.copy(self.initial_qvel)
-        if self.model.na != 0:
-            self.data.act[:] = None
-
+        # Reset buffers for joint states, warm-start, control buffers etc.
+        mujoco.mj_resetData(self.model, self.data)
         mujoco.mj_forward(self.model, self.data)
         return super()._reset_sim()
 

diff --git a/tests/test_envs.py b/tests/test_envs.py
@@ -106,6 +106,105 @@ def test_env_determinism_rollout(env_spec: EnvSpec):
     env_2.close()
 
 
+@pytest.mark.parametrize(
+    "env_spec", non_mujoco_py_env_specs, ids=[env.id for env in non_mujoco_py_env_specs]
+)
+def test_same_env_determinism_rollout(env_spec: EnvSpec):
+    """Run two rollouts with a single environment and assert equality.
+
+    This test runs two rollouts of NUM_STEPS steps with one environment
+    reset with the same seed and asserts that:
+
+    - observations after the reset are the same
+    - same actions are sampled by the environment
+    - observations are contained in the observation space
+    - obs, rew, terminated, truncated and info are equals between the two rollouts
+
+    Note:
+        We exclude mujoco_py environments because they are deprecated and their implementation is
+        frozen at this point. They are affected by a subtle bug in their reset method producing
+        slightly different results for the same seed on subsequent resets of the same environment.
+        This will not be fixed and tests are expected to fail.
+    """
+    # Don't check rollout equality if it's a nondeterministic environment.
+    if env_spec.nondeterministic is True:
+        return
+
+    env = env_spec.make(disable_env_checker=True)
+
+    rollout_1 = {
+        "observations": [],
+        "actions": [],
+        "rewards": [],
+        "terminated": [],
+        "truncated": [],
+        "infos": [],
+    }
+    rollout_2 = {
+        "observations": [],
+        "actions": [],
+        "rewards": [],
+        "terminated": [],
+        "truncated": [],
+        "infos": [],
+    }
+
+    # Run two rollouts of the same environment instance
+    for rollout in [rollout_1, rollout_2]:
+        # Reset the environment with the same seed for both rollouts
+        obs, info = env.reset(seed=SEED)
+        env.action_space.seed(SEED)
+        rollout["observations"].append(obs)
+        rollout["infos"].append(info)
+
+        for time_step in range(NUM_STEPS):
+            action = env.action_space.sample()
+
+            obs, rew, terminated, truncated, info = env.step(action)
+            rollout["observations"].append(obs)
+            rollout["actions"].append(action)
+            rollout["rewards"].append(rew)
+            rollout["terminated"].append(terminated)
+            rollout["truncated"].append(truncated)
+            rollout["infos"].append(info)
+            if terminated or truncated:
+                env.reset(seed=SEED)
+
+    for time_step, (obs_1, obs_2) in enumerate(
+        zip(rollout_1["observations"], rollout_2["observations"])
+    ):
+        # -1 because of the initial observation stored on reset
+        time_step = "initial" if time_step == 0 else time_step - 1
+        assert_equals(obs_1, obs_2, f"[{time_step}] ")
+        assert env.observation_space.contains(
+            obs_1
+        )  # obs_2 verified by previous assertion
+    for time_step, (rew_1, rew_2) in enumerate(
+        zip(rollout_1["rewards"], rollout_2["rewards"])
+    ):
+        assert rew_1 == rew_2, f"[{time_step}] reward 1={rew_1}, reward 2={rew_2}"
+    for time_step, (terminated_1, terminated_2) in enumerate(
+        zip(rollout_1["terminated"], rollout_2["terminated"])
+    ):
+        assert (
+            terminated_1 == terminated_2
+        ), f"[{time_step}] terminated 1={terminated_1}, terminated 2={terminated_2}"
+    for time_step, (truncated_1, truncated_2) in enumerate(
+        zip(rollout_1["truncated"], rollout_2["truncated"])
+    ):
+        assert (
+            truncated_1 == truncated_2
+        ), f"[{time_step}] truncated 1={truncated_1}, truncated 2={truncated_2}"
+    for time_step, (info_1, info_2) in enumerate(
+        zip(rollout_1["infos"], rollout_2["infos"])
+    ):
+        # -1 because of the initial info stored on reset
+        time_step = "initial" if time_step == 0 else time_step - 1
+        assert_equals(info_1, info_2, f"[{time_step}] ")
+
+    env.close()
+
+
 @pytest.mark.parametrize(
     "spec", non_mujoco_py_env_specs, ids=[spec.id for spec in non_mujoco_py_env_specs]
 )