From 54043fcd195ed4a7ddcaed33d637d5c3c2bb54b6 Mon Sep 17 00:00:00 2001 From: Martin Schuck Date: Sat, 10 Feb 2024 10:17:42 +0100 Subject: [PATCH 01/17] Fix reproducibility of FetchPickAndPlace-v2 --- gymnasium_robotics/envs/fetch/fetch_env.py | 4 ++++ gymnasium_robotics/envs/robot_env.py | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/gymnasium_robotics/envs/fetch/fetch_env.py b/gymnasium_robotics/envs/fetch/fetch_env.py index 78c4841f..add8f270 100644 --- a/gymnasium_robotics/envs/fetch/fetch_env.py +++ b/gymnasium_robotics/envs/fetch/fetch_env.py @@ -376,6 +376,10 @@ def _reset_sim(self): self.data.time = self.initial_time self.data.qpos[:] = np.copy(self.initial_qpos) self.data.qvel[:] = np.copy(self.initial_qvel) + self.data.qacc_warmstart[:] = np.copy(self.initial_qacc_warmstart) + self.data.ctrl[:] = np.copy(self.initial_ctrl) + self.data.mocap_pos[:] = np.copy(self.initial_mocap_pos) + self.data.mocap_quat[:] = np.copy(self.initial_mocap_quat) if self.model.na != 0: self.data.act[:] = None diff --git a/gymnasium_robotics/envs/robot_env.py b/gymnasium_robotics/envs/robot_env.py index 1a51a47d..09a10e50 100644 --- a/gymnasium_robotics/envs/robot_env.py +++ b/gymnasium_robotics/envs/robot_env.py @@ -297,6 +297,10 @@ def _initialize_simulation(self): self.initial_time = self.data.time self.initial_qpos = np.copy(self.data.qpos) self.initial_qvel = np.copy(self.data.qvel) + self.initial_ctrl = np.copy(self.data.ctrl) + self.initial_qacc_warmstart = np.copy(self.data.qacc_warmstart) + self.initial_mocap_pos = np.copy(self.data.mocap_pos) + self.initial_mocap_quat = np.copy(self.data.mocap_quat) def _reset_sim(self): self.data.time = self.initial_time From e89b53ecbfec29ee2145f600e9ca06de4fcbb5b0 Mon Sep 17 00:00:00 2001 From: Martin Schuck Date: Sat, 10 Feb 2024 11:02:57 +0100 Subject: [PATCH 02/17] Add tests for same environment rollout determinism --- tests/test_envs.py | 93 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) diff --git a/tests/test_envs.py b/tests/test_envs.py index 6fc05928..9c006581 100644 --- a/tests/test_envs.py +++ b/tests/test_envs.py @@ -106,6 +106,99 @@ def test_env_determinism_rollout(env_spec: EnvSpec): env_2.close() +@pytest.mark.parametrize( + "env_spec", all_testing_env_specs, ids=[env.id for env in all_testing_env_specs] +) +def test_same_env_determinism_rollout(env_spec: EnvSpec): + """Run two rollouts with a single environment and assert equality. + + This test runs two rollouts of NUM_STEPS steps with one environment + reset with the same seed and asserts that: + + - observations after the reset are the same + - same actions are sampled by the environment + - observations are contained in the observation space + - obs, rew, terminated, truncated and info are equals between the two rollouts + """ + # Don't check rollout equality if it's a nondeterministic environment. + if env_spec.nondeterministic is True: + return + + env = env_spec.make(disable_env_checker=True) + + rollout_1 = { + "observations": [], + "actions": [], + "rewards": [], + "terminated": [], + "truncated": [], + "infos": [], + } + rollout_2 = { + "observations": [], + "actions": [], + "rewards": [], + "terminated": [], + "truncated": [], + "infos": [], + } + + # Run two rollouts of the same environment instance + for rollout in [rollout_1, rollout_2]: + # Reset the environment with the same seed for both rollouts + obs, info = env.reset(seed=SEED) + env.action_space.seed(SEED) + rollout["observations"].append(obs) + rollout["infos"].append(info) + + for time_step in range(NUM_STEPS): + action = env.action_space.sample() + + obs, rew, terminated, truncated, info = env.step(action) + rollout["observations"].append(obs) + rollout["actions"].append(action) + rollout["rewards"].append(rew) + rollout["terminated"].append(terminated) + rollout["truncated"].append(truncated) + rollout["infos"].append(info) + if terminated or truncated: + env.reset(seed=SEED) + + for time_step, (obs_1, obs_2) in enumerate( + zip(rollout_1["observations"], rollout_2["observations"]) + ): + # -1 because of the initial observation stored on reset + time_step = "initial" if time_step == 0 else time_step - 1 + assert_equals(obs_1, obs_2, f"[{time_step}] ") + assert env.observation_space.contains( + obs_1 + ) # obs_2 verified by previous assertion + for time_step, (rew_1, rew_2) in enumerate( + zip(rollout_1["rewards"], rollout_2["rewards"]) + ): + assert rew_1 == rew_2, f"[{time_step}] reward 1={rew_1}, reward 2={rew_2}" + for time_step, (terminated_1, terminated_2) in enumerate( + zip(rollout_1["terminated"], rollout_2["terminated"]) + ): + assert ( + terminated_1 == terminated_2 + ), f"[{time_step}] terminated 1={terminated_1}, terminated 2={terminated_2}" + for time_step, (truncated_1, truncated_2) in enumerate( + zip(rollout_1["truncated"], rollout_2["truncated"]) + ): + assert ( + truncated_1 == truncated_2 + ), f"[{time_step}] truncated 1={truncated_1}, truncated 2={truncated_2}" + for time_step, (info_1, info_2) in enumerate( + zip(rollout_1["infos"], rollout_2["infos"]) + ): + # -1 because of the initial info stored on reset + time_step = "initial" if time_step == 0 else time_step - 1 + assert_equals(info_1, info_2, f"[{time_step}] ") + + env.close() + + @pytest.mark.parametrize( "spec", non_mujoco_py_env_specs, ids=[spec.id for spec in non_mujoco_py_env_specs] ) From 5e14ea14d1ac46c60aaed7a00edc4f997510e705 Mon Sep 17 00:00:00 2001 From: Martin Schuck Date: Sun, 11 Feb 2024 10:46:53 +0100 Subject: [PATCH 03/17] Fix reproducibility for all robotics environments - Fix remaining mujoco envs - Fix mujoco_py envs - Simplify reset --- gymnasium_robotics/envs/fetch/fetch_env.py | 7 +++---- gymnasium_robotics/envs/robot_env.py | 7 +++---- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/gymnasium_robotics/envs/fetch/fetch_env.py b/gymnasium_robotics/envs/fetch/fetch_env.py index add8f270..53b307f2 100644 --- a/gymnasium_robotics/envs/fetch/fetch_env.py +++ b/gymnasium_robotics/envs/fetch/fetch_env.py @@ -249,6 +249,7 @@ def _viewer_setup(self): setattr(self.viewer.cam, key, value) def _reset_sim(self): + self.sim.reset() # Reset warm-start buffers, control buffers etc. self.sim.set_state(self.initial_state) # Randomize start position of object. @@ -376,10 +377,8 @@ def _reset_sim(self): self.data.time = self.initial_time self.data.qpos[:] = np.copy(self.initial_qpos) self.data.qvel[:] = np.copy(self.initial_qvel) - self.data.qacc_warmstart[:] = np.copy(self.initial_qacc_warmstart) - self.data.ctrl[:] = np.copy(self.initial_ctrl) - self.data.mocap_pos[:] = np.copy(self.initial_mocap_pos) - self.data.mocap_quat[:] = np.copy(self.initial_mocap_quat) + # Reset buffers for warm-start, control buffers etc. + self._mujoco.mj_resetData(self.model, self.data) if self.model.na != 0: self.data.act[:] = None diff --git a/gymnasium_robotics/envs/robot_env.py b/gymnasium_robotics/envs/robot_env.py index 09a10e50..674eea03 100644 --- a/gymnasium_robotics/envs/robot_env.py +++ b/gymnasium_robotics/envs/robot_env.py @@ -297,12 +297,10 @@ def _initialize_simulation(self): self.initial_time = self.data.time self.initial_qpos = np.copy(self.data.qpos) self.initial_qvel = np.copy(self.data.qvel) - self.initial_ctrl = np.copy(self.data.ctrl) - self.initial_qacc_warmstart = np.copy(self.data.qacc_warmstart) - self.initial_mocap_pos = np.copy(self.data.mocap_pos) - self.initial_mocap_quat = np.copy(self.data.mocap_quat) def _reset_sim(self): + # Reset warm-start buffers, control buffers etc. + mujoco.mj_resetData(self.model, self.data) self.data.time = self.initial_time self.data.qpos[:] = np.copy(self.initial_qpos) self.data.qvel[:] = np.copy(self.initial_qvel) @@ -381,6 +379,7 @@ def _initialize_simulation(self): self.initial_state = copy.deepcopy(self.sim.get_state()) def _reset_sim(self): + self.sim.reset() # Reset warm-start buffers, control buffers etc. self.sim.set_state(self.initial_state) self.sim.forward() return super()._reset_sim() From 02ffe14ab321b9f4046dbfe9651b94026986efbc Mon Sep 17 00:00:00 2001 From: Martin Schuck Date: Mon, 12 Feb 2024 21:20:21 +0100 Subject: [PATCH 04/17] Revert changes to mujoco-py. Simplify _reset_sim. Remove mujoco-py envs from tests --- gymnasium_robotics/envs/fetch/fetch_env.py | 8 +------- gymnasium_robotics/envs/robot_env.py | 9 +-------- tests/test_envs.py | 8 +++++++- 3 files changed, 9 insertions(+), 16 deletions(-) diff --git a/gymnasium_robotics/envs/fetch/fetch_env.py b/gymnasium_robotics/envs/fetch/fetch_env.py index 53b307f2..25b5b98b 100644 --- a/gymnasium_robotics/envs/fetch/fetch_env.py +++ b/gymnasium_robotics/envs/fetch/fetch_env.py @@ -249,7 +249,6 @@ def _viewer_setup(self): setattr(self.viewer.cam, key, value) def _reset_sim(self): - self.sim.reset() # Reset warm-start buffers, control buffers etc. self.sim.set_state(self.initial_state) # Randomize start position of object. @@ -374,13 +373,8 @@ def _render_callback(self): self._mujoco.mj_forward(self.model, self.data) def _reset_sim(self): - self.data.time = self.initial_time - self.data.qpos[:] = np.copy(self.initial_qpos) - self.data.qvel[:] = np.copy(self.initial_qvel) - # Reset buffers for warm-start, control buffers etc. + # Reset buffers for joint states, actuators, warm-start, control buffers etc. self._mujoco.mj_resetData(self.model, self.data) - if self.model.na != 0: - self.data.act[:] = None # Randomize start position of object. if self.has_object: diff --git a/gymnasium_robotics/envs/robot_env.py b/gymnasium_robotics/envs/robot_env.py index 674eea03..e23dfc69 100644 --- a/gymnasium_robotics/envs/robot_env.py +++ b/gymnasium_robotics/envs/robot_env.py @@ -299,14 +299,8 @@ def _initialize_simulation(self): self.initial_qvel = np.copy(self.data.qvel) def _reset_sim(self): - # Reset warm-start buffers, control buffers etc. + # Reset buffers for joint states, warm-start, control buffers etc. mujoco.mj_resetData(self.model, self.data) - self.data.time = self.initial_time - self.data.qpos[:] = np.copy(self.initial_qpos) - self.data.qvel[:] = np.copy(self.initial_qvel) - if self.model.na != 0: - self.data.act[:] = None - mujoco.mj_forward(self.model, self.data) return super()._reset_sim() @@ -379,7 +373,6 @@ def _initialize_simulation(self): self.initial_state = copy.deepcopy(self.sim.get_state()) def _reset_sim(self): - self.sim.reset() # Reset warm-start buffers, control buffers etc. self.sim.set_state(self.initial_state) self.sim.forward() return super()._reset_sim() diff --git a/tests/test_envs.py b/tests/test_envs.py index 9c006581..d1418e24 100644 --- a/tests/test_envs.py +++ b/tests/test_envs.py @@ -107,7 +107,7 @@ def test_env_determinism_rollout(env_spec: EnvSpec): @pytest.mark.parametrize( - "env_spec", all_testing_env_specs, ids=[env.id for env in all_testing_env_specs] + "env_spec", non_mujoco_py_env_specs, ids=[env.id for env in non_mujoco_py_env_specs] ) def test_same_env_determinism_rollout(env_spec: EnvSpec): """Run two rollouts with a single environment and assert equality. @@ -119,6 +119,12 @@ def test_same_env_determinism_rollout(env_spec: EnvSpec): - same actions are sampled by the environment - observations are contained in the observation space - obs, rew, terminated, truncated and info are equals between the two rollouts + + Note: + We exclude mujoco_py environments because they are deprecated and their implementation is + frozen at this point. They are affected by a subtle bug in their reset method producing + slightly different results for the same seed on subsequent resets of the same environment. + This will not be fixed and tests are expected to fail. """ # Don't check rollout equality if it's a nondeterministic environment. if env_spec.nondeterministic is True: From 11db01aa0c2032e81880fb9cdc658d087ee62a20 Mon Sep 17 00:00:00 2001 From: Martin Schuck Date: Thu, 15 Feb 2024 08:45:26 +0100 Subject: [PATCH 05/17] Remove unnecessary forward call --- gymnasium_robotics/envs/robot_env.py | 1 - 1 file changed, 1 deletion(-) diff --git a/gymnasium_robotics/envs/robot_env.py b/gymnasium_robotics/envs/robot_env.py index e23dfc69..640e0f5f 100644 --- a/gymnasium_robotics/envs/robot_env.py +++ b/gymnasium_robotics/envs/robot_env.py @@ -301,7 +301,6 @@ def _initialize_simulation(self): def _reset_sim(self): # Reset buffers for joint states, warm-start, control buffers etc. mujoco.mj_resetData(self.model, self.data) - mujoco.mj_forward(self.model, self.data) return super()._reset_sim() def render(self): From 9dd88f7078e3a40af3b4dba4902690c6636302f4 Mon Sep 17 00:00:00 2001 From: Martin Schuck Date: Thu, 29 Feb 2024 11:08:34 +0100 Subject: [PATCH 06/17] Bump versions of affected environments. Change documentation to reflect new version number. Simplify reset test. --- README.md | 2 +- docs/content/multi-goal_api.md | 2 +- docs/envs/fetch/index.md | 8 +- docs/envs/shadow_dexterous_hand/index.md | 2 +- docs/index.md | 2 +- gymnasium_robotics/__init__.py | 10 +-- .../envs/fetch/pick_and_place.py | 7 +- gymnasium_robotics/envs/fetch/push.py | 7 +- gymnasium_robotics/envs/fetch/reach.py | 9 +- gymnasium_robotics/envs/fetch/slide.py | 7 +- .../envs/shadow_dexterous_hand/reach.py | 9 +- tests/test_envs.py | 85 +------------------ 12 files changed, 39 insertions(+), 111 deletions(-) diff --git a/README.md b/README.md index c8d0031d..60d9ec7b 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,7 @@ goal, e.g. state derived from the simulation. ```python import gymnasium as gym -env = gym.make("FetchReach-v2") +env = gym.make("FetchReach-v3") env.reset() obs, reward, terminated, truncated, info = env.step(env.action_space.sample()) diff --git a/docs/content/multi-goal_api.md b/docs/content/multi-goal_api.md index d4458dfa..c9b65ad2 100644 --- a/docs/content/multi-goal_api.md +++ b/docs/content/multi-goal_api.md @@ -25,7 +25,7 @@ import gymnasium_robotics gym.register_envs(gymnasium_robotics) -env = gym.make("FetchReach-v2") +env = gym.make("FetchReach-v3") env.reset() obs, reward, terminated, truncated, info = env.step(env.action_space.sample()) diff --git a/docs/envs/fetch/index.md b/docs/envs/fetch/index.md index 380ad51d..f89d1c9e 100644 --- a/docs/envs/fetch/index.md +++ b/docs/envs/fetch/index.md @@ -7,10 +7,10 @@ lastpage: The Fetch environments are based on the 7-DoF [Fetch Mobile Manipulator](https://fetchrobotics.com/) arm, with a two-fingered parallel gripper attached to it. The main environment tasks are the following: -* `FetchReach-v2`: Fetch has to move its end-effector to the desired goal position. -* `FetchPush-v2`: Fetch has to move a box by pushing it until it reaches a desired goal position. -* `FetchSlide-v2`: Fetch has to hit a puck across a long table such that it slides and comes to rest on the desired goal. -* `FetchPickAndPlace-v2`: Fetch has to pick up a box from a table using its gripper and move it to a desired goal above the table. +* `FetchReach-v3`: Fetch has to move its end-effector to the desired goal position. +* `FetchPush-v3`: Fetch has to move a box by pushing it until it reaches a desired goal position. +* `FetchSlide-v3`: Fetch has to hit a puck across a long table such that it slides and comes to rest on the desired goal. +* `FetchPickAndPlace-v3`: Fetch has to pick up a box from a table using its gripper and move it to a desired goal above the table. ```{raw} html :file: list.html diff --git a/docs/envs/shadow_dexterous_hand/index.md b/docs/envs/shadow_dexterous_hand/index.md index ae93485e..aa3f296a 100644 --- a/docs/envs/shadow_dexterous_hand/index.md +++ b/docs/envs/shadow_dexterous_hand/index.md @@ -7,7 +7,7 @@ lastpage: These environments are based on the [Shadow Dexterous Hand](https://www.shadowrobot.com/), 5 which is an anthropomorphic robotic hand with 24 degrees of freedom. Of those 24 joints, 20 can be controlled independently whereas the remaining ones are coupled joints. -* `HandReach-v1`: ShadowHand has to reach with its thumb and a selected finger until they meet at a desired goal position above the palm. +* `HandReach-v2`: ShadowHand has to reach with its thumb and a selected finger until they meet at a desired goal position above the palm. * `HandManipulateBlock-v1`: ShadowHand has to manipulate a block until it achieves a desired goal position and rotation. * `HandManipulateEgg-v1`: ShadowHand has to manipulate an egg until it achieves a desired goal position and rotation. * `HandManipulatePen-v1`: ShadowHand has to manipulate a pen until it achieves a desired goal position and rotation. diff --git a/docs/index.md b/docs/index.md index f40b7a63..50c667b8 100644 --- a/docs/index.md +++ b/docs/index.md @@ -56,7 +56,7 @@ import gymnasium_robotics gym.register_envs(gymnasium_robotics) -env = gym.make("FetchPickAndPlace-v2", render_mode="human") +env = gym.make("FetchPickAndPlace-v3", render_mode="human") observation, info = env.reset(seed=42) for _ in range(1000): action = policy(observation) # User-defined policy function diff --git a/gymnasium_robotics/__init__.py b/gymnasium_robotics/__init__.py index 49c86374..5bd5dca1 100644 --- a/gymnasium_robotics/__init__.py +++ b/gymnasium_robotics/__init__.py @@ -30,7 +30,7 @@ def _merge(a, b): ) register( - id=f"FetchSlide{suffix}-v2", + id=f"FetchSlide{suffix}-v3", entry_point="gymnasium_robotics.envs.fetch.slide:MujocoFetchSlideEnv", kwargs=kwargs, max_episode_steps=50, @@ -44,7 +44,7 @@ def _merge(a, b): ) register( - id=f"FetchPickAndPlace{suffix}-v2", + id=f"FetchPickAndPlace{suffix}-v3", entry_point="gymnasium_robotics.envs.fetch.pick_and_place:MujocoFetchPickAndPlaceEnv", kwargs=kwargs, max_episode_steps=50, @@ -58,7 +58,7 @@ def _merge(a, b): ) register( - id=f"FetchReach{suffix}-v2", + id=f"FetchReach{suffix}-v3", entry_point="gymnasium_robotics.envs.fetch.reach:MujocoFetchReachEnv", kwargs=kwargs, max_episode_steps=50, @@ -72,7 +72,7 @@ def _merge(a, b): ) register( - id=f"FetchPush{suffix}-v2", + id=f"FetchPush{suffix}-v3", entry_point="gymnasium_robotics.envs.fetch.push:MujocoFetchPushEnv", kwargs=kwargs, max_episode_steps=50, @@ -87,7 +87,7 @@ def _merge(a, b): ) register( - id=f"HandReach{suffix}-v1", + id=f"HandReach{suffix}-v2", entry_point="gymnasium_robotics.envs.shadow_dexterous_hand.reach:MujocoHandReachEnv", kwargs=kwargs, max_episode_steps=50, diff --git a/gymnasium_robotics/envs/fetch/pick_and_place.py b/gymnasium_robotics/envs/fetch/pick_and_place.py index 74b5ed21..1ceff302 100644 --- a/gymnasium_robotics/envs/fetch/pick_and_place.py +++ b/gymnasium_robotics/envs/fetch/pick_and_place.py @@ -88,7 +88,7 @@ class MujocoFetchPickAndPlaceEnv(MujocoFetchEnv, EzPickle): - *sparse*: the returned reward can have two values: `-1` if the block hasn't reached its final target position, and `0` if the block is in the final target position (the block is considered to have reached the goal if the Euclidean distance between both is lower than 0.05 m). - *dense*: the returned reward is the negative Euclidean distance between the achieved goal position and the desired goal. - To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `FetchPickAndPlace-v2`. However, for `dense` reward the id must be modified to `FetchPickAndPlaceDense-v2` and initialized as follows: + To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `FetchPickAndPlace-3`. However, for `dense` reward the id must be modified to `FetchPickAndPlaceDense-v3` and initialized as follows: ```python import gymnasium as gym @@ -96,7 +96,7 @@ class MujocoFetchPickAndPlaceEnv(MujocoFetchEnv, EzPickle): gym.register_envs(gymnasium_robotics) - env = gym.make('FetchPickAndPlaceDense-v2') + env = gym.make('FetchPickAndPlaceDense-v3') ``` ## Starting State @@ -125,11 +125,12 @@ class MujocoFetchPickAndPlaceEnv(MujocoFetchEnv, EzPickle): gym.register_envs(gymnasium_robotics) - env = gym.make('FetchPickAndPlace-v2', max_episode_steps=100) + env = gym.make('FetchPickAndPlace-v3', max_episode_steps=100) ``` ## Version History + * v3: Fix slight differences between rollouts of the same environment when reset with the same seed. * v2: the environment depends on the newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind. * v1: the environment depends on `mujoco_py` which is no longer maintained. """ diff --git a/gymnasium_robotics/envs/fetch/push.py b/gymnasium_robotics/envs/fetch/push.py index 10a87282..feb9ef52 100644 --- a/gymnasium_robotics/envs/fetch/push.py +++ b/gymnasium_robotics/envs/fetch/push.py @@ -116,7 +116,7 @@ class MujocoFetchPushEnv(MujocoFetchEnv, EzPickle): - *sparse*: the returned reward can have two values: `-1` if the block hasn't reached its final target position, and `0` if the block is in the final target position (the block is considered to have reached the goal if the Euclidean distance between both is lower than 0.05 m). - *dense*: the returned reward is the negative Euclidean distance between the achieved goal position and the desired goal. - To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `FetchPush-v2`. However, for `dense` reward the id must be modified to `FetchPush-v2` and initialized as follows: + To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `FetchPush-v3`. However, for `dense` reward the id must be modified to `FetchPushDense-v3` and initialized as follows: ```python import gymnasium as gym @@ -124,7 +124,7 @@ class MujocoFetchPushEnv(MujocoFetchEnv, EzPickle): gym.register_envs(gymnasium_robotics) - env = gym.make('FetchPushDense-v2') + env = gym.make('FetchPushDense-v3') ``` ## Starting State @@ -153,11 +153,12 @@ class MujocoFetchPushEnv(MujocoFetchEnv, EzPickle): gym.register_envs(gymnasium_robotics) - env = gym.make('FetchPush-v2', max_episode_steps=100) + env = gym.make('FetchPush-v3', max_episode_steps=100) ``` ## Version History + * v3: Fix slight differences between rollouts of the same environment when reset with the same seed. * v2: the environment depends on the newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind. * v1: the environment depends on `mujoco_py` which is no longer maintained. """ diff --git a/gymnasium_robotics/envs/fetch/reach.py b/gymnasium_robotics/envs/fetch/reach.py index 962fd74d..bebe82dd 100644 --- a/gymnasium_robotics/envs/fetch/reach.py +++ b/gymnasium_robotics/envs/fetch/reach.py @@ -77,8 +77,8 @@ class MujocoFetchReachEnv(MujocoFetchEnv, EzPickle): the end effector and the goal is lower than 0.05 m). - *dense*: the returned reward is the negative Euclidean distance between the achieved goal position and the desired goal. - To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `FetchReach-v2`. However, for `dense` - reward the id must be modified to `FetchReachDense-v2` and initialized as follows: + To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `FetchReach-v3`. However, for `dense` + reward the id must be modified to `FetchReachDense-v3` and initialized as follows: ```python import gymnasium as gym @@ -86,7 +86,7 @@ class MujocoFetchReachEnv(MujocoFetchEnv, EzPickle): gym.register_envs(gymnasium_robotics) - env = gym.make('FetchReachDense-v2') + env = gym.make('FetchReachDense-v3') ``` ## Starting State @@ -111,11 +111,12 @@ class MujocoFetchReachEnv(MujocoFetchEnv, EzPickle): gym.register_envs(gymnasium_robotics) - env = gym.make('FetchReach-v2', max_episode_steps=100) + env = gym.make('FetchReach-v3', max_episode_steps=100) ``` ## Version History + * v3: Fix slight differences between rollouts of the same environment when reset with the same seed. * v2: the environment depends on the newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind. * v1: the environment depends on `mujoco_py` which is no longer maintained. """ diff --git a/gymnasium_robotics/envs/fetch/slide.py b/gymnasium_robotics/envs/fetch/slide.py index 1381b4a5..47197823 100644 --- a/gymnasium_robotics/envs/fetch/slide.py +++ b/gymnasium_robotics/envs/fetch/slide.py @@ -116,7 +116,7 @@ class MujocoFetchSlideEnv(MujocoFetchEnv, EzPickle): - *sparse*: the returned reward can have two values: `-1` if the puck hasn't reached its final target position, and `0` if the puck is in the final target position (the puck is considered to have reached the goal if the Euclidean distance between both is lower than 0.05 m). - *dense*: the returned reward is the negative Euclidean distance between the achieved goal position and the desired goal. - To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `FetchSlide-v2`. However, for `dense` reward the id must be modified to `FetchSlideDense-v2` and initialized as follows: + To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `FetchSlide-v3`. However, for `dense` reward the id must be modified to `FetchSlideDense-v3` and initialized as follows: ```python import gymnasium as gym @@ -124,7 +124,7 @@ class MujocoFetchSlideEnv(MujocoFetchEnv, EzPickle): gym.register_envs(gymnasium_robotics) - env = gym.make('FetchSlideDense-v2') + env = gym.make('FetchSlideDense-v3') ``` ## Starting State @@ -152,11 +152,12 @@ class MujocoFetchSlideEnv(MujocoFetchEnv, EzPickle): gym.register_envs(gymnasium_robotics) - env = gym.make('FetchSlide-v2', max_episode_steps=100) + env = gym.make('FetchSlide-v3', max_episode_steps=100) ``` ## Version History + * v3: Fix slight differences between rollouts of the same environment when reset with the same seed. * v2: the environment depends on the newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind. * v1: the environment depends on `mujoco_py` which is no longer maintained. """ diff --git a/gymnasium_robotics/envs/shadow_dexterous_hand/reach.py b/gymnasium_robotics/envs/shadow_dexterous_hand/reach.py index 984bd4d6..67a1682a 100644 --- a/gymnasium_robotics/envs/shadow_dexterous_hand/reach.py +++ b/gymnasium_robotics/envs/shadow_dexterous_hand/reach.py @@ -306,13 +306,13 @@ class MujocoHandReachEnv(get_base_hand_reanch_env(MujocoHandEnv)): the achieved goal vector and the desired goal vector is lower than 0.01). - *dense*: the returned reward is the negative 2-norm distance between the achieved goal vector and the desired goal vector. - To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `HandReach-v1`. - However, for `dense` reward the id must be modified to `HandReachDense-v1` and initialized as follows: + To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `HandReach-v2`. + However, for `dense` reward the id must be modified to `HandReachDense-v2` and initialized as follows: ``` import gymnasium as gym - env = gym.make('HandReachDense-v1') + env = gym.make('HandReachDense-v2') ``` ## Starting State @@ -383,11 +383,12 @@ class MujocoHandReachEnv(get_base_hand_reanch_env(MujocoHandEnv)): ``` import gymnasium as gym - env = gym.make('HandReach-v1', max_episode_steps=100) + env = gym.make('HandReach-v2', max_episode_steps=100) ``` ## Version History + * v2: Fix slight differences between rollouts of the same environment when reset with the same seed. * v1: the environment depends on the newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind. * v0: the environment depends on `mujoco_py` which is no longer maintained. diff --git a/tests/test_envs.py b/tests/test_envs.py index 907a1a33..b018f896 100644 --- a/tests/test_envs.py +++ b/tests/test_envs.py @@ -3,6 +3,7 @@ import gymnasium as gym import pytest +from gymnasium.envs.mujoco.utils import check_mujoco_reset_state from gymnasium.envs.registration import EnvSpec from gymnasium.error import Error from gymnasium.utils.env_checker import check_env, data_equivalence @@ -109,16 +110,8 @@ def test_env_determinism_rollout(env_spec: EnvSpec): @pytest.mark.parametrize( "env_spec", non_mujoco_py_env_specs, ids=[env.id for env in non_mujoco_py_env_specs] ) -def test_same_env_determinism_rollout(env_spec: EnvSpec): - """Run two rollouts with a single environment and assert equality. - - This test runs two rollouts of NUM_STEPS steps with one environment - reset with the same seed and asserts that: - - - observations after the reset are the same - - same actions are sampled by the environment - - observations are contained in the observation space - - obs, rew, terminated, truncated and info are equals between the two rollouts +def test_mujoco_reset_state_seeding(env_spec: EnvSpec): + """Check if the reset method of mujoco environments is deterministic for the same seed. Note: We exclude mujoco_py environments because they are deprecated and their implementation is @@ -132,77 +125,7 @@ def test_same_env_determinism_rollout(env_spec: EnvSpec): env = env_spec.make(disable_env_checker=True) - rollout_1 = { - "observations": [], - "actions": [], - "rewards": [], - "terminated": [], - "truncated": [], - "infos": [], - } - rollout_2 = { - "observations": [], - "actions": [], - "rewards": [], - "terminated": [], - "truncated": [], - "infos": [], - } - - # Run two rollouts of the same environment instance - for rollout in [rollout_1, rollout_2]: - # Reset the environment with the same seed for both rollouts - obs, info = env.reset(seed=SEED) - env.action_space.seed(SEED) - rollout["observations"].append(obs) - rollout["infos"].append(info) - - for time_step in range(NUM_STEPS): - action = env.action_space.sample() - - obs, rew, terminated, truncated, info = env.step(action) - rollout["observations"].append(obs) - rollout["actions"].append(action) - rollout["rewards"].append(rew) - rollout["terminated"].append(terminated) - rollout["truncated"].append(truncated) - rollout["infos"].append(info) - if terminated or truncated: - env.reset(seed=SEED) - - for time_step, (obs_1, obs_2) in enumerate( - zip(rollout_1["observations"], rollout_2["observations"]) - ): - # -1 because of the initial observation stored on reset - time_step = "initial" if time_step == 0 else time_step - 1 - assert_equals(obs_1, obs_2, f"[{time_step}] ") - assert env.observation_space.contains( - obs_1 - ) # obs_2 verified by previous assertion - for time_step, (rew_1, rew_2) in enumerate( - zip(rollout_1["rewards"], rollout_2["rewards"]) - ): - assert rew_1 == rew_2, f"[{time_step}] reward 1={rew_1}, reward 2={rew_2}" - for time_step, (terminated_1, terminated_2) in enumerate( - zip(rollout_1["terminated"], rollout_2["terminated"]) - ): - assert ( - terminated_1 == terminated_2 - ), f"[{time_step}] terminated 1={terminated_1}, terminated 2={terminated_2}" - for time_step, (truncated_1, truncated_2) in enumerate( - zip(rollout_1["truncated"], rollout_2["truncated"]) - ): - assert ( - truncated_1 == truncated_2 - ), f"[{time_step}] truncated 1={truncated_1}, truncated 2={truncated_2}" - for time_step, (info_1, info_2) in enumerate( - zip(rollout_1["infos"], rollout_2["infos"]) - ): - # -1 because of the initial info stored on reset - time_step = "initial" if time_step == 0 else time_step - 1 - assert_equals(info_1, info_2, f"[{time_step}] ") - - env.close() + check_mujoco_reset_state(env) @pytest.mark.parametrize( From c6b54d6905f9c44086601c1c4f8be0c84b57541d Mon Sep 17 00:00:00 2001 From: Martin Schuck Date: Fri, 1 Mar 2024 10:16:02 +0100 Subject: [PATCH 07/17] Fix typo, clarify version change with issue --- gymnasium_robotics/envs/fetch/pick_and_place.py | 4 ++-- gymnasium_robotics/envs/fetch/push.py | 2 +- gymnasium_robotics/envs/fetch/reach.py | 2 +- gymnasium_robotics/envs/fetch/slide.py | 2 +- gymnasium_robotics/envs/shadow_dexterous_hand/reach.py | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/gymnasium_robotics/envs/fetch/pick_and_place.py b/gymnasium_robotics/envs/fetch/pick_and_place.py index 1ceff302..17369d90 100644 --- a/gymnasium_robotics/envs/fetch/pick_and_place.py +++ b/gymnasium_robotics/envs/fetch/pick_and_place.py @@ -88,7 +88,7 @@ class MujocoFetchPickAndPlaceEnv(MujocoFetchEnv, EzPickle): - *sparse*: the returned reward can have two values: `-1` if the block hasn't reached its final target position, and `0` if the block is in the final target position (the block is considered to have reached the goal if the Euclidean distance between both is lower than 0.05 m). - *dense*: the returned reward is the negative Euclidean distance between the achieved goal position and the desired goal. - To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `FetchPickAndPlace-3`. However, for `dense` reward the id must be modified to `FetchPickAndPlaceDense-v3` and initialized as follows: + To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `FetchPickAndPlace-v3`. However, for `dense` reward the id must be modified to `FetchPickAndPlaceDense-v3` and initialized as follows: ```python import gymnasium as gym @@ -130,7 +130,7 @@ class MujocoFetchPickAndPlaceEnv(MujocoFetchEnv, EzPickle): ## Version History - * v3: Fix slight differences between rollouts of the same environment when reset with the same seed. + * v3: Fixed bug: Environments did not yield reproducible results if the same instance was reset with seeding after several steps. MuJoCo environments now properly reset their state (related [GitHub issue](https://github.com/Farama-Foundation/Gymnasium-Robotics/issues/207)). * v2: the environment depends on the newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind. * v1: the environment depends on `mujoco_py` which is no longer maintained. """ diff --git a/gymnasium_robotics/envs/fetch/push.py b/gymnasium_robotics/envs/fetch/push.py index feb9ef52..e9698f54 100644 --- a/gymnasium_robotics/envs/fetch/push.py +++ b/gymnasium_robotics/envs/fetch/push.py @@ -158,7 +158,7 @@ class MujocoFetchPushEnv(MujocoFetchEnv, EzPickle): ## Version History - * v3: Fix slight differences between rollouts of the same environment when reset with the same seed. + * v3: Fixed bug: Environments did not yield reproducible results if the same instance was reset with seeding after several steps. MuJoCo environments now properly reset their state (related [GitHub issue](https://github.com/Farama-Foundation/Gymnasium-Robotics/issues/207)). * v2: the environment depends on the newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind. * v1: the environment depends on `mujoco_py` which is no longer maintained. """ diff --git a/gymnasium_robotics/envs/fetch/reach.py b/gymnasium_robotics/envs/fetch/reach.py index bebe82dd..68eb74ce 100644 --- a/gymnasium_robotics/envs/fetch/reach.py +++ b/gymnasium_robotics/envs/fetch/reach.py @@ -116,7 +116,7 @@ class MujocoFetchReachEnv(MujocoFetchEnv, EzPickle): ## Version History - * v3: Fix slight differences between rollouts of the same environment when reset with the same seed. + * v3: Fixed bug: Environments did not yield reproducible results if the same instance was reset with seeding after several steps. MuJoCo environments now properly reset their state (related [GitHub issue](https://github.com/Farama-Foundation/Gymnasium-Robotics/issues/207)). * v2: the environment depends on the newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind. * v1: the environment depends on `mujoco_py` which is no longer maintained. """ diff --git a/gymnasium_robotics/envs/fetch/slide.py b/gymnasium_robotics/envs/fetch/slide.py index 47197823..93842e15 100644 --- a/gymnasium_robotics/envs/fetch/slide.py +++ b/gymnasium_robotics/envs/fetch/slide.py @@ -157,7 +157,7 @@ class MujocoFetchSlideEnv(MujocoFetchEnv, EzPickle): ## Version History - * v3: Fix slight differences between rollouts of the same environment when reset with the same seed. + * v3: Fixed bug: Environments did not yield reproducible results if the same instance was reset with seeding after several steps. MuJoCo environments now properly reset their state (related [GitHub issue](https://github.com/Farama-Foundation/Gymnasium-Robotics/issues/207)). * v2: the environment depends on the newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind. * v1: the environment depends on `mujoco_py` which is no longer maintained. """ diff --git a/gymnasium_robotics/envs/shadow_dexterous_hand/reach.py b/gymnasium_robotics/envs/shadow_dexterous_hand/reach.py index 67a1682a..e353f9e0 100644 --- a/gymnasium_robotics/envs/shadow_dexterous_hand/reach.py +++ b/gymnasium_robotics/envs/shadow_dexterous_hand/reach.py @@ -388,7 +388,7 @@ class MujocoHandReachEnv(get_base_hand_reanch_env(MujocoHandEnv)): ## Version History - * v2: Fix slight differences between rollouts of the same environment when reset with the same seed. + * v2: Fixed bug: Environments did not yield reproducible results if the same instance was reset with the same seed after several steps. MuJoCo environments now properly reset their state (related [GitHub issue](https://github.com/Farama-Foundation/Gymnasium-Robotics/issues/207)). * v1: the environment depends on the newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind. * v0: the environment depends on `mujoco_py` which is no longer maintained. From b451ba79858ee39f6f70b916e7ccb2402969b0a4 Mon Sep 17 00:00:00 2001 From: Kallinteris Andreas <30759571+Kallinteris-Andreas@users.noreply.github.com> Date: Fri, 24 May 2024 07:38:12 +0300 Subject: [PATCH 08/17] update to gymnasium==1.0a2 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 4c4961d5..77784ab1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,7 @@ classifiers = [ dependencies = [ "mujoco>=2.2.0", "numpy>=1.21.0", - "gymnasium>=1.0.0a1", + "gymnasium>=1.0.0a2", "PettingZoo>=1.23.0", "Jinja2>=3.0.3", "imageio" From e0f3fb8f20c0bf67bcab9aa95520589cd16fcb1d Mon Sep 17 00:00:00 2001 From: David Leins Date: Wed, 29 May 2024 10:51:14 +0200 Subject: [PATCH 09/17] Add asserts for mj_id2name calls in mujoco_utils --- gymnasium_robotics/utils/mujoco_utils.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/gymnasium_robotics/utils/mujoco_utils.py b/gymnasium_robotics/utils/mujoco_utils.py index 07e7b485..e26407ad 100644 --- a/gymnasium_robotics/utils/mujoco_utils.py +++ b/gymnasium_robotics/utils/mujoco_utils.py @@ -130,6 +130,7 @@ def get_site_jacr(model, data, site_id): def set_joint_qpos(model, data, name, value): """Set the joint positions (qpos) of the model.""" joint_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_JOINT, name) + assert joint_id != -1, f"Joint with name '{name}' is not part of the model!" joint_type = model.jnt_type[joint_id] joint_addr = model.jnt_qposadr[joint_id] @@ -154,6 +155,7 @@ def set_joint_qpos(model, data, name, value): def set_joint_qvel(model, data, name, value): """Set the joints linear and angular (qvel) of the model.""" joint_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_JOINT, name) + assert joint_id != -1, f"Joint with name '{name}' is not part of the model!" joint_type = model.jnt_type[joint_id] joint_addr = model.jnt_dofadr[joint_id] @@ -178,6 +180,7 @@ def set_joint_qvel(model, data, name, value): def get_joint_qpos(model, data, name): """Return the joints position and orientation (qpos) of the model.""" joint_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_JOINT, name) + assert joint_id != -1, f"Joint with name '{name}' is not part of the model!" joint_type = model.jnt_type[joint_id] joint_addr = model.jnt_qposadr[joint_id] @@ -198,6 +201,7 @@ def get_joint_qpos(model, data, name): def get_joint_qvel(model, data, name): """Return the joints linear and angular velocities (qvel) of the model.""" joint_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_JOINT, name) + assert joint_id != -1, f"Joint with name '{name}' is not part of the model!" joint_type = model.jnt_type[joint_id] joint_addr = model.jnt_dofadr[joint_id] @@ -217,11 +221,13 @@ def get_joint_qvel(model, data, name): def get_site_xpos(model, data, name): site_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_SITE, name) + assert site_id != -1, f"Site with name '{name}' is not part of the model!" return data.site_xpos[site_id] def get_site_xvelp(model, data, name): site_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_SITE, name) + assert site_id != -1, f"Site with name '{name}' is not part of the model!" jacp = get_site_jacp(model, data, site_id) xvelp = jacp @ data.qvel return xvelp @@ -229,6 +235,7 @@ def get_site_xvelp(model, data, name): def get_site_xvelr(model, data, name): site_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_SITE, name) + assert site_id != -1, f"Site with name '{name}' is not part of the model!" jacp = get_site_jacr(model, data, site_id) xvelp = jacp @ data.qvel return xvelp @@ -236,18 +243,21 @@ def get_site_xvelr(model, data, name): def set_mocap_pos(model, data, name, value): body_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_BODY, name) + assert body_id != -1, f"Body with name '{name}' is not part of the model!" mocap_id = model.body_mocapid[body_id] data.mocap_pos[mocap_id] = value def set_mocap_quat(model: MjModel, data: MjData, name: str, value): body_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_BODY, name) + assert body_id != -1, f"Body with name '{name}' is not part of the model!" mocap_id = model.body_mocapid[body_id] data.mocap_quat[mocap_id] = value def get_site_xmat(model: MjModel, data: MjData, name: str): site_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_SITE, name) + assert site_id != -1, f"Site with name '{name}' is not part of the model!" return data.site_xmat[site_id].reshape(3, 3) From dace06bca3976312df71c5ce5bfc7e23329a54c3 Mon Sep 17 00:00:00 2001 From: Kallinteris Andreas <30759571+Kallinteris-Andreas@users.noreply.github.com> Date: Wed, 29 May 2024 17:31:28 +0300 Subject: [PATCH 10/17] update changelog --- gymnasium_robotics/envs/fetch/pick_and_place.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gymnasium_robotics/envs/fetch/pick_and_place.py b/gymnasium_robotics/envs/fetch/pick_and_place.py index 17369d90..37f9a75b 100644 --- a/gymnasium_robotics/envs/fetch/pick_and_place.py +++ b/gymnasium_robotics/envs/fetch/pick_and_place.py @@ -130,7 +130,7 @@ class MujocoFetchPickAndPlaceEnv(MujocoFetchEnv, EzPickle): ## Version History - * v3: Fixed bug: Environments did not yield reproducible results if the same instance was reset with seeding after several steps. MuJoCo environments now properly reset their state (related [GitHub issue](https://github.com/Farama-Foundation/Gymnasium-Robotics/issues/207)). + * v3: Fixed bug: `env.reset()` not properly resetting the internal state. Fetch environments now properly reset their state (related [GitHub issue](https://github.com/Farama-Foundation/Gymnasium-Robotics/issues/207)). * v2: the environment depends on the newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind. * v1: the environment depends on `mujoco_py` which is no longer maintained. """ From f0c1a32177f3b90ad0aa9ece71e42dc9e45947d0 Mon Sep 17 00:00:00 2001 From: Kallinteris Andreas <30759571+Kallinteris-Andreas@users.noreply.github.com> Date: Wed, 29 May 2024 17:32:07 +0300 Subject: [PATCH 11/17] update changelog --- gymnasium_robotics/envs/fetch/push.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/gymnasium_robotics/envs/fetch/push.py b/gymnasium_robotics/envs/fetch/push.py index e9698f54..5420be0b 100644 --- a/gymnasium_robotics/envs/fetch/push.py +++ b/gymnasium_robotics/envs/fetch/push.py @@ -157,8 +157,7 @@ class MujocoFetchPushEnv(MujocoFetchEnv, EzPickle): ``` ## Version History - - * v3: Fixed bug: Environments did not yield reproducible results if the same instance was reset with seeding after several steps. MuJoCo environments now properly reset their state (related [GitHub issue](https://github.com/Farama-Foundation/Gymnasium-Robotics/issues/207)). + * v3: Fixed bug: `env.reset()` not properly resetting the internal state. Fetch environments now properly reset their state (related [GitHub issue](https://github.com/Farama-Foundation/Gymnasium-Robotics/issues/207)). * v2: the environment depends on the newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind. * v1: the environment depends on `mujoco_py` which is no longer maintained. """ From 5f6c5e2500560c63348eb64b2cbb3830c19f27a9 Mon Sep 17 00:00:00 2001 From: Kallinteris Andreas <30759571+Kallinteris-Andreas@users.noreply.github.com> Date: Wed, 29 May 2024 17:32:33 +0300 Subject: [PATCH 12/17] Update reach.py --- gymnasium_robotics/envs/fetch/reach.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/gymnasium_robotics/envs/fetch/reach.py b/gymnasium_robotics/envs/fetch/reach.py index 68eb74ce..cb227f9f 100644 --- a/gymnasium_robotics/envs/fetch/reach.py +++ b/gymnasium_robotics/envs/fetch/reach.py @@ -115,8 +115,7 @@ class MujocoFetchReachEnv(MujocoFetchEnv, EzPickle): ``` ## Version History - - * v3: Fixed bug: Environments did not yield reproducible results if the same instance was reset with seeding after several steps. MuJoCo environments now properly reset their state (related [GitHub issue](https://github.com/Farama-Foundation/Gymnasium-Robotics/issues/207)). + * v3: Fixed bug: `env.reset()` not properly resetting the internal state. Fetch environments now properly reset their state (related [GitHub issue](https://github.com/Farama-Foundation/Gymnasium-Robotics/issues/207)). * v2: the environment depends on the newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind. * v1: the environment depends on `mujoco_py` which is no longer maintained. """ From 7ab8f631560274635cedd59d880c9b2793f635c5 Mon Sep 17 00:00:00 2001 From: Kallinteris Andreas <30759571+Kallinteris-Andreas@users.noreply.github.com> Date: Wed, 29 May 2024 17:33:18 +0300 Subject: [PATCH 13/17] Update slide.py --- gymnasium_robotics/envs/fetch/slide.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/gymnasium_robotics/envs/fetch/slide.py b/gymnasium_robotics/envs/fetch/slide.py index 93842e15..d4898aed 100644 --- a/gymnasium_robotics/envs/fetch/slide.py +++ b/gymnasium_robotics/envs/fetch/slide.py @@ -156,8 +156,7 @@ class MujocoFetchSlideEnv(MujocoFetchEnv, EzPickle): ``` ## Version History - - * v3: Fixed bug: Environments did not yield reproducible results if the same instance was reset with seeding after several steps. MuJoCo environments now properly reset their state (related [GitHub issue](https://github.com/Farama-Foundation/Gymnasium-Robotics/issues/207)). + * v3: Fixed bug: `env.reset()` not properly resetting the internal state. Fetch environments now properly reset their state (related [GitHub issue](https://github.com/Farama-Foundation/Gymnasium-Robotics/issues/207)). * v2: the environment depends on the newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind. * v1: the environment depends on `mujoco_py` which is no longer maintained. """ From 5da9ab63814e2ae82aa5aaa62ff98ac7ddd37747 Mon Sep 17 00:00:00 2001 From: Kallinteris Andreas <30759571+Kallinteris-Andreas@users.noreply.github.com> Date: Wed, 29 May 2024 17:37:12 +0300 Subject: [PATCH 14/17] Update reach.py --- gymnasium_robotics/envs/shadow_dexterous_hand/reach.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/gymnasium_robotics/envs/shadow_dexterous_hand/reach.py b/gymnasium_robotics/envs/shadow_dexterous_hand/reach.py index e353f9e0..377b7e41 100644 --- a/gymnasium_robotics/envs/shadow_dexterous_hand/reach.py +++ b/gymnasium_robotics/envs/shadow_dexterous_hand/reach.py @@ -387,8 +387,7 @@ class MujocoHandReachEnv(get_base_hand_reanch_env(MujocoHandEnv)): ``` ## Version History - - * v2: Fixed bug: Environments did not yield reproducible results if the same instance was reset with the same seed after several steps. MuJoCo environments now properly reset their state (related [GitHub issue](https://github.com/Farama-Foundation/Gymnasium-Robotics/issues/207)). + * v2: Fixed bug: `env.reset()` not properly resetting the internal state. Fetch environments now properly reset their state (related [GitHub issue](https://github.com/Farama-Foundation/Gymnasium-Robotics/issues/207)). * v1: the environment depends on the newest [mujoco python bindings](https://mujoco.readthedocs.io/en/latest/python.html) maintained by the MuJoCo team in Deepmind. * v0: the environment depends on `mujoco_py` which is no longer maintained. From 58175c3a90928c43eee8b630415bf85bb5f4796e Mon Sep 17 00:00:00 2001 From: Timo Friedl Date: Fri, 12 Jul 2024 14:38:32 +0200 Subject: [PATCH 15/17] Fixed typo --- gymnasium_robotics/envs/robot_env.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gymnasium_robotics/envs/robot_env.py b/gymnasium_robotics/envs/robot_env.py index 640e0f5f..a4d1a01d 100644 --- a/gymnasium_robotics/envs/robot_env.py +++ b/gymnasium_robotics/envs/robot_env.py @@ -190,7 +190,7 @@ def reset( def _mujoco_step(self, action): """Advance the mujoco simulation. - Override depending on the python binginds, either mujoco or mujoco_py + Override depending on the python bindings, either mujoco or mujoco_py """ raise NotImplementedError From bfa3cd40d9bc8437c5446d582baad31098f017c4 Mon Sep 17 00:00:00 2001 From: Kallinteris Andreas <30759571+Kallinteris-Andreas@users.noreply.github.com> Date: Thu, 25 Jul 2024 12:52:37 +0300 Subject: [PATCH 16/17] Update README.md --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index 60d9ec7b..d1a2f9cc 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,7 @@ We support and test for Python 3.8, 3.9, 3.10 and 3.11 on Linux and macOS. We wi * [Fetch](https://robotics.farama.org/envs/fetch/) - A collection of environments with a 7-DoF robot arm that has to perform manipulation tasks such as Reach, Push, Slide or Pick and Place. * [Shadow Dexterous Hand](https://robotics.farama.org/envs/shadow_dexterous_hand/) - A collection of environments with a 24-DoF anthropomorphic robotic hand that has to perform object manipulation tasks with a cube, egg-object, or pen. There are variations of these environments that also include data from 92 touch sensors in the observation space. +* [MaMuJoCo](https://robotics.farama.org/envs/MaMuJoCo/) - A collection of multi agent factorizations of the [Gymnasium/MuJoCo](https://gymnasium.farama.org/environments/mujoco/) environments and a framework for factorizing robotic environments, uses the [pettingzoo.ParallelEnv](https://pettingzoo.farama.org/api/parallel/) API. The [D4RL](https://github.com/Farama-Foundation/D4RL) environments are now available. These environments have been refactored and may not have the same action/observation spaces as the original, please read their documentation: @@ -32,8 +33,6 @@ The [D4RL](https://github.com/Farama-Foundation/D4RL) environments are now avail The different tasks involve hammering a nail, opening a door, twirling a pen, or picking up and moving a ball. * [Franka Kitchen](https://robotics.farama.org/envs/franka_kitchen/) - Multitask environment in which a 9-DoF Franka robot is placed in a kitchen containing several common household items. The goal of each task is to interact with the items in order to reach a desired goal configuration. -* [MaMuJoCo](https://robotics.farama.org/envs/MaMuJoCo/) - A collection of multi agent factorizations of the [Gymnasium/MuJoCo](https://gymnasium.farama.org/environments/mujoco/) environments and a framework for factorizing robotic environments, uses the [pettingzoo.ParallelEnv](https://pettingzoo.farama.org/api/parallel/) API. - **WIP**: generate new `D4RL` environment datasets with [Minari](https://github.com/Farama-Foundation/Minari). ## Multi-goal API From 629c589541af3686e2bdb5f25502607da238ef72 Mon Sep 17 00:00:00 2001 From: Kallinteris Andreas <30759571+Kallinteris-Andreas@users.noreply.github.com> Date: Tue, 30 Jul 2024 00:40:02 +0300 Subject: [PATCH 17/17] Update pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 77784ab1..12fb458e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ classifiers = [ 'Topic :: Scientific/Engineering :: Artificial Intelligence', ] dependencies = [ - "mujoco>=2.2.0", + "mujoco>=2.2.0, <3.2.0", "numpy>=1.21.0", "gymnasium>=1.0.0a2", "PettingZoo>=1.23.0",