Farama-Foundation · rodrigodelazcano · Sep 2, 2023 · Aug 19, 2023 · Sep 2, 2023 · Sep 2, 2023
diff --git a/gymnasium_robotics/envs/maze/ant_maze_v4.py b/gymnasium_robotics/envs/maze/ant_maze_v4.py
@@ -183,7 +183,8 @@ class AntMazeEnv(MazeEnv, EzPickle):
     ### Arguments
 
     * `maze_map` - Optional argument to initialize the environment with a custom maze map.
-    * `continuing_task` - If set to `True` the episode won't be terminated when reaching the goal, instead a new goal location will be generated. If `False` the environment is terminated when the ant reaches the final goal.
+    * `continuing_task` - If set to `True` the episode won't be terminated when reaching the goal, instead a new goal location will be generated (unless `reset_target` argument is `True`). If `False` the environment is terminated when the ant reaches the final goal.
+    * `reset_target` - If set to `True` and the argument `continuing_task` is also `True`, when the ant reaches the target goal the location of the goal will be kept the same and no new goal location will be generated. If `False` a new goal will be generated when reached.
     * `use_contact_forces` - If `True` contact forces of the ant are included in the `observation`.
 
     Note that, the maximum number of timesteps before the episode is `truncated` can be increased or decreased by specifying the `max_episode_steps` argument at initialization. For example,
@@ -216,6 +217,7 @@ def __init__(
         maze_map: List[List[Union[str, int]]] = U_MAZE,
         reward_type: str = "sparse",
         continuing_task: bool = True,
+        reset_target: bool = True,
         **kwargs,
     ):
         # Get the ant.xml path from the Gymnasium package
@@ -229,6 +231,7 @@ def __init__(
             maze_height=0.5,
             reward_type=reward_type,
             continuing_task=continuing_task,
+            reset_target=reset_target,
             **kwargs,
         )
         # Create the MuJoCo environment, include position observation of the Ant for GoalEnv

diff --git a/gymnasium_robotics/envs/maze/maze_v4.py b/gymnasium_robotics/envs/maze/maze_v4.py
@@ -238,6 +238,7 @@ def __init__(
         agent_xml_path: str,
         reward_type: str = "dense",
         continuing_task: bool = True,
+        reset_target: bool = True,
         maze_map: List[List[Union[int, str]]] = U_MAZE,
         maze_size_scaling: float = 1.0,
         maze_height: float = 0.5,
@@ -247,6 +248,7 @@ def __init__(
 
         self.reward_type = reward_type
         self.continuing_task = continuing_task
+        self.reset_target = reset_target
         self.maze, self.tmp_xml_file_path = Maze.make_maze(
             agent_xml_path, maze_map, maze_size_scaling, maze_height
         )
@@ -375,6 +377,7 @@ def update_goal(self, achieved_goal: np.ndarray) -> None:
         """Update goal position if continuing task and within goal radius."""
         if (
             self.continuing_task
+            and self.reset_target
             and bool(np.linalg.norm(achieved_goal - self.goal) <= 0.45)
             and len(self.maze.unique_goal_locations) > 1
         ):

diff --git a/gymnasium_robotics/envs/maze/point_maze.py b/gymnasium_robotics/envs/maze/point_maze.py
@@ -279,6 +279,7 @@ class PointMazeEnv(MazeEnv, EzPickle):
 
     * `maze_map` - Optional argument to initialize the environment with a custom maze map.
     * `continuing_task` - If set to `True` the episode won't be terminated when reaching the goal, instead a new goal location will be generated. If `False` the environment is terminated when the ball reaches the final goal.
+    * `reset_target` - If set to `True` and the argument `continuing_task` is also `True`, when the ant reaches the target goal the location of the goal will be kept the same and no new goal location will be generated. If `False` a new goal will be generated when reached.
 
     Note that, the maximum number of timesteps before the episode is `truncated` can be increased or decreased by specifying the `max_episode_steps` argument at initialization. For example,
     to increase the total number of timesteps to 100 make the environment as follows:
@@ -309,6 +310,7 @@ def __init__(
         render_mode: Optional[str] = None,
         reward_type: str = "sparse",
         continuing_task: bool = True,
+        reset_target: bool = False,
         **kwargs,
     ):
         point_xml_file_path = path.join(
@@ -321,6 +323,7 @@ def __init__(
             maze_height=0.4,
             reward_type=reward_type,
             continuing_task=continuing_task,
+            reset_target=reset_target,
             **kwargs,
         )