diff --git a/maze_world/envs/maze.py b/maze_world/envs/maze.py index c0bb952..01cba41 100644 --- a/maze_world/envs/maze.py +++ b/maze_world/envs/maze.py @@ -7,25 +7,70 @@ class MazeEnv(gym.Env): + r"""The main Maze Environment class for implementing different maze environments + + The class encapsulates an maze environments with arbitrary behind-the-scenes dynamics + through the :meth:`step` and :meth:`reset` functions. + + The main API methods that users of this class need to know are: + + - :meth:`step` - Updates an environment with actions returning the next agent observation, the reward for taking that actions, if the environment has terminated or truncated due to the latest action and information from the environment about the step, i.e. metrics, debug info. + - :meth:`reset` - Resets the environment to an initial state, required before calling step. + Returns the first agent observation for an episode and information, i.e. metrics, debug info. + - :meth:`render` - Renders the environments to help visualise what the agent see, examples modes are "human", "rgb_array", "ansi" for text. + - :meth:`close` - Closes the environment, important when external software is used, i.e. pygame for rendering, databases + + Environments have additional attributes for users to understand the implementation + + - :attr:`action_space` - The Space object corresponding to valid actions, all valid actions should be contained within the space. + - :attr:`observation_space` - The Space object corresponding to valid observations, all valid observations should be contained within the space. + - :attr:`reward_range` - A tuple corresponding to the minimum and maximum possible rewards for an agent over an episode. The default reward range is set to :math:`(-\infty,+\infty)`. + - :attr:`spec` - An environment spec that contains the information used to initialize the environment from :meth:`gymnasium.make` + - :attr:`metadata` - The metadata of the environment, i.e. render modes, render fps + - :attr:`np_random` - The random number generator for the environment. This is automatically assigned during ``super().reset(seed=seed)`` and when assessing ``self.np_random``. + + Note: + To get reproducible sampling of actions, a seed can be set with ``env.action_space.seed(123)``. + """ + metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 4} def __init__( self, - generate_maze_fn, + generate_maze_fn: callable, render_mode: str = None, maze_width: int = None, # columns maze_height: int = None, # rows ): + """ + + :param generate_maze_fn: This function is called during every reset of the environment and is expected to three items in following order: + + - maze-map: numpy array of map where "1" represents wall and "0" represents floor. + - agent location: tuple (x,y) where x and y represent location of agent + - target location: tuple (x,y) where x and y represent target location of the agent + + :param render_mode: specifies one of the following: + + - None (default): no render is computed. + - “human”: The environment is continuously rendered in the current display or terminal, usually for human consumption. This rendering should occur during step() and render() doesn’t need to be called. Returns None. + - “rgb_array”: Return a single frame representing the current state of the environment. A frame is a np.ndarray with shape (x, y, 3) representing RGB values for an x-by-y pixel image. + - “ansi”: Return a strings (str) or StringIO.StringIO containing a terminal-style text representation for each time step. The text can include newlines and ANSI escape sequences (e.g. for colors). + - “rgb_array_list” and “ansi_list”: List based version of render modes are possible (except Human) through the wrapper, gymnasium.wrappers.RenderCollection that is automatically applied during gymnasium.make(...,render_mode="rgb_array_list"). The frames collected are popped after render() is called or reset(). + + :param maze_width: The width of the maze + :param maze_height: The height of the maze + """ self.generate_maze_fn = generate_maze_fn self.maze_width = maze_width self.maze_height = maze_height # The size of the PyGame window - self.window_pixel_size = 25 - self.window_size = ( - maze_width * self.window_pixel_size, - maze_height * self.window_pixel_size, + self._window_pixel_size = 25 + self._window_size = ( + maze_width * self._window_pixel_size, + maze_height * self._window_pixel_size, ) # Id of the elements in 2d maze: @@ -75,17 +120,17 @@ def __init__( assert render_mode is None or render_mode in self.metadata["render_modes"] self.render_mode = render_mode - self.canvas = None + self._canvas = None """ - If human-rendering is used, `self.window` will be a reference - to the window that we draw to. `self.clock` will be a clock that is used + If human-rendering is used, `self._window` will be a reference + to the window that we draw to. `self._clock` will be a clock that is used to ensure that the environment is rendered at the correct framerate in human-mode. They will remain `None` until human-mode is used for the first time. """ - self.window = None - self.clock = None + self._window = None + self._clock = None def _get_obs(self): agent_maze = copy(self.maze_map) @@ -136,7 +181,7 @@ def reset(self, seed: int = None, options=None): if self.render_mode == "human": self._render_frame() - self.canvas = None + self._canvas = None return observation, info @@ -183,76 +228,76 @@ def render(self): return self._render_frame() def _render_frame(self): - if self.window is None and self.render_mode == "human": + if self._window is None and self.render_mode == "human": pygame.init() pygame.display.init() pygame.display.set_caption(f"Maze - {self.maze_width} x {self.maze_height}") - self.window = pygame.display.set_mode(self.window_size) - if self.clock is None and self.render_mode == "human": - self.clock = pygame.time.Clock() + self._window = pygame.display.set_mode(self._window_size) + if self._clock is None and self.render_mode == "human": + self._clock = pygame.time.Clock() - if self.canvas is None: - self.canvas = pygame.Surface(self.window_size) - self.canvas.fill((255, 255, 255)) + if self._canvas is None: + self._canvas = pygame.Surface(self._window_size) + self._canvas.fill((255, 255, 255)) # draw walls for x in range(self.maze_height): for y in range(self.maze_width): if self.maze_map[x, y] == 1: pygame.draw.rect( - self.canvas, + self._canvas, (169, 169, 169), pygame.Rect( - np.array([y, x]) * self.window_pixel_size, - (self.window_pixel_size, self.window_pixel_size), + np.array([y, x]) * self._window_pixel_size, + (self._window_pixel_size, self._window_pixel_size), ), ) # draw the target pygame.draw.rect( - self.canvas, + self._canvas, (255, 0, 0), pygame.Rect( - self._target_location[::-1] * self.window_pixel_size, - (self.window_pixel_size, self.window_pixel_size), + self._target_location[::-1] * self._window_pixel_size, + (self._window_pixel_size, self._window_pixel_size), ), ) # Clean previous agent location if self._prev_agent_location is not None: pygame.draw.circle( - self.canvas, + self._canvas, (255, 255, 255), - (self._prev_agent_location[::-1] + 0.5) * self.window_pixel_size, - self.window_pixel_size / 4, + (self._prev_agent_location[::-1] + 0.5) * self._window_pixel_size, + self._window_pixel_size / 4, ) # Draw new agent location pygame.draw.circle( - self.canvas, + self._canvas, (0, 0, 255), - (self._agent_location[::-1] + 0.5) * self.window_pixel_size, - self.window_pixel_size / 4, + (self._agent_location[::-1] + 0.5) * self._window_pixel_size, + self._window_pixel_size / 4, ) if self.render_mode == "human": # The following line copies our drawings from `canvas` to the visible window - self.window.blit(self.canvas, self.canvas.get_rect()) + self._window.blit(self._canvas, self._canvas.get_rect()) pygame.event.pump() pygame.display.update() # We need to ensure that human-rendering occurs at the predefined framerate. # The following line will automatically add a delay to keep the framerate stable. - self.clock.tick(self.metadata["render_fps"]) + self._clock.tick(self.metadata["render_fps"]) else: # rgb_array return np.transpose( - np.array(pygame.surfarray.pixels3d(self.canvas)), axes=(1, 0, 2) + np.array(pygame.surfarray.pixels3d(self._canvas)), axes=(1, 0, 2) ) def close(self): """ Retrieves queries for the environment. """ - if self.window is not None: + if self._window is not None: pygame.display.quit() pygame.quit() diff --git a/maze_world/envs/random_maze.py b/maze_world/envs/random_maze.py index 660b375..d9b578d 100644 --- a/maze_world/envs/random_maze.py +++ b/maze_world/envs/random_maze.py @@ -5,12 +5,26 @@ class RandomMazeEnv(MazeEnv): + r"""It extends Maze Class to create random mazes of specified sizes at each reset""" + def __init__( self, render_mode: str = None, maze_width: int = 11, maze_height: int = 11, ): + r""" + :param render_mode: specify one of the following: + + - None (default): no render is computed. + - “human”: The environment is continuously rendered in the current display or terminal, usually for human consumption. This rendering should occur during step() and render() doesn’t need to be called. Returns None. + - “rgb_array”: Return a single frame representing the current state of the environment. A frame is a np.ndarray with shape (x, y, 3) representing RGB values for an x-by-y pixel image. + - “ansi”: Return a strings (str) or StringIO.StringIO containing a terminal-style text representation for each time step. The text can include newlines and ANSI escape sequences (e.g. for colors). + - “rgb_array_list” and “ansi_list”: List based version of render modes are possible (except Human) through the wrapper, gymnasium.wrappers.RenderCollection that is automatically applied during gymnasium.make(...,render_mode="rgb_array_list"). The frames collected are popped after render() is called or reset(). + + :param maze_width: The width of the maze + :param maze_height: The height of the maze + """ if maze_width % 2 == 0 or maze_height % 2 == 0: raise ValueError("width/height of maze should be odd") diff --git a/maze_world/utils/solver.py b/maze_world/utils/solver.py index d4a9f2c..3b6deaf 100644 --- a/maze_world/utils/solver.py +++ b/maze_world/utils/solver.py @@ -59,6 +59,17 @@ def get_actions(impassable_array, motions, predecessors, start_idx, goal_idx): def maze_dijkstra_solver(impassable_array, motions, start_idx, goal_idx): + """ + Uses Dijkstra's algorithm to solve a maze + + :param impassable_array: + :param motions: + :param start_idx: (x,y) initial position of the agent + :param goal_idx: (x,y) position of the goal cell. + :return: + + + """ impassable_array = np.asarray(impassable_array) assert impassable_array.dtype == bool