JuliaReinforcementLearning · LooseTerrifyingSpaceMonkey · May 4, 2023 · May 9, 2023 · May 15, 2023 · May 15, 2023
diff --git a/README.md b/README.md
@@ -33,6 +33,7 @@ This package is inspired by [gym-minigrid](https://github.com/maximecb/gym-minig
 1. [Catcher](#catcher)
 1. [TransportUndirected](#transportundirected)
 1. [TransportDirected](#transportdirected)
+1. [FrozenLakeUndirected](#frozenlakeundirected)
 
 ## Getting Started
 
@@ -355,3 +356,10 @@ In `ReinforcementLearning.jl`, you can create a [hook](https://juliareinforcemen
 
     <img src="https://user-images.githubusercontent.com/32610387/126910050-723e100c-c5c7-4703-8eab-5ab86a15e41f.png">
     <img src="https://user-images.githubusercontent.com/32610387/126909921-fdb3c853-4cac-4e6a-b20c-604caf5632e0.gif">
+
+1. ### FrozenLakeUndirected
+
+    The objective of the agent is to navigate its way to the goal while avoiding falling into the holes in the lake. When the agent reaches the goal, it receives a reward of 1 and the environment terminates. If the agent collides falls into a hole, the agent receives a reward of -1 and the environment terminates. The probablility of moving in the direction given by an agent is 1/3 while there is 1/3 chance to move in either perpendicular direction (for example: 1/3 chance to move up, 1/3 chance to move left and 1/3 chance to move right if the agent chose up). The scenario is based on the [Frozen Lake environment](https://gymnasium.farama.org/environments/toy_text/frozen_lake/) in Python's gymnasium. In the Python version there are two preset maps: "4x4" and "8x8". The GridWorlds implementation includes the walls as part of the dimensions, so the equivalent maps in GridWorlds is "6x6" and "10x10" respectively. The start, goal, and holes are located in the same positions in the lake as the Python version. If specifying custom height and widths keep in mind it is going to add walls all around the map so the actual surface of the lake is (height - 2, width - 2). 
+
+    <img src="https://user-images.githubusercontent.com/32610387/126910030-d93a714d-10b7-4117-887c-773afe78c625.png">
+    <img src="https://user-images.githubusercontent.com/32610387/126909888-8fa8473f-deb6-4562-9004-419fa8080693.gif">
diff --git a/src/envs/frozen_lake_undirected.jl b/src/envs/frozen_lake_undirected.jl
@@ -3,6 +3,7 @@ module FrozenLakeUndirectedModule
 import ..GridWorlds as GW
 import Random
 import ReinforcementLearningBase as RLBase
+using AStarSearch
 
 #####
 ##### game logic 
@@ -12,7 +13,7 @@ const NUM_OBJECTS = 4
 const AGENT = 1
 const WALL = 2
 const GOAL = 3
-const OBSTACLE = 4
+const HOLE = 4
 const NUM_ACTIONS = 4
 
 mutable struct FrozenLakeUndirected{R, RNG} <: GW.AbstractGridWorld
@@ -28,43 +29,77 @@ mutable struct FrozenLakeUndirected{R, RNG} <: GW.AbstractGridWorld
     num_obstacles::Int
     obstacle_positions::Vector{CartesianIndex{2}}
     is_slippery::Bool
+    randomize_start_end::Bool
 end
 
-function FrozenLakeUndirected(; map_name = String, R = Float32, height = 8, width = 8, num_obstacles = floor(Int, sqrt(height * width) / 2), rng = Random.GLOBAL_RNG, is_slippery = true)
+function FrozenLakeUndirected(; map_name::String = "", R::Type = Float32, height::Int = 8, width::Int = 8, num_obstacles::Int = floor(Int, sqrt(height * width) / 2), rng = Random.GLOBAL_RNG, is_slippery::Bool = true, randomize_start_end::Bool = false)
     obstacle_positions = Array{CartesianIndex{2}}(undef, num_obstacles)
-    if map_name == "4x4"
-        height = 4
-        width = 4
+    if map_name == "6x6"
+        height = 6
+        width = 6
         num_obstacles = 4
         obstacle_positions = Array{CartesianIndex{2}}(undef, num_obstacles)
         obstacle_positions = [CartesianIndex(3, 3), CartesianIndex(3, 5), CartesianIndex(4, 5), CartesianIndex(5, 2)]
-    elseif map_name == "8x8"
-        height = 8
-        width = 8
+    elseif map_name == "10x10"
+        height = 10
+        width = 10
         num_obstacles = 10
         obstacle_positions = Array{CartesianIndex{2}}(undef, num_obstacles)
         obstacle_positions = [CartesianIndex(4, 5), CartesianIndex(5, 7), CartesianIndex(6, 5), CartesianIndex(7, 3), CartesianIndex(7, 4), CartesianIndex(7, 8), CartesianIndex(8, 3), CartesianIndex(8, 6), CartesianIndex(8, 8), CartesianIndex(9, 5)]
+    elseif map_name != ""
+        throw(ArgumentError("Unsupported map_name value: '$(map_name)'. Please use '6x6', '10x10', or undefined."))
     end
 
-    print("Obstacle Positions: ", obstacle_positions, " Height: ", height, " Width: ", width, "\n")
-    tile_map = falses(NUM_OBJECTS, height + 2, width + 2)
+    tile_map = falses(NUM_OBJECTS, height, width)
 
     tile_map[WALL, 1, :] .= true
-    tile_map[WALL, height + 2, :] .= true
+    tile_map[WALL, height, :] .= true
     tile_map[WALL, :, 1] .= true
-    tile_map[WALL, :, width + 2] .= true
+    tile_map[WALL, :, width] .= true
 
-    agent_position = CartesianIndex(2, 2)
-    tile_map[AGENT, agent_position] = true
+    if randomize_start_end
 env = GW.RLBaseEnv(Env(R = R)) 
 env = GW.RLBaseEnv(Env(R = R)) 
+        agent_position = CartesionIndex(rand(rng, [i for i in 2:height - 1]), rand(rng, [i for i in 2:width - 1]))
+
+        #Find a goal that is not the same position as the start point
+        goal_position = agent_position
+        while agent_position == goal_position
+            goal_position = CartesianIndex(rand(rng, [i for i in 2:height - 1]), rand(rng, [i for i in 2:width - 1]))
+        end
+    else
+        agent_position = CartesianIndex(2, 2)
+        goal_position = CartesianIndex(height - 1, width - 1)
+    end
 
-    goal_position = CartesianIndex(height + 1, width + 1)
+    tile_map[AGENT, agent_position] = true
     tile_map[GOAL, goal_position] = true
 
-    if map_name === nothing
-        obstacle_positions = Array{CartesianIndex{2}}(undef, num_obstacles)
-        for i in 1:num_obstacles
-            obstacle_position = GW.sample_empty_position(rng, tile_map)
-            obstacle_positions[i] = obstacle_position
+    if map_name == ""
+        function get_neighbors(state::CartesianIndex)
+            return_list = []
+            for pos in [GW.move_up(state), GW.move_down(state), GW.move_left(state), GW.move_right(state)]
+                    if !tile_map[WALL, pos] && !tile_map[HOLE, pos]
+                        push!(return_list, pos)
+                    end
+            end
+            return return_list
+        end
+
+        manhattan(a::CartesianIndex, b::CartesianIndex) = sum(abs.((b-a).I))
+        is_goal(state::CartesianIndex, end_state::CartesianIndex) = state == end_state
+
+        distance_heuristic(state::CartesianIndex, end_state::CartesianIndex) = manhattan(state, end_state)
+
+        path_exists = false
+        while !path_exists
+            obstacle_positions = Array{CartesianIndex{2}}(undef, num_obstacles)
+            for i in 1:num_obstacles
+                obstacle_position = GW.sample_empty_position(rng, tile_map)
+                obstacle_positions[i] = obstacle_position
+            end
+            tile_map = update_obstacles_on_map(tile_map, obstacle_positions)
+
+            path_exists = astar(get_neighbors, agent_position, goal_position; heuristic = distance_heuristic, isgoal = is_goal).status == :success
+            @debug "path_exists: $(path_exists)"
         end
     end
 
@@ -75,16 +110,14 @@ function FrozenLakeUndirected(; map_name = String, R = Float32, height = 8, widt
     terminal_reward = one(R)
     terminal_penalty = -one(R)
 
-    env = FrozenLakeUndirected(tile_map, map_name, agent_position, reward, rng, done, terminal_reward, terminal_penalty, goal_position, num_obstacles, obstacle_positions, is_slippery)
-
-    # GW.reset!(env)
+    env = FrozenLakeUndirected(tile_map, map_name, agent_position, reward, rng, done, terminal_reward, terminal_penalty, goal_position, num_obstacles, obstacle_positions, is_slippery, randomize_start_end)
 
     return env
 end
 
 function update_obstacles_on_map(tile_map, obstacle_positions)
     for position in obstacle_positions
-        tile_map[OBSTACLE, position] = true
+        tile_map[HOLE, position] = true
     end
     return tile_map
 end
@@ -132,7 +165,7 @@ function GW.act!(env::FrozenLakeUndirected, action)
     if tile_map[GOAL, env.agent_position]
         env.reward = env.terminal_reward
         env.done = true
-    elseif tile_map[OBSTACLE, env.agent_position]
+    elseif tile_map[HOLE, env.agent_position]
         env.reward = env.terminal_penalty
         env.done = true
     else
@@ -154,7 +187,7 @@ GW.get_action_names(env::FrozenLakeUndirected) = (:MOVE_UP, :MOVE_DOWN, :MOVE_LE
 GW.get_object_names(env::FrozenLakeUndirected) = (:AGENT, :WALL, :GOAL, :OBSTACLE)
 
 function GW.get_pretty_tile_map(env::FrozenLakeUndirected, position::CartesianIndex{2})
-    characters = ('☻', '█', '♥', '⊗', '⋅')
+    characters = ('☻', '█', '♥', '○', '⋅') 
 
     object = findfirst(@view env.tile_map[:, position])
     if isnothing(object)
@@ -168,7 +201,7 @@ function GW.get_pretty_sub_tile_map(env::FrozenLakeUndirected, window_size, posi
     tile_map = env.tile_map
     agent_position = env.agent_position
 
-    characters = ('☻', '█', '♥', '⊗', '⋅')
+    characters = ('☻', '█', '♥', '○', '⋅')
 
     sub_tile_map = GW.get_sub_tile_map(tile_map, agent_position, window_size)