Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions benchmarks/acc.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,12 @@ def step(self, action: np.ndarray) -> \
self.observation_space.low,
self.observation_space.high)
reward = 2.0 + x if x < 0 else -10
done = bool(x >= 0) or self.steps > self._max_episode_steps
done = self.steps >= self._max_episode_steps
self.steps += 1
return self.state, reward, done, {}

def predict_done(self, state: np.ndarray) -> bool:
return state[0] >= 0
return False

def seed(self, seed: int):
self.action_space.seed(seed)
Expand Down
7 changes: 3 additions & 4 deletions benchmarks/car_racing.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,7 @@ def step(self, action: np.ndarray) -> \
reward = -(6.0 + abs(x - 3.0) + abs(y - 3.0))

done = self.corner and x <= 0.0 and y <= 0.0
done = done or self.steps >= self._max_episode_steps or \
self.unsafe(self.state)
done = done or self.steps >= self._max_episode_steps
self.steps += 1

return self.state, reward, done, {}
Expand All @@ -72,5 +71,5 @@ def seed(self, seed: int):
self.init_space.seed(seed)

def unsafe(self, state: np.ndarray) -> bool:
return self.state[0] >= 1.0 and self.state[0] <= 2.0 and \
self.state[1] >= 1.0 and self.state[1] <= 2.0
return state[0] >= 1.0 and state[0] <= 2.0 and \
state[1] >= 1.0 and state[1] <= 2.0
6 changes: 3 additions & 3 deletions benchmarks/mid_obstacle.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def step(self, action: np.ndarray) -> \
reward = -(abs(x - 3.0) + abs(y - 3.0))

done = x >= 3.0 and y >= 3.0
done = done or self.steps > self._max_episode_steps
done = done or self.steps >= self._max_episode_steps
self.steps += 1

return self.state, reward, done, {}
Expand All @@ -63,5 +63,5 @@ def seed(self, seed: int):
self.init_space.seed(seed)

def unsafe(self, state: np.ndarray) -> bool:
return self.state[0] >= 1.0 and self.state[0] <= 2.0 and \
self.state[1] >= 1.0 and self.state[1] <= 2.0
return state[0] >= 1.0 and state[0] <= 2.0 and \
state[1] >= 1.0 and state[1] <= 2.0
4 changes: 2 additions & 2 deletions benchmarks/mountain_car.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,12 @@ def step(self, action: np.ndarray) -> \
return self.state, reward, done, {}

def predict_done(self, state: np.ndarray) -> bool:
return state[0] >= 0.6 or state[0] < -np.pi / 3
return state[0] >= 0.6

def seed(self, seed: int):
self.action_space.seed(seed)
self.observation_space.seed(seed)
self.init_space.seed(seed)

def unsafe(self, state: np.ndarray) -> bool:
return self.state[0] < self.safe_limit
return state[0] < self.safe_limit
3 changes: 1 addition & 2 deletions benchmarks/noisy_road.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,7 @@ def step(self, action: np.ndarray) -> \
self.observation_space.high)

reward = -abs(x - 3.0)
done = x >= 3.0 or self.steps >= self._max_episode_steps or \
self.unsafe(self.state)
done = x >= 3.0 or self.steps >= self._max_episode_steps
self.steps += 1

return self.state, reward, done, {}
Expand Down
5 changes: 2 additions & 3 deletions benchmarks/noisy_road_2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,7 @@ def step(self, action: np.ndarray) -> \

reward = -(abs(x - 3.0) + abs(y - 3.0))
done = x >= 3.0 and y >= 3.0
done = done or self.steps >= self._max_episode_steps or \
self.unsafe(self.state)
done = done or self.steps >= self._max_episode_steps
self.steps += 1

return self.state, reward, done, {}
Expand All @@ -102,4 +101,4 @@ def seed(self, seed: int):
self.rng = np.random.default_rng(np.random.PCG64(seed))

def unsafe(self, state: np.ndarray) -> bool:
return state[0]**2 + state[1]**2 >= self.max_speed**2
return state[2]**2 + state[3]**2 >= self.max_speed**2
7 changes: 3 additions & 4 deletions benchmarks/obstacle.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,7 @@ def step(self, action: np.ndarray) -> \
reward = -(abs(x - 3.0) + abs(y - 3.0))

done = x >= 3.0 and y >= 3.0
done = done or self.steps >= self._max_episode_steps or \
self.unsafe(self.state)
done = done or self.steps >= self._max_episode_steps
self.steps += 1

return self.state, reward, done, {}
Expand All @@ -64,5 +63,5 @@ def seed(self, seed: int):
self.init_space.seed(seed)

def unsafe(self, state: np.ndarray) -> bool:
return self.state[0] >= 0.0 and self.state[0] <= 1.0 and \
self.state[1] >= 2.0 and self.state[1] <= 3.0
return state[0] >= 0.0 and state[0] <= 1.0 and \
state[1] >= 2.0 and state[1] <= 3.0
7 changes: 3 additions & 4 deletions benchmarks/pendulum.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,18 +52,17 @@ def step(self, action: np.ndarray) -> \
self.observation_space.low,
self.observation_space.high)
reward = -abs(theta)
done = bool(abs(theta) >= 0.4) or \
self.steps >= self._max_episode_steps or self.unsafe(self.state)
done = self.steps >= self._max_episode_steps
self.steps += 1
return self.state, reward, done, {}

def predict_done(self, state: np.ndarray) -> bool:
return abs(state[0]) >= 0.4
return False

def seed(self, seed: int):
self.action_space.seed(seed)
self.observation_space.seed(seed)
self.init_space.seed(seed)

def unsafe(self, state: np.ndarray) -> bool:
return abs(self.state[0]) >= 0.4
return abs(state[0]) >= 0.4
3 changes: 1 addition & 2 deletions benchmarks/road.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,7 @@ def step(self, action: np.ndarray) -> \
self.observation_space.high)

reward = -abs(x - 3.0)
done = x >= 3.0 or self.steps >= self._max_episode_steps or \
self.unsafe(self.state)
done = x >= 3.0 or self.steps >= self._max_episode_steps
self.steps += 1

return self.state, reward, done, {}
Expand Down
5 changes: 2 additions & 3 deletions benchmarks/road_2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,7 @@ def step(self, action: np.ndarray) -> \

reward = -(abs(x - 3.0) + abs(y - 3.0))
done = x >= 3.0 and y >= 3.0
done = done or self.steps >= self._max_episode_steps or \
self.unsafe(self.state)
done = done or self.steps >= self._max_episode_steps
self.steps += 1

return self.state, reward, done, {}
Expand All @@ -98,4 +97,4 @@ def seed(self, seed: int):
self.init_space.seed(seed)

def unsafe(self, state: np.ndarray) -> bool:
return state[0]**2 + state[1]**2 >= self.max_speed**2
return state[2]**2 + state[3]**2 >= self.max_speed**2