diff --git a/benchmarks/acc.py b/benchmarks/acc.py index f1118a3f..bf47244f 100644 --- a/benchmarks/acc.py +++ b/benchmarks/acc.py @@ -50,12 +50,12 @@ def step(self, action: np.ndarray) -> \ self.observation_space.low, self.observation_space.high) reward = 2.0 + x if x < 0 else -10 - done = bool(x >= 0) or self.steps > self._max_episode_steps + done = self.steps >= self._max_episode_steps self.steps += 1 return self.state, reward, done, {} def predict_done(self, state: np.ndarray) -> bool: - return state[0] >= 0 + return False def seed(self, seed: int): self.action_space.seed(seed) diff --git a/benchmarks/car_racing.py b/benchmarks/car_racing.py index ddd29a17..4a359744 100644 --- a/benchmarks/car_racing.py +++ b/benchmarks/car_racing.py @@ -57,8 +57,7 @@ def step(self, action: np.ndarray) -> \ reward = -(6.0 + abs(x - 3.0) + abs(y - 3.0)) done = self.corner and x <= 0.0 and y <= 0.0 - done = done or self.steps >= self._max_episode_steps or \ - self.unsafe(self.state) + done = done or self.steps >= self._max_episode_steps self.steps += 1 return self.state, reward, done, {} @@ -72,5 +71,5 @@ def seed(self, seed: int): self.init_space.seed(seed) def unsafe(self, state: np.ndarray) -> bool: - return self.state[0] >= 1.0 and self.state[0] <= 2.0 and \ - self.state[1] >= 1.0 and self.state[1] <= 2.0 + return state[0] >= 1.0 and state[0] <= 2.0 and \ + state[1] >= 1.0 and state[1] <= 2.0 diff --git a/benchmarks/mid_obstacle.py b/benchmarks/mid_obstacle.py index a6888615..0bd6ce0a 100644 --- a/benchmarks/mid_obstacle.py +++ b/benchmarks/mid_obstacle.py @@ -49,7 +49,7 @@ def step(self, action: np.ndarray) -> \ reward = -(abs(x - 3.0) + abs(y - 3.0)) done = x >= 3.0 and y >= 3.0 - done = done or self.steps > self._max_episode_steps + done = done or self.steps >= self._max_episode_steps self.steps += 1 return self.state, reward, done, {} @@ -63,5 +63,5 @@ def seed(self, seed: int): self.init_space.seed(seed) def unsafe(self, state: np.ndarray) -> bool: - return self.state[0] >= 1.0 and self.state[0] <= 2.0 and \ - self.state[1] >= 1.0 and self.state[1] <= 2.0 + return state[0] >= 1.0 and state[0] <= 2.0 and \ + state[1] >= 1.0 and state[1] <= 2.0 diff --git a/benchmarks/mountain_car.py b/benchmarks/mountain_car.py index 5641bcc1..55cb3272 100644 --- a/benchmarks/mountain_car.py +++ b/benchmarks/mountain_car.py @@ -54,7 +54,7 @@ def step(self, action: np.ndarray) -> \ return self.state, reward, done, {} def predict_done(self, state: np.ndarray) -> bool: - return state[0] >= 0.6 or state[0] < -np.pi / 3 + return state[0] >= 0.6 def seed(self, seed: int): self.action_space.seed(seed) @@ -62,4 +62,4 @@ def seed(self, seed: int): self.init_space.seed(seed) def unsafe(self, state: np.ndarray) -> bool: - return self.state[0] < self.safe_limit + return state[0] < self.safe_limit diff --git a/benchmarks/noisy_road.py b/benchmarks/noisy_road.py index 650dff2f..586aa503 100644 --- a/benchmarks/noisy_road.py +++ b/benchmarks/noisy_road.py @@ -46,8 +46,7 @@ def step(self, action: np.ndarray) -> \ self.observation_space.high) reward = -abs(x - 3.0) - done = x >= 3.0 or self.steps >= self._max_episode_steps or \ - self.unsafe(self.state) + done = x >= 3.0 or self.steps >= self._max_episode_steps self.steps += 1 return self.state, reward, done, {} diff --git a/benchmarks/noisy_road_2d.py b/benchmarks/noisy_road_2d.py index ba03b18f..8a2d8b92 100644 --- a/benchmarks/noisy_road_2d.py +++ b/benchmarks/noisy_road_2d.py @@ -86,8 +86,7 @@ def step(self, action: np.ndarray) -> \ reward = -(abs(x - 3.0) + abs(y - 3.0)) done = x >= 3.0 and y >= 3.0 - done = done or self.steps >= self._max_episode_steps or \ - self.unsafe(self.state) + done = done or self.steps >= self._max_episode_steps self.steps += 1 return self.state, reward, done, {} @@ -102,4 +101,4 @@ def seed(self, seed: int): self.rng = np.random.default_rng(np.random.PCG64(seed)) def unsafe(self, state: np.ndarray) -> bool: - return state[0]**2 + state[1]**2 >= self.max_speed**2 + return state[2]**2 + state[3]**2 >= self.max_speed**2 diff --git a/benchmarks/obstacle.py b/benchmarks/obstacle.py index 7be9481e..bb648486 100644 --- a/benchmarks/obstacle.py +++ b/benchmarks/obstacle.py @@ -49,8 +49,7 @@ def step(self, action: np.ndarray) -> \ reward = -(abs(x - 3.0) + abs(y - 3.0)) done = x >= 3.0 and y >= 3.0 - done = done or self.steps >= self._max_episode_steps or \ - self.unsafe(self.state) + done = done or self.steps >= self._max_episode_steps self.steps += 1 return self.state, reward, done, {} @@ -64,5 +63,5 @@ def seed(self, seed: int): self.init_space.seed(seed) def unsafe(self, state: np.ndarray) -> bool: - return self.state[0] >= 0.0 and self.state[0] <= 1.0 and \ - self.state[1] >= 2.0 and self.state[1] <= 3.0 + return state[0] >= 0.0 and state[0] <= 1.0 and \ + state[1] >= 2.0 and state[1] <= 3.0 diff --git a/benchmarks/pendulum.py b/benchmarks/pendulum.py index f0bba866..1a8f7f05 100644 --- a/benchmarks/pendulum.py +++ b/benchmarks/pendulum.py @@ -52,13 +52,12 @@ def step(self, action: np.ndarray) -> \ self.observation_space.low, self.observation_space.high) reward = -abs(theta) - done = bool(abs(theta) >= 0.4) or \ - self.steps >= self._max_episode_steps or self.unsafe(self.state) + done = self.steps >= self._max_episode_steps self.steps += 1 return self.state, reward, done, {} def predict_done(self, state: np.ndarray) -> bool: - return abs(state[0]) >= 0.4 + return False def seed(self, seed: int): self.action_space.seed(seed) @@ -66,4 +65,4 @@ def seed(self, seed: int): self.init_space.seed(seed) def unsafe(self, state: np.ndarray) -> bool: - return abs(self.state[0]) >= 0.4 + return abs(state[0]) >= 0.4 diff --git a/benchmarks/road.py b/benchmarks/road.py index 7fb3b8dc..89536f5c 100644 --- a/benchmarks/road.py +++ b/benchmarks/road.py @@ -43,8 +43,7 @@ def step(self, action: np.ndarray) -> \ self.observation_space.high) reward = -abs(x - 3.0) - done = x >= 3.0 or self.steps >= self._max_episode_steps or \ - self.unsafe(self.state) + done = x >= 3.0 or self.steps >= self._max_episode_steps self.steps += 1 return self.state, reward, done, {} diff --git a/benchmarks/road_2d.py b/benchmarks/road_2d.py index 14924033..d1200132 100644 --- a/benchmarks/road_2d.py +++ b/benchmarks/road_2d.py @@ -83,8 +83,7 @@ def step(self, action: np.ndarray) -> \ reward = -(abs(x - 3.0) + abs(y - 3.0)) done = x >= 3.0 and y >= 3.0 - done = done or self.steps >= self._max_episode_steps or \ - self.unsafe(self.state) + done = done or self.steps >= self._max_episode_steps self.steps += 1 return self.state, reward, done, {} @@ -98,4 +97,4 @@ def seed(self, seed: int): self.init_space.seed(seed) def unsafe(self, state: np.ndarray) -> bool: - return state[0]**2 + state[1]**2 >= self.max_speed**2 + return state[2]**2 + state[3]**2 >= self.max_speed**2