Rccar constraint (#10)

yardenas · web-flow · commit b6b886726974 · 2024-10-27T08:41:24.000+01:00
* Cleanup

* Add simple cost function

* Add safety constraints

* Add obstacle as parameter
diff --git a/ss2r/benchmark_suites/rccar/rccar.py b/ss2r/benchmark_suites/rccar/rccar.py
@@ -1,5 +1,4 @@
 import functools
-from typing import Tuple
 
 import jax
 import jax.flatten_util
@@ -144,13 +143,13 @@ def state_reward(self, obs: jax.Array, next_obs: jax.Array) -> jax.Array:
         return reward
 
 
-class RCCar(Env):
-    dim_action: Tuple[int] = (2,)
-    _goal: jax.Array = jnp.array([0.0, 0.0, 0.0])
-    _init_pose: jax.Array = jnp.array([1.42, -1.04, jnp.pi])
-    _angle_idx: int = 2
-    _obs_noise_stds: jax.Array = OBS_NOISE_STD_SIM_CAR
+def cost_fn(state: jax.Array, obstacle_position, obstacle_radius) -> jax.Array:
+    xy = state[..., :2]
+    distance = jnp.linalg.norm(xy - obstacle_position)
+    return jnp.where(distance >= obstacle_radius, 0.0, 1.0)
+
 
+class RCCar(Env):
     def __init__(
         self,
         car_model_params: dict,
@@ -160,6 +159,7 @@ def __init__(
         margin_factor: float = 10.0,
         max_throttle: float = 1.0,
         dt: float = 1 / 30.0,
+        obstacle: tuple[float, float, float] = (-0.75, -0.75, 0.2),
     ):
         """
         Race car simulator environment
@@ -173,6 +173,12 @@ def __init__(
             car_model_params: dictionary of car model parameters that overwrite the default values
             seed: random number generator seed
         """
+        self._goal = jnp.array([0.0, 0.0, 0.0])
+        self.obstacle = tuple(obstacle)
+        self._init_pose = jnp.array([1.42, -1.04, jnp.pi])
+        self._angle_idx = 2
+        self._obs_noise_stds = OBS_NOISE_STD_SIM_CAR
+        self.dim_action = (2,)
         self._dt = dt
         self.dim_state = (7,) if encode_angle else (6,)
         self.encode_angle = encode_angle
@@ -226,6 +232,7 @@ def reset(self, rng: jax.Array) -> State:
             obs=init_state,
             reward=jnp.array(0.0),
             done=jnp.array(0.0),
+            info={"cost": jnp.array(0.0)},
         )
 
     def step(self, state: State, action: jax.Array) -> State:
@@ -239,14 +246,16 @@ def step(self, state: State, action: jax.Array) -> State:
         # FIXME (yarden): hard-coded key is bad here.
         next_obs = self._obs(next_dynamics_state, rng=jax.random.PRNGKey(0))
         reward = self.reward_model.forward(obs=None, action=action, next_obs=next_obs)
+        cost = cost_fn(obs, jnp.asarray(self.obstacle[:2]), self.obstacle[2])
         done = jnp.asarray(0.0)
+        info = {**state.info, "cost": cost}
         next_state = State(
             pipeline_state=state.pipeline_state,
             obs=next_obs,
             reward=reward,
             done=done,
             metrics=state.metrics,
-            info=state.info,
+            info=info,
         )
         return next_state
 
@@ -281,6 +290,8 @@ def render(env, policy, steps, rng):
     if env.encode_angle:
         trajectory = decode_angles(trajectory, 2)
 
+    obstacle_position, obstacle_radius = env.obstacle[:2], env.obstacle[2]
+
     def draw_scene(timestep):
         # Create a figure and axis
         fig = Figure(figsize=(2.5, 2.5), dpi=300)
@@ -301,7 +312,7 @@ def draw_scene(timestep):
         # Plot the car's position and velocity at the specified timestep
         x, y = rotated_trajectory[timestep, 0], rotated_trajectory[timestep, 1]
         vx, vy = rotated_trajectory[timestep, 3], rotated_trajectory[timestep, 4]
-        car_width, car_length = 0.3, 0.6
+        car_width, car_length = 0.07, 0.2
         car = Rectangle(
             (x - car_length / 2, y - car_width / 2),
             car_length,
@@ -313,17 +324,15 @@ def draw_scene(timestep):
             rotation_point="center",
         )
         ax.add_patch(car)
-        # Add an arrow to indicate the car's orientation
-        ax.arrow(
-            x,
-            y,
-            vx * 0.5,
-            vy * 0.5,
-            head_width=0.2,
-            head_length=0.2,
-            fc="black",
+        obstacle = Circle(
+            obstacle_position,
+            obstacle_radius,
+            color="gray",
+            alpha=0.5,
             ec="black",
+            lw=1.5,
         )
+        ax.add_patch(obstacle)
         ax.quiver(
             x,
             y,
@@ -336,6 +345,7 @@ def draw_scene(timestep):
             headwidth=3,
             linewidth=0.5,
         )
+        ax.grid(True, linewidth=0.5, c="gainsboro", zorder=0)
         # Render figure to canvas and retrieve RGB array
         canvas.draw()
         image = np.frombuffer(canvas.tostring_rgb(), dtype="uint8").copy()
diff --git a/ss2r/configs/environment/rccar.yaml b/ss2r/configs/environment/rccar.yaml
@@ -11,3 +11,4 @@ ctrl_cost_weight: 0.005
 margin_factor: 20.0
 max_throttle: 1.0
 use_obs_noise: false
+obstacle: [-0.75, -0.75, 0.2]  # x, y, radius