From eca62e3c7bee712d02de43c95a8758bcae2c2cdf Mon Sep 17 00:00:00 2001
From: Kevin Fu <kfu02@gmail.com>
Date: Fri, 24 May 2024 15:59:37 -0400
Subject: [PATCH 1/4] changes for joint passage size

---
 vmas/scenarios/joint_passage_size.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/vmas/scenarios/joint_passage_size.py b/vmas/scenarios/joint_passage_size.py
index 4fa6780e..aea5d926 100644
--- a/vmas/scenarios/joint_passage_size.py
+++ b/vmas/scenarios/joint_passage_size.py
@@ -70,6 +70,9 @@ def make_world(self, batch_dim: int, device: torch.device, **kwargs):
 
         assert self.n_passages == 3 or self.n_passages == 4
 
+        # NOTE: I changed how our VMAS viewer works s.t. it requires a world_semidim, my bad
+        self.world_semidim = 1.0
+
         self.plot_grid = False
 
         # Make world
@@ -510,6 +513,20 @@ def observation(self, agent: Agent):
             angle_to_vector(self.goal.state.rot),
         ] + ([angle_to_vector(joint_angle)] if self.observe_joint_angle else [])
 
+        # NOTE: I add the size capability in "mixed" form manually here:
+        radius = agent.shape.radius
+        mean_radius = (self.agent_radius + self.agent_radius_2) / 2
+        relative_radius = agent.shape.radius - mean_radius
+        capability_repr = [
+            torch.tensor(
+                radius, device=self.world.device
+            ).repeat(self.world.batch_dim, 1),
+            torch.tensor(
+                relative_radius, device=self.world.device
+            ).repeat(self.world.batch_dim, 1),
+        ]
+        observations += capability_repr
+
         if self.obs_noise > 0:
             for i, obs in enumerate(observations):
                 noise = torch.zeros(

From 05b0a4f82080320978128e549e59a027d85cd01b Mon Sep 17 00:00:00 2001
From: Kevin Fu <kevin.fu842@gmail.com>
Date: Tue, 28 May 2024 11:44:14 -0400
Subject: [PATCH 2/4] WIP agent pos shaping

---
 vmas/scenarios/joint_passage_size.py | 53 +++++++++++++++++++++-------
 1 file changed, 40 insertions(+), 13 deletions(-)

diff --git a/vmas/scenarios/joint_passage_size.py b/vmas/scenarios/joint_passage_size.py
index aea5d926..cae4fb10 100644
--- a/vmas/scenarios/joint_passage_size.py
+++ b/vmas/scenarios/joint_passage_size.py
@@ -46,7 +46,6 @@ def get_line_angle_dist_0_180(angle, goal):
         ),
     ).squeeze(-1)
 
-
 class Scenario(BaseScenario):
     def make_world(self, batch_dim: int, device: torch.device, **kwargs):
         self.fixed_passage = kwargs.get("fixed_passage", False)
@@ -59,6 +58,7 @@ def make_world(self, batch_dim: int, device: torch.device, **kwargs):
         self.mass_ratio = kwargs.get("mass_ratio", 1)
         self.mass_position = kwargs.get("mass_position", 0.75)
         self.max_speed_1 = kwargs.get("max_speed_1", None)  # 0.1
+        self.agent_pos_shaping_factor = kwargs.get("agent_pos_shaping_factor", 1)
         self.pos_shaping_factor = kwargs.get("pos_shaping_factor", 1)
         self.rot_shaping_factor = kwargs.get("rot_shaping_factor", 0)
         self.collision_reward = kwargs.get("collision_reward", -0.1)
@@ -200,6 +200,7 @@ def mass_collision_filter(e):
         self.rot_rew = self.pos_rew.clone()
         self.collision_rew = self.pos_rew.clone()
         self.energy_rew = self.pos_rew.clone()
+        self.agent_pos_rew = self.pos_rew.clone()
         self.all_passed = torch.full((batch_dim,), False, device=device)
 
         return world
@@ -216,6 +217,13 @@ def set_n_passages(self, val):
         self.create_passage_map(self.world)
         self.reset_world_at()
 
+    def agent_dist_to_goal(self, agent_index):
+        agent = self.world.agents[agent_index]
+        agent_goal = (self.goal.state.pos - self.joint_length / 2) if agent_index == 0 else (self.goal.state.pos + self.joint_length / 2)
+        return torch.linalg.vector_norm(
+            agent.state.pos - agent_goal, dim=1
+        )
+
     def reset_world_at(self, env_index: int = None):
         start_angle = torch.rand(
             (1, 1) if env_index is not None else (self.world.batch_dim, 1),
@@ -354,6 +362,10 @@ def reset_world_at(self, env_index: int = None):
                 )
             ) * self.rot_shaping_factor
 
+            # these variables allow "previous state" - "curr state" so that 
+            # the reward is only relative to previous state
+            self.prev_agent_dist_shaping = (self.agent_dist_to_goal(0) + self.agent_dist_to_goal(1)) * self.agent_pos_shaping_factor
+
         else:
             self.t[env_index] = 0
             self.passed[env_index] = 0
@@ -396,6 +408,7 @@ def reward(self, agent: Agent):
             self.rot_rew[:] = 0
             self.collision_rew[:] = 0
             self.energy_rew[:] = 0
+            self.agent_pos_rew[:] = 0
 
             joint_passed = self.joint.landmark.state.pos[:, Y] > 0
             self.all_passed = (
@@ -416,15 +429,16 @@ def reward(self, agent: Agent):
             ]
             self.joint.pos_shaping_pre = joint_shaping
 
-            joint_dist_to_goal = torch.linalg.vector_norm(
-                self.joint.landmark.state.pos - self.goal.state.pos,
-                dim=1,
-            )
-            joint_shaping = joint_dist_to_goal * self.pos_shaping_factor
-            self.pos_rew[joint_passed] += (self.joint.pos_shaping_post - joint_shaping)[
-                joint_passed
-            ]
-            self.joint.pos_shaping_post = joint_shaping
+            # 0 reward for joint center to goal
+            # joint_dist_to_goal = torch.linalg.vector_norm(
+            #     self.joint.landmark.state.pos - self.goal.state.pos,
+            #     dim=1,
+            # )
+            # joint_shaping = joint_dist_to_goal * self.pos_shaping_factor
+            # self.pos_rew[joint_passed] += (self.joint.pos_shaping_post - joint_shaping)[
+            #     joint_passed
+            # ]
+            # self.joint.pos_shaping_post = joint_shaping
 
             # Rot shaping
             joint_dist_to_90_rot = (
@@ -436,8 +450,10 @@ def reward(self, agent: Agent):
                     self.joint.landmark.state.rot, self.middle_angle
                 )
             )
+            # only apply before passage
             joint_shaping = joint_dist_to_90_rot * self.rot_shaping_factor
-            self.rot_rew += self.joint.rot_shaping_pre - joint_shaping
+            self.rot_rew[~joint_passed] += (self.joint.rot_shaping_pre -
+                                            joint_shaping)[~joint_passed]
             self.joint.rot_shaping_pre = joint_shaping
 
             # Agent collisions
@@ -469,8 +485,18 @@ def reward(self, agent: Agent):
                 ).sum(-1)
                 self.energy_rew = -self.energy_expenditure * self.energy_reward_coeff
 
+            # shaping for the agents to land on their spots
+            # only apply this after both agents are past the passage
+            both_agents_past = torch.logical_and(self.world.agents[0].state.pos[:, Y] > 0, self.world.agents[1].state.pos[:, Y] > 0)
+            curr_agent_dist_shaping = (self.agent_dist_to_goal(0) + self.agent_dist_to_goal(1)) * self.agent_pos_shaping_factor
+            # self.agent_pos_rew += self.prev_agent_dist_shaping - curr_agent_dist_shaping
+            self.agent_pos_rew[both_agents_past] += (self.prev_agent_dist_shaping - curr_agent_dist_shaping)[
+                both_agents_past
+            ]
+            self.prev_agent_dist_shaping = curr_agent_dist_shaping
+
             self.rew = (
-                self.pos_rew + self.rot_rew + self.collision_rew + self.energy_rew
+                self.pos_rew + self.rot_rew + self.collision_rew + self.energy_rew + self.agent_pos_rew
             )
 
         return self.rew
@@ -565,6 +591,7 @@ def info(self, agent: Agent) -> Dict[str, Tensor]:
             just_passed = self.all_passed * (self.passed == 0)
             self.passed[just_passed] = 100
             self.info_stored = {
+                "agent_pos_rew": self.agent_pos_rew,
                 "pos_rew": self.pos_rew,
                 "rot_rew": self.rot_rew,
                 "collision_rew": self.collision_rew,
@@ -760,7 +787,7 @@ def extra_render(self, env_index: int = 0):
 
         goal_agent_1.set_color(*color)
         geoms.append(goal_agent_1)
-        goal_agent_2 = rendering.make_circle(self.agent_radius)
+        goal_agent_2 = rendering.make_circle(3 * self.agent_radius)
         xform = rendering.Transform()
         goal_agent_2.add_attr(xform)
         xform.set_translation(

From 3b015029ddd7c305f53913fb7e3f64bae792b073 Mon Sep 17 00:00:00 2001
From: Kevin Fu <kevin.fu842@gmail.com>
Date: Wed, 29 May 2024 11:33:49 -0400
Subject: [PATCH 3/4] Revert "WIP agent pos shaping"

This reverts commit 05b0a4f82080320978128e549e59a027d85cd01b.
---
 vmas/scenarios/joint_passage_size.py | 53 +++++++---------------------
 1 file changed, 13 insertions(+), 40 deletions(-)

diff --git a/vmas/scenarios/joint_passage_size.py b/vmas/scenarios/joint_passage_size.py
index cae4fb10..aea5d926 100644
--- a/vmas/scenarios/joint_passage_size.py
+++ b/vmas/scenarios/joint_passage_size.py
@@ -46,6 +46,7 @@ def get_line_angle_dist_0_180(angle, goal):
         ),
     ).squeeze(-1)
 
+
 class Scenario(BaseScenario):
     def make_world(self, batch_dim: int, device: torch.device, **kwargs):
         self.fixed_passage = kwargs.get("fixed_passage", False)
@@ -58,7 +59,6 @@ def make_world(self, batch_dim: int, device: torch.device, **kwargs):
         self.mass_ratio = kwargs.get("mass_ratio", 1)
         self.mass_position = kwargs.get("mass_position", 0.75)
         self.max_speed_1 = kwargs.get("max_speed_1", None)  # 0.1
-        self.agent_pos_shaping_factor = kwargs.get("agent_pos_shaping_factor", 1)
         self.pos_shaping_factor = kwargs.get("pos_shaping_factor", 1)
         self.rot_shaping_factor = kwargs.get("rot_shaping_factor", 0)
         self.collision_reward = kwargs.get("collision_reward", -0.1)
@@ -200,7 +200,6 @@ def mass_collision_filter(e):
         self.rot_rew = self.pos_rew.clone()
         self.collision_rew = self.pos_rew.clone()
         self.energy_rew = self.pos_rew.clone()
-        self.agent_pos_rew = self.pos_rew.clone()
         self.all_passed = torch.full((batch_dim,), False, device=device)
 
         return world
@@ -217,13 +216,6 @@ def set_n_passages(self, val):
         self.create_passage_map(self.world)
         self.reset_world_at()
 
-    def agent_dist_to_goal(self, agent_index):
-        agent = self.world.agents[agent_index]
-        agent_goal = (self.goal.state.pos - self.joint_length / 2) if agent_index == 0 else (self.goal.state.pos + self.joint_length / 2)
-        return torch.linalg.vector_norm(
-            agent.state.pos - agent_goal, dim=1
-        )
-
     def reset_world_at(self, env_index: int = None):
         start_angle = torch.rand(
             (1, 1) if env_index is not None else (self.world.batch_dim, 1),
@@ -362,10 +354,6 @@ def reset_world_at(self, env_index: int = None):
                 )
             ) * self.rot_shaping_factor
 
-            # these variables allow "previous state" - "curr state" so that 
-            # the reward is only relative to previous state
-            self.prev_agent_dist_shaping = (self.agent_dist_to_goal(0) + self.agent_dist_to_goal(1)) * self.agent_pos_shaping_factor
-
         else:
             self.t[env_index] = 0
             self.passed[env_index] = 0
@@ -408,7 +396,6 @@ def reward(self, agent: Agent):
             self.rot_rew[:] = 0
             self.collision_rew[:] = 0
             self.energy_rew[:] = 0
-            self.agent_pos_rew[:] = 0
 
             joint_passed = self.joint.landmark.state.pos[:, Y] > 0
             self.all_passed = (
@@ -429,16 +416,15 @@ def reward(self, agent: Agent):
             ]
             self.joint.pos_shaping_pre = joint_shaping
 
-            # 0 reward for joint center to goal
-            # joint_dist_to_goal = torch.linalg.vector_norm(
-            #     self.joint.landmark.state.pos - self.goal.state.pos,
-            #     dim=1,
-            # )
-            # joint_shaping = joint_dist_to_goal * self.pos_shaping_factor
-            # self.pos_rew[joint_passed] += (self.joint.pos_shaping_post - joint_shaping)[
-            #     joint_passed
-            # ]
-            # self.joint.pos_shaping_post = joint_shaping
+            joint_dist_to_goal = torch.linalg.vector_norm(
+                self.joint.landmark.state.pos - self.goal.state.pos,
+                dim=1,
+            )
+            joint_shaping = joint_dist_to_goal * self.pos_shaping_factor
+            self.pos_rew[joint_passed] += (self.joint.pos_shaping_post - joint_shaping)[
+                joint_passed
+            ]
+            self.joint.pos_shaping_post = joint_shaping
 
             # Rot shaping
             joint_dist_to_90_rot = (
@@ -450,10 +436,8 @@ def reward(self, agent: Agent):
                     self.joint.landmark.state.rot, self.middle_angle
                 )
             )
-            # only apply before passage
             joint_shaping = joint_dist_to_90_rot * self.rot_shaping_factor
-            self.rot_rew[~joint_passed] += (self.joint.rot_shaping_pre -
-                                            joint_shaping)[~joint_passed]
+            self.rot_rew += self.joint.rot_shaping_pre - joint_shaping
             self.joint.rot_shaping_pre = joint_shaping
 
             # Agent collisions
@@ -485,18 +469,8 @@ def reward(self, agent: Agent):
                 ).sum(-1)
                 self.energy_rew = -self.energy_expenditure * self.energy_reward_coeff
 
-            # shaping for the agents to land on their spots
-            # only apply this after both agents are past the passage
-            both_agents_past = torch.logical_and(self.world.agents[0].state.pos[:, Y] > 0, self.world.agents[1].state.pos[:, Y] > 0)
-            curr_agent_dist_shaping = (self.agent_dist_to_goal(0) + self.agent_dist_to_goal(1)) * self.agent_pos_shaping_factor
-            # self.agent_pos_rew += self.prev_agent_dist_shaping - curr_agent_dist_shaping
-            self.agent_pos_rew[both_agents_past] += (self.prev_agent_dist_shaping - curr_agent_dist_shaping)[
-                both_agents_past
-            ]
-            self.prev_agent_dist_shaping = curr_agent_dist_shaping
-
             self.rew = (
-                self.pos_rew + self.rot_rew + self.collision_rew + self.energy_rew + self.agent_pos_rew
+                self.pos_rew + self.rot_rew + self.collision_rew + self.energy_rew
             )
 
         return self.rew
@@ -591,7 +565,6 @@ def info(self, agent: Agent) -> Dict[str, Tensor]:
             just_passed = self.all_passed * (self.passed == 0)
             self.passed[just_passed] = 100
             self.info_stored = {
-                "agent_pos_rew": self.agent_pos_rew,
                 "pos_rew": self.pos_rew,
                 "rot_rew": self.rot_rew,
                 "collision_rew": self.collision_rew,
@@ -787,7 +760,7 @@ def extra_render(self, env_index: int = 0):
 
         goal_agent_1.set_color(*color)
         geoms.append(goal_agent_1)
-        goal_agent_2 = rendering.make_circle(3 * self.agent_radius)
+        goal_agent_2 = rendering.make_circle(self.agent_radius)
         xform = rendering.Transform()
         goal_agent_2.add_attr(xform)
         xform.set_translation(

From 40284020f48eb53b868fa6631799fabf59271091 Mon Sep 17 00:00:00 2001
From: Kevin Fu <kevin.fu842@gmail.com>
Date: Thu, 30 May 2024 13:46:55 -0400
Subject: [PATCH 4/4] modify done(), modify controller params

---
 vmas/scenarios/joint_passage_size.py | 49 ++++++++++++++++++----------
 1 file changed, 31 insertions(+), 18 deletions(-)

diff --git a/vmas/scenarios/joint_passage_size.py b/vmas/scenarios/joint_passage_size.py
index aea5d926..4fc72d35 100644
--- a/vmas/scenarios/joint_passage_size.py
+++ b/vmas/scenarios/joint_passage_size.py
@@ -103,7 +103,7 @@ def make_world(self, batch_dim: int, device: torch.device, **kwargs):
         self.n_boxes = int(self.scenario_length // self.passage_length)
         self.min_collision_distance = 0.005
 
-        cotnroller_params = [2.0, 10, 0.00001]
+        controller_params = [1.0, 0.0, 0.0]
 
         # Add agents
         agent = Agent(
@@ -115,7 +115,7 @@ def make_world(self, batch_dim: int, device: torch.device, **kwargs):
             f_range=10,
         )
         agent.controller = VelocityController(
-            agent, world, cotnroller_params, "standard"
+            agent, world, controller_params, "standard"
         )
         world.add_agent(agent)
         agent = Agent(
@@ -129,7 +129,7 @@ def make_world(self, batch_dim: int, device: torch.device, **kwargs):
             f_range=10,
         )
         agent.controller = VelocityController(
-            agent, world, cotnroller_params, "standard"
+            agent, world, controller_params, "standard"
         )
         world.add_agent(agent)
 
@@ -543,25 +543,35 @@ def observation(self, agent: Agent):
         )
 
     def done(self):
-        return torch.all(
-            (
-                torch.linalg.vector_norm(
-                    self.joint.landmark.state.pos - self.goal.state.pos, dim=1
-                )
-                <= 0.01
-            )
-            * (
-                get_line_angle_dist_0_180(
-                    self.joint.landmark.state.rot, self.goal.state.rot
-                ).unsqueeze(-1)
-                <= 0.01
-            ),
-            dim=1,
-        )
+        # reimplementation
+        pos_done = torch.linalg.vector_norm(self.joint.landmark.state.pos - self.goal.state.pos, dim=1) <= 0.01
+        rot_done = get_line_angle_dist_0_180(self.joint.landmark.state.rot, self.goal.state.rot) <= 0.01
+        done = torch.logical_and(pos_done, rot_done)
+        return done
+        
+        # original
+        # return torch.all(
+        #     (
+        #         torch.linalg.vector_norm(
+        #             self.joint.landmark.state.pos - self.goal.state.pos, dim=1
+        #         )
+        #         <= 0.01
+        #     )
+        #     * (
+        #         get_line_angle_dist_0_180(
+        #             self.joint.landmark.state.rot, self.goal.state.rot
+        #         ) # .unsqueeze(-1)
+        #         <= 0.01
+        #     ),
+        #     dim=1,
+        # )
 
     def info(self, agent: Agent) -> Dict[str, Tensor]:
         is_first = self.world.agents[0] == agent
         if is_first:
+            dist_to_goal = torch.linalg.vector_norm(self.joint.landmark.state.pos - self.goal.state.pos, dim=1)
+            rot_to_goal = get_line_angle_dist_0_180(self.joint.landmark.state.rot, self.goal.state.rot)
+
             just_passed = self.all_passed * (self.passed == 0)
             self.passed[just_passed] = 100
             self.info_stored = {
@@ -570,6 +580,9 @@ def info(self, agent: Agent) -> Dict[str, Tensor]:
                 "collision_rew": self.collision_rew,
                 "energy_rew": self.energy_rew,
                 "passed": just_passed.to(torch.int),
+                "success_rate": self.done(), # wandb averages over n_envs
+                "dist_to_goal": dist_to_goal,
+                "rot_to_goal": rot_to_goal,
             }
         return self.info_stored