From eca62e3c7bee712d02de43c95a8758bcae2c2cdf Mon Sep 17 00:00:00 2001 From: Kevin Fu Date: Fri, 24 May 2024 15:59:37 -0400 Subject: [PATCH 1/4] changes for joint passage size --- vmas/scenarios/joint_passage_size.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/vmas/scenarios/joint_passage_size.py b/vmas/scenarios/joint_passage_size.py index 4fa6780e..aea5d926 100644 --- a/vmas/scenarios/joint_passage_size.py +++ b/vmas/scenarios/joint_passage_size.py @@ -70,6 +70,9 @@ def make_world(self, batch_dim: int, device: torch.device, **kwargs): assert self.n_passages == 3 or self.n_passages == 4 + # NOTE: I changed how our VMAS viewer works s.t. it requires a world_semidim, my bad + self.world_semidim = 1.0 + self.plot_grid = False # Make world @@ -510,6 +513,20 @@ def observation(self, agent: Agent): angle_to_vector(self.goal.state.rot), ] + ([angle_to_vector(joint_angle)] if self.observe_joint_angle else []) + # NOTE: I add the size capability in "mixed" form manually here: + radius = agent.shape.radius + mean_radius = (self.agent_radius + self.agent_radius_2) / 2 + relative_radius = agent.shape.radius - mean_radius + capability_repr = [ + torch.tensor( + radius, device=self.world.device + ).repeat(self.world.batch_dim, 1), + torch.tensor( + relative_radius, device=self.world.device + ).repeat(self.world.batch_dim, 1), + ] + observations += capability_repr + if self.obs_noise > 0: for i, obs in enumerate(observations): noise = torch.zeros( From 05b0a4f82080320978128e549e59a027d85cd01b Mon Sep 17 00:00:00 2001 From: Kevin Fu Date: Tue, 28 May 2024 11:44:14 -0400 Subject: [PATCH 2/4] WIP agent pos shaping --- vmas/scenarios/joint_passage_size.py | 53 +++++++++++++++++++++------- 1 file changed, 40 insertions(+), 13 deletions(-) diff --git a/vmas/scenarios/joint_passage_size.py b/vmas/scenarios/joint_passage_size.py index aea5d926..cae4fb10 100644 --- a/vmas/scenarios/joint_passage_size.py +++ b/vmas/scenarios/joint_passage_size.py @@ -46,7 +46,6 @@ def get_line_angle_dist_0_180(angle, goal): ), ).squeeze(-1) - class Scenario(BaseScenario): def make_world(self, batch_dim: int, device: torch.device, **kwargs): self.fixed_passage = kwargs.get("fixed_passage", False) @@ -59,6 +58,7 @@ def make_world(self, batch_dim: int, device: torch.device, **kwargs): self.mass_ratio = kwargs.get("mass_ratio", 1) self.mass_position = kwargs.get("mass_position", 0.75) self.max_speed_1 = kwargs.get("max_speed_1", None) # 0.1 + self.agent_pos_shaping_factor = kwargs.get("agent_pos_shaping_factor", 1) self.pos_shaping_factor = kwargs.get("pos_shaping_factor", 1) self.rot_shaping_factor = kwargs.get("rot_shaping_factor", 0) self.collision_reward = kwargs.get("collision_reward", -0.1) @@ -200,6 +200,7 @@ def mass_collision_filter(e): self.rot_rew = self.pos_rew.clone() self.collision_rew = self.pos_rew.clone() self.energy_rew = self.pos_rew.clone() + self.agent_pos_rew = self.pos_rew.clone() self.all_passed = torch.full((batch_dim,), False, device=device) return world @@ -216,6 +217,13 @@ def set_n_passages(self, val): self.create_passage_map(self.world) self.reset_world_at() + def agent_dist_to_goal(self, agent_index): + agent = self.world.agents[agent_index] + agent_goal = (self.goal.state.pos - self.joint_length / 2) if agent_index == 0 else (self.goal.state.pos + self.joint_length / 2) + return torch.linalg.vector_norm( + agent.state.pos - agent_goal, dim=1 + ) + def reset_world_at(self, env_index: int = None): start_angle = torch.rand( (1, 1) if env_index is not None else (self.world.batch_dim, 1), @@ -354,6 +362,10 @@ def reset_world_at(self, env_index: int = None): ) ) * self.rot_shaping_factor + # these variables allow "previous state" - "curr state" so that + # the reward is only relative to previous state + self.prev_agent_dist_shaping = (self.agent_dist_to_goal(0) + self.agent_dist_to_goal(1)) * self.agent_pos_shaping_factor + else: self.t[env_index] = 0 self.passed[env_index] = 0 @@ -396,6 +408,7 @@ def reward(self, agent: Agent): self.rot_rew[:] = 0 self.collision_rew[:] = 0 self.energy_rew[:] = 0 + self.agent_pos_rew[:] = 0 joint_passed = self.joint.landmark.state.pos[:, Y] > 0 self.all_passed = ( @@ -416,15 +429,16 @@ def reward(self, agent: Agent): ] self.joint.pos_shaping_pre = joint_shaping - joint_dist_to_goal = torch.linalg.vector_norm( - self.joint.landmark.state.pos - self.goal.state.pos, - dim=1, - ) - joint_shaping = joint_dist_to_goal * self.pos_shaping_factor - self.pos_rew[joint_passed] += (self.joint.pos_shaping_post - joint_shaping)[ - joint_passed - ] - self.joint.pos_shaping_post = joint_shaping + # 0 reward for joint center to goal + # joint_dist_to_goal = torch.linalg.vector_norm( + # self.joint.landmark.state.pos - self.goal.state.pos, + # dim=1, + # ) + # joint_shaping = joint_dist_to_goal * self.pos_shaping_factor + # self.pos_rew[joint_passed] += (self.joint.pos_shaping_post - joint_shaping)[ + # joint_passed + # ] + # self.joint.pos_shaping_post = joint_shaping # Rot shaping joint_dist_to_90_rot = ( @@ -436,8 +450,10 @@ def reward(self, agent: Agent): self.joint.landmark.state.rot, self.middle_angle ) ) + # only apply before passage joint_shaping = joint_dist_to_90_rot * self.rot_shaping_factor - self.rot_rew += self.joint.rot_shaping_pre - joint_shaping + self.rot_rew[~joint_passed] += (self.joint.rot_shaping_pre - + joint_shaping)[~joint_passed] self.joint.rot_shaping_pre = joint_shaping # Agent collisions @@ -469,8 +485,18 @@ def reward(self, agent: Agent): ).sum(-1) self.energy_rew = -self.energy_expenditure * self.energy_reward_coeff + # shaping for the agents to land on their spots + # only apply this after both agents are past the passage + both_agents_past = torch.logical_and(self.world.agents[0].state.pos[:, Y] > 0, self.world.agents[1].state.pos[:, Y] > 0) + curr_agent_dist_shaping = (self.agent_dist_to_goal(0) + self.agent_dist_to_goal(1)) * self.agent_pos_shaping_factor + # self.agent_pos_rew += self.prev_agent_dist_shaping - curr_agent_dist_shaping + self.agent_pos_rew[both_agents_past] += (self.prev_agent_dist_shaping - curr_agent_dist_shaping)[ + both_agents_past + ] + self.prev_agent_dist_shaping = curr_agent_dist_shaping + self.rew = ( - self.pos_rew + self.rot_rew + self.collision_rew + self.energy_rew + self.pos_rew + self.rot_rew + self.collision_rew + self.energy_rew + self.agent_pos_rew ) return self.rew @@ -565,6 +591,7 @@ def info(self, agent: Agent) -> Dict[str, Tensor]: just_passed = self.all_passed * (self.passed == 0) self.passed[just_passed] = 100 self.info_stored = { + "agent_pos_rew": self.agent_pos_rew, "pos_rew": self.pos_rew, "rot_rew": self.rot_rew, "collision_rew": self.collision_rew, @@ -760,7 +787,7 @@ def extra_render(self, env_index: int = 0): goal_agent_1.set_color(*color) geoms.append(goal_agent_1) - goal_agent_2 = rendering.make_circle(self.agent_radius) + goal_agent_2 = rendering.make_circle(3 * self.agent_radius) xform = rendering.Transform() goal_agent_2.add_attr(xform) xform.set_translation( From 3b015029ddd7c305f53913fb7e3f64bae792b073 Mon Sep 17 00:00:00 2001 From: Kevin Fu Date: Wed, 29 May 2024 11:33:49 -0400 Subject: [PATCH 3/4] Revert "WIP agent pos shaping" This reverts commit 05b0a4f82080320978128e549e59a027d85cd01b. --- vmas/scenarios/joint_passage_size.py | 53 +++++++--------------------- 1 file changed, 13 insertions(+), 40 deletions(-) diff --git a/vmas/scenarios/joint_passage_size.py b/vmas/scenarios/joint_passage_size.py index cae4fb10..aea5d926 100644 --- a/vmas/scenarios/joint_passage_size.py +++ b/vmas/scenarios/joint_passage_size.py @@ -46,6 +46,7 @@ def get_line_angle_dist_0_180(angle, goal): ), ).squeeze(-1) + class Scenario(BaseScenario): def make_world(self, batch_dim: int, device: torch.device, **kwargs): self.fixed_passage = kwargs.get("fixed_passage", False) @@ -58,7 +59,6 @@ def make_world(self, batch_dim: int, device: torch.device, **kwargs): self.mass_ratio = kwargs.get("mass_ratio", 1) self.mass_position = kwargs.get("mass_position", 0.75) self.max_speed_1 = kwargs.get("max_speed_1", None) # 0.1 - self.agent_pos_shaping_factor = kwargs.get("agent_pos_shaping_factor", 1) self.pos_shaping_factor = kwargs.get("pos_shaping_factor", 1) self.rot_shaping_factor = kwargs.get("rot_shaping_factor", 0) self.collision_reward = kwargs.get("collision_reward", -0.1) @@ -200,7 +200,6 @@ def mass_collision_filter(e): self.rot_rew = self.pos_rew.clone() self.collision_rew = self.pos_rew.clone() self.energy_rew = self.pos_rew.clone() - self.agent_pos_rew = self.pos_rew.clone() self.all_passed = torch.full((batch_dim,), False, device=device) return world @@ -217,13 +216,6 @@ def set_n_passages(self, val): self.create_passage_map(self.world) self.reset_world_at() - def agent_dist_to_goal(self, agent_index): - agent = self.world.agents[agent_index] - agent_goal = (self.goal.state.pos - self.joint_length / 2) if agent_index == 0 else (self.goal.state.pos + self.joint_length / 2) - return torch.linalg.vector_norm( - agent.state.pos - agent_goal, dim=1 - ) - def reset_world_at(self, env_index: int = None): start_angle = torch.rand( (1, 1) if env_index is not None else (self.world.batch_dim, 1), @@ -362,10 +354,6 @@ def reset_world_at(self, env_index: int = None): ) ) * self.rot_shaping_factor - # these variables allow "previous state" - "curr state" so that - # the reward is only relative to previous state - self.prev_agent_dist_shaping = (self.agent_dist_to_goal(0) + self.agent_dist_to_goal(1)) * self.agent_pos_shaping_factor - else: self.t[env_index] = 0 self.passed[env_index] = 0 @@ -408,7 +396,6 @@ def reward(self, agent: Agent): self.rot_rew[:] = 0 self.collision_rew[:] = 0 self.energy_rew[:] = 0 - self.agent_pos_rew[:] = 0 joint_passed = self.joint.landmark.state.pos[:, Y] > 0 self.all_passed = ( @@ -429,16 +416,15 @@ def reward(self, agent: Agent): ] self.joint.pos_shaping_pre = joint_shaping - # 0 reward for joint center to goal - # joint_dist_to_goal = torch.linalg.vector_norm( - # self.joint.landmark.state.pos - self.goal.state.pos, - # dim=1, - # ) - # joint_shaping = joint_dist_to_goal * self.pos_shaping_factor - # self.pos_rew[joint_passed] += (self.joint.pos_shaping_post - joint_shaping)[ - # joint_passed - # ] - # self.joint.pos_shaping_post = joint_shaping + joint_dist_to_goal = torch.linalg.vector_norm( + self.joint.landmark.state.pos - self.goal.state.pos, + dim=1, + ) + joint_shaping = joint_dist_to_goal * self.pos_shaping_factor + self.pos_rew[joint_passed] += (self.joint.pos_shaping_post - joint_shaping)[ + joint_passed + ] + self.joint.pos_shaping_post = joint_shaping # Rot shaping joint_dist_to_90_rot = ( @@ -450,10 +436,8 @@ def reward(self, agent: Agent): self.joint.landmark.state.rot, self.middle_angle ) ) - # only apply before passage joint_shaping = joint_dist_to_90_rot * self.rot_shaping_factor - self.rot_rew[~joint_passed] += (self.joint.rot_shaping_pre - - joint_shaping)[~joint_passed] + self.rot_rew += self.joint.rot_shaping_pre - joint_shaping self.joint.rot_shaping_pre = joint_shaping # Agent collisions @@ -485,18 +469,8 @@ def reward(self, agent: Agent): ).sum(-1) self.energy_rew = -self.energy_expenditure * self.energy_reward_coeff - # shaping for the agents to land on their spots - # only apply this after both agents are past the passage - both_agents_past = torch.logical_and(self.world.agents[0].state.pos[:, Y] > 0, self.world.agents[1].state.pos[:, Y] > 0) - curr_agent_dist_shaping = (self.agent_dist_to_goal(0) + self.agent_dist_to_goal(1)) * self.agent_pos_shaping_factor - # self.agent_pos_rew += self.prev_agent_dist_shaping - curr_agent_dist_shaping - self.agent_pos_rew[both_agents_past] += (self.prev_agent_dist_shaping - curr_agent_dist_shaping)[ - both_agents_past - ] - self.prev_agent_dist_shaping = curr_agent_dist_shaping - self.rew = ( - self.pos_rew + self.rot_rew + self.collision_rew + self.energy_rew + self.agent_pos_rew + self.pos_rew + self.rot_rew + self.collision_rew + self.energy_rew ) return self.rew @@ -591,7 +565,6 @@ def info(self, agent: Agent) -> Dict[str, Tensor]: just_passed = self.all_passed * (self.passed == 0) self.passed[just_passed] = 100 self.info_stored = { - "agent_pos_rew": self.agent_pos_rew, "pos_rew": self.pos_rew, "rot_rew": self.rot_rew, "collision_rew": self.collision_rew, @@ -787,7 +760,7 @@ def extra_render(self, env_index: int = 0): goal_agent_1.set_color(*color) geoms.append(goal_agent_1) - goal_agent_2 = rendering.make_circle(3 * self.agent_radius) + goal_agent_2 = rendering.make_circle(self.agent_radius) xform = rendering.Transform() goal_agent_2.add_attr(xform) xform.set_translation( From 40284020f48eb53b868fa6631799fabf59271091 Mon Sep 17 00:00:00 2001 From: Kevin Fu Date: Thu, 30 May 2024 13:46:55 -0400 Subject: [PATCH 4/4] modify done(), modify controller params --- vmas/scenarios/joint_passage_size.py | 49 ++++++++++++++++++---------- 1 file changed, 31 insertions(+), 18 deletions(-) diff --git a/vmas/scenarios/joint_passage_size.py b/vmas/scenarios/joint_passage_size.py index aea5d926..4fc72d35 100644 --- a/vmas/scenarios/joint_passage_size.py +++ b/vmas/scenarios/joint_passage_size.py @@ -103,7 +103,7 @@ def make_world(self, batch_dim: int, device: torch.device, **kwargs): self.n_boxes = int(self.scenario_length // self.passage_length) self.min_collision_distance = 0.005 - cotnroller_params = [2.0, 10, 0.00001] + controller_params = [1.0, 0.0, 0.0] # Add agents agent = Agent( @@ -115,7 +115,7 @@ def make_world(self, batch_dim: int, device: torch.device, **kwargs): f_range=10, ) agent.controller = VelocityController( - agent, world, cotnroller_params, "standard" + agent, world, controller_params, "standard" ) world.add_agent(agent) agent = Agent( @@ -129,7 +129,7 @@ def make_world(self, batch_dim: int, device: torch.device, **kwargs): f_range=10, ) agent.controller = VelocityController( - agent, world, cotnroller_params, "standard" + agent, world, controller_params, "standard" ) world.add_agent(agent) @@ -543,25 +543,35 @@ def observation(self, agent: Agent): ) def done(self): - return torch.all( - ( - torch.linalg.vector_norm( - self.joint.landmark.state.pos - self.goal.state.pos, dim=1 - ) - <= 0.01 - ) - * ( - get_line_angle_dist_0_180( - self.joint.landmark.state.rot, self.goal.state.rot - ).unsqueeze(-1) - <= 0.01 - ), - dim=1, - ) + # reimplementation + pos_done = torch.linalg.vector_norm(self.joint.landmark.state.pos - self.goal.state.pos, dim=1) <= 0.01 + rot_done = get_line_angle_dist_0_180(self.joint.landmark.state.rot, self.goal.state.rot) <= 0.01 + done = torch.logical_and(pos_done, rot_done) + return done + + # original + # return torch.all( + # ( + # torch.linalg.vector_norm( + # self.joint.landmark.state.pos - self.goal.state.pos, dim=1 + # ) + # <= 0.01 + # ) + # * ( + # get_line_angle_dist_0_180( + # self.joint.landmark.state.rot, self.goal.state.rot + # ) # .unsqueeze(-1) + # <= 0.01 + # ), + # dim=1, + # ) def info(self, agent: Agent) -> Dict[str, Tensor]: is_first = self.world.agents[0] == agent if is_first: + dist_to_goal = torch.linalg.vector_norm(self.joint.landmark.state.pos - self.goal.state.pos, dim=1) + rot_to_goal = get_line_angle_dist_0_180(self.joint.landmark.state.rot, self.goal.state.rot) + just_passed = self.all_passed * (self.passed == 0) self.passed[just_passed] = 100 self.info_stored = { @@ -570,6 +580,9 @@ def info(self, agent: Agent) -> Dict[str, Tensor]: "collision_rew": self.collision_rew, "energy_rew": self.energy_rew, "passed": just_passed.to(torch.int), + "success_rate": self.done(), # wandb averages over n_envs + "dist_to_goal": dist_to_goal, + "rot_to_goal": rot_to_goal, } return self.info_stored