diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/manipulation/config/HumanoidRLEnv/agents/rsl_rl_ppo_cfg.py b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/manipulation/config/HumanoidRLEnv/agents/rsl_rl_ppo_cfg.py index 0840515..8e7c203 100644 --- a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/manipulation/config/HumanoidRLEnv/agents/rsl_rl_ppo_cfg.py +++ b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/manipulation/config/HumanoidRLEnv/agents/rsl_rl_ppo_cfg.py @@ -5,7 +5,7 @@ @configclass class HumanoidArmReachPPORunnerCfg(RslRlOnPolicyRunnerCfg): num_steps_per_env = 24 - max_iterations = 1000 + max_iterations = 300 save_interval = 50 experiment_name = "reach_humanoid_arm" run_name = "" diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/manipulation/config/HumanoidRLEnv/joint_pos_env_cfg.py b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/manipulation/config/HumanoidRLEnv/joint_pos_env_cfg.py index 74665a8..26b13a9 100644 --- a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/manipulation/config/HumanoidRLEnv/joint_pos_env_cfg.py +++ b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/manipulation/config/HumanoidRLEnv/joint_pos_env_cfg.py @@ -1,7 +1,6 @@ import math from isaaclab.utils import configclass from HumanoidRLPackage.HumanoidRLSetup.tasks.manipulation.reach_env_cfg import ReachEnvCfg -from HumanoidRLPackage.HumanoidRLSetup.modelCfg.humanoid import ARM_CFG # Config on top of the base ReachEnvCfg, can override any settings @@ -9,8 +8,6 @@ class HumanoidArmReachEnvCfg(ReachEnvCfg): def __post_init__(self): super().__post_init__() - - self.scene.robot = ARM_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot") marker_scale = 0.02 diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/manipulation/reach_env_cfg.py b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/manipulation/reach_env_cfg.py index 3cb2077..525bbc6 100644 --- a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/manipulation/reach_env_cfg.py +++ b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/manipulation/reach_env_cfg.py @@ -1,7 +1,5 @@ -from dataclasses import MISSING - import isaaclab.sim as sim_utils -from isaaclab.assets import ArticulationCfg, AssetBaseCfg +from isaaclab.assets import AssetBaseCfg from isaaclab.envs import ManagerBasedRLEnvCfg from isaaclab.managers import CurriculumTermCfg as CurrTerm from isaaclab.managers import EventTermCfg as EventTerm @@ -15,6 +13,8 @@ from isaaclab.utils.noise import AdditiveUniformNoiseCfg as Unoise import HumanoidRLPackage.HumanoidRLSetup.tasks.manipulation.mdp as mdp +from HumanoidRLPackage.HumanoidRLSetup.modelCfg.humanoid import ARM_CFG + @configclass class ReachSceneCfg(InteractiveSceneCfg): @@ -24,7 +24,7 @@ class ReachSceneCfg(InteractiveSceneCfg): init_state=AssetBaseCfg.InitialStateCfg(pos=(0.0, 0.0, -1.05)), ) - robot: ArticulationCfg = MISSING + robot = ARM_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot") light = AssetBaseCfg( prim_path="/World/light", @@ -34,6 +34,7 @@ class ReachSceneCfg(InteractiveSceneCfg): @configclass class CommandsCfg: + # Target end effector position ee_pose = mdp.UniformPoseCommandCfg( asset_name="robot", body_name="DIP_INDEX_v1_.*", @@ -112,6 +113,7 @@ class CommandsCfg: @configclass class ActionsCfg: + # All joints are allowed to move to achieve the target end effector position arm_action = mdp.JointPositionActionCfg( asset_name="robot", joint_names=[".*"], scale=0.5, use_default_offset=True ) @@ -238,11 +240,11 @@ class RewardsCfg: # params={"asset_cfg": SceneEntityCfg("robot", body_names="TIP_B_5"), "command_name": "ee_pose_5"}, # ) - # action penalty - action_rate = RewTerm(func=mdp.action_rate_l2, weight=-0.000002) + # Penalty for avoiding too rapid actions + action_rate = RewTerm(func=mdp.action_rate_l2, weight=-0.03) joint_vel = RewTerm( func=mdp.joint_vel_l2, - weight=-0.005, + weight=-0.01, params={"asset_cfg": SceneEntityCfg("robot")}, ) @@ -254,12 +256,18 @@ class TerminationsCfg: @configclass class CurriculumCfg: + # Curriculum modify reward weights during certain time of training + # Current RL training has around 22000 - 25000 steps in each iterations + + # We increase the action rate and joint velocity penalty in the later half of the training + # such that early in the training, it is incentivized to get near the target first + # but afterwards slow down and stablize around the target point action_rate = CurrTerm( - func=mdp.modify_reward_weight, params={"term_name": "action_rate", "weight": -0.00005, "num_steps": 18000} + func=mdp.modify_reward_weight, params={"term_name": "action_rate", "weight": -0.05, "num_steps": 15000} ) joint_vel = CurrTerm( - func=mdp.modify_reward_weight, params={"term_name": "joint_vel", "weight": -0.05, "num_steps": 18000} + func=mdp.modify_reward_weight, params={"term_name": "joint_vel", "weight": -0.15, "num_steps": 15000} ) diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/logs/rsl_rl/reach_humanoid_arm/2026-03-13_00-17-23/exported/policy.onnx b/autonomy/simulation/Humanoid_Wato/HumanoidRL/logs/rsl_rl/reach_humanoid_arm/2026-03-13_00-17-23/exported/policy.onnx deleted file mode 100644 index 8b39a9d..0000000 Binary files a/autonomy/simulation/Humanoid_Wato/HumanoidRL/logs/rsl_rl/reach_humanoid_arm/2026-03-13_00-17-23/exported/policy.onnx and /dev/null differ diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/logs/rsl_rl/reach_humanoid_arm/2026-03-13_00-17-23/exported/policy.pt b/autonomy/simulation/Humanoid_Wato/HumanoidRL/logs/rsl_rl/reach_humanoid_arm/2026-03-13_00-17-23/exported/policy.pt deleted file mode 100644 index c98e91a..0000000 Binary files a/autonomy/simulation/Humanoid_Wato/HumanoidRL/logs/rsl_rl/reach_humanoid_arm/2026-03-13_00-17-23/exported/policy.pt and /dev/null differ diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/logs/rsl_rl/reach_humanoid_arm/2026-03-13_00-17-23/model_200.pt b/autonomy/simulation/Humanoid_Wato/HumanoidRL/logs/rsl_rl/reach_humanoid_arm/2026-03-13_00-17-23/model_200.pt deleted file mode 100644 index f027254..0000000 Binary files a/autonomy/simulation/Humanoid_Wato/HumanoidRL/logs/rsl_rl/reach_humanoid_arm/2026-03-13_00-17-23/model_200.pt and /dev/null differ diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/logs/rsl_rl/reach_humanoid_arm/2026-03-13_03-30-17/exported/policy.onnx b/autonomy/simulation/Humanoid_Wato/HumanoidRL/logs/rsl_rl/reach_humanoid_arm/2026-03-13_03-30-17/exported/policy.onnx new file mode 100644 index 0000000..350c18f Binary files /dev/null and b/autonomy/simulation/Humanoid_Wato/HumanoidRL/logs/rsl_rl/reach_humanoid_arm/2026-03-13_03-30-17/exported/policy.onnx differ diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/logs/rsl_rl/reach_humanoid_arm/2026-03-13_03-30-17/exported/policy.pt b/autonomy/simulation/Humanoid_Wato/HumanoidRL/logs/rsl_rl/reach_humanoid_arm/2026-03-13_03-30-17/exported/policy.pt new file mode 100644 index 0000000..1949aaf Binary files /dev/null and b/autonomy/simulation/Humanoid_Wato/HumanoidRL/logs/rsl_rl/reach_humanoid_arm/2026-03-13_03-30-17/exported/policy.pt differ diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/logs/rsl_rl/reach_humanoid_arm/2026-03-13_03-30-17/model_299.pt b/autonomy/simulation/Humanoid_Wato/HumanoidRL/logs/rsl_rl/reach_humanoid_arm/2026-03-13_03-30-17/model_299.pt new file mode 100644 index 0000000..6804083 Binary files /dev/null and b/autonomy/simulation/Humanoid_Wato/HumanoidRL/logs/rsl_rl/reach_humanoid_arm/2026-03-13_03-30-17/model_299.pt differ