diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/modelCfg/humanoid.py b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/modelCfg/humanoid.py index fe50dab..47169d8 100644 --- a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/modelCfg/humanoid.py +++ b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/modelCfg/humanoid.py @@ -5,7 +5,7 @@ from isaaclab.assets.articulation import ArticulationCfg _HUMANOID_WATO_ROOT = os.path.abspath( - os.path.join(os.path.dirname(__file__), "..", "..", "..", "..", "..") + os.path.join(os.path.dirname(__file__), "..", "..", "..", "..") ) _MODEL_ASSETS = os.path.join(_HUMANOID_WATO_ROOT, "ModelAssets") @@ -40,10 +40,9 @@ actuators={ "arm": ImplicitActuatorCfg( joint_names_expr=[".*"], - # velocity_limit=100.0, - # effort_limit=87.0, stiffness=0.5, damping=0.5, + velocity_limit_sim=3.0, ), }, ) @@ -93,72 +92,84 @@ joint_names_expr=["shoulder_flexion_extension"], stiffness=757.6, damping=60.3, + velocity_limit_sim=3.0, ), # J≈0.95 kg·m², f=4.0 Hz, ζ=1.0 → k=600.0, d=47.7 "shoulder_aa": ImplicitActuatorCfg( joint_names_expr=["shoulder_abduction_adduction"], stiffness=600.0, damping=47.7, + velocity_limit_sim=3.0, ), # J≈0.77 kg·m², f=4.5 Hz, ζ=1.0 → k=615.5, d=43.5 "shoulder_rot": ImplicitActuatorCfg( joint_names_expr=["shoulder_rotation"], stiffness=615.5, damping=43.5, + velocity_limit_sim=3.0, ), # J≈0.77 kg·m², f=4.5 Hz, ζ=1.0 → k=615.5, d=43.5 "elbow": ImplicitActuatorCfg( joint_names_expr=["elbow_flexion_extension"], stiffness=615.5, damping=43.5, + velocity_limit_sim=3.0, ), # J≈0.45 kg·m², f=5.0 Hz, ζ=1.0 → k=444.1, d=28.3 "forearm": ImplicitActuatorCfg( joint_names_expr=["forearm_rotation"], stiffness=444.1, damping=28.3, + velocity_limit_sim=3.0, ), # J≈0.12 kg·m², f=6.0 Hz, ζ=1.0 → k=170.5, d=9.0 "wrist": ImplicitActuatorCfg( joint_names_expr=["wrist_extension"], stiffness=170.5, damping=9.0, + velocity_limit_sim=3.0, ), # J≈0.0012 kg·m², f=8.0 Hz, ζ=1.0 → k=3.0, d=0.12 "finger_mcp": ImplicitActuatorCfg( joint_names_expr=["mcp_index", "mcp_middle", "mcp_ring", "mcp_pinky"], stiffness=3.0, damping=0.12, + velocity_limit_sim=3.0, ), # J≈0.0006 kg·m², f=8.0 Hz, ζ=1.0 → k=1.5, d=0.06 "finger_pip": ImplicitActuatorCfg( joint_names_expr=["pip_index", "pip_middle", "pip_ring", "pip_pinky"], stiffness=1.5, damping=0.06, + velocity_limit_sim=3.0, ), # J≈0.0002 kg·m², f=8.0 Hz, ζ=1.0 → k=0.5, d=0.02 "finger_dip": ImplicitActuatorCfg( joint_names_expr=["dip_index", "dip_middle", "dip_ring", "dip_pinky"], stiffness=0.5, damping=0.02, + velocity_limit_sim=3.0, ), # J≈0.003 kg·m², f=8.0 Hz, ζ=1.0 → k=7.6, d=0.30 "thumb_cmc": ImplicitActuatorCfg( joint_names_expr=["cmc_thumb"], stiffness=7.6, damping=0.30, + velocity_limit_sim=3.0, ), # J≈0.001 kg·m², f=8.0 Hz, ζ=1.0 → k=2.5, d=0.10 "thumb_mcp": ImplicitActuatorCfg( joint_names_expr=["mcp_thumb"], stiffness=2.5, damping=0.10, + velocity_limit_sim=3.0, ), # J≈0.0002 kg·m², f=8.0 Hz, ζ=1.0 → k=0.5, d=0.02 "thumb_ip": ImplicitActuatorCfg( joint_names_expr=["ip_thumb"], stiffness=0.5, damping=0.02, + velocity_limit_sim=3.0, ), }, ) \ No newline at end of file diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/manipulation/config/HumanoidRLEnv/joint_pos_env_cfg.py b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/manipulation/config/HumanoidRLEnv/joint_pos_env_cfg.py index bee47db..74665a8 100644 --- a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/manipulation/config/HumanoidRLEnv/joint_pos_env_cfg.py +++ b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/manipulation/config/HumanoidRLEnv/joint_pos_env_cfg.py @@ -1,17 +1,15 @@ import math from isaaclab.utils import configclass -import HumanoidRLPackage.HumanoidRLSetup.tasks.manipulation.mdp as mdp from HumanoidRLPackage.HumanoidRLSetup.tasks.manipulation.reach_env_cfg import ReachEnvCfg -# from HumanoidRLPackage.HumanoidRLSetup.modelCfg.universal_robots import UR10_CFG from HumanoidRLPackage.HumanoidRLSetup.modelCfg.humanoid import ARM_CFG +# Config on top of the base ReachEnvCfg, can override any settings @configclass class HumanoidArmReachEnvCfg(ReachEnvCfg): def __post_init__(self): super().__post_init__() - # self.scene.robot = UR10_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot") self.scene.robot = ARM_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot") marker_scale = 0.02 @@ -54,10 +52,6 @@ def __post_init__(self): # self.rewards.end_effector_5_position_tracking_fine_grained.params["asset_cfg"].body_names = ["IP_THUMB_v1_.*"] # self.rewards.end_effector_5_orientation_tracking.params["asset_cfg"].body_names = ["TIP_B_5"] - # override actions - self.actions.arm_action = mdp.JointPositionActionCfg( - asset_name="robot", joint_names=[".*"], scale=0.5, use_default_offset=True - ) # override command generator body # end-effector is along x-direction self.commands.ee_pose.body_name = "DIP_INDEX_v1_.*" @@ -82,13 +76,13 @@ def __post_init__(self): @configclass class HumanoidArmReachEnvCfg_PLAY(HumanoidArmReachEnvCfg): def __post_init__(self): - # post init of parent super().__post_init__() + # make a smaller scene for play self.scene.num_envs = 50 self.scene.env_spacing = 2.5 - # disable randomization for play - self.observations.policy.enable_corruption = False + self.observations.policy.enable_corruption = False # disable randomization for play + # (env_isaaclab) hy@hy-LOQ-15IRX9:~/Downloads/Humanoid_Wato/HumanoidRL$ PYTHONPATH=$(pwd) /home/hy/IsaacLab/isaaclab.sh -p HumanoidRLPackage/rsl_rl_scripts/train.py --task=Isaac-Reach-Humanoid-Arm-v0 --headless diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/manipulation/reach_env_cfg.py b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/manipulation/reach_env_cfg.py index d74244b..3cb2077 100644 --- a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/manipulation/reach_env_cfg.py +++ b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/manipulation/reach_env_cfg.py @@ -3,7 +3,6 @@ import isaaclab.sim as sim_utils from isaaclab.assets import ArticulationCfg, AssetBaseCfg from isaaclab.envs import ManagerBasedRLEnvCfg -from isaaclab.managers import ActionTermCfg as ActionTerm from isaaclab.managers import CurriculumTermCfg as CurrTerm from isaaclab.managers import EventTermCfg as EventTerm from isaaclab.managers import ObservationGroupCfg as ObsGroup @@ -13,54 +12,41 @@ from isaaclab.managers import TerminationTermCfg as DoneTerm from isaaclab.scene import InteractiveSceneCfg from isaaclab.utils import configclass -from isaaclab.utils.assets import ISAAC_NUCLEUS_DIR from isaaclab.utils.noise import AdditiveUniformNoiseCfg as Unoise import HumanoidRLPackage.HumanoidRLSetup.tasks.manipulation.mdp as mdp @configclass class ReachSceneCfg(InteractiveSceneCfg): - """Configuration for the scene with a robotic arm.""" - - # world ground = AssetBaseCfg( prim_path="/World/ground", spawn=sim_utils.GroundPlaneCfg(), init_state=AssetBaseCfg.InitialStateCfg(pos=(0.0, 0.0, -1.05)), ) - # robots robot: ArticulationCfg = MISSING - # lights light = AssetBaseCfg( prim_path="/World/light", spawn=sim_utils.DomeLightCfg(color=(0.75, 0.75, 0.75), intensity=2500.0), ) -## -# MDP settings -## - - @configclass class CommandsCfg: - """Command terms for the MDP.""" - ee_pose = mdp.UniformPoseCommandCfg( asset_name="robot", body_name="DIP_INDEX_v1_.*", - resampling_time_range=(4.0, 4.0), + resampling_time_range=(4.0, 4.0), # target pose changes every sampling time debug_vis=True, ranges=mdp.UniformPoseCommandCfg.Ranges( - pos_x=(-0.5, 0.5), - pos_y=(0.62, 0.66), - pos_z=(0.3, 0.7), + pos_x=(-0.8, -0.7), + pos_y=(-0.4, 0.4), + pos_z=(0.0, 0.3), roll=(0.0, 0.0), pitch=(0.0, 0.0), yaw=(0.0, 0.0), - ), + ), # range of position/rotation that the target pose can appear in ) # ee_pose_2 = mdp.UniformPoseCommandCfg( @@ -126,20 +112,15 @@ class CommandsCfg: @configclass class ActionsCfg: - """Action specifications for the MDP.""" - - arm_action: ActionTerm = MISSING - gripper_action: ActionTerm | None = None + arm_action = mdp.JointPositionActionCfg( + asset_name="robot", joint_names=[".*"], scale=0.5, use_default_offset=True + ) @configclass class ObservationsCfg: - """Observation specifications for the MDP.""" - @configclass class PolicyCfg(ObsGroup): - """Observations for policy group.""" - # observation terms (order preserved) joint_pos = ObsTerm(func=mdp.joint_pos_rel, noise=Unoise(n_min=-0.01, n_max=0.01)) joint_vel = ObsTerm(func=mdp.joint_vel_rel, noise=Unoise(n_min=-0.01, n_max=0.01)) @@ -181,7 +162,7 @@ class RewardsCfg: # task terms end_effector_position_tracking = RewTerm( func=mdp.position_command_error, - weight=-0.2, + weight=-1.0, params={"asset_cfg": SceneEntityCfg("robot", body_names="DIP_INDEX_v1_.*"), "command_name": "ee_pose"}, ) # end_effector_2_position_tracking = RewTerm( @@ -207,7 +188,7 @@ class RewardsCfg: end_effector_position_tracking_fine_grained = RewTerm( func=mdp.position_command_error_tanh, - weight=0.2, + weight=0.5, params={"asset_cfg": SceneEntityCfg("robot", body_names="DIP_INDEX_v1_.*"), "std": 0.1, "command_name": "ee_pose"}, ) # end_effector_2_position_tracking_fine_grained = RewTerm( @@ -258,43 +239,32 @@ class RewardsCfg: # ) # action penalty - action_rate = RewTerm(func=mdp.action_rate_l2, weight=-0.00001) + action_rate = RewTerm(func=mdp.action_rate_l2, weight=-0.000002) joint_vel = RewTerm( func=mdp.joint_vel_l2, - weight=-0.0000001, + weight=-0.005, params={"asset_cfg": SceneEntityCfg("robot")}, ) @configclass class TerminationsCfg: - """Termination terms for the MDP.""" - time_out = DoneTerm(func=mdp.time_out, time_out=True) @configclass class CurriculumCfg: - """Curriculum terms for the MDP.""" - action_rate = CurrTerm( - func=mdp.modify_reward_weight, params={"term_name": "action_rate", "weight": -0.0001, "num_steps": 4500} + func=mdp.modify_reward_weight, params={"term_name": "action_rate", "weight": -0.00005, "num_steps": 18000} ) joint_vel = CurrTerm( - func=mdp.modify_reward_weight, params={"term_name": "joint_vel", "weight": -0.001, "num_steps": 4500} + func=mdp.modify_reward_weight, params={"term_name": "joint_vel", "weight": -0.05, "num_steps": 18000} ) -## -# Environment configuration -## - - @configclass class ReachEnvCfg(ManagerBasedRLEnvCfg): - """Configuration for the reach end-effector pose tracking environment.""" - # Scene settings scene: ReachSceneCfg = ReachSceneCfg(num_envs=4096, env_spacing=2.5) # Basic settings @@ -309,10 +279,8 @@ class ReachEnvCfg(ManagerBasedRLEnvCfg): def __post_init__(self): """Post initialization.""" - # general settings - self.decimation = 4 # policy every 4 physics steps (was 2) → smoother target updates + self.decimation = 4 self.sim.render_interval = self.decimation self.episode_length_s = 12.0 self.viewer.eye = (3.5, 3.5, 3.5) - # simulation settings self.sim.dt = 1.0 / 60.0 \ No newline at end of file diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/logs/rsl_rl/reach_humanoid_arm/2026-03-13_00-17-23/exported/policy.onnx b/autonomy/simulation/Humanoid_Wato/HumanoidRL/logs/rsl_rl/reach_humanoid_arm/2026-03-13_00-17-23/exported/policy.onnx new file mode 100644 index 0000000..8b39a9d Binary files /dev/null and b/autonomy/simulation/Humanoid_Wato/HumanoidRL/logs/rsl_rl/reach_humanoid_arm/2026-03-13_00-17-23/exported/policy.onnx differ diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/logs/rsl_rl/reach_humanoid_arm/2026-03-13_00-17-23/exported/policy.pt b/autonomy/simulation/Humanoid_Wato/HumanoidRL/logs/rsl_rl/reach_humanoid_arm/2026-03-13_00-17-23/exported/policy.pt new file mode 100644 index 0000000..c98e91a Binary files /dev/null and b/autonomy/simulation/Humanoid_Wato/HumanoidRL/logs/rsl_rl/reach_humanoid_arm/2026-03-13_00-17-23/exported/policy.pt differ diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/logs/rsl_rl/reach_humanoid_arm/2026-03-13_00-17-23/model_200.pt b/autonomy/simulation/Humanoid_Wato/HumanoidRL/logs/rsl_rl/reach_humanoid_arm/2026-03-13_00-17-23/model_200.pt new file mode 100644 index 0000000..f027254 Binary files /dev/null and b/autonomy/simulation/Humanoid_Wato/HumanoidRL/logs/rsl_rl/reach_humanoid_arm/2026-03-13_00-17-23/model_200.pt differ diff --git a/autonomy/simulation/Humanoid_Wato/ModelAssets/.thumbs/256x256/arm.usd.png b/autonomy/simulation/Humanoid_Wato/ModelAssets/.thumbs/256x256/arm.usd.png index 705495d..31e9a97 100644 Binary files a/autonomy/simulation/Humanoid_Wato/ModelAssets/.thumbs/256x256/arm.usd.png and b/autonomy/simulation/Humanoid_Wato/ModelAssets/.thumbs/256x256/arm.usd.png differ diff --git a/autonomy/simulation/Humanoid_Wato/ModelAssets/arm.usd b/autonomy/simulation/Humanoid_Wato/ModelAssets/arm.usd index 41cec0f..9bb4c0c 100644 Binary files a/autonomy/simulation/Humanoid_Wato/ModelAssets/arm.usd and b/autonomy/simulation/Humanoid_Wato/ModelAssets/arm.usd differ