diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/modelCfg/humanoid.py b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/modelCfg/humanoid.py
index fe50dab..47169d8 100644
--- a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/modelCfg/humanoid.py
+++ b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/modelCfg/humanoid.py
@@ -5,7 +5,7 @@
 from isaaclab.assets.articulation import ArticulationCfg
 
 _HUMANOID_WATO_ROOT = os.path.abspath(
-    os.path.join(os.path.dirname(__file__), "..", "..", "..", "..", "..")
+    os.path.join(os.path.dirname(__file__), "..", "..", "..", "..")
 )
 _MODEL_ASSETS = os.path.join(_HUMANOID_WATO_ROOT, "ModelAssets")
 
@@ -40,10 +40,9 @@
     actuators={
         "arm": ImplicitActuatorCfg(
             joint_names_expr=[".*"],
-            # velocity_limit=100.0,
-            # effort_limit=87.0,
             stiffness=0.5,
             damping=0.5,
+            velocity_limit_sim=3.0,
         ),
     },
 )
@@ -93,72 +92,84 @@
             joint_names_expr=["shoulder_flexion_extension"],
             stiffness=757.6,
             damping=60.3,
+            velocity_limit_sim=3.0,
         ),
         # J≈0.95 kg·m², f=4.0 Hz, ζ=1.0 → k=600.0, d=47.7
         "shoulder_aa": ImplicitActuatorCfg(
             joint_names_expr=["shoulder_abduction_adduction"],
             stiffness=600.0,
             damping=47.7,
+            velocity_limit_sim=3.0,
         ),
         # J≈0.77 kg·m², f=4.5 Hz, ζ=1.0 → k=615.5, d=43.5
         "shoulder_rot": ImplicitActuatorCfg(
             joint_names_expr=["shoulder_rotation"],
             stiffness=615.5,
             damping=43.5,
+            velocity_limit_sim=3.0,
         ),
         # J≈0.77 kg·m², f=4.5 Hz, ζ=1.0 → k=615.5, d=43.5
         "elbow": ImplicitActuatorCfg(
             joint_names_expr=["elbow_flexion_extension"],
             stiffness=615.5,
             damping=43.5,
+            velocity_limit_sim=3.0,
         ),
         # J≈0.45 kg·m², f=5.0 Hz, ζ=1.0 → k=444.1, d=28.3
         "forearm": ImplicitActuatorCfg(
             joint_names_expr=["forearm_rotation"],
             stiffness=444.1,
             damping=28.3,
+            velocity_limit_sim=3.0,
         ),
         # J≈0.12 kg·m², f=6.0 Hz, ζ=1.0 → k=170.5, d=9.0
         "wrist": ImplicitActuatorCfg(
             joint_names_expr=["wrist_extension"],
             stiffness=170.5,
             damping=9.0,
+            velocity_limit_sim=3.0,
         ),
         # J≈0.0012 kg·m², f=8.0 Hz, ζ=1.0 → k=3.0, d=0.12
         "finger_mcp": ImplicitActuatorCfg(
             joint_names_expr=["mcp_index", "mcp_middle", "mcp_ring", "mcp_pinky"],
             stiffness=3.0,
             damping=0.12,
+            velocity_limit_sim=3.0,
         ),
         # J≈0.0006 kg·m², f=8.0 Hz, ζ=1.0 → k=1.5, d=0.06
         "finger_pip": ImplicitActuatorCfg(
             joint_names_expr=["pip_index", "pip_middle", "pip_ring", "pip_pinky"],
             stiffness=1.5,
             damping=0.06,
+            velocity_limit_sim=3.0,
         ),
         # J≈0.0002 kg·m², f=8.0 Hz, ζ=1.0 → k=0.5, d=0.02
         "finger_dip": ImplicitActuatorCfg(
             joint_names_expr=["dip_index", "dip_middle", "dip_ring", "dip_pinky"],
             stiffness=0.5,
             damping=0.02,
+            velocity_limit_sim=3.0,
         ),
         # J≈0.003 kg·m², f=8.0 Hz, ζ=1.0 → k=7.6, d=0.30
         "thumb_cmc": ImplicitActuatorCfg(
             joint_names_expr=["cmc_thumb"],
             stiffness=7.6,
             damping=0.30,
+            velocity_limit_sim=3.0,
         ),
         # J≈0.001 kg·m², f=8.0 Hz, ζ=1.0 → k=2.5, d=0.10
         "thumb_mcp": ImplicitActuatorCfg(
             joint_names_expr=["mcp_thumb"],
             stiffness=2.5,
             damping=0.10,
+            velocity_limit_sim=3.0,
         ),
         # J≈0.0002 kg·m², f=8.0 Hz, ζ=1.0 → k=0.5, d=0.02
         "thumb_ip": ImplicitActuatorCfg(
             joint_names_expr=["ip_thumb"],
             stiffness=0.5,
             damping=0.02,
+            velocity_limit_sim=3.0,
         ),
     },
 )
\ No newline at end of file
diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/manipulation/config/HumanoidRLEnv/joint_pos_env_cfg.py b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/manipulation/config/HumanoidRLEnv/joint_pos_env_cfg.py
index bee47db..74665a8 100644
--- a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/manipulation/config/HumanoidRLEnv/joint_pos_env_cfg.py
+++ b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/manipulation/config/HumanoidRLEnv/joint_pos_env_cfg.py
@@ -1,17 +1,15 @@
 import math
 from isaaclab.utils import configclass
-import HumanoidRLPackage.HumanoidRLSetup.tasks.manipulation.mdp as mdp
 from HumanoidRLPackage.HumanoidRLSetup.tasks.manipulation.reach_env_cfg import ReachEnvCfg
-# from HumanoidRLPackage.HumanoidRLSetup.modelCfg.universal_robots import UR10_CFG
 from HumanoidRLPackage.HumanoidRLSetup.modelCfg.humanoid import ARM_CFG
 
 
+# Config on top of the base ReachEnvCfg, can override any settings
 @configclass
 class HumanoidArmReachEnvCfg(ReachEnvCfg):
     def __post_init__(self):
         super().__post_init__()
 
-        # self.scene.robot = UR10_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot")
         self.scene.robot = ARM_CFG.replace(prim_path="{ENV_REGEX_NS}/Robot")
         
         marker_scale = 0.02
@@ -54,10 +52,6 @@ def __post_init__(self):
         # self.rewards.end_effector_5_position_tracking_fine_grained.params["asset_cfg"].body_names = ["IP_THUMB_v1_.*"]
         # self.rewards.end_effector_5_orientation_tracking.params["asset_cfg"].body_names = ["TIP_B_5"]
 
-        # override actions
-        self.actions.arm_action = mdp.JointPositionActionCfg(
-            asset_name="robot", joint_names=[".*"], scale=0.5, use_default_offset=True
-        )
         # override command generator body
         # end-effector is along x-direction
         self.commands.ee_pose.body_name = "DIP_INDEX_v1_.*"
@@ -82,13 +76,13 @@ def __post_init__(self):
 @configclass
 class HumanoidArmReachEnvCfg_PLAY(HumanoidArmReachEnvCfg):
     def __post_init__(self):
-        # post init of parent
         super().__post_init__()
+
         # make a smaller scene for play
         self.scene.num_envs = 50
         self.scene.env_spacing = 2.5
-        # disable randomization for play
-        self.observations.policy.enable_corruption = False
+        self.observations.policy.enable_corruption = False  # disable randomization for play
+
 
 # (env_isaaclab) hy@hy-LOQ-15IRX9:~/Downloads/Humanoid_Wato/HumanoidRL$ PYTHONPATH=$(pwd) /home/hy/IsaacLab/isaaclab.sh -p HumanoidRLPackage/rsl_rl_scripts/train.py --task=Isaac-Reach-Humanoid-Arm-v0 --headless
 
diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/manipulation/reach_env_cfg.py b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/manipulation/reach_env_cfg.py
index d74244b..3cb2077 100644
--- a/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/manipulation/reach_env_cfg.py
+++ b/autonomy/simulation/Humanoid_Wato/HumanoidRL/HumanoidRLPackage/HumanoidRLSetup/tasks/manipulation/reach_env_cfg.py
@@ -3,7 +3,6 @@
 import isaaclab.sim as sim_utils
 from isaaclab.assets import ArticulationCfg, AssetBaseCfg
 from isaaclab.envs import ManagerBasedRLEnvCfg
-from isaaclab.managers import ActionTermCfg as ActionTerm
 from isaaclab.managers import CurriculumTermCfg as CurrTerm
 from isaaclab.managers import EventTermCfg as EventTerm
 from isaaclab.managers import ObservationGroupCfg as ObsGroup
@@ -13,54 +12,41 @@
 from isaaclab.managers import TerminationTermCfg as DoneTerm
 from isaaclab.scene import InteractiveSceneCfg
 from isaaclab.utils import configclass
-from isaaclab.utils.assets import ISAAC_NUCLEUS_DIR
 from isaaclab.utils.noise import AdditiveUniformNoiseCfg as Unoise
 
 import HumanoidRLPackage.HumanoidRLSetup.tasks.manipulation.mdp as mdp
 
 @configclass
 class ReachSceneCfg(InteractiveSceneCfg):
-    """Configuration for the scene with a robotic arm."""
-
-    # world
     ground = AssetBaseCfg(
         prim_path="/World/ground",
         spawn=sim_utils.GroundPlaneCfg(),
         init_state=AssetBaseCfg.InitialStateCfg(pos=(0.0, 0.0, -1.05)),
     )
 
-    # robots
     robot: ArticulationCfg = MISSING
 
-    # lights
     light = AssetBaseCfg(
         prim_path="/World/light",
         spawn=sim_utils.DomeLightCfg(color=(0.75, 0.75, 0.75), intensity=2500.0),
     )
 
 
-##
-# MDP settings
-##
-
-
 @configclass
 class CommandsCfg:
-    """Command terms for the MDP."""
-
     ee_pose = mdp.UniformPoseCommandCfg(
         asset_name="robot",
         body_name="DIP_INDEX_v1_.*",
-        resampling_time_range=(4.0, 4.0),
+        resampling_time_range=(4.0, 4.0), # target pose changes every sampling time
         debug_vis=True,
         ranges=mdp.UniformPoseCommandCfg.Ranges(
-            pos_x=(-0.5, 0.5),
-            pos_y=(0.62, 0.66),
-            pos_z=(0.3, 0.7),
+            pos_x=(-0.8, -0.7),
+            pos_y=(-0.4, 0.4),
+            pos_z=(0.0, 0.3),
             roll=(0.0, 0.0),
             pitch=(0.0, 0.0),
             yaw=(0.0, 0.0),
-        ),
+        ), # range of position/rotation that the target pose can appear in
     )
 
     # ee_pose_2 = mdp.UniformPoseCommandCfg(
@@ -126,20 +112,15 @@ class CommandsCfg:
 
 @configclass
 class ActionsCfg:
-    """Action specifications for the MDP."""
-
-    arm_action: ActionTerm = MISSING
-    gripper_action: ActionTerm | None = None
+    arm_action = mdp.JointPositionActionCfg(
+        asset_name="robot", joint_names=[".*"], scale=0.5, use_default_offset=True
+    )
 
 
 @configclass
 class ObservationsCfg:
-    """Observation specifications for the MDP."""
-
     @configclass
     class PolicyCfg(ObsGroup):
-        """Observations for policy group."""
-
         # observation terms (order preserved)
         joint_pos = ObsTerm(func=mdp.joint_pos_rel, noise=Unoise(n_min=-0.01, n_max=0.01))
         joint_vel = ObsTerm(func=mdp.joint_vel_rel, noise=Unoise(n_min=-0.01, n_max=0.01))
@@ -181,7 +162,7 @@ class RewardsCfg:
     # task terms
     end_effector_position_tracking = RewTerm(
         func=mdp.position_command_error,
-        weight=-0.2,
+        weight=-1.0,
         params={"asset_cfg": SceneEntityCfg("robot", body_names="DIP_INDEX_v1_.*"), "command_name": "ee_pose"},
     )
     # end_effector_2_position_tracking = RewTerm(
@@ -207,7 +188,7 @@ class RewardsCfg:
 
     end_effector_position_tracking_fine_grained = RewTerm(
         func=mdp.position_command_error_tanh,
-        weight=0.2,
+        weight=0.5,
         params={"asset_cfg": SceneEntityCfg("robot", body_names="DIP_INDEX_v1_.*"), "std": 0.1, "command_name": "ee_pose"},
     )
     # end_effector_2_position_tracking_fine_grained = RewTerm(
@@ -258,43 +239,32 @@ class RewardsCfg:
     # )
 
     # action penalty
-    action_rate = RewTerm(func=mdp.action_rate_l2, weight=-0.00001)
+    action_rate = RewTerm(func=mdp.action_rate_l2, weight=-0.000002)
     joint_vel = RewTerm(
         func=mdp.joint_vel_l2,
-        weight=-0.0000001,
+        weight=-0.005,
         params={"asset_cfg": SceneEntityCfg("robot")},
     )
 
 
 @configclass
 class TerminationsCfg:
-    """Termination terms for the MDP."""
-
     time_out = DoneTerm(func=mdp.time_out, time_out=True)
 
 
 @configclass
 class CurriculumCfg:
-    """Curriculum terms for the MDP."""
-
     action_rate = CurrTerm(
-        func=mdp.modify_reward_weight, params={"term_name": "action_rate", "weight": -0.0001, "num_steps": 4500}
+        func=mdp.modify_reward_weight, params={"term_name": "action_rate", "weight": -0.00005, "num_steps": 18000}
     )
 
     joint_vel = CurrTerm(
-        func=mdp.modify_reward_weight, params={"term_name": "joint_vel", "weight": -0.001, "num_steps": 4500}
+        func=mdp.modify_reward_weight, params={"term_name": "joint_vel", "weight": -0.05, "num_steps": 18000}
     )
 
 
-##
-# Environment configuration
-##
-
-
 @configclass
 class ReachEnvCfg(ManagerBasedRLEnvCfg):
-    """Configuration for the reach end-effector pose tracking environment."""
-
     # Scene settings
     scene: ReachSceneCfg = ReachSceneCfg(num_envs=4096, env_spacing=2.5)
     # Basic settings
@@ -309,10 +279,8 @@ class ReachEnvCfg(ManagerBasedRLEnvCfg):
 
     def __post_init__(self):
         """Post initialization."""
-        # general settings
-        self.decimation = 4  # policy every 4 physics steps (was 2) → smoother target updates
+        self.decimation = 4
         self.sim.render_interval = self.decimation
         self.episode_length_s = 12.0
         self.viewer.eye = (3.5, 3.5, 3.5)
-        # simulation settings
         self.sim.dt = 1.0 / 60.0
\ No newline at end of file
diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/logs/rsl_rl/reach_humanoid_arm/2026-03-13_00-17-23/exported/policy.onnx b/autonomy/simulation/Humanoid_Wato/HumanoidRL/logs/rsl_rl/reach_humanoid_arm/2026-03-13_00-17-23/exported/policy.onnx
new file mode 100644
index 0000000..8b39a9d
Binary files /dev/null and b/autonomy/simulation/Humanoid_Wato/HumanoidRL/logs/rsl_rl/reach_humanoid_arm/2026-03-13_00-17-23/exported/policy.onnx differ
diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/logs/rsl_rl/reach_humanoid_arm/2026-03-13_00-17-23/exported/policy.pt b/autonomy/simulation/Humanoid_Wato/HumanoidRL/logs/rsl_rl/reach_humanoid_arm/2026-03-13_00-17-23/exported/policy.pt
new file mode 100644
index 0000000..c98e91a
Binary files /dev/null and b/autonomy/simulation/Humanoid_Wato/HumanoidRL/logs/rsl_rl/reach_humanoid_arm/2026-03-13_00-17-23/exported/policy.pt differ
diff --git a/autonomy/simulation/Humanoid_Wato/HumanoidRL/logs/rsl_rl/reach_humanoid_arm/2026-03-13_00-17-23/model_200.pt b/autonomy/simulation/Humanoid_Wato/HumanoidRL/logs/rsl_rl/reach_humanoid_arm/2026-03-13_00-17-23/model_200.pt
new file mode 100644
index 0000000..f027254
Binary files /dev/null and b/autonomy/simulation/Humanoid_Wato/HumanoidRL/logs/rsl_rl/reach_humanoid_arm/2026-03-13_00-17-23/model_200.pt differ
diff --git a/autonomy/simulation/Humanoid_Wato/ModelAssets/.thumbs/256x256/arm.usd.png b/autonomy/simulation/Humanoid_Wato/ModelAssets/.thumbs/256x256/arm.usd.png
index 705495d..31e9a97 100644
Binary files a/autonomy/simulation/Humanoid_Wato/ModelAssets/.thumbs/256x256/arm.usd.png and b/autonomy/simulation/Humanoid_Wato/ModelAssets/.thumbs/256x256/arm.usd.png differ
diff --git a/autonomy/simulation/Humanoid_Wato/ModelAssets/arm.usd b/autonomy/simulation/Humanoid_Wato/ModelAssets/arm.usd
index 41cec0f..9bb4c0c 100644
Binary files a/autonomy/simulation/Humanoid_Wato/ModelAssets/arm.usd and b/autonomy/simulation/Humanoid_Wato/ModelAssets/arm.usd differ