citbrains · yasuohayashibara · Sep 16, 2020 · Sep 16, 2020 · Sep 16, 2020 · Sep 16, 2020
diff --git a/check_walking.py b/check_walking.py
@@ -0,0 +1,29 @@
+#!/usr/bin/env python3
+
+import gym
+import envs
+from time import sleep
+
+env = gym.make('GankenKun-v0')
+state = env.reset()
+
+step = 0
+while True:
+    if 0 <= step <= 10:
+        action = [1.0, 0.0, 0.0]
+    if 10 <= step <= 20:
+        action = [0.0, 1.0, 0.0]
+    if 20 <= step <= 30:
+        action = [-1.0, 0.0, 0.0]
+    if 30 <= step <= 40:
+        action = [0.0, -1.0, 0.0]
+    if 40 <= step <= 50:
+        action = [0.0, 0.0, 1.0]
+    if 50 <= step <= 60:
+        action = [0.0, 0.0, -1.0]
+    if 60 <= step <= 70:
+        action = [0.0, 0.0, 0.0]
+    next_state, reward, done, info = env.step(action)
+    step += 1
+    sleep(1)
+
diff --git a/dqn_train.py b/dqn_train.py
diff --git a/envs/GankenKun_pybullet/GankenKun/foot_step_planner.py b/envs/GankenKun_pybullet/GankenKun/foot_step_planner.py
@@ -27,13 +27,13 @@ def calculate(self, goal_x, goal_y, goal_th, current_x, current_y, current_th, n
     # first step
     foot_step = []
     if status == 'start':
-      foot_step += [[0.0, current_x, current_y, current_th, 'both']]
+      foot_step += [[0.0, current_x, current_y, current_th, 'both', 0]]
       time += self.period * 2.0
     if next_support_leg == 'right':
-      foot_step += [[time, current_x, -self.width+current_y, current_th, next_support_leg]]
+      foot_step += [[time, current_x, -self.width+current_y, current_th, next_support_leg, -self.width]]
       next_support_leg = 'left'
     elif next_support_leg == 'left':
-      foot_step += [[time, current_x,  self.width+current_y, current_th, next_support_leg]]
+      foot_step += [[time, current_x,  self.width+current_y, current_th, next_support_leg,  self.width]]
       next_support_leg = 'right'
 
     # walking
@@ -50,10 +50,10 @@ def calculate(self, goal_x, goal_y, goal_th, current_x, current_y, current_th, n
       next_y  = current_y  + stride_y
       next_th = current_th + stride_th
       if next_support_leg == 'right':
-        foot_step += [[time, next_x, next_y-self.width, next_th, next_support_leg]]
+        foot_step += [[time, next_x, next_y-self.width, next_th, next_support_leg, -self.width]]
         next_support_leg = 'left'
       elif next_support_leg == 'left':
-        foot_step += [[time, next_x, next_y+self.width, next_th, next_support_leg]]
+        foot_step += [[time, next_x, next_y+self.width, next_th, next_support_leg,  self.width]]
         next_support_leg = 'right'
       current_x, current_y, current_th = next_x, next_y, next_th
 
@@ -62,16 +62,16 @@ def calculate(self, goal_x, goal_y, goal_th, current_x, current_y, current_th, n
     if not status == 'stop':
       time += self.period
       if next_support_leg == 'right':
-        foot_step += [[time, next_x, next_y-self.width, next_th, next_support_leg]]
+        foot_step += [[time, next_x, next_y-self.width, next_th, next_support_leg, -self.width]]
       elif next_support_leg == 'left':
-        foot_step += [[time, next_x, next_y+self.width, next_th, next_support_leg]]
+        foot_step += [[time, next_x, next_y+self.width, next_th, next_support_leg,  self.width]]
       time += self.period
       next_support_leg = 'both'
-      foot_step += [[time, next_x, next_y, next_th, next_support_leg]]
+      foot_step += [[time, next_x, next_y, next_th, next_support_leg, 0]]
       time += 2.0
-      foot_step += [[time, next_x, next_y, next_th, next_support_leg]]
+      foot_step += [[time, next_x, next_y, next_th, next_support_leg, 0]]
       time += 100.0
-      foot_step += [[time, next_x, next_y, next_th, next_support_leg]]
+      foot_step += [[time, next_x, next_y, next_th, next_support_leg, 0]]
 
     return foot_step
 

diff --git a/envs/GankenKun_pybullet/GankenKun_env.py b/envs/GankenKun_pybullet/GankenKun_env.py
@@ -27,17 +27,21 @@ def __init__(self):
         self.y_threshold = 3.5
         self.ball_x_threshold = 4.5
         self.ball_y_threshold = 3.0
+        self.ball_not_touch_period = 0
 
-        TIME_STEP = 0.001
- #       physicsClient = p.connect(p.GUI)
-        physicsClient = p.connect(p.DIRECT)
+        TIME_STEP = 0.01
+        physicsClient = p.connect(p.GUI)
+#        physicsClient = p.connect(p.DIRECT)
         p.setGravity(0, 0, -9.8)
         p.setTimeStep(TIME_STEP)
+        p.setPhysicsEngineParameter(numSubSteps=10)
+        p.configureDebugVisualizer(p.COV_ENABLE_GUI,0)
 
 #        planeId = p.loadURDF("envs/GankenKun_pybullet/URDF/plane.urdf", [0, 0, 0])
         self.FieldId = p.loadSDF("envs/GankenKun_pybullet/SDF/field/soccerfield.sdf")
         self.RobotId = p.loadURDF("envs/GankenKun_pybullet/URDF/gankenkun.urdf", [0, 0, 0])
         self.BallId = p.loadSDF("envs/GankenKun_pybullet/SDF/ball/ball.sdf")
+        self.ball_pos = [0.3, 0.0]
 
         self.index = {p.getBodyInfo(self.RobotId)[0].decode('UTF-8'):-1,}
         for id in range(p.getNumJoints(self.RobotId)):
@@ -65,33 +69,25 @@ def __init__(self):
         x_goal, y_goal, th_goal = 0.1, 0.0, 0.0
         self.foot_step = self.walk.setGoalPos([x_goal, y_goal, th_goal])
     #    print(p.getBasePositionAndOrientation(BallId[0]))
+        self.ball_delta_length = 0.0
 
     def step(self, action):
-        j = 0
-        #print()
         while p.isConnected():
-            j += 1
-            if j >= 10:
-                self.joint_angles,lf,rf,xp,n = self.walk.getNextPos()
-                j = 0
-                if n == 0:
-                    if (len(self.foot_step) <= 6):
-                        x_goal = self.foot_step[0][1] + action[0]
-                        y_goal = self.foot_step[0][2] + action[1]
-                        th_goal = self.foot_step[0][3] + action[2]
-
-                        self.foot_step = self.walk.setGoalPos([x_goal, y_goal, th_goal])
-                    else:
-                        self.foot_step = self.walk.setGoalPos()
-                #if you want new goal, please send position
-                    break
+            self.joint_angles,lf,rf,xp,n = self.walk.getNextPos()
+            if n == 0:
+                x_goal = self.foot_step[0][1] + action[0]
+                y_goal = self.foot_step[0][2] - self.foot_step[0][5] + action[1]
+                th_goal = self.foot_step[0][3] + action[2]
+                self.foot_step = self.walk.setGoalPos([x_goal, y_goal, th_goal])
+                break
             for id in range(p.getNumJoints(self.RobotId)):
                 qIndex = p.getJointInfo(self.RobotId, id)[3]
                 if qIndex > -1:
                     p.setJointMotorControl2(self.RobotId, id, p.POSITION_CONTROL, self.joint_angles[qIndex-7])
-                
+
             p.stepSimulation()
             #   sleep(TIME_STEP) # delete -> speed up
+
         x, y, _ = p.getBasePositionAndOrientation(self.RobotId)[0]
         roll, pitch, yaw = p.getEulerFromQuaternion(p.getBasePositionAndOrientation(self.RobotId)[1])
         ball_x, ball_y, _ = p.getBasePositionAndOrientation(self.BallId[0])[0]
@@ -108,19 +104,31 @@ def step(self, action):
         if not done:
             dx, dy = ball_x - x, ball_y - y
             ball_distance = math.sqrt(dx**2 + dy**2)
-            reward += math.floor(-10.0 * ball_distance)/10
-            ball_goal_distance = math.sqrt((goal_x - ball_x)**2 + (goal_y - ball_y)**2)
-            reward += math.floor(-10.0 * ball_goal_distance)/10
+            reward += - ball_distance / 10
+            ball_goal_prev_distance = math.sqrt((goal_x + 1.0 - self.ball_pos[0])**2 + (goal_y - self.ball_pos[1])**2)
+            ball_goal_distance = math.sqrt((goal_x + 1.0 - ball_x)**2 + (goal_y - ball_y)**2)
+            if self.ball_delta_length == 0.0:
+                reward += (- ball_goal_distance + ball_goal_prev_distance)*10
+            self.ball_delta_length = - ball_goal_distance + ball_goal_prev_distance
+            if ball_goal_distance == ball_goal_prev_distance:
+                self.ball_not_touch_period += 1
+                if self.ball_not_touch_period > 60:
+                    self.ball_not_touch_period = 0
+                    done = True
+            else:
+                self.ball_not_touch_period = 0
+            self.ball_pos[0] = ball_x
+            self.ball_pos[1] = ball_y
         elif ball_x > goal_x and self.goal_leftpole < ball_y < self.goal_rightpole:
-            reward = 500
-        else:
-            reward = -500
+            reward = 10
+            done = True
 
+#        print("action: "+str(action)+", reward: "+str(reward))
         return self.state, reward, done, {}
 
     def reset(self):
         p.resetBasePositionAndOrientation(self.RobotId, [0, 0, 0], [0, 0, 0, 1.0])
-        p.resetBasePositionAndOrientation(self.BallId[0], [0.2, 0, 0.1], [0, 0, 0, 1.0])
+        p.resetBasePositionAndOrientation(self.BallId[0], [0.3, 0, 0.1], [0, 0, 0, 1.0])
         x_goal, y_goal, th_goal = 0.1, 0.0, 0.0
         del self.walk, self.pc
         self.pc = preview_control(0.01, 1.0, 0.27)
@@ -130,6 +138,8 @@ def reset(self):
         roll, pitch, yaw = p.getEulerFromQuaternion(p.getBasePositionAndOrientation(self.RobotId)[1])
         ball_x, ball_y, _ = p.getBasePositionAndOrientation(self.BallId[0])[0]
         self.state = [x, y, yaw, ball_x, ball_y]
+        self.ball_pos[0] = 0.3
+        self.ball_pos[1] = 0.0
         return np.array(self.state)