From 36a3532c471953d2489058fc327532e39d5151e7 Mon Sep 17 00:00:00 2001
From: Claudio Delli Bovi <snakecharmer182@gmail.com>
Date: Tue, 28 Jan 2014 18:28:04 +0100
Subject: [PATCH 01/17] TOFIX: negative coeffs + rewards

---
 machineLearning/DiveHandler/DiveHandler.cpp | 23 +++++++++++++--------
 machineLearning/DiveHandler/DiveHandler.h   |  2 +-
 2 files changed, 15 insertions(+), 10 deletions(-)
diff --git a/machineLearning/DiveHandler/DiveHandler.cpp b/machineLearning/DiveHandler/DiveHandler.cpp
index df6d44d..9f5d5bb 100644
--- a/machineLearning/DiveHandler/DiveHandler.cpp
+++ b/machineLearning/DiveHandler/DiveHandler.cpp
@@ -1,7 +1,7 @@
 /**
 * @file DiveHandler.cpp
 *
-*	This header file contains the implementation of a module working as a dive handler for the goalie.
+*	This source file contains the implementation of a module working as a dive handler for the goalie.
 *   Such handler is activated when the ball gets in the own field side, and it computes an estimate of its projection toward the goal
 *   with respect to the goalie reference frame. It also provides estimates for the amount of time needed to dive, save the ball and
 *   then get back to the goalie position. This measure is compared against the estimated time the ball needs to reach the goal.
@@ -230,7 +230,6 @@ void DiveHandler::PGLearner::generatePerturbations(std::vector<float>* partial_p
     }
 }
 
-
 /* TOCOMMENT */
 float DiveHandler::PGLearner::evaluatePerturbation( std::vector<float> R )
 {
@@ -356,10 +355,14 @@ bool DiveHandler::PGLearner::updateCoeffs()
                     coeffs_avgGradient.at(coeffs.size() - (n +1)) = avg_plus - avg_minus;
             }
 #endif
+            // Avoid 'nan' when the gradient is zeroed
+            float normalization = 1.0;
+            if (magnitude(coeffs_avgGradient) != 0)
+                normalization = magnitude(coeffs_avgGradient);
 
 #ifdef DIVEHANDLER_TRAINING
-            SPQR_INFO("Computed policy gradient: [ " << coeffs_avgGradient.at(0)/magnitude(coeffs_avgGradient)
-                      << ", " << coeffs_avgGradient.at(1)/magnitude(coeffs_avgGradient) << " ]");
+            SPQR_INFO("Computed policy gradient: [ " << coeffs_avgGradient.at(0)/normalization
+                      << ", " << coeffs_avgGradient.at(1)/normalization << " ]");
 #endif
 
             // Update coefficients history
@@ -370,7 +373,7 @@ bool DiveHandler::PGLearner::updateCoeffs()
 
             // Update the coefficients following the gradient direction
             for( unsigned int i=0; i<coeffs_avgGradient.size(); ++i )
-                coeffs.at(i) += - (coeffs_avgGradient.at(i)/magnitude(coeffs_avgGradient)) * ETA;
+                coeffs.at(i) += - (coeffs_avgGradient.at(i)/normalization) * ETA;
 
 #ifdef DIVEHANDLER_TRAINING
             SPQR_INFO("New coefficients: [ " << coeffs.at(0) << ", " << coeffs.at(1) << " ]");
@@ -518,7 +521,7 @@ void DiveHandler::estimateDiveTimes()
 /* TOCOMMENT */
 inline float DiveHandler::computeDiveAndRecoverTime(float alpha1, float alpha2)
 {
-    return alpha2*( alpha1*tBall2Goal - tDive ) + tBackInPose;
+    return fabs(alpha2*( alpha1*tBall2Goal - tDive ) + tBackInPose);
 }
 
 /* TOTEST&COMMENT */
@@ -578,7 +581,7 @@ void DiveHandler::update(DiveHandle& diveHandle)
             // The module is in the learning state, waiting for the next reward
             else if( state == waitReward )
             {
-                // First case: game controller active and the opponent team scores
+                // The opponent team scores: the goalie failed and gets a negative reward
                 if(opponentScore != (int)theOpponentTeamInfo.score)
                 {
                     // The learner obtains a negative reward
@@ -599,7 +602,7 @@ void DiveHandler::update(DiveHandle& diveHandle)
                     if(!diveHandle.rewardAck)
                         diveHandle.rewardAck = true;
                 }
-                // The goalie has performed a dive and yet the outcome is unknown
+                // The own team scores: user-guided move to provide the goalie a positive reward
                 else if(ownScore != (int)theOwnTeamInfo.score)
                 {
                     // The learner obtains a positive reward
@@ -608,6 +611,9 @@ void DiveHandler::update(DiveHandle& diveHandle)
                     // Crop the buffer
                     if (rewardHistory.size() > REWARDS_HISTORY_SIZE)
                         rewardHistory.resize(REWARDS_HISTORY_SIZE);
+                    // Update own score
+                    ownScore = (int)theOwnTeamInfo.score;
+
 #ifdef DIVEHANDLER_TRAINING
                     SPQR_SUCCESS("The goalie has succeeded! Positive reward for the learner.  ");
 #endif
@@ -616,7 +622,6 @@ void DiveHandler::update(DiveHandle& diveHandle)
                     // Clear the pending reward
                     if(!diveHandle.rewardAck)
                         diveHandle.rewardAck = true;
-
                 }
             }
 
diff --git a/machineLearning/DiveHandler/DiveHandler.h b/machineLearning/DiveHandler/DiveHandler.h
index 7b4f763..88fc986 100644
--- a/machineLearning/DiveHandler/DiveHandler.h
+++ b/machineLearning/DiveHandler/DiveHandler.h
@@ -185,7 +185,7 @@ class DiveHandler : public DiveHandlerBase
     // Obtained rewards
     std::list<float> rewardHistory;
 
-    // Opponent team current score
+    // Current scores
     int opponentScore;
     int ownScore;
 

From c01ff3c007722b53c077baccec4c97237b40d107 Mon Sep 17 00:00:00 2001
From: Claudio Delli Bovi <snakecharmer182@gmail.com>
Date: Tue, 28 Jan 2014 21:01:56 +0100
Subject: [PATCH 02/17] TOFIX: reward values

---
 machineLearning/DiveHandler/DiveHandler.cpp | 32 +++++++++++++++++----
 1 file changed, 26 insertions(+), 6 deletions(-)

diff --git a/machineLearning/DiveHandler/DiveHandler.cpp b/machineLearning/DiveHandler/DiveHandler.cpp
index 9f5d5bb..076358d 100644
--- a/machineLearning/DiveHandler/DiveHandler.cpp
+++ b/machineLearning/DiveHandler/DiveHandler.cpp
@@ -22,6 +22,7 @@
 
 // Uncomment to have debug information
 //#define DIVEHANDLER_DEBUG
+#define DIVEHANDLER_TRAINING_DEBUG
 #define DIVEHANDLER_TRAINING
 //#define RAND_PERMUTATIONS
 
@@ -66,9 +67,10 @@ void DiveHandler::CoeffsLearner::setCoeffs(const std::vector<float>& _coeffs)
 
 void DiveHandler::CoeffsLearner::setParam(const std::string& _key, float _value)
 {
-    // "Smart" insertion procedure using iterators (C++ 11)
-    std::map<std::string, float>::iterator iter = params.begin();
-    params.insert( iter, std::pair< std::string, float >(_key, _value) );
+    params[_key] = _value;
+//    // "Smart" insertion procedure using iterators (C++ 11)
+//    std::map<std::string, float>::iterator iter = params.begin();
+//    params.insert( std::pair< std::string, float >(_key, _value) );
 }
 
 
@@ -245,23 +247,36 @@ void DiveHandler::PGLearner::updateParams(const std::list<float>& rewards)
 {
     float reward_score = 0.0;
     int discount_exp = 0;
+#ifdef DIVEHANDLER_TRAINING_DEBUG
+    int positives = 0, negatives = 0;
+#endif
+
     std::list<float>::const_iterator i = rewards.begin();
     while (i != rewards.end())
     {
+#ifdef DIVEHANDLER_TRAINING_DEBUG
+        if (*i == POSITIVE_REWARD) ++positives;
+        else ++ negatives;
+#endif
         // Computing discounted rewards
         reward_score += (*i) * pow(GAMMA, discount_exp);
-        ++i; ++discount_exp;
+        ++i; ++discount_exp;        
     }
+#ifdef DIVEHANDLER_TRAINING_DEBUG
+    SPQR_INFO("Positive rewards: " << positives << " out of " << rewards.size());
+    SPQR_INFO("Negative rewards: " << negatives << " out of " << rewards.size());
+    SPQR_INFO("Reward total score: " << reward_score);
+#endif
 
     //Adjusting PG parameters according to the obtained score
-    setParam("epsilon", exp( reward_score / rewards.size() ) * getParam("epsilon"));
+    setParam("epsilon", exp( -reward_score / REWARDS_HISTORY_SIZE ) * getParam("epsilon"));
 
 #ifdef DIVEHANDLER_TRAINING
     SPQR_INFO( "Epsilon value changed to: " << getParam("epsilon") << " according to the obtained rewards. ");
 #endif
 
 #ifdef RAND_PERMUTATIONS
-    setParam("T", exp( reward_score / rewards.size() ) * getParam("T"));
+    setParam("T", exp( -reward_score / REWARDS_HISTORY_SIZE ) * getParam("T"));
 #endif
 }
 
@@ -373,8 +388,13 @@ bool DiveHandler::PGLearner::updateCoeffs()
 
             // Update the coefficients following the gradient direction
             for( unsigned int i=0; i<coeffs_avgGradient.size(); ++i )
+            {
                 coeffs.at(i) += - (coeffs_avgGradient.at(i)/normalization) * ETA;
 
+                // Crop negative coefficients
+                if (coeffs.at(i) < 0) coeffs.at(i) = 0.0;
+            }
+
 #ifdef DIVEHANDLER_TRAINING
             SPQR_INFO("New coefficients: [ " << coeffs.at(0) << ", " << coeffs.at(1) << " ]");
 #endif

From 5dac511aed533d5238bc54a570c9b93b6aab2f7c Mon Sep 17 00:00:00 2001
From: Claudio Delli Bovi <snakecharmer182@gmail.com>
Date: Wed, 29 Jan 2014 20:02:19 +0100
Subject: [PATCH 03/17] TOFIX: Evaluation of hypotheses

---
 machineLearning/DiveHandler/DiveHandler.cpp | 49 ++++++++++++++-------
 machineLearning/DiveHandler/DiveHandler.h   |  9 +++-
 2 files changed, 39 insertions(+), 19 deletions(-)

diff --git a/machineLearning/DiveHandler/DiveHandler.cpp b/machineLearning/DiveHandler/DiveHandler.cpp
index 076358d..4037c45 100644
--- a/machineLearning/DiveHandler/DiveHandler.cpp
+++ b/machineLearning/DiveHandler/DiveHandler.cpp
@@ -26,8 +26,8 @@
 #define DIVEHANDLER_TRAINING
 //#define RAND_PERMUTATIONS
 
-#define NEGATIVE_REWARD -1.0
-#define POSITIVE_REWARD 1.0
+#define NEGATIVE_REWARD -0.5
+#define POSITIVE_REWARD 1.5
 
 // Debug messages template
 #define SPQR_ERR(x) std::cerr << "\033[22;31;1m" <<"[DiveHandler] " << x << "\033[0m"<< std::endl;
@@ -68,9 +68,6 @@ void DiveHandler::CoeffsLearner::setCoeffs(const std::vector<float>& _coeffs)
 void DiveHandler::CoeffsLearner::setParam(const std::string& _key, float _value)
 {
     params[_key] = _value;
-//    // "Smart" insertion procedure using iterators (C++ 11)
-//    std::map<std::string, float>::iterator iter = params.begin();
-//    params.insert( std::pair< std::string, float >(_key, _value) );
 }
 
 
@@ -88,7 +85,9 @@ void DiveHandler::CoeffsLearner::setParam(const std::string& _key, float _value)
  */
 DiveHandler::PGLearner::PGLearner( DiveHandler* _dhPtr, int _nCoeffs, float _epsilon, int _T, float _initValue, bool randomize ):
     // Initialize the base class
-    CoeffsLearner(_nCoeffs, _initValue, _dhPtr)
+    CoeffsLearner(_nCoeffs, _initValue, _dhPtr),
+    // Initialize the gradient estimate
+    coeffsGradient(_nCoeffs, 0.0), positivesWeight(0.0)
 {
     // Initializing coefficients
     if(randomize)
@@ -238,7 +237,8 @@ float DiveHandler::PGLearner::evaluatePerturbation( std::vector<float> R )
     // Dimensions check
     assert(R.size() == coeffs.size());
     // Generate perturbated policy and call the DiveHandler object for evaluation
-    return diveHandler_ptr->computeDiveAndRecoverTime(coeffs.at(0) + R.at(0), coeffs.at(1) + R.at(1));
+    float tDiveAndRecover = diveHandler_ptr->computeDiveAndRecoverTime(coeffs.at(0) + R.at(0), coeffs.at(1) + R.at(1));
+    return LAMBDA*fabs(tDiveAndRecover) + (1-LAMBDA)*fabs(diveHandler_ptr->tBall2Goal - tDiveAndRecover);
 }
 
 
@@ -247,24 +247,24 @@ void DiveHandler::PGLearner::updateParams(const std::list<float>& rewards)
 {
     float reward_score = 0.0;
     int discount_exp = 0;
-#ifdef DIVEHANDLER_TRAINING_DEBUG
-    int positives = 0, negatives = 0;
-#endif
+    int positives = 0;
 
     std::list<float>::const_iterator i = rewards.begin();
     while (i != rewards.end())
     {
-#ifdef DIVEHANDLER_TRAINING_DEBUG
-        if (*i == POSITIVE_REWARD) ++positives;
-        else ++ negatives;
-#endif
+        // Counting positives
+        if (*i == POSITIVE_REWARD)
+            ++positives;
+
         // Computing discounted rewards
         reward_score += (*i) * pow(GAMMA, discount_exp);
         ++i; ++discount_exp;        
     }
+    positivesWeight = static_cast<float>(positives)/rewards.size();
+
 #ifdef DIVEHANDLER_TRAINING_DEBUG
     SPQR_INFO("Positive rewards: " << positives << " out of " << rewards.size());
-    SPQR_INFO("Negative rewards: " << negatives << " out of " << rewards.size());
+    SPQR_INFO("Negative rewards: " << (rewards.size() - positives) << " out of " << rewards.size());
     SPQR_INFO("Reward total score: " << reward_score);
 #endif
 
@@ -375,10 +375,22 @@ bool DiveHandler::PGLearner::updateCoeffs()
             if (magnitude(coeffs_avgGradient) != 0)
                 normalization = magnitude(coeffs_avgGradient);
 
+
 #ifdef DIVEHANDLER_TRAINING
             SPQR_INFO("Computed policy gradient: [ " << coeffs_avgGradient.at(0)/normalization
                       << ", " << coeffs_avgGradient.at(1)/normalization << " ]");
 #endif
+            // Weight new gradient estimate and previous one according to the reward score
+            std::vector<float> newGradient (coeffsGradient.size());
+            for( unsigned int j=0; j<newGradient.size(); ++j )
+                newGradient.at(j) = (positivesWeight)*coeffsGradient.at(j) +
+                                    (1.0 - positivesWeight)*(coeffs_avgGradient.at(j)/normalization);
+
+#ifdef DIVEHANDLER_TRAINING
+            SPQR_INFO("Weight of the current estimate: " << positivesWeight);
+            SPQR_INFO("New policy gradient: [ " << newGradient.at(0)
+                      << ", " << newGradient.at(1) << " ]");
+#endif
 
             // Update coefficients history
             coeffsBuffer.push_front(coeffs);
@@ -389,7 +401,10 @@ bool DiveHandler::PGLearner::updateCoeffs()
             // Update the coefficients following the gradient direction
             for( unsigned int i=0; i<coeffs_avgGradient.size(); ++i )
             {
-                coeffs.at(i) += - (coeffs_avgGradient.at(i)/normalization) * ETA;
+                // Coefficients
+                coeffs.at(i) += - newGradient.at(i) * getParam("epsilon");
+                // Gradient estimate
+                coeffsGradient.at(i) = newGradient.at(i);
 
                 // Crop negative coefficients
                 if (coeffs.at(i) < 0) coeffs.at(i) = 0.0;
@@ -541,7 +556,7 @@ void DiveHandler::estimateDiveTimes()
 /* TOCOMMENT */
 inline float DiveHandler::computeDiveAndRecoverTime(float alpha1, float alpha2)
 {
-    return fabs(alpha2*( alpha1*tBall2Goal - tDive ) + tBackInPose);
+    return alpha2*( alpha1*tBall2Goal - tDive ) + tBackInPose;
 }
 
 /* TOTEST&COMMENT */
diff --git a/machineLearning/DiveHandler/DiveHandler.h b/machineLearning/DiveHandler/DiveHandler.h
index 88fc986..15095d1 100644
--- a/machineLearning/DiveHandler/DiveHandler.h
+++ b/machineLearning/DiveHandler/DiveHandler.h
@@ -53,14 +53,14 @@ END_MODULE
 // Termination conditions
 #define MAX_ITER 300
 #define CONVERGENCE_THRESHOLD 0.05
-
 // PG parameters
 #define GAMMA 0.5
 #define BUFFER_DIM 10
 #define REWARDS_HISTORY_SIZE 15
-#define ETA 0.4
 #define EPSILON 0.15
 #define T 15
+// Evaluation weight
+#define LAMBDA 0.15
 
 
 // Module class declaration
@@ -139,6 +139,11 @@ class DiveHandler : public DiveHandlerBase
 
         private:
 
+        // Current estimate for the coefficients gradient
+        std::vector<float> coeffsGradient;
+        // Weight of the current gradient estimate
+        float positivesWeight;
+
         // Memory buffer for the PG algorithm
         PGbuffer coeffsBuffer;
         // Set of perturbations to be performed

From 40ffbc53e7c69ebfca8301428fbb3d34f8d2857c Mon Sep 17 00:00:00 2001
From: Claudio Delli Bovi <snakecharmer182@gmail.com>
Date: Thu, 30 Jan 2014 18:07:39 +0100
Subject: [PATCH 04/17] TOTUNE: Evaluation of hypotheses

---
 machineLearning/DiveHandler/DiveHandler.cpp | 7 +++++--
 machineLearning/DiveHandler/DiveHandler.h   | 3 ++-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/machineLearning/DiveHandler/DiveHandler.cpp b/machineLearning/DiveHandler/DiveHandler.cpp
index 4037c45..ba646ee 100644
--- a/machineLearning/DiveHandler/DiveHandler.cpp
+++ b/machineLearning/DiveHandler/DiveHandler.cpp
@@ -238,7 +238,10 @@ float DiveHandler::PGLearner::evaluatePerturbation( std::vector<float> R )
     assert(R.size() == coeffs.size());
     // Generate perturbated policy and call the DiveHandler object for evaluation
     float tDiveAndRecover = diveHandler_ptr->computeDiveAndRecoverTime(coeffs.at(0) + R.at(0), coeffs.at(1) + R.at(1));
-    return LAMBDA*fabs(tDiveAndRecover) + (1-LAMBDA)*fabs(diveHandler_ptr->tBall2Goal - tDiveAndRecover);
+
+    return (1.0-LAMBDA1-LAMBDA2)*fabs(tDiveAndRecover) +
+           LAMBDA1*fabs(diveHandler_ptr->tBall2Goal - tDiveAndRecover) +
+           LAMBDA2*fabs(1.0 - ((coeffs.at(0) + R.at(0))+(coeffs.at(1) + R.at(1))));
 }
 
 
@@ -260,7 +263,7 @@ void DiveHandler::PGLearner::updateParams(const std::list<float>& rewards)
         reward_score += (*i) * pow(GAMMA, discount_exp);
         ++i; ++discount_exp;        
     }
-    positivesWeight = static_cast<float>(positives)/rewards.size();
+    positivesWeight = (POSITIVE_REWARD*static_cast<float>(positives))/(positives*POSITIVE_REWARD + (rewards.size()-positives)*fabs(NEGATIVE_REWARD));
 
 #ifdef DIVEHANDLER_TRAINING_DEBUG
     SPQR_INFO("Positive rewards: " << positives << " out of " << rewards.size());
diff --git a/machineLearning/DiveHandler/DiveHandler.h b/machineLearning/DiveHandler/DiveHandler.h
index 15095d1..402a699 100644
--- a/machineLearning/DiveHandler/DiveHandler.h
+++ b/machineLearning/DiveHandler/DiveHandler.h
@@ -60,7 +60,8 @@ END_MODULE
 #define EPSILON 0.15
 #define T 15
 // Evaluation weight
-#define LAMBDA 0.15
+#define LAMBDA1 0.6
+#define LAMBDA2 0.3
 
 
 // Module class declaration

From 2874684d8896a23040ec835758bf7513420813aa Mon Sep 17 00:00:00 2001
From: Claudio Delli Bovi <snakecharmer182@gmail.com>
Date: Sat, 8 Feb 2014 20:04:31 +0100
Subject: [PATCH 05/17] TOTRY: Evaluation function with individual best as
 attractor

---
 machineLearning/DiveHandler/DiveHandler.cpp | 32 ++++++++++++++-------
 machineLearning/DiveHandler/DiveHandler.h   | 21 ++++++++++----
 2 files changed, 36 insertions(+), 17 deletions(-)

diff --git a/machineLearning/DiveHandler/DiveHandler.cpp b/machineLearning/DiveHandler/DiveHandler.cpp
index ba646ee..321654c 100644
--- a/machineLearning/DiveHandler/DiveHandler.cpp
+++ b/machineLearning/DiveHandler/DiveHandler.cpp
@@ -26,7 +26,7 @@
 #define DIVEHANDLER_TRAINING
 //#define RAND_PERMUTATIONS
 
-#define NEGATIVE_REWARD -0.5
+#define NEGATIVE_REWARD -1.0
 #define POSITIVE_REWARD 1.5
 
 // Debug messages template
@@ -87,8 +87,11 @@ DiveHandler::PGLearner::PGLearner( DiveHandler* _dhPtr, int _nCoeffs, float _eps
     // Initialize the base class
     CoeffsLearner(_nCoeffs, _initValue, _dhPtr),
     // Initialize the gradient estimate
-    coeffsGradient(_nCoeffs, 0.0), positivesWeight(0.0)
+    coeffsGradient(_nCoeffs, 0.0)
 {
+    reward_score = 0.0;
+    reward_norm = 1.0;
+
     // Initializing coefficients
     if(randomize)
     {
@@ -238,17 +241,26 @@ float DiveHandler::PGLearner::evaluatePerturbation( std::vector<float> R )
     assert(R.size() == coeffs.size());
     // Generate perturbated policy and call the DiveHandler object for evaluation
     float tDiveAndRecover = diveHandler_ptr->computeDiveAndRecoverTime(coeffs.at(0) + R.at(0), coeffs.at(1) + R.at(1));
+    // Perturbated coefficients
+    std::vector<float> new_coeffs(2);
+    new_coeffs.at(0) = coeffs.at(0) + R.at(0);
+    new_coeffs.at(1) = coeffs.at(1) + R.at(1);
+
+//    return (1.0-LAMBDA1-LAMBDA2)*fabs(tDiveAndRecover) +
+//           LAMBDA1*fabs(diveHandler_ptr->tBall2Goal - tDiveAndRecover) +
+//           LAMBDA2*fabs(1.0 - ((coeffs.at(0) + R.at(0))+(coeffs.at(1) + R.at(1))));
 
-    return (1.0-LAMBDA1-LAMBDA2)*fabs(tDiveAndRecover) +
-           LAMBDA1*fabs(diveHandler_ptr->tBall2Goal - tDiveAndRecover) +
-           LAMBDA2*fabs(1.0 - ((coeffs.at(0) + R.at(0))+(coeffs.at(1) + R.at(1))));
+    return (1.0 - fabs(reward_score/reward_norm))*fabs(diveHandler_ptr->tBall2Goal - tDiveAndRecover) +
+           fabs(reward_score/reward_norm)*fabs(magnitude(coeffs) - magnitude(new_coeffs));
 }
 
 
 /* TOTEST&COMMENT */
 void DiveHandler::PGLearner::updateParams(const std::list<float>& rewards)
 {
-    float reward_score = 0.0;
+    reward_score = 0.0;
+    if (!rewards.empty()) reward_norm = 0.0;
+
     int discount_exp = 0;
     int positives = 0;
 
@@ -261,9 +273,9 @@ void DiveHandler::PGLearner::updateParams(const std::list<float>& rewards)
 
         // Computing discounted rewards
         reward_score += (*i) * pow(GAMMA, discount_exp);
+        reward_norm += fabs((*i) * pow(GAMMA, discount_exp));
         ++i; ++discount_exp;        
     }
-    positivesWeight = (POSITIVE_REWARD*static_cast<float>(positives))/(positives*POSITIVE_REWARD + (rewards.size()-positives)*fabs(NEGATIVE_REWARD));
 
 #ifdef DIVEHANDLER_TRAINING_DEBUG
     SPQR_INFO("Positive rewards: " << positives << " out of " << rewards.size());
@@ -386,11 +398,9 @@ bool DiveHandler::PGLearner::updateCoeffs()
             // Weight new gradient estimate and previous one according to the reward score
             std::vector<float> newGradient (coeffsGradient.size());
             for( unsigned int j=0; j<newGradient.size(); ++j )
-                newGradient.at(j) = (positivesWeight)*coeffsGradient.at(j) +
-                                    (1.0 - positivesWeight)*(coeffs_avgGradient.at(j)/normalization);
+                newGradient.at(j) = coeffs_avgGradient.at(j)/normalization;
 
 #ifdef DIVEHANDLER_TRAINING
-            SPQR_INFO("Weight of the current estimate: " << positivesWeight);
             SPQR_INFO("New policy gradient: [ " << newGradient.at(0)
                       << ", " << newGradient.at(1) << " ]");
 #endif
@@ -431,7 +441,7 @@ bool DiveHandler::PGLearner::updateCoeffs()
  */
 DiveHandler::DiveHandler():
     diveType(none), state(static_cast<DiveHandler::LearningState>(SPQR::GOALIE_LEARNING_STATE)),
-    learner(new PGLearner(this, 2, EPSILON, T, 1.0, true)), opponentScore(0), tBall2Goal(SPQR::FIELD_DIMENSION_Y),
+    learner(new PGLearner(this, 2, EPSILON, T, 1.0, false)), opponentScore(0), tBall2Goal(SPQR::FIELD_DIMENSION_Y),
     tDive(0.0), tBackInPose(0.0), ballProjectionIntercept(SPQR::FIELD_DIMENSION_Y), distanceBall2Goal(SPQR::FIELD_DIMENSION_X)
 {
 #ifdef DIVEHANDLER_TRAINING
diff --git a/machineLearning/DiveHandler/DiveHandler.h b/machineLearning/DiveHandler/DiveHandler.h
index 402a699..351dbbf 100644
--- a/machineLearning/DiveHandler/DiveHandler.h
+++ b/machineLearning/DiveHandler/DiveHandler.h
@@ -52,15 +52,15 @@ END_MODULE
 
 // Termination conditions
 #define MAX_ITER 300
-#define CONVERGENCE_THRESHOLD 0.05
+#define CONVERGENCE_THRESHOLD 0.01
 // PG parameters
 #define GAMMA 0.5
 #define BUFFER_DIM 10
 #define REWARDS_HISTORY_SIZE 15
-#define EPSILON 0.15
+#define EPSILON 0.10
 #define T 15
 // Evaluation weight
-#define LAMBDA1 0.6
+#define LAMBDA1 0.7
 #define LAMBDA2 0.3
 
 
@@ -142,8 +142,11 @@ class DiveHandler : public DiveHandlerBase
 
         // Current estimate for the coefficients gradient
         std::vector<float> coeffsGradient;
-        // Weight of the current gradient estimate
-        float positivesWeight;
+
+        // Current reward score
+        float reward_score;
+        // Current reward normalization factor
+        float reward_norm;
 
         // Memory buffer for the PG algorithm
         PGbuffer coeffsBuffer;
@@ -160,7 +163,7 @@ class DiveHandler : public DiveHandlerBase
 
         // Default constructor
         PGLearner(DiveHandler* _dhPtr, int _nCoeffs, float _epsilon = EPSILON,
-                  int _T = T, float _initValue = 0.0, bool randomize = false);
+                  int _T = T, float _initValue = 1.0, bool randomize = false);
 
         // Generate a set of perturbations for the current policy
         void generatePerturbations();
@@ -222,6 +225,12 @@ class DiveHandler : public DiveHandlerBase
     // Destructor
     ~DiveHandler();
 
+    // Setter for the reward list
+    inline const std::list<float>& getRewardList() const
+    {
+        return rewardHistory;
+    }
+
     // Update the DiveHandle for the goalie behavior
     void update(DiveHandle& diveHandle);
 	

From 5170928866df8fc29f53105b238d8089ccad1a5a Mon Sep 17 00:00:00 2001
From: Claudio Delli Bovi <snakecharmer182@gmail.com>
Date: Tue, 11 Feb 2014 17:02:35 +0100
Subject: [PATCH 06/17] TOTRY: Evaluation function with overall best as
 attractor

---
 machineLearning/DiveHandler/DiveHandler.cpp | 16 ++++++++++++----
 machineLearning/DiveHandler/DiveHandler.h   | 12 ++++++++++--
 2 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/machineLearning/DiveHandler/DiveHandler.cpp b/machineLearning/DiveHandler/DiveHandler.cpp
index 321654c..7a900d4 100644
--- a/machineLearning/DiveHandler/DiveHandler.cpp
+++ b/machineLearning/DiveHandler/DiveHandler.cpp
@@ -89,8 +89,10 @@ DiveHandler::PGLearner::PGLearner( DiveHandler* _dhPtr, int _nCoeffs, float _eps
     // Initialize the gradient estimate
     coeffsGradient(_nCoeffs, 0.0)
 {
+    // Initializing reward scores
     reward_score = 0.0;
     reward_norm = 1.0;
+    coeffsBest = coeffs;
 
     // Initializing coefficients
     if(randomize)
@@ -241,26 +243,28 @@ float DiveHandler::PGLearner::evaluatePerturbation( std::vector<float> R )
     assert(R.size() == coeffs.size());
     // Generate perturbated policy and call the DiveHandler object for evaluation
     float tDiveAndRecover = diveHandler_ptr->computeDiveAndRecoverTime(coeffs.at(0) + R.at(0), coeffs.at(1) + R.at(1));
+
     // Perturbated coefficients
     std::vector<float> new_coeffs(2);
     new_coeffs.at(0) = coeffs.at(0) + R.at(0);
     new_coeffs.at(1) = coeffs.at(1) + R.at(1);
 
+    return (1.0-LAMBDA1)*fabs(diveHandler_ptr->tBall2Goal - tDiveAndRecover) +
+            LAMBDA1*fabs(magnitude(coeffs) - magnitude(coeffsBest));
+
 //    return (1.0-LAMBDA1-LAMBDA2)*fabs(tDiveAndRecover) +
 //           LAMBDA1*fabs(diveHandler_ptr->tBall2Goal - tDiveAndRecover) +
 //           LAMBDA2*fabs(1.0 - ((coeffs.at(0) + R.at(0))+(coeffs.at(1) + R.at(1))));
 
-    return (1.0 - fabs(reward_score/reward_norm))*fabs(diveHandler_ptr->tBall2Goal - tDiveAndRecover) +
-           fabs(reward_score/reward_norm)*fabs(magnitude(coeffs) - magnitude(new_coeffs));
 }
 
 
 /* TOTEST&COMMENT */
 void DiveHandler::PGLearner::updateParams(const std::list<float>& rewards)
 {
+    // Re-initialize reward scores
     reward_score = 0.0;
     if (!rewards.empty()) reward_norm = 0.0;
-
     int discount_exp = 0;
     int positives = 0;
 
@@ -286,6 +290,10 @@ void DiveHandler::PGLearner::updateParams(const std::list<float>& rewards)
     //Adjusting PG parameters according to the obtained score
     setParam("epsilon", exp( -reward_score / REWARDS_HISTORY_SIZE ) * getParam("epsilon"));
 
+    // Update best performance
+    if (rewards.front() == POSITIVE_REWARD)
+        coeffsBest = coeffs;
+
 #ifdef DIVEHANDLER_TRAINING
     SPQR_INFO( "Epsilon value changed to: " << getParam("epsilon") << " according to the obtained rewards. ");
 #endif
@@ -569,7 +577,7 @@ void DiveHandler::estimateDiveTimes()
 /* TOCOMMENT */
 inline float DiveHandler::computeDiveAndRecoverTime(float alpha1, float alpha2)
 {
-    return alpha2*( alpha1*tBall2Goal - tDive ) + tBackInPose;
+    return alpha2*( alpha1*tBall2Goal - tDive );
 }
 
 /* TOTEST&COMMENT */
diff --git a/machineLearning/DiveHandler/DiveHandler.h b/machineLearning/DiveHandler/DiveHandler.h
index 351dbbf..5fc43ec 100644
--- a/machineLearning/DiveHandler/DiveHandler.h
+++ b/machineLearning/DiveHandler/DiveHandler.h
@@ -60,8 +60,8 @@ END_MODULE
 #define EPSILON 0.10
 #define T 15
 // Evaluation weight
-#define LAMBDA1 0.7
-#define LAMBDA2 0.3
+#define LAMBDA1 0.9
+//#define LAMBDA2 0.3
 
 
 // Module class declaration
@@ -142,6 +142,8 @@ class DiveHandler : public DiveHandlerBase
 
         // Current estimate for the coefficients gradient
         std::vector<float> coeffsGradient;
+        // Best individual performance achieved so far
+        std::vector<float> coeffsBest;
 
         // Current reward score
         float reward_score;
@@ -177,6 +179,12 @@ class DiveHandler : public DiveHandlerBase
         // Update coefficients performing a step of the learning algorithm
         virtual bool updateCoeffs();
 
+        // Update the best coefficient setting so far
+        inline void updateCoeffsBest()
+        {
+            coeffsBest = coeffs;
+        }
+
     };
 	
 // 	class GALearner : public CoeffsLearner

From ded204dfa03fe1306f66a77f33cc4f163e18c996 Mon Sep 17 00:00:00 2001
From: Claudio Delli Bovi <snakecharmer182@gmail.com>
Date: Wed, 5 Mar 2014 22:49:33 +0100
Subject: [PATCH 07/17] last update

---
 machineLearning/DiveHandler/DiveHandler.cpp | 134 +++++++++++---------
 machineLearning/DiveHandler/DiveHandler.h   |  58 +++++----
 2 files changed, 106 insertions(+), 86 deletions(-)

diff --git a/machineLearning/DiveHandler/DiveHandler.cpp b/machineLearning/DiveHandler/DiveHandler.cpp
index 7a900d4..a06bed5 100644
--- a/machineLearning/DiveHandler/DiveHandler.cpp
+++ b/machineLearning/DiveHandler/DiveHandler.cpp
@@ -29,6 +29,8 @@
 #define NEGATIVE_REWARD -1.0
 #define POSITIVE_REWARD 1.5
 
+#define REWARD_WORST 999999.9
+
 // Debug messages template
 #define SPQR_ERR(x) std::cerr << "\033[22;31;1m" <<"[DiveHandler] " << x << "\033[0m"<< std::endl;
 #define SPQR_INFO(x) std::cerr << "\033[22;34;1m" <<"[DiveHandler] " << x << "\033[0m" << std::endl;
@@ -44,9 +46,9 @@
 MAKE_MODULE(DiveHandler, SPQR-Modules)
 
 // Shortcut to compute the magnitude of a vector
-float magnitude(std::vector<float> v)
+double magnitude(std::vector<double> v)
 {
-    float m = 0.0;
+    double m = 0.0;
     for (unsigned int i = 0; i < v.size(); ++i)
         m += v.at(i) * v.at(i);
 
@@ -60,12 +62,12 @@ float magnitude(std::vector<float> v)
 /*
  * Simple setters for the learner's parameters and coefficients.
  */
-void DiveHandler::CoeffsLearner::setCoeffs(const std::vector<float>& _coeffs)
+void DiveHandler::CoeffsLearner::setCoeffs(const std::vector<double>& _coeffs)
 {
     coeffs = _coeffs;
 }
 
-void DiveHandler::CoeffsLearner::setParam(const std::string& _key, float _value)
+void DiveHandler::CoeffsLearner::setParam(const std::string& _key, double _value)
 {
     params[_key] = _value;
 }
@@ -83,16 +85,16 @@ void DiveHandler::CoeffsLearner::setParam(const std::string& _key, float _value)
  * - An initial value for the learning coefficients (or an upper bound for the random initialization of those);
  * - A flag indicating whether a fixed or random initialization has to be performed.
  */
-DiveHandler::PGLearner::PGLearner( DiveHandler* _dhPtr, int _nCoeffs, float _epsilon, int _T, float _initValue, bool randomize ):
+DiveHandler::PGLearner::PGLearner( DiveHandler* _dhPtr, int _nCoeffs, double _epsilon, int _T, double _initValue, bool randomize ):
     // Initialize the base class
     CoeffsLearner(_nCoeffs, _initValue, _dhPtr),
     // Initialize the gradient estimate
-    coeffsGradient(_nCoeffs, 0.0)
+    coeffsGradient(_nCoeffs, 0.0), coeffsBest(_nCoeffs, 0.0)
 {
     // Initializing reward scores
     reward_score = 0.0;
     reward_norm = 1.0;
-    coeffsBest = coeffs;
+    rewardBest = REWARD_WORST;
 
     // Initializing coefficients
     if(randomize)
@@ -100,7 +102,7 @@ DiveHandler::PGLearner::PGLearner( DiveHandler* _dhPtr, int _nCoeffs, float _eps
         // Random initialization in [0, INIT_VALUE]
         srand(time(NULL));
         for( int i=0; i<_nCoeffs; ++i)
-            coeffs.at(i) = (static_cast<float>(rand()%101)/100 ) *_initValue;
+            coeffs.at(i) = (static_cast<double>(rand()%101)/100 ) *_initValue;
     }
 
     // Initializing parameters
@@ -124,7 +126,7 @@ bool DiveHandler::PGLearner::converged()
     {
         // Compute variations mean
         // Delta previous to current step
-        float avg_variation = (magnitude(coeffs) - magnitude(coeffsBuffer.front()))/coeffsBuffer.size() ;
+        double avg_variation = (magnitude(coeffs) - magnitude(coeffsBuffer.front()))/coeffsBuffer.size() ;
         // Iterate over the whole buffer and compute deltas from step i-1 to i
         PGbuffer::const_iterator i = coeffsBuffer.begin();
         PGbuffer::const_iterator j = coeffsBuffer.begin(); ++j;
@@ -136,7 +138,7 @@ bool DiveHandler::PGLearner::converged()
 
         // Compute variations standard deviation
         // Delta previous to current step
-        float std_variation = pow(magnitude(coeffs)-magnitude(coeffsBuffer.front()) - avg_variation, 2) / coeffsBuffer.size();
+        double std_variation = pow(magnitude(coeffs)-magnitude(coeffsBuffer.front()) - avg_variation, 2) / coeffsBuffer.size();
         // Iterate over the whole buffer and compute deltas from step i-1 to i
         PGbuffer::const_iterator k = coeffsBuffer.begin();
         PGbuffer::const_iterator t = coeffsBuffer.begin(); ++t;
@@ -175,7 +177,7 @@ void DiveHandler::PGLearner::generatePerturbations()
 
     for(int i=0; i<params["T"]; ++i)
     {
-        std::vector<float> perturbation(coeffs);
+        std::vector<double> perturbation(coeffs);
 
         for(unsigned int j=0; j<coeffs.size(); ++j)
             perturbation.at(j) += (rand()%3 -1)*params["epsilon"];
@@ -189,7 +191,7 @@ void DiveHandler::PGLearner::generatePerturbations()
     }
 #else
     // Initialize a placeholder for perturbations
-    std::vector<float> perturbation (coeffs.size(),0.0);
+    std::vector<double> perturbation (coeffs.size(),0.0);
 
     // Generate all possible combinations recursively
     generatePerturbations(&perturbation, 0);
@@ -208,7 +210,7 @@ void DiveHandler::PGLearner::generatePerturbations()
 }
 
 /* TOTEST&COMMENT */
-void DiveHandler::PGLearner::generatePerturbations(std::vector<float>* partial_perturbation, unsigned int index)
+void DiveHandler::PGLearner::generatePerturbations(std::vector<double>* partial_perturbation, unsigned int index)
 {
     if (index == partial_perturbation->size()-1)
     {
@@ -216,7 +218,7 @@ void DiveHandler::PGLearner::generatePerturbations(std::vector<float>* partial_p
         for (int perturbation_type = -1; perturbation_type <= 1; ++perturbation_type)
         {
             // Compute last index and generate the final perturbation
-            std::vector<float> perturbation (*partial_perturbation);
+            std::vector<double> perturbation (*partial_perturbation);
             perturbation.at(index) = coeffs.at(index) + perturbation_type * params["epsilon"];
 
             // Update the perturbations buffer
@@ -237,20 +239,29 @@ void DiveHandler::PGLearner::generatePerturbations(std::vector<float>* partial_p
 }
 
 /* TOCOMMENT */
-float DiveHandler::PGLearner::evaluatePerturbation( std::vector<float> R )
+double DiveHandler::PGLearner::evaluatePerturbation( std::vector<double> R )
 {
     // Dimensions check
     assert(R.size() == coeffs.size());
+
+    if (R.at(0) == 0.0 || R.at(1) == 0.0)
+        return REWARD_WORST;
+
     // Generate perturbated policy and call the DiveHandler object for evaluation
-    float tDiveAndRecover = diveHandler_ptr->computeDiveAndRecoverTime(coeffs.at(0) + R.at(0), coeffs.at(1) + R.at(1));
+    double tDiveAndRecover = diveHandler_ptr->computeDiveAndRecoverTime(R.at(0), R.at(1));
 
-    // Perturbated coefficients
-    std::vector<float> new_coeffs(2);
-    new_coeffs.at(0) = coeffs.at(0) + R.at(0);
-    new_coeffs.at(1) = coeffs.at(1) + R.at(1);
+    // Attractor
+    std::vector<double> distanceToBest(2);
+    distanceToBest.at(0) = coeffsBest.at(0) - R.at(0);
+    distanceToBest.at(1) = coeffsBest.at(1) - R.at(1);
+
+#ifdef DIVEHANDLER_TRAINING_DEBUG
+    SPQR_INFO("Perturbated policy: [" << R.at(0) << ", " << R.at(1)
+              << "], Score: " << ((1.0-LAMBDA1)*fabs(diveHandler_ptr->tBall2Goal-tDiveAndRecover)+LAMBDA1*magnitude(distanceToBest)));
+#endif
 
     return (1.0-LAMBDA1)*fabs(diveHandler_ptr->tBall2Goal - tDiveAndRecover) +
-            LAMBDA1*fabs(magnitude(coeffs) - magnitude(coeffsBest));
+            LAMBDA1*magnitude(distanceToBest);
 
 //    return (1.0-LAMBDA1-LAMBDA2)*fabs(tDiveAndRecover) +
 //           LAMBDA1*fabs(diveHandler_ptr->tBall2Goal - tDiveAndRecover) +
@@ -260,7 +271,7 @@ float DiveHandler::PGLearner::evaluatePerturbation( std::vector<float> R )
 
 
 /* TOTEST&COMMENT */
-void DiveHandler::PGLearner::updateParams(const std::list<float>& rewards)
+void DiveHandler::PGLearner::updateParams(const std::list<double>& rewards)
 {
     // Re-initialize reward scores
     reward_score = 0.0;
@@ -268,7 +279,7 @@ void DiveHandler::PGLearner::updateParams(const std::list<float>& rewards)
     int discount_exp = 0;
     int positives = 0;
 
-    std::list<float>::const_iterator i = rewards.begin();
+    std::list<double>::const_iterator i = rewards.begin();
     while (i != rewards.end())
     {
         // Counting positives
@@ -281,18 +292,21 @@ void DiveHandler::PGLearner::updateParams(const std::list<float>& rewards)
         ++i; ++discount_exp;        
     }
 
-#ifdef DIVEHANDLER_TRAINING_DEBUG
-    SPQR_INFO("Positive rewards: " << positives << " out of " << rewards.size());
-    SPQR_INFO("Negative rewards: " << (rewards.size() - positives) << " out of " << rewards.size());
-    SPQR_INFO("Reward total score: " << reward_score);
-#endif
-
     //Adjusting PG parameters according to the obtained score
     setParam("epsilon", exp( -reward_score / REWARDS_HISTORY_SIZE ) * getParam("epsilon"));
 
     // Update best performance
-    if (rewards.front() == POSITIVE_REWARD)
+    if (rewardGradient < rewardBest)
+    {
+        rewardBest = rewardGradient;
         coeffsBest = coeffs;
+    }
+#ifdef DIVEHANDLER_TRAINING_DEBUG
+    SPQR_INFO("Positive rewards: " << positives << " out of " << rewards.size());
+    SPQR_INFO("Negative rewards: " << (rewards.size() - positives) << " out of " << rewards.size());
+    SPQR_INFO("Reward total score: " << reward_score);
+    SPQR_INFO("Best evaluation so far: [ " << coeffsBest.at(0) << ", " << coeffsBest.at(1) << " ] with score: " << rewardBest);
+#endif
 
 #ifdef DIVEHANDLER_TRAINING
     SPQR_INFO( "Epsilon value changed to: " << getParam("epsilon") << " according to the obtained rewards. ");
@@ -307,33 +321,31 @@ void DiveHandler::PGLearner::updateParams(const std::list<float>& rewards)
 /* TOTEST&COMMENT */
 bool DiveHandler::PGLearner::updateCoeffs()
 {
-
-#ifdef DIVEHANDLER_TRAINING
-    SPQR_INFO( "PG algorithm, iteration " << iter_count << "... " );
-#endif
-
     if( iter_count == MAX_ITER || converged() )
         return false;
     else
         {
+#ifdef DIVEHANDLER_TRAINING
+            SPQR_INFO( "PG algorithm, iteration " << iter_count << "... " );
+#endif
             // First generate the set of random perturbation for the current coefficients
             generatePerturbations();
 
             // For each perturbation, evaluate with the objective function and store the result in a temporary container
-            std::vector<float> evaluatedPerturbations (perturbationsBuffer.size());
+            std::vector<double> evaluatedPerturbations (perturbationsBuffer.size());
             PGbuffer::const_iterator evaluator;
             for(evaluator = perturbationsBuffer.begin(); evaluator != perturbationsBuffer.end(); ++evaluator)
                 evaluatedPerturbations.push_back( evaluatePerturbation(*evaluator) );
 
             // Compute the average 'gradient' for the current coefficients
-            std::vector<float> coeffs_avgGradient(coeffs.size());
+            std::vector<double> coeffs_avgGradient(coeffs.size());
 
 #ifdef RAND_PERMUTATIONS
             // For each coefficient, compute the average score to determine the correspondent 'gradient' entry
             PGbuffer::const_iterator current_perturbation = perturbationsBuffer.begin();
             for( unsigned int n = 0; n < coeffs.size(); ++n )
             {
-                std::vector<float> score_plus, score_minus, score_zero;
+                std::vector<double> score_plus, score_minus, score_zero;
 
                 // Keep track of the perturbation type and store each score in a container
                 for( unsigned int i = 0; i < evaluatedPerturbations.size(); ++i )
@@ -349,17 +361,17 @@ bool DiveHandler::PGLearner::updateCoeffs()
                 }
 
                 // Sum up all positive perturbation scores
-                float avg_plus = 0.0;
+                double avg_plus = 0.0;
                 for (unsigned int j = 0; j < score_plus.size(); ++j)
                     avg_plus += score_plus.at(j) / score_plus.size();
 
                 // Sum up all negative perturbation scores
-                float avg_minus = 0.0;
+                double avg_minus = 0.0;
                 for (unsigned int j = 0; j < score_minus.size(); ++j)
                     avg_minus += score_minus.at(j) / score_minus.size();
 
                 // Sum up all null perturbation scores
-                float avg_zero = 0.0;
+                double avg_zero = 0.0;
                 for (unsigned int j = 0; j < score_zero.size(); ++j)
                     avg_zero += score_zero.at(j) / score_zero.size();
 
@@ -373,12 +385,12 @@ bool DiveHandler::PGLearner::updateCoeffs()
             for( unsigned int n = 0; n < coeffs.size(); ++n )
             {
                 int avg_selector = 0;
-                float avg_minus = 0.0 , avg_zero = 0.0, avg_plus = 0.0;
+                double avg_minus = 0.0 , avg_zero = 0.0, avg_plus = 0.0;
                 for( unsigned int i = 0; i < evaluatedPerturbations.size(); i = i + pow(3,n) )
                 {
                     for( unsigned int k = i; k < i + pow(3,n); ++k )
                     {
-                        float evaluation = evaluatedPerturbations.at(k) / (evaluatedPerturbations.size()/3);
+                        double evaluation = evaluatedPerturbations.at(k) / (evaluatedPerturbations.size()/3);
 
                         if( (avg_selector)%3 == 0 ) avg_minus += evaluation;
                         if( (avg_selector)%3 == 1 ) avg_zero += evaluation;
@@ -393,8 +405,11 @@ bool DiveHandler::PGLearner::updateCoeffs()
                     coeffs_avgGradient.at(coeffs.size() - (n +1)) = avg_plus - avg_minus;
             }
 #endif
+            // Evaluate the gradient
+            rewardGradient = evaluatePerturbation(coeffs_avgGradient);
+
             // Avoid 'nan' when the gradient is zeroed
-            float normalization = 1.0;
+            double normalization = 1.0;
             if (magnitude(coeffs_avgGradient) != 0)
                 normalization = magnitude(coeffs_avgGradient);
 
@@ -402,9 +417,10 @@ bool DiveHandler::PGLearner::updateCoeffs()
 #ifdef DIVEHANDLER_TRAINING
             SPQR_INFO("Computed policy gradient: [ " << coeffs_avgGradient.at(0)/normalization
                       << ", " << coeffs_avgGradient.at(1)/normalization << " ]");
+            SPQR_INFO("Gradient score (before normalization): " << rewardGradient);
 #endif
             // Weight new gradient estimate and previous one according to the reward score
-            std::vector<float> newGradient (coeffsGradient.size());
+            std::vector<double> newGradient (coeffsGradient.size());
             for( unsigned int j=0; j<newGradient.size(); ++j )
                 newGradient.at(j) = coeffs_avgGradient.at(j)/normalization;
 
@@ -449,12 +465,12 @@ bool DiveHandler::PGLearner::updateCoeffs()
  */
 DiveHandler::DiveHandler():
     diveType(none), state(static_cast<DiveHandler::LearningState>(SPQR::GOALIE_LEARNING_STATE)),
-    learner(new PGLearner(this, 2, EPSILON, T, 1.0, false)), opponentScore(0), tBall2Goal(SPQR::FIELD_DIMENSION_Y),
+    learner(new PGLearner(this, 2, EPSILON, T)), opponentScore(0), tBall2Goal(SPQR::FIELD_DIMENSION_Y),
     tDive(0.0), tBackInPose(0.0), ballProjectionIntercept(SPQR::FIELD_DIMENSION_Y), distanceBall2Goal(SPQR::FIELD_DIMENSION_X)
 {
 #ifdef DIVEHANDLER_TRAINING
     SPQR_INFO("Initializing PGlearner...");
-    std::vector<float> coeffs = learner->getCoeffs();
+    std::vector<double> coeffs = learner->getCoeffs();
     SPQR_INFO("Coefficients: alpha 1 = " << coeffs.at(0) << ", alpha 2 = " << coeffs.at(1));
     SPQR_INFO("Parameters: epsilon = " << learner->getParam("epsilon") << ", T = " << learner->getParam("T"));
 #endif
@@ -477,18 +493,18 @@ DiveHandler::~DiveHandler()
 void DiveHandler::estimateBallProjection()
 {
     // Ball path line
-    float A1 = (theBallModel.estimate.position.y - theBallModel.estimate.velocity.y) - theBallModel.estimate.position.y;
-    float B1 = theBallModel.estimate.position.x - (theBallModel.estimate.position.x - theBallModel.estimate.velocity.x);
-    float C1 = A1*theBallModel.estimate.position.x + B1*theBallModel.estimate.position.y;
+    double A1 = (theBallModel.estimate.position.y - theBallModel.estimate.velocity.y) - theBallModel.estimate.position.y;
+    double B1 = theBallModel.estimate.position.x - (theBallModel.estimate.position.x - theBallModel.estimate.velocity.x);
+    double C1 = A1*theBallModel.estimate.position.x + B1*theBallModel.estimate.position.y;
 
     // Goal line
-    float A2 = SPQR::GOALIE_FAR_LIMIT_Y - -SPQR::GOALIE_FAR_LIMIT_Y;
+    double A2 = SPQR::GOALIE_FAR_LIMIT_Y - -SPQR::GOALIE_FAR_LIMIT_Y;
 
     // Cross product/determinant
-    float det = - A2*B1;
+    double det = - A2*B1;
 
     // Y-intercept initialized with the maximum value possible
-    float yIntercept = SPQR::FIELD_DIMENSION_Y;
+    double yIntercept = SPQR::FIELD_DIMENSION_Y;
 
     // Non-singular case
     if( fabs(det) > SPQR::GOALIE_EPSILON_COLLINEAR )
@@ -530,8 +546,8 @@ void DiveHandler::estimateBallProjection()
     ballProjectionIntercept = yIntercept;
 
     // Computing the distance vector from the ball to the goal
-    float delta_x = -SPQR::FIELD_DIMENSION_X - theGlobalBallEstimation.singleRobotX;
-    float delta_y = ballProjectionIntercept - theGlobalBallEstimation.singleRobotY;
+    double delta_x = -SPQR::FIELD_DIMENSION_X - theGlobalBallEstimation.singleRobotX;
+    double delta_y = ballProjectionIntercept - theGlobalBallEstimation.singleRobotY;
     // Estimated distance from the ball
     distanceBall2Goal = sqrt( delta_x*delta_x + delta_y*delta_y);
 }
@@ -553,8 +569,8 @@ void DiveHandler::estimateDiveTimes()
         tBall2Goal = -1.0;
 
     // Using the appropriate estimates for recover and reposition times
-    float tRecover = 0.0;
-    float tReposition = 0.0;
+    double tRecover = 0.0;
+    double tReposition = 0.0;
     if( diveType == rcloseDive || diveType == lcloseDive )
         // Close dive: no need to back up to the original position
         tRecover = SPQR::GOALIE_CLOSE_DIVE_RECOVER_TIME;
@@ -575,7 +591,7 @@ void DiveHandler::estimateDiveTimes()
 }
 
 /* TOCOMMENT */
-inline float DiveHandler::computeDiveAndRecoverTime(float alpha1, float alpha2)
+inline double DiveHandler::computeDiveAndRecoverTime(double alpha1, double alpha2)
 {
     return alpha2*( alpha1*tBall2Goal - tDive );
 }
@@ -686,7 +702,7 @@ void DiveHandler::update(DiveHandle& diveHandle)
                 learner->updateParams(rewardHistory);
 
             // Compute the dive time using the current coefficients as T = alpha2 * (alpha1*T_PAPO - T_dive)
-            float diveTime = (learner->getCoeffs()).at(1) * ( (learner->getCoeffs()).at(0) * tBall2Goal - tDive );
+            double diveTime = (learner->getCoeffs()).at(1) * ( (learner->getCoeffs()).at(0) * tBall2Goal - tDive );
 
 #ifdef DIVEHANDLER_DEBUG
             SPQR_INFO( "Estimated overall time to dive and recover position: " <<
diff --git a/machineLearning/DiveHandler/DiveHandler.h b/machineLearning/DiveHandler/DiveHandler.h
index 5fc43ec..fa104b7 100644
--- a/machineLearning/DiveHandler/DiveHandler.h
+++ b/machineLearning/DiveHandler/DiveHandler.h
@@ -60,7 +60,7 @@ END_MODULE
 #define EPSILON 0.10
 #define T 15
 // Evaluation weight
-#define LAMBDA1 0.9
+#define LAMBDA1 0.7
 //#define LAMBDA2 0.3
 
 
@@ -102,9 +102,9 @@ class DiveHandler : public DiveHandlerBase
     {
         protected:
         // Set of coefficients representing the learning objective
-        std::vector<float> coeffs;
+        std::vector<double> coeffs;
         // Set of fixed parameters defining the cost funcion
-        std::map<std::string, float> params;
+        std::map<std::string, double> params;
 
         // Iteration counter
         int iter_count;
@@ -114,41 +114,45 @@ class DiveHandler : public DiveHandlerBase
 		
         public:
         // Default constructor
-        CoeffsLearner(int _nCoeffs, float _initValue, DiveHandler* _dhPtr):
+        CoeffsLearner(int _nCoeffs, double _initValue, DiveHandler* _dhPtr):
             coeffs(_nCoeffs, _initValue), iter_count(0), diveHandler_ptr(_dhPtr) { }
 
         // Setter/getter for the coefficients
-        void setCoeffs(const std::vector<float>& _coeffs);
-        inline std::vector<float> getCoeffs(){ return coeffs; }
+        void setCoeffs(const std::vector<double>& _coeffs);
+        inline std::vector<double> getCoeffs(){ return coeffs; }
 
         // Setter/getter for the parameters
-        void setParam(const std::string& _key, float _value);
-        inline float getParam(std::string _key){ return params[_key]; }
+        void setParam(const std::string& _key, double _value);
+        inline double getParam(std::string _key){ return params[_key]; }
 
         // Update coefficients performing a step of the learning algorithm
         virtual bool updateCoeffs() = 0;
 
         // Use the obtained rewards to adjust the algorithm parameters
-        virtual void updateParams(const std::list<float>& rewards) = 0;
+        virtual void updateParams(const std::list<double>& rewards) = 0;
 
     };
 
     // Inner class modeling a PolicyGradient-based learning agent
     class PGLearner : public CoeffsLearner
     {
-        typedef std::list< std::vector<float> > PGbuffer;
+        typedef std::list< std::vector<double> > PGbuffer;
 
         private:
 
         // Current estimate for the coefficients gradient
-        std::vector<float> coeffsGradient;
+        std::vector<double> coeffsGradient;
         // Best individual performance achieved so far
-        std::vector<float> coeffsBest;
+        std::vector<double> coeffsBest;
 
         // Current reward score
-        float reward_score;
+        double reward_score;
         // Current reward normalization factor
-        float reward_norm;
+        double reward_norm;
+        // Score of the current gradient estimate
+        double rewardGradient;
+        // Best gradient score so far
+        double rewardBest;
 
         // Memory buffer for the PG algorithm
         PGbuffer coeffsBuffer;
@@ -159,22 +163,22 @@ class DiveHandler : public DiveHandlerBase
         bool converged();
 
         // Recursive perturbation generator
-        void generatePerturbations(std::vector<float>* partial_perturbation, unsigned int index);
+        void generatePerturbations(std::vector<double>* partial_perturbation, unsigned int index);
 
         public:
 
         // Default constructor
-        PGLearner(DiveHandler* _dhPtr, int _nCoeffs, float _epsilon = EPSILON,
-                  int _T = T, float _initValue = 1.0, bool randomize = false);
+        PGLearner(DiveHandler* _dhPtr, int _nCoeffs, double _epsilon = EPSILON,
+                  int _T = T, double _initValue = 1.0, bool randomize = false);
 
         // Generate a set of perturbations for the current policy
         void generatePerturbations();
 
         // Evaluate a single policy perturbation with the cost function
-        float evaluatePerturbation( std::vector<float> R );
+        double evaluatePerturbation( std::vector<double> R );
 
         // Update the PG parameters according to the obtained rewards
-        void updateParams(const std::list<float>& rewards);
+        void updateParams(const std::list<double>& rewards);
 
         // Update coefficients performing a step of the learning algorithm
         virtual bool updateCoeffs();
@@ -200,7 +204,7 @@ class DiveHandler : public DiveHandlerBase
     // Learning agent
     CoeffsLearner* learner;
     // Obtained rewards
-    std::list<float> rewardHistory;
+    std::list<double> rewardHistory;
 
     // Current scores
     int opponentScore;
@@ -208,23 +212,23 @@ class DiveHandler : public DiveHandlerBase
 
     // Estimated time the ball needs to reach the goal
     // a.k.a. Tpapo (historical reasons)
-    float tBall2Goal;
+    double tBall2Goal;
     // Estimated time needed for the current dive action to be performed
-    float tDive;
+    double tDive;
     // Estimated time the goalie needs to back up to its original position
-    float tBackInPose;
+    double tBackInPose;
 
     // Estimated intersection between the ball projection and the goal line
-    float ballProjectionIntercept;
+    double ballProjectionIntercept;
     // Estimated distance of the ball from the own goal
-    float distanceBall2Goal;
+    double distanceBall2Goal;
 
     // Computes parameters using the ball estimated position and velocity
     void estimateDiveTimes();
     void estimateBallProjection();
 
     // Compute the overall time the goalie needs to dive and then recover its position
-    inline float computeDiveAndRecoverTime(float alpha1, float alpha2);
+    inline double computeDiveAndRecoverTime(double alpha1, double alpha2);
 	
 public:
 
@@ -234,7 +238,7 @@ class DiveHandler : public DiveHandlerBase
     ~DiveHandler();
 
     // Setter for the reward list
-    inline const std::list<float>& getRewardList() const
+    inline const std::list<double>& getRewardList() const
     {
         return rewardHistory;
     }

From eede2eeccbeb7424c5a089fc50e4bc536c86c7c6 Mon Sep 17 00:00:00 2001
From: Francesco Riccio <riccio.fran@gmail.com>
Date: Tue, 11 Mar 2014 20:35:16 +0100
Subject: [PATCH 08/17] SUPER

---
 machineLearning/ConfigurationParameters.h   | 112 -------
 machineLearning/DiveHandle.h                |  21 +-
 machineLearning/DiveHandler/DiveHandler.cpp | 327 ++++++++++++--------
 machineLearning/DiveHandler/DiveHandler.h   | 134 ++++----
 4 files changed, 284 insertions(+), 310 deletions(-)
 delete mode 100644 machineLearning/ConfigurationParameters.h

diff --git a/machineLearning/ConfigurationParameters.h b/machineLearning/ConfigurationParameters.h
deleted file mode 100644
index 9c8bb8a..0000000
--- a/machineLearning/ConfigurationParameters.h
+++ /dev/null
@@ -1,112 +0,0 @@
-#pragma once
-
-#include <string>
-
-namespace SPQR
-{
-	/************ GAME CONTROLLER ************/
-	static const std::string IP_GOALIE 										= "10.0.19.14";
-	static const int CHEST_BUTTON_MANUAL_GAME_CONTROLLER_PORT				= 18003;
-        static const int FIELD_DIMENSION_X										= 3000;
-        static const int FIELD_DIMENSION_Y										= 2000;
-	
-	static const unsigned int POLICY										= 0;		///{STABLE ="0", S_POSITIONIG_X ="1", S_POSITIONIG_XY ="2", WALL ="3", TANK ="4", STATIC_POSITIONG="5"};
-	static const unsigned int STRATEGY										= 0;		///{DRIBBLING ="0", PASSING ="1"};
-	
-	static const float TURN_VALID_THS										= 10;		/// degree
-	static const float TURN_EXCESS											= 10;
-	
-	static const int COORDINATION_PORT_NUMBER								= 11937;
-	static const int MAXIMUM_DISTANCE_BALL_VIEWED							= 6000;
-	static const int MAXIMUM_DISTANCE_ON_THE_FIELD							= 11000;
-    static const unsigned int TABLE_ROWS									= 5;		/// TABLE_ROWS also equals to the number of roles.
-    static const unsigned int ACTIVE_ROLES                                                                      = 5;            /// Active roles (including the goalie) => max 5 (goalie, defender, supporter, jolly, striker)
-    static const unsigned int TABLE_COLUMNS									= ACTIVE_ROLES+4;
-	static const unsigned int DEAD_ROBOT_TIME_THRESHOLD						= 5000;
-	static const unsigned int HYSTERESIS_PERIOD_IN_CYCLES					= 100;
-	static const unsigned int COORDINATION_INFORMATION_NETWORK_FREQUENCY	= 10;		/// FREQUENCY!
-	static const unsigned int FALL_DOWN_PENALTY								= 200;
-	static const unsigned int TIME_TO_GET_UP								= 10000;
-	static const unsigned int MOVING_BALL_MIN_VELOCITY						= 10;		/// [mm/s]
-	static const unsigned int SUPPORTER_MIN_TIME_WHEN_LAST_SEEN				= 500;		/// [ms]
-	static const unsigned int DEFENDER_MIN_TIME_WHEN_LAST_SEEN				= 500;		/// [ms]
-	static const unsigned int JOLLY_MIN_TIME_WHEN_LAST_SEEN					= 500;		/// [ms]
-	static const int MINIMUM_PASSING_DISTANCE								= 1000;		/// [mm]
-	static const int HYSTERESIS_BOUND_DISTANCE								= 300;		/// [mm]
-	
-    static const float DEFENDER_KICKOFF_DEFAULT_POSITION_X					= -0.55 * FIELD_DIMENSION_X;
-    static const float DEFENDER_KICKOFF_DEFAULT_POSITION_Y					= 0.13 * FIELD_DIMENSION_Y;
-	static const float DEFENDER_NO_KICKOFF_DEFAULT_POSITION_X				= -0.55 * FIELD_DIMENSION_X;
-    static const float DEFENDER_NO_KICKOFF_DEFAULT_POSITION_Y				= 0.13 * FIELD_DIMENSION_Y;
-	
-	static const float SUPPORTER_KICKOFF_DEFAULT_POSITION_X					= -0.27 * FIELD_DIMENSION_X;
-    static const float SUPPORTER_KICKOFF_DEFAULT_POSITION_Y					= 0.33 * FIELD_DIMENSION_Y;    
-	static const float SUPPORTER_NO_KICKOFF_DEFAULT_POSITION_X				= -0.27 * FIELD_DIMENSION_X;
-    static const float SUPPORTER_NO_KICKOFF_DEFAULT_POSITION_Y				= 0.33 * FIELD_DIMENSION_Y;
-	
-	static const float JOLLY_KICKOFF_DEFAULT_POSITION_X						= -0.27 * FIELD_DIMENSION_X;
-    static const float JOLLY_KICKOFF_DEFAULT_POSITION_Y						= -0.33 * FIELD_DIMENSION_Y;
-	static const float JOLLY_NO_KICKOFF_DEFAULT_POSITION_X					= -0.27 * FIELD_DIMENSION_X;
-    static const float JOLLY_NO_KICKOFF_DEFAULT_POSITION_Y					= -0.33 * FIELD_DIMENSION_Y;
-	
-	static const float STRIKER_KICKOFF_POSITION_X							= -220.0;
-	static const float STRIKER_KICKOFF_POSITION_Y							= 0.0;
-	static const float STRIKER_NO_KICKOFF_POSITION_X						= -1200.0;
-	static const float STRIKER_NO_KICKOFF_POSITION_Y						= 0.0;
-	
-	static const float SPEED_X												= 0.6;
-	static const float SPEED_Y												= 0.6;
-	static const float HEAD_ROTATION										= 8.0;
-	static const float TIME_BEFORE_STARTING_TO_COORD_SEARCH					= 7000.0;
-	
-	/************ WALL ************/
-	static const float DEFENDER_KICKOFF_WALL_POSITION_X						= -0.75 * FIELD_DIMENSION_X;
-    static const float DEFENDER_KICKOFF_WALL_POSITION_Y						= 0.16 * FIELD_DIMENSION_Y;
-    static const float SUPPORTER_KICKOFF_WALL_POSITION_X					= -0.75 * FIELD_DIMENSION_X;
-    static const float SUPPORTER_KICKOFF_WALL_POSITION_Y					= 0.45 * FIELD_DIMENSION_Y;
-	static const float JOLLY_KICKOFF_WALL_POSITION_X						= -0.75 * FIELD_DIMENSION_X;
-    static const float JOLLY_KICKOFF_WALL_POSITION_Y						= -0.30 * FIELD_DIMENSION_Y;
-	
-	/************ NO BALL ************/
-	static const float DEFENDER_KICKOFF_NO_BALL_POSITION_X					= -0.75 * FIELD_DIMENSION_X;
-    static const float DEFENDER_KICKOFF_NO_BALL_POSITION_Y					= 0.75 * FIELD_DIMENSION_Y;
-    static const float SUPPORTER_KICKOFF_NO_BALL_POSITION_X					= -0.75 * FIELD_DIMENSION_X;
-    static const float SUPPORTER_KICKOFF_NO_BALL_POSITION_Y					= -0.75 * FIELD_DIMENSION_Y;
-	static const float JOLLY_KICKOFF_NO_BALL_POSITION_X						= 0.75 * FIELD_DIMENSION_X;
-    static const float JOLLY_KICKOFF_NO_BALL_POSITION_Y						= -0.50 * FIELD_DIMENSION_Y;
-    
-    /************ GOALIE ************/
-    static const float			GOALIE_BASE_POSITION_X						= -FIELD_DIMENSION_X + 250;	/// [mm]  //TODO take this from theFieldDimensions
-    static const float			GOALIE_BASE_POSITION_Y						= 0;		/// [mm]
-    static const float			GOALIE_BASE_POSITION_BEARING				= 0;		/// [mm]
-
-    static const int                    GOALIE_LEARNING_STATE                                   = 3; /// 1 = learning disabled, 3 = learning enabled
-
-    static const float          GOALIE_DIVE_TIME                            = 3000;
-    static const float          GOALIE_DIVE_RECOVER_TIME                    = 3000;
-    static const float          GOALIE_DIVE_REPOSITION_TIME                 = 3000;
-
-static const float          GOALIE_CLOSE_DIVE_TIME                      = 1500;
-static const float          GOALIE_CLOSE_DIVE_RECOVER_TIME              = 1500;
-
-static const float          GOALIE_STOP_BALL_TIME                       = 2000;
-static const float          GOALIE_STOP_BALL_RECOVER_TIME               = 2000;
-
-static const float			GOALIE_POSE_X_TOLLERANCE					= 150;		/// [mm]
-static const float			GOALIE_POSE_Y_TOLLERANCE					= 150;		/// [mm]
-static const float			GOALIE_POSE_ANGLE_TOLLERANCE				= 10;		/// [deg]
-static const float			GOALIE_POSE_X_TOLLERANCE_AFTER_DIVE			= 150;		/// [mm]
-static const float			GOALIE_POSE_Y_TOLLERANCE_AFTER_DIVE			= 150;		/// [mm]
-
-static const float          GOALIE_DIVE_TIME_TOLERANCE                  = 100;      /// [ms]
-
-static const float			GOALIE_MOVING_BALL_MIN_VELOCITY				= 10;		/// [mm/s]
-static const float			GOALIE_EPSILON_COLLINEAR					= 0.001;	/// [??]
-static const float			GOALIE_FAR_LIMIT_Y							= 800;		/// a little more than goal post   //TODO take this from FieldDimensions
-static const float			GOALIE_CLOSE_LIMIT_Y						= 200;		/// dont-dive distance  //TODO take this from FieldDimensions
-static const unsigned int	GOALIE_MIN_TIME_WHEN_LAST_SEEN				= 500;		/// [ms]
-static const float			GOALIE_MIN_BALL_DIST_FROM_POST				= 500;
-
-static const float			GOALIE_MAX_DIST_BALL_IN_RANGE_ABS			= 500;		/// [mm]
-}
-
diff --git a/machineLearning/DiveHandle.h b/machineLearning/DiveHandle.h
index 1d75bf9..876473e 100644
--- a/machineLearning/DiveHandle.h
+++ b/machineLearning/DiveHandle.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include "Tools/Math/Vector2.h"
+#include "Tools/Enum.h"
 
 class DiveHandle : public Streamable
 {
@@ -20,18 +21,14 @@ class DiveHandle : public Streamable
     }
 
 public:
-	enum Dive
-	{ 
-		none = 1, 
-		lDive, 
-		rDive, 
-		lcloseDive, 
-        rcloseDive,
-        stopBall
-	};
-	
-	typedef int Dive;
-	
+    ENUM(Dive,
+         none = 1,
+         lDive,
+         rDive,
+         lcloseDive,
+         rcloseDive,
+         stopBall);
+
     float diveTime;
     float ballProjectionEstimate;
     Dive diveType;
diff --git a/machineLearning/DiveHandler/DiveHandler.cpp b/machineLearning/DiveHandler/DiveHandler.cpp
index a06bed5..61e67ac 100644
--- a/machineLearning/DiveHandler/DiveHandler.cpp
+++ b/machineLearning/DiveHandler/DiveHandler.cpp
@@ -16,21 +16,20 @@
 
 #include <stdlib.h>
 #include <cmath>
-#include <time.h>
+#include <assert.h>
 
+#include "Tools/Enum.h"
 #include "DiveHandler.h"
 
 // Uncomment to have debug information
 //#define DIVEHANDLER_DEBUG
-#define DIVEHANDLER_TRAINING_DEBUG
-#define DIVEHANDLER_TRAINING
+//#define DIVEHANDLER_TRAINING_DEBUG
+//#define DIVEHANDLER_TRAINING
 //#define RAND_PERMUTATIONS
 
 #define NEGATIVE_REWARD -1.0
 #define POSITIVE_REWARD 1.5
 
-#define REWARD_WORST 999999.9
-
 // Debug messages template
 #define SPQR_ERR(x) std::cerr << "\033[22;31;1m" <<"[DiveHandler] " << x << "\033[0m"<< std::endl;
 #define SPQR_INFO(x) std::cerr << "\033[22;34;1m" <<"[DiveHandler] " << x << "\033[0m" << std::endl;
@@ -42,13 +41,16 @@
     else if(x == 2) std::cerr << "\033[22;34;1m"<<"Learner state: paused (waiting for reward). "<<"\033[0m" << std::endl; \
     else if(x == 3) std::cerr << "\033[22;34;1m"<<"Learner state: enabled. "<<"\033[0m" << std::endl; \
 
+bool stamp =false;
+bool tooEarly=false;
+bool estimatedTime=false;
 
 MAKE_MODULE(DiveHandler, SPQR-Modules)
 
 // Shortcut to compute the magnitude of a vector
-double magnitude(std::vector<double> v)
+float magnitude(std::vector<float> v)
 {
-    double m = 0.0;
+    float m = 0.0;
     for (unsigned int i = 0; i < v.size(); ++i)
         m += v.at(i) * v.at(i);
 
@@ -62,12 +64,12 @@ double magnitude(std::vector<double> v)
 /*
  * Simple setters for the learner's parameters and coefficients.
  */
-void DiveHandler::CoeffsLearner::setCoeffs(const std::vector<double>& _coeffs)
+void DiveHandler::CoeffsLearner::setCoeffs(const std::vector<float>& _coeffs)
 {
     coeffs = _coeffs;
 }
 
-void DiveHandler::CoeffsLearner::setParam(const std::string& _key, double _value)
+void DiveHandler::CoeffsLearner::setParam(const std::string& _key, float _value)
 {
     params[_key] = _value;
 }
@@ -85,16 +87,16 @@ void DiveHandler::CoeffsLearner::setParam(const std::string& _key, double _value
  * - An initial value for the learning coefficients (or an upper bound for the random initialization of those);
  * - A flag indicating whether a fixed or random initialization has to be performed.
  */
-DiveHandler::PGLearner::PGLearner( DiveHandler* _dhPtr, int _nCoeffs, double _epsilon, int _T, double _initValue, bool randomize ):
+DiveHandler::PGLearner::PGLearner( DiveHandler* _dhPtr, int _nCoeffs, float _epsilon, int _T, float _initValue, bool randomize ):
     // Initialize the base class
     CoeffsLearner(_nCoeffs, _initValue, _dhPtr),
     // Initialize the gradient estimate
-    coeffsGradient(_nCoeffs, 0.0), coeffsBest(_nCoeffs, 0.0)
+    coeffsGradient(_nCoeffs, 0.0)
 {
     // Initializing reward scores
     reward_score = 0.0;
     reward_norm = 1.0;
-    rewardBest = REWARD_WORST;
+    coeffsBest = coeffs;
 
     // Initializing coefficients
     if(randomize)
@@ -102,7 +104,7 @@ DiveHandler::PGLearner::PGLearner( DiveHandler* _dhPtr, int _nCoeffs, double _ep
         // Random initialization in [0, INIT_VALUE]
         srand(time(NULL));
         for( int i=0; i<_nCoeffs; ++i)
-            coeffs.at(i) = (static_cast<double>(rand()%101)/100 ) *_initValue;
+            coeffs.at(i) = (static_cast<float>(rand()%101)/100 ) *_initValue;
     }
 
     // Initializing parameters
@@ -126,7 +128,7 @@ bool DiveHandler::PGLearner::converged()
     {
         // Compute variations mean
         // Delta previous to current step
-        double avg_variation = (magnitude(coeffs) - magnitude(coeffsBuffer.front()))/coeffsBuffer.size() ;
+        float avg_variation = (magnitude(coeffs) - magnitude(coeffsBuffer.front()))/coeffsBuffer.size() ;
         // Iterate over the whole buffer and compute deltas from step i-1 to i
         PGbuffer::const_iterator i = coeffsBuffer.begin();
         PGbuffer::const_iterator j = coeffsBuffer.begin(); ++j;
@@ -138,7 +140,7 @@ bool DiveHandler::PGLearner::converged()
 
         // Compute variations standard deviation
         // Delta previous to current step
-        double std_variation = pow(magnitude(coeffs)-magnitude(coeffsBuffer.front()) - avg_variation, 2) / coeffsBuffer.size();
+        float std_variation = pow(magnitude(coeffs)-magnitude(coeffsBuffer.front()) - avg_variation, 2) / coeffsBuffer.size();
         // Iterate over the whole buffer and compute deltas from step i-1 to i
         PGbuffer::const_iterator k = coeffsBuffer.begin();
         PGbuffer::const_iterator t = coeffsBuffer.begin(); ++t;
@@ -177,7 +179,7 @@ void DiveHandler::PGLearner::generatePerturbations()
 
     for(int i=0; i<params["T"]; ++i)
     {
-        std::vector<double> perturbation(coeffs);
+        std::vector<float> perturbation(coeffs);
 
         for(unsigned int j=0; j<coeffs.size(); ++j)
             perturbation.at(j) += (rand()%3 -1)*params["epsilon"];
@@ -191,7 +193,7 @@ void DiveHandler::PGLearner::generatePerturbations()
     }
 #else
     // Initialize a placeholder for perturbations
-    std::vector<double> perturbation (coeffs.size(),0.0);
+    std::vector<float> perturbation (coeffs.size(),0.0);
 
     // Generate all possible combinations recursively
     generatePerturbations(&perturbation, 0);
@@ -210,7 +212,7 @@ void DiveHandler::PGLearner::generatePerturbations()
 }
 
 /* TOTEST&COMMENT */
-void DiveHandler::PGLearner::generatePerturbations(std::vector<double>* partial_perturbation, unsigned int index)
+void DiveHandler::PGLearner::generatePerturbations(std::vector<float>* partial_perturbation, unsigned int index)
 {
     if (index == partial_perturbation->size()-1)
     {
@@ -218,7 +220,7 @@ void DiveHandler::PGLearner::generatePerturbations(std::vector<double>* partial_
         for (int perturbation_type = -1; perturbation_type <= 1; ++perturbation_type)
         {
             // Compute last index and generate the final perturbation
-            std::vector<double> perturbation (*partial_perturbation);
+            std::vector<float> perturbation (*partial_perturbation);
             perturbation.at(index) = coeffs.at(index) + perturbation_type * params["epsilon"];
 
             // Update the perturbations buffer
@@ -239,29 +241,22 @@ void DiveHandler::PGLearner::generatePerturbations(std::vector<double>* partial_
 }
 
 /* TOCOMMENT */
-double DiveHandler::PGLearner::evaluatePerturbation( std::vector<double> R )
+float DiveHandler::PGLearner::evaluatePerturbation( std::vector<float> R )
 {
     // Dimensions check
     assert(R.size() == coeffs.size());
-
-    if (R.at(0) == 0.0 || R.at(1) == 0.0)
-        return REWARD_WORST;
-
     // Generate perturbated policy and call the DiveHandler object for evaluation
-    double tDiveAndRecover = diveHandler_ptr->computeDiveAndRecoverTime(R.at(0), R.at(1));
+//    float tDiveAndRecover = diveHandler_ptr->computeDiveAndRecoverTime(coeffs.at(0) + R.at(0), coeffs.at(1) + R.at(1));
 
-    // Attractor
-    std::vector<double> distanceToBest(2);
-    distanceToBest.at(0) = coeffsBest.at(0) - R.at(0);
-    distanceToBest.at(1) = coeffsBest.at(1) - R.at(1);
-
-#ifdef DIVEHANDLER_TRAINING_DEBUG
-    SPQR_INFO("Perturbated policy: [" << R.at(0) << ", " << R.at(1)
-              << "], Score: " << ((1.0-LAMBDA1)*fabs(diveHandler_ptr->tBall2Goal-tDiveAndRecover)+LAMBDA1*magnitude(distanceToBest)));
-#endif
+    // Perturbated coefficients
+    std::vector<float> new_coeffs(2);
+    new_coeffs.at(0) = coeffs.at(0) + R.at(0);
+    new_coeffs.at(1) = coeffs.at(1) + R.at(1);
 
-    return (1.0-LAMBDA1)*fabs(diveHandler_ptr->tBall2Goal - tDiveAndRecover) +
-            LAMBDA1*magnitude(distanceToBest);
+	return (diveHandler_ptr->estimatedInterval - ( R.at(0)*diveHandler_ptr->tBall2Goal))*
+			(diveHandler_ptr->estimatedInterval - ( R.at(0)*diveHandler_ptr->tBall2Goal)) ;
+//    return (1.0-LAMBDA1)*fabs(diveHandler_ptr->tBall2Goal - tDiveAndRecover) +
+//            LAMBDA1*fabs(magnitude(coeffs) - magnitude(coeffsBest));
 
 //    return (1.0-LAMBDA1-LAMBDA2)*fabs(tDiveAndRecover) +
 //           LAMBDA1*fabs(diveHandler_ptr->tBall2Goal - tDiveAndRecover) +
@@ -271,7 +266,7 @@ double DiveHandler::PGLearner::evaluatePerturbation( std::vector<double> R )
 
 
 /* TOTEST&COMMENT */
-void DiveHandler::PGLearner::updateParams(const std::list<double>& rewards)
+void DiveHandler::PGLearner::updateParams(const std::list<float>& rewards)
 {
     // Re-initialize reward scores
     reward_score = 0.0;
@@ -279,7 +274,7 @@ void DiveHandler::PGLearner::updateParams(const std::list<double>& rewards)
     int discount_exp = 0;
     int positives = 0;
 
-    std::list<double>::const_iterator i = rewards.begin();
+    std::list<float>::const_iterator i = rewards.begin();
     while (i != rewards.end())
     {
         // Counting positives
@@ -292,22 +287,19 @@ void DiveHandler::PGLearner::updateParams(const std::list<double>& rewards)
         ++i; ++discount_exp;        
     }
 
-    //Adjusting PG parameters according to the obtained score
-    setParam("epsilon", exp( -reward_score / REWARDS_HISTORY_SIZE ) * getParam("epsilon"));
-
-    // Update best performance
-    if (rewardGradient < rewardBest)
-    {
-        rewardBest = rewardGradient;
-        coeffsBest = coeffs;
-    }
 #ifdef DIVEHANDLER_TRAINING_DEBUG
     SPQR_INFO("Positive rewards: " << positives << " out of " << rewards.size());
     SPQR_INFO("Negative rewards: " << (rewards.size() - positives) << " out of " << rewards.size());
     SPQR_INFO("Reward total score: " << reward_score);
-    SPQR_INFO("Best evaluation so far: [ " << coeffsBest.at(0) << ", " << coeffsBest.at(1) << " ] with score: " << rewardBest);
 #endif
 
+    //Adjusting PG parameters according to the obtained score
+    setParam("epsilon", exp( -reward_score / REWARDS_HISTORY_SIZE ) * getParam("epsilon"));
+
+    // Update best performance
+    if (rewards.front() == POSITIVE_REWARD)
+        coeffsBest = coeffs;
+
 #ifdef DIVEHANDLER_TRAINING
     SPQR_INFO( "Epsilon value changed to: " << getParam("epsilon") << " according to the obtained rewards. ");
 #endif
@@ -321,31 +313,33 @@ void DiveHandler::PGLearner::updateParams(const std::list<double>& rewards)
 /* TOTEST&COMMENT */
 bool DiveHandler::PGLearner::updateCoeffs()
 {
+
+#ifdef DIVEHANDLER_TRAINING
+    SPQR_INFO( "PG algorithm, iteration " << iter_count << "... " );
+#endif
+
     if( iter_count == MAX_ITER || converged() )
         return false;
     else
         {
-#ifdef DIVEHANDLER_TRAINING
-            SPQR_INFO( "PG algorithm, iteration " << iter_count << "... " );
-#endif
             // First generate the set of random perturbation for the current coefficients
             generatePerturbations();
 
             // For each perturbation, evaluate with the objective function and store the result in a temporary container
-            std::vector<double> evaluatedPerturbations (perturbationsBuffer.size());
+            std::vector<float> evaluatedPerturbations (perturbationsBuffer.size());
             PGbuffer::const_iterator evaluator;
             for(evaluator = perturbationsBuffer.begin(); evaluator != perturbationsBuffer.end(); ++evaluator)
                 evaluatedPerturbations.push_back( evaluatePerturbation(*evaluator) );
 
             // Compute the average 'gradient' for the current coefficients
-            std::vector<double> coeffs_avgGradient(coeffs.size());
+            std::vector<float> coeffs_avgGradient(coeffs.size());
 
 #ifdef RAND_PERMUTATIONS
             // For each coefficient, compute the average score to determine the correspondent 'gradient' entry
             PGbuffer::const_iterator current_perturbation = perturbationsBuffer.begin();
             for( unsigned int n = 0; n < coeffs.size(); ++n )
             {
-                std::vector<double> score_plus, score_minus, score_zero;
+                std::vector<float> score_plus, score_minus, score_zero;
 
                 // Keep track of the perturbation type and store each score in a container
                 for( unsigned int i = 0; i < evaluatedPerturbations.size(); ++i )
@@ -361,17 +355,17 @@ bool DiveHandler::PGLearner::updateCoeffs()
                 }
 
                 // Sum up all positive perturbation scores
-                double avg_plus = 0.0;
+                float avg_plus = 0.0;
                 for (unsigned int j = 0; j < score_plus.size(); ++j)
                     avg_plus += score_plus.at(j) / score_plus.size();
 
                 // Sum up all negative perturbation scores
-                double avg_minus = 0.0;
+                float avg_minus = 0.0;
                 for (unsigned int j = 0; j < score_minus.size(); ++j)
                     avg_minus += score_minus.at(j) / score_minus.size();
 
                 // Sum up all null perturbation scores
-                double avg_zero = 0.0;
+                float avg_zero = 0.0;
                 for (unsigned int j = 0; j < score_zero.size(); ++j)
                     avg_zero += score_zero.at(j) / score_zero.size();
 
@@ -385,12 +379,12 @@ bool DiveHandler::PGLearner::updateCoeffs()
             for( unsigned int n = 0; n < coeffs.size(); ++n )
             {
                 int avg_selector = 0;
-                double avg_minus = 0.0 , avg_zero = 0.0, avg_plus = 0.0;
+                float avg_minus = 0.0 , avg_zero = 0.0, avg_plus = 0.0;
                 for( unsigned int i = 0; i < evaluatedPerturbations.size(); i = i + pow(3,n) )
                 {
                     for( unsigned int k = i; k < i + pow(3,n); ++k )
                     {
-                        double evaluation = evaluatedPerturbations.at(k) / (evaluatedPerturbations.size()/3);
+                        float evaluation = evaluatedPerturbations.at(k) / (evaluatedPerturbations.size()/3);
 
                         if( (avg_selector)%3 == 0 ) avg_minus += evaluation;
                         if( (avg_selector)%3 == 1 ) avg_zero += evaluation;
@@ -405,11 +399,8 @@ bool DiveHandler::PGLearner::updateCoeffs()
                     coeffs_avgGradient.at(coeffs.size() - (n +1)) = avg_plus - avg_minus;
             }
 #endif
-            // Evaluate the gradient
-            rewardGradient = evaluatePerturbation(coeffs_avgGradient);
-
             // Avoid 'nan' when the gradient is zeroed
-            double normalization = 1.0;
+            float normalization = 1.0;
             if (magnitude(coeffs_avgGradient) != 0)
                 normalization = magnitude(coeffs_avgGradient);
 
@@ -417,10 +408,9 @@ bool DiveHandler::PGLearner::updateCoeffs()
 #ifdef DIVEHANDLER_TRAINING
             SPQR_INFO("Computed policy gradient: [ " << coeffs_avgGradient.at(0)/normalization
                       << ", " << coeffs_avgGradient.at(1)/normalization << " ]");
-            SPQR_INFO("Gradient score (before normalization): " << rewardGradient);
 #endif
             // Weight new gradient estimate and previous one according to the reward score
-            std::vector<double> newGradient (coeffsGradient.size());
+            std::vector<float> newGradient (coeffsGradient.size());
             for( unsigned int j=0; j<newGradient.size(); ++j )
                 newGradient.at(j) = coeffs_avgGradient.at(j)/normalization;
 
@@ -464,13 +454,14 @@ bool DiveHandler::PGLearner::updateCoeffs()
  * Default class constructor: initializes all parameters and generates the learning agent.
  */
 DiveHandler::DiveHandler():
-    diveType(none), state(static_cast<DiveHandler::LearningState>(SPQR::GOALIE_LEARNING_STATE)),
-    learner(new PGLearner(this, 2, EPSILON, T)), opponentScore(0), tBall2Goal(SPQR::FIELD_DIMENSION_Y),
-    tDive(0.0), tBackInPose(0.0), ballProjectionIntercept(SPQR::FIELD_DIMENSION_Y), distanceBall2Goal(SPQR::FIELD_DIMENSION_X)
+    diveType(DiveHandle::none), state(static_cast<DiveHandler::LearningState>(SPQR::GOALIE_LEARNING_STATE)),
+    learner(new PGLearner(this, 2, EPSILON, T, 1.0, false)), opponentScore(0), tBall2Goal(SPQR::FIELD_DIMENSION_Y),
+	tDive(0.0), tBackInPose(0.0), estimatedInterval(0),
+	ballProjectionIntercept(SPQR::FIELD_DIMENSION_Y), distanceBall2Goal(SPQR::FIELD_DIMENSION_X)
 {
 #ifdef DIVEHANDLER_TRAINING
     SPQR_INFO("Initializing PGlearner...");
-    std::vector<double> coeffs = learner->getCoeffs();
+    std::vector<float> coeffs = learner->getCoeffs();
     SPQR_INFO("Coefficients: alpha 1 = " << coeffs.at(0) << ", alpha 2 = " << coeffs.at(1));
     SPQR_INFO("Parameters: epsilon = " << learner->getParam("epsilon") << ", T = " << learner->getParam("T"));
 #endif
@@ -481,7 +472,7 @@ DiveHandler::DiveHandler():
  */
 DiveHandler::~DiveHandler()
 {
-    if (learner) delete learner;
+    if(learner) delete learner;
 }
 
 /*
@@ -493,18 +484,18 @@ DiveHandler::~DiveHandler()
 void DiveHandler::estimateBallProjection()
 {
     // Ball path line
-    double A1 = (theBallModel.estimate.position.y - theBallModel.estimate.velocity.y) - theBallModel.estimate.position.y;
-    double B1 = theBallModel.estimate.position.x - (theBallModel.estimate.position.x - theBallModel.estimate.velocity.x);
-    double C1 = A1*theBallModel.estimate.position.x + B1*theBallModel.estimate.position.y;
+    float A1 = (theBallModel.estimate.position.y - theBallModel.estimate.velocity.y) - theBallModel.estimate.position.y;
+    float B1 = theBallModel.estimate.position.x - (theBallModel.estimate.position.x - theBallModel.estimate.velocity.x);
+    float C1 = A1*theBallModel.estimate.position.x + B1*theBallModel.estimate.position.y;
 
     // Goal line
-    double A2 = SPQR::GOALIE_FAR_LIMIT_Y - -SPQR::GOALIE_FAR_LIMIT_Y;
+    float A2 = SPQR::GOALIE_FAR_LIMIT_Y - -SPQR::GOALIE_FAR_LIMIT_Y;
 
     // Cross product/determinant
-    double det = - A2*B1;
+    float det = - A2*B1;
 
     // Y-intercept initialized with the maximum value possible
-    double yIntercept = SPQR::FIELD_DIMENSION_Y;
+    float yIntercept = SPQR::FIELD_DIMENSION_Y;
 
     // Non-singular case
     if( fabs(det) > SPQR::GOALIE_EPSILON_COLLINEAR )
@@ -513,31 +504,33 @@ void DiveHandler::estimateBallProjection()
         yIntercept = (- A2*C1) / det;
 
         // Devising the type of dive to be performed
-        if( yIntercept > ( SPQR::GOALIE_CLOSE_LIMIT_Y/2) && yIntercept < SPQR::GOALIE_FAR_LIMIT_Y )
-            // Close intercept on the left
-            diveType = lcloseDive;
-        else if( yIntercept > SPQR::GOALIE_FAR_LIMIT_Y )
-            // Far intercept on the left
-            diveType = lDive;
-        else if( yIntercept < (-SPQR::GOALIE_CLOSE_LIMIT_Y/2) && yIntercept > -SPQR::GOALIE_FAR_LIMIT_Y )
-            // Close intercept on the right
-            diveType = rcloseDive;
-        else if( yIntercept < -SPQR::GOALIE_FAR_LIMIT_Y )
-            // Far intercept on the right
-            diveType = rDive;
-        else if( fabs(yIntercept) < SPQR::GOALIE_CLOSE_LIMIT_Y/2)
-            diveType = stopBall;
-        else
-            // Any other case: no dive at all
-            diveType = none;
-        }
+
+		if( yIntercept > ( SPQR::GOALIE_CLOSE_LIMIT_Y/2) && yIntercept < SPQR::GOALIE_FAR_LIMIT_Y )
+			// Close intercept on the left
+			diveType = DiveHandle::lcloseDive;
+		else if( yIntercept > SPQR::GOALIE_FAR_LIMIT_Y )
+			// Far intercept on the left
+			diveType = DiveHandle::lDive;
+		else if( yIntercept < (-SPQR::GOALIE_CLOSE_LIMIT_Y/2) && yIntercept > -SPQR::GOALIE_FAR_LIMIT_Y )
+			// Close intercept on the right
+			diveType = DiveHandle::rcloseDive;
+		else if( yIntercept < -SPQR::GOALIE_FAR_LIMIT_Y )
+			// Far intercept on the right
+			diveType = DiveHandle::rDive;
+
+		else if( fabs(yIntercept) < SPQR::GOALIE_CLOSE_LIMIT_Y/2)
+			diveType = DiveHandle::stopBall;
+		else
+			// Any other case: no dive at all
+			diveType = DiveHandle::none;
+	}
 
     // Using the appropriate estimate for the dive time
-    if (diveType == lDive || diveType == rDive )
+    if (diveType == DiveHandle::lDive || diveType == DiveHandle::rDive )
         tDive = SPQR::GOALIE_DIVE_TIME;
-    else if (diveType == lcloseDive || diveType == rcloseDive )
+    else if (diveType == DiveHandle::lcloseDive || diveType == DiveHandle::rcloseDive )
         tDive = SPQR::GOALIE_CLOSE_DIVE_TIME;
-    else if (diveType == stopBall )
+    else if (diveType == DiveHandle::stopBall )
         tDive = SPQR::GOALIE_STOP_BALL_TIME;
     else
         tDive = 0.0;
@@ -546,10 +539,11 @@ void DiveHandler::estimateBallProjection()
     ballProjectionIntercept = yIntercept;
 
     // Computing the distance vector from the ball to the goal
-    double delta_x = -SPQR::FIELD_DIMENSION_X - theGlobalBallEstimation.singleRobotX;
-    double delta_y = ballProjectionIntercept - theGlobalBallEstimation.singleRobotY;
+//    float delta_x = -SPQR::FIELD_DIMENSION_X - theGlobalBallEstimation.singleRobotX;
+//    float delta_y = ballProjectionIntercept - theGlobalBallEstimation.singleRobotY;
     // Estimated distance from the ball
-    distanceBall2Goal = sqrt( delta_x*delta_x + delta_y*delta_y);
+//    distanceBall2Goal = sqrt( delta_x*delta_x + delta_y*delta_y);
+	distanceBall2Goal = theBallModel.estimate.position.x;
 }
 
 /*
@@ -569,18 +563,18 @@ void DiveHandler::estimateDiveTimes()
         tBall2Goal = -1.0;
 
     // Using the appropriate estimates for recover and reposition times
-    double tRecover = 0.0;
-    double tReposition = 0.0;
-    if( diveType == rcloseDive || diveType == lcloseDive )
+    float tRecover = 0.0;
+    float tReposition = 0.0;
+    if( diveType == DiveHandle::rcloseDive || diveType == DiveHandle::lcloseDive )
         // Close dive: no need to back up to the original position
         tRecover = SPQR::GOALIE_CLOSE_DIVE_RECOVER_TIME;
-    else if( diveType == rDive || diveType == lDive )
+    else if( diveType == DiveHandle::rDive || diveType == DiveHandle::lDive )
     {
         // Long dive: the robot has to stand up and reposition
         tRecover = SPQR::GOALIE_DIVE_RECOVER_TIME;
         tReposition = SPQR::GOALIE_DIVE_REPOSITION_TIME;
     }
-    else if( diveType == stopBall )
+    else if( diveType == DiveHandle::stopBall )
     {
         // stop ball: the robot has to stand up and stop the ball
         tRecover = SPQR::GOALIE_STOP_BALL_RECOVER_TIME;
@@ -591,7 +585,7 @@ void DiveHandler::estimateDiveTimes()
 }
 
 /* TOCOMMENT */
-inline double DiveHandler::computeDiveAndRecoverTime(double alpha1, double alpha2)
+inline float DiveHandler::computeDiveAndRecoverTime(float alpha1, float alpha2)
 {
     return alpha2*( alpha1*tBall2Goal - tDive );
 }
@@ -611,21 +605,86 @@ inline double DiveHandler::computeDiveAndRecoverTime(double alpha1, double alpha
  */
 void DiveHandler::update(DiveHandle& diveHandle)
 {
-//    theOpponentTeamInfo.score;
     // Check you're actually the goalie...
     if (theRobotInfo.number == 1)
-    {
-        // Compute the ball projection estimate
+	{
+		// Compute the ball projection estimate
         estimateBallProjection();
         // Update the DiveHandle
         diveHandle.ballProjectionEstimate = ballProjectionIntercept;
 
-        // Check whether the ball is close enough
-        if( (distanceBall2Goal < SPQR::FIELD_DIMENSION_X) && (fabs(ballProjectionIntercept) < SPQR::FIELD_DIMENSION_Y) )
+		if( ((int) (clock() - timer.fallen)/(CLOCKS_PER_SEC/1000)) > 10001 &&
+				((int) (clock() - timer.fallen)/(CLOCKS_PER_SEC/1000)) < 10050 &&
+				(int) timer.fallen != 0)
+//			SPQR_SUCCESS("TooEarly time window START...");
+		if( ((int) (clock() - timer.fallen)/(CLOCKS_PER_SEC/1000)) > 14971 &&
+				((int) (clock() - timer.fallen)/(CLOCKS_PER_SEC/1000)) < 14999 &&
+				(int) timer.fallen != 0)
+//			SPQR_SUCCESS("TooEarly time window END.");
+
+		if( ((int) (clock() - timer.fallen)/(CLOCKS_PER_SEC/1000)) > 10000 &&
+				((int) (clock() - timer.fallen)/(CLOCKS_PER_SEC/1000)) < 15000 &&
+				(int) timer.fallen != 0)
+		{
+			if(opponentScore != (int)theOpponentTeamInfo.score)
+				tooEarly=true;
+		}
+		// Check whether the ball is close enough
+		if( (distanceBall2Goal < SPQR::FIELD_DIMENSION_X) && (fabs(ballProjectionIntercept) < SPQR::FIELD_DIMENSION_Y) )
         {
             // Estimate all temporal parameters
             estimateDiveTimes();
 
+			if(state != notLearning)
+			{
+				// if not in playing state
+				if(theGameInfo.state != STATE_PLAYING)
+					timer.reset();
+				else
+				{
+					// if the ball is moving enough fast then set the timer
+					if( !timer.setTimer && (theBallModel.estimate.velocity.abs() > SPQR::MOVING_BALL_MIN_VELOCITY &&
+										  theFrameInfo.getTimeSince(theBallModel.timeWhenLastSeen) < 1000) )
+						timer.set(clock());
+					// else reset it...
+					if( timer.setTimer && (theBallModel.estimate.velocity.abs() < SPQR::MOVING_BALL_MIN_VELOCITY ||
+										 theFrameInfo.getTimeSince(theBallModel.timeWhenLastSeen) > 1000) )
+						timer.reset();
+
+					// if the goalie dives
+					if( (int)theFallDownState.state == (int)FallDownState::fallen )
+					{
+						timer.fallen=clock();
+						estimatedInterval = (int) (clock() - timer.start)/(CLOCKS_PER_SEC/1000);
+					}
+
+					if(opponentScore != (int)theOpponentTeamInfo.score && !estimatedTime)
+					{
+						if( tooEarly )
+						{
+							SPQR_FAILURE("too FAST dude!");
+							estimatedInterval += 2000;
+							tooEarly=false;
+						}
+						else
+						{
+							SPQR_FAILURE("too SLOW dude!");
+							estimatedInterval += (int)(clock() - timer.fallen)/(CLOCKS_PER_SEC/1000) - 500;
+						}
+						estimatedTime=true;
+
+					}
+					// if the goalie succeeded
+					else if(ownScore != (int)theOwnTeamInfo.score && !estimatedTime)
+					{
+						SPQR_SUCCESS("SUPER!");
+						estimatedInterval -= 100;
+						estimatedTime=true;
+					}
+
+				}
+			}
+
 #ifdef DIVEHANDLER_DEBUG
             SPQR_INFO("Ball projection: " << ballProjectionIntercept);
             SPQR_INFO("PAPO time: " << tBall2Goal);
@@ -635,7 +694,7 @@ void DiveHandler::update(DiveHandle& diveHandle)
 #endif
 
             // The module is in the learning state and a reward has been received
-            if( (state == learning) )
+            if( state == learning )
             {
                 // Perform a single iteration of the learning algorithm
                 if( learner->updateCoeffs() )
@@ -654,7 +713,7 @@ void DiveHandler::update(DiveHandle& diveHandle)
             else if( state == waitReward )
             {
                 // The opponent team scores: the goalie failed and gets a negative reward
-                if(opponentScore != (int)theOpponentTeamInfo.score)
+				if(opponentScore != (int)theOpponentTeamInfo.score && estimatedTime)
                 {
                     // The learner obtains a negative reward
                     rewardHistory.push_front(NEGATIVE_REWARD);
@@ -666,16 +725,19 @@ void DiveHandler::update(DiveHandle& diveHandle)
                     opponentScore = (int)theOpponentTeamInfo.score;
 
 #ifdef DIVEHANDLER_TRAINING
-                    SPQR_FAILURE("The opponent team scored! Negative reward for the learner. ");
+					SPQR_FAILURE("The opponent team scored! Negative reward for the learner.");
 #endif
                     // A reward has been received: re-enable learning
                     state = learning;
-                    // Clear the pending reward
+					// Clear the pending rewardelse
                     if(!diveHandle.rewardAck)
                         diveHandle.rewardAck = true;
+
+					estimatedTime=false;
+					stamp =true;
                 }
                 // The own team scores: user-guided move to provide the goalie a positive reward
-                else if(ownScore != (int)theOwnTeamInfo.score)
+				else if(ownScore != (int)theOwnTeamInfo.score && estimatedTime)
                 {
                     // The learner obtains a positive reward
                     rewardHistory.push_front(POSITIVE_REWARD);
@@ -694,6 +756,9 @@ void DiveHandler::update(DiveHandle& diveHandle)
                     // Clear the pending reward
                     if(!diveHandle.rewardAck)
                         diveHandle.rewardAck = true;
+
+					estimatedTime=false;
+					stamp=true;
                 }
             }
 
@@ -701,8 +766,19 @@ void DiveHandler::update(DiveHandle& diveHandle)
             if( state == learning )
                 learner->updateParams(rewardHistory);
 
-            // Compute the dive time using the current coefficients as T = alpha2 * (alpha1*T_PAPO - T_dive)
-            double diveTime = (learner->getCoeffs()).at(1) * ( (learner->getCoeffs()).at(0) * tBall2Goal - tDive );
+			// Compute the dive time using the current coefficients as T = alpha2 * (alpha1*T_PAPO - T_dive)
+			float diveTime = ( (learner->getCoeffs()).at(0) * tBall2Goal );
+
+#ifdef DIVEHANDLER_TRAINING
+			if(stamp)
+			{
+				SPQR_INFO("diveTime: " << diveTime );
+				SPQR_INFO("estimated time interval: " << estimatedInterval );
+				SPQR_ERR("TimeError: "<< (estimatedInterval - diveTime)*(estimatedInterval - diveTime));
+				SPQR_INFO("/-----------------------------------------/\n");
+				stamp = false;
+			}
+#endif
 
 #ifdef DIVEHANDLER_DEBUG
             SPQR_INFO( "Estimated overall time to dive and recover position: " <<
@@ -712,16 +788,16 @@ void DiveHandler::update(DiveHandle& diveHandle)
 
             // Update the DiveHandle
             if (diveTime > 0.0)
-                diveHandle.diveTime = diveTime;
+				diveHandle.diveTime = diveTime -tDive;
             else
                 diveHandle.diveTime = -1.0;
 
 #ifdef DIVEHANDLER_TRAINING
-            if (diveTime > 0.0)
-            {
-                if(diveHandle.diveTime < SPQR::GOALIE_DIVE_TIME_TOLERANCE)
-                    SPQR_INFO("Dive now! ");
-            }
+//            if (diveTime > 0.0)
+//            {
+//                if(diveHandle.diveTime < SPQR::GOALIE_DIVE_TIME_TOLERANCE)
+//                    SPQR_INFO("Dive now! ");
+//            }
 #endif
 
         }
@@ -729,7 +805,8 @@ void DiveHandler::update(DiveHandle& diveHandle)
         else
         {
             diveHandle.diveTime = -1;
-            diveHandle.diveType = diveType;
+			diveHandle.diveType = diveType;
+			timer.reset();
         }
     }
 }
diff --git a/machineLearning/DiveHandler/DiveHandler.h b/machineLearning/DiveHandler/DiveHandler.h
index fa104b7..d593b7a 100644
--- a/machineLearning/DiveHandler/DiveHandler.h
+++ b/machineLearning/DiveHandler/DiveHandler.h
@@ -22,27 +22,30 @@
 #include <vector>
 #include <list>
 #include <map>
+#include <time.h>
 
 #include "Tools/Module/Module.h"
 #include "Representations/Modeling/BallModel.h"
 #include "Representations/Infrastructure/TeamInfo.h"
 #include "Representations/Infrastructure/FrameInfo.h"
+#include "Representations/Infrastructure/GameInfo.h"
 #include "Representations/Infrastructure/RobotInfo.h"
+#include "Representations/Sensing/FallDownState.h"
 #include "Representations/SPQR-Representations/ConfigurationParameters.h"
 #include "Representations/SPQR-Representations/RobotPoseSpqrFiltered.h"
 #include "Representations/SPQR-Representations/GlobalBallEstimation.h"
 #include "Representations/SPQR-Representations/DiveHandle.h"
-#include "SPQR-Libraries/PTracking/src/Utils/AgentPacket.h"
+#include "Utils/AgentPacket.h"
 
 
 // Module definition
-
-
 MODULE(DiveHandler)
     REQUIRES(OpponentTeamInfo)
     REQUIRES(OwnTeamInfo)
     REQUIRES(FrameInfo)
-    REQUIRES(RobotInfo)
+	REQUIRES(GameInfo)
+	REQUIRES(FallDownState)
+	REQUIRES(RobotInfo)
     REQUIRES(RobotPoseSpqrFiltered)
     REQUIRES(BallModel)
     REQUIRES(GlobalBallEstimation)
@@ -51,60 +54,40 @@ END_MODULE
 
 
 // Termination conditions
-#define MAX_ITER 300
+#define MAX_ITER 15
 #define CONVERGENCE_THRESHOLD 0.01
 // PG parameters
 #define GAMMA 0.5
 #define BUFFER_DIM 10
-#define REWARDS_HISTORY_SIZE 15
-#define EPSILON 0.10
+#define REWARDS_HISTORY_SIZE 10
+#define EPSILON 0.05
 #define T 15
 // Evaluation weight
-#define LAMBDA1 0.7
+#define LAMBDA1 0.9
 //#define LAMBDA2 0.3
 
 
 // Module class declaration
-
-
 class DiveHandler : public DiveHandlerBase
 {
     // Learning state
-    enum LearningState
-    {
+    ENUM( LearningState,
         // Learning disabled
         notLearning = 1,
         // Learning paused, expecting reward
         waitReward,
         // Learning active
         learning
-    };
-
-    // Dive type
-    enum Dive
-    {
-        // No dive at all
-        none = 1,
-        // Long dive on the left
-        lDive,
-        // Long dive on the right
-        rDive,
-        // Close dive on the left
-        lcloseDive,
-        // Close dive on the right
-        rcloseDive,
-        // Stop the ball without diving
-        stopBall
-    };
+     );
 
     // Inner base class modeling the learning agent
     class CoeffsLearner
     {
         protected:
         // Set of coefficients representing the learning objective
-        std::vector<double> coeffs;
+        std::vector<float> coeffs;
         // Set of fixed parameters defining the cost funcion
-        std::map<std::string, double> params;
+        std::map<std::string, float> params;
 
         // Iteration counter
         int iter_count;
@@ -114,45 +97,43 @@ class DiveHandler : public DiveHandlerBase
 		
         public:
         // Default constructor
-        CoeffsLearner(int _nCoeffs, double _initValue, DiveHandler* _dhPtr):
+        CoeffsLearner(int _nCoeffs, float _initValue, DiveHandler* _dhPtr):
             coeffs(_nCoeffs, _initValue), iter_count(0), diveHandler_ptr(_dhPtr) { }
 
+        virtual ~CoeffsLearner(){}
+
         // Setter/getter for the coefficients
-        void setCoeffs(const std::vector<double>& _coeffs);
-        inline std::vector<double> getCoeffs(){ return coeffs; }
+        void setCoeffs(const std::vector<float>& _coeffs);
+        inline std::vector<float> getCoeffs(){ return coeffs; }
 
         // Setter/getter for the parameters
-        void setParam(const std::string& _key, double _value);
-        inline double getParam(std::string _key){ return params[_key]; }
+        void setParam(const std::string& _key, float _value);
+        inline float getParam(std::string _key){ return params[_key]; }
 
         // Update coefficients performing a step of the learning algorithm
         virtual bool updateCoeffs() = 0;
 
         // Use the obtained rewards to adjust the algorithm parameters
-        virtual void updateParams(const std::list<double>& rewards) = 0;
+        virtual void updateParams(const std::list<float>& rewards) = 0;
 
     };
 
     // Inner class modeling a PolicyGradient-based learning agent
     class PGLearner : public CoeffsLearner
     {
-        typedef std::list< std::vector<double> > PGbuffer;
+        typedef std::list< std::vector<float> > PGbuffer;
 
         private:
 
         // Current estimate for the coefficients gradient
-        std::vector<double> coeffsGradient;
+        std::vector<float> coeffsGradient;
         // Best individual performance achieved so far
-        std::vector<double> coeffsBest;
+        std::vector<float> coeffsBest;
 
         // Current reward score
-        double reward_score;
+        float reward_score;
         // Current reward normalization factor
-        double reward_norm;
-        // Score of the current gradient estimate
-        double rewardGradient;
-        // Best gradient score so far
-        double rewardBest;
+        float reward_norm;
 
         // Memory buffer for the PG algorithm
         PGbuffer coeffsBuffer;
@@ -163,22 +144,22 @@ class DiveHandler : public DiveHandlerBase
         bool converged();
 
         // Recursive perturbation generator
-        void generatePerturbations(std::vector<double>* partial_perturbation, unsigned int index);
+        void generatePerturbations(std::vector<float>* partial_perturbation, unsigned int index);
 
         public:
 
         // Default constructor
-        PGLearner(DiveHandler* _dhPtr, int _nCoeffs, double _epsilon = EPSILON,
-                  int _T = T, double _initValue = 1.0, bool randomize = false);
+        PGLearner(DiveHandler* _dhPtr, int _nCoeffs, float _epsilon = EPSILON,
+                  int _T = T, float _initValue = 1.0, bool randomize = false);
 
         // Generate a set of perturbations for the current policy
         void generatePerturbations();
 
         // Evaluate a single policy perturbation with the cost function
-        double evaluatePerturbation( std::vector<double> R );
+        float evaluatePerturbation( std::vector<float> R );
 
         // Update the PG parameters according to the obtained rewards
-        void updateParams(const std::list<double>& rewards);
+        void updateParams(const std::list<float>& rewards);
 
         // Update coefficients performing a step of the learning algorithm
         virtual bool updateCoeffs();
@@ -197,14 +178,14 @@ class DiveHandler : public DiveHandlerBase
 private:
 
     // Dive type currently selected
-    Dive diveType;
+    DiveHandle::Dive diveType;
 
     // Current learning state
     LearningState state;  
     // Learning agent
     CoeffsLearner* learner;
     // Obtained rewards
-    std::list<double> rewardHistory;
+    std::list<float> rewardHistory;
 
     // Current scores
     int opponentScore;
@@ -212,23 +193,54 @@ class DiveHandler : public DiveHandlerBase
 
     // Estimated time the ball needs to reach the goal
     // a.k.a. Tpapo (historical reasons)
-    double tBall2Goal;
+    float tBall2Goal;
     // Estimated time needed for the current dive action to be performed
-    double tDive;
+    float tDive;
     // Estimated time the goalie needs to back up to its original position
-    double tBackInPose;
+    float tBackInPose;
+
+	// Timer
+	class Timer
+	{
+	public:
+		clock_t start;
+		clock_t fallen;
+		bool setTimer;
+
+		Timer():start(0), fallen(0), setTimer(false){}
+		inline void set(clock_t startTime)
+		{
+			if(!setTimer)
+			{
+				start = startTime;
+				setTimer = true;
+//				std::cerr << "\033[33;1m" <<"[DiveHandler] " << "set Timer!" << "\033[0m" << std::endl;
+			}
+		}
+		inline void reset()
+		{
+			if(setTimer)
+			{
+				setTimer = false;
+//				std::cerr << "\033[33;1m" <<"[DiveHandler] " << "reset Timer!" << "\033[0m" << std::endl;
+			}
+		}
+	};
+
+	Timer timer;
+	unsigned int estimatedInterval;
 
     // Estimated intersection between the ball projection and the goal line
-    double ballProjectionIntercept;
+    float ballProjectionIntercept;
     // Estimated distance of the ball from the own goal
-    double distanceBall2Goal;
+    float distanceBall2Goal;
 
     // Computes parameters using the ball estimated position and velocity
     void estimateDiveTimes();
     void estimateBallProjection();
 
     // Compute the overall time the goalie needs to dive and then recover its position
-    inline double computeDiveAndRecoverTime(double alpha1, double alpha2);
+    inline float computeDiveAndRecoverTime(float alpha1, float alpha2);
 	
 public:
 
@@ -238,7 +250,7 @@ class DiveHandler : public DiveHandlerBase
     ~DiveHandler();
 
     // Setter for the reward list
-    inline const std::list<double>& getRewardList() const
+    inline const std::list<float>& getRewardList() const
     {
         return rewardHistory;
     }

From d9ecaa6351e75284a4e9ccba07c1de3c31e52c96 Mon Sep 17 00:00:00 2001
From: Francesco Riccio <riccio.fran@gmail.com>
Date: Tue, 11 Mar 2014 23:21:28 +0100
Subject: [PATCH 09/17] updates

---
 machineLearning/DiveHandler/DiveHandler.cpp | 31 ++++++++-------------
 machineLearning/DiveHandler/DiveHandler.h   |  6 ++--
 2 files changed, 15 insertions(+), 22 deletions(-)

diff --git a/machineLearning/DiveHandler/DiveHandler.cpp b/machineLearning/DiveHandler/DiveHandler.cpp
index 61e67ac..62d4c38 100644
--- a/machineLearning/DiveHandler/DiveHandler.cpp
+++ b/machineLearning/DiveHandler/DiveHandler.cpp
@@ -23,8 +23,8 @@
 
 // Uncomment to have debug information
 //#define DIVEHANDLER_DEBUG
-//#define DIVEHANDLER_TRAINING_DEBUG
-//#define DIVEHANDLER_TRAINING
+#define DIVEHANDLER_TRAINING_DEBUG
+#define DIVEHANDLER_TRAINING
 //#define RAND_PERMUTATIONS
 
 #define NEGATIVE_REWARD -1.0
@@ -253,8 +253,7 @@ float DiveHandler::PGLearner::evaluatePerturbation( std::vector<float> R )
     new_coeffs.at(0) = coeffs.at(0) + R.at(0);
     new_coeffs.at(1) = coeffs.at(1) + R.at(1);
 
-	return (diveHandler_ptr->estimatedInterval - ( R.at(0)*diveHandler_ptr->tBall2Goal))*
-			(diveHandler_ptr->estimatedInterval - ( R.at(0)*diveHandler_ptr->tBall2Goal)) ;
+	return ( std::abs(diveHandler_ptr->estimatedInterval - ( R.at(0)*diveHandler_ptr->tBall2Goal)) ) ;
 //    return (1.0-LAMBDA1)*fabs(diveHandler_ptr->tBall2Goal - tDiveAndRecover) +
 //            LAMBDA1*fabs(magnitude(coeffs) - magnitude(coeffsBest));
 
@@ -616,12 +615,15 @@ void DiveHandler::update(DiveHandle& diveHandle)
 		if( ((int) (clock() - timer.fallen)/(CLOCKS_PER_SEC/1000)) > 10001 &&
 				((int) (clock() - timer.fallen)/(CLOCKS_PER_SEC/1000)) < 10050 &&
 				(int) timer.fallen != 0)
-//			SPQR_SUCCESS("TooEarly time window START...");
+#ifdef DIVEHANDLER_TRAINING
+			SPQR_SUCCESS("TooEarly time window START...");
+#endif
 		if( ((int) (clock() - timer.fallen)/(CLOCKS_PER_SEC/1000)) > 14971 &&
 				((int) (clock() - timer.fallen)/(CLOCKS_PER_SEC/1000)) < 14999 &&
 				(int) timer.fallen != 0)
-//			SPQR_SUCCESS("TooEarly time window END.");
-
+#ifdef DIVEHANDLER_TRAINING
+			SPQR_SUCCESS("TooEarly time window END.");
+#endif
 		if( ((int) (clock() - timer.fallen)/(CLOCKS_PER_SEC/1000)) > 10000 &&
 				((int) (clock() - timer.fallen)/(CLOCKS_PER_SEC/1000)) < 15000 &&
 				(int) timer.fallen != 0)
@@ -663,7 +665,7 @@ void DiveHandler::update(DiveHandle& diveHandle)
 						if( tooEarly )
 						{
 							SPQR_FAILURE("too FAST dude!");
-							estimatedInterval += 2000;
+							estimatedInterval = timer.fallen + 3000;
 							tooEarly=false;
 						}
 						else
@@ -678,7 +680,7 @@ void DiveHandler::update(DiveHandle& diveHandle)
 					else if(ownScore != (int)theOwnTeamInfo.score && !estimatedTime)
 					{
 						SPQR_SUCCESS("SUPER!");
-						estimatedInterval -= 100;
+						estimatedInterval -= 200;
 						estimatedTime=true;
 					}
 
@@ -774,7 +776,7 @@ void DiveHandler::update(DiveHandle& diveHandle)
 			{
 				SPQR_INFO("diveTime: " << diveTime );
 				SPQR_INFO("estimated time interval: " << estimatedInterval );
-				SPQR_ERR("TimeError: "<< (estimatedInterval - diveTime)*(estimatedInterval - diveTime));
+				SPQR_ERR("TimeError: "<< std::abs(estimatedInterval - diveTime) );
 				SPQR_INFO("/-----------------------------------------/\n");
 				stamp = false;
 			}
@@ -791,15 +793,6 @@ void DiveHandler::update(DiveHandle& diveHandle)
 				diveHandle.diveTime = diveTime -tDive;
             else
                 diveHandle.diveTime = -1.0;
-
-#ifdef DIVEHANDLER_TRAINING
-//            if (diveTime > 0.0)
-//            {
-//                if(diveHandle.diveTime < SPQR::GOALIE_DIVE_TIME_TOLERANCE)
-//                    SPQR_INFO("Dive now! ");
-//            }
-#endif
-
         }
         // If the ball is far away or completely off target, no dive has to performed
         else
diff --git a/machineLearning/DiveHandler/DiveHandler.h b/machineLearning/DiveHandler/DiveHandler.h
index d593b7a..4154b03 100644
--- a/machineLearning/DiveHandler/DiveHandler.h
+++ b/machineLearning/DiveHandler/DiveHandler.h
@@ -61,7 +61,7 @@ END_MODULE
 #define BUFFER_DIM 10
 #define REWARDS_HISTORY_SIZE 10
 #define EPSILON 0.05
-#define T 15
+#define T 5
 // Evaluation weight
 #define LAMBDA1 0.9
 //#define LAMBDA2 0.3
@@ -214,7 +214,7 @@ class DiveHandler : public DiveHandlerBase
 			{
 				start = startTime;
 				setTimer = true;
-//				std::cerr << "\033[33;1m" <<"[DiveHandler] " << "set Timer!" << "\033[0m" << std::endl;
+				std::cerr << "\033[33;1m" <<"[DiveHandler] " << "set Timer!" << "\033[0m" << std::endl;
 			}
 		}
 		inline void reset()
@@ -222,7 +222,7 @@ class DiveHandler : public DiveHandlerBase
 			if(setTimer)
 			{
 				setTimer = false;
-//				std::cerr << "\033[33;1m" <<"[DiveHandler] " << "reset Timer!" << "\033[0m" << std::endl;
+				std::cerr << "\033[33;1m" <<"[DiveHandler] " << "reset Timer!" << "\033[0m" << std::endl;
 			}
 		}
 	};

From 94b6fc2e3d21fa936736709d26fef9b5b4aaeff3 Mon Sep 17 00:00:00 2001
From: Francesco Riccio <riccio.fran@gmail.com>
Date: Wed, 12 Mar 2014 20:28:13 +0100
Subject: [PATCH 10/17] too early too late too jesus

---
 machineLearning/DiveHandler/DiveHandler.cpp | 108 +++++++++++++-------
 machineLearning/DiveHandler/DiveHandler.h   |  16 +--
 2 files changed, 81 insertions(+), 43 deletions(-)

diff --git a/machineLearning/DiveHandler/DiveHandler.cpp b/machineLearning/DiveHandler/DiveHandler.cpp
index 62d4c38..906440f 100644
--- a/machineLearning/DiveHandler/DiveHandler.cpp
+++ b/machineLearning/DiveHandler/DiveHandler.cpp
@@ -44,6 +44,7 @@
 bool stamp =false;
 bool tooEarly=false;
 bool estimatedTime=false;
+bool goalDetected=false;
 
 MAKE_MODULE(DiveHandler, SPQR-Modules)
 
@@ -612,25 +613,42 @@ void DiveHandler::update(DiveHandle& diveHandle)
         // Update the DiveHandle
         diveHandle.ballProjectionEstimate = ballProjectionIntercept;
 
-		if( ((int) (clock() - timer.fallen)/(CLOCKS_PER_SEC/1000)) > 10001 &&
-				((int) (clock() - timer.fallen)/(CLOCKS_PER_SEC/1000)) < 10050 &&
-				(int) timer.fallen != 0)
 #ifdef DIVEHANDLER_TRAINING
+		if( timer.getTimeSince(timer.fallen) > 10000 && timer.getTimeSince(timer.fallen) < 10050 && timer.fallen != 0)
 			SPQR_SUCCESS("TooEarly time window START...");
 #endif
-		if( ((int) (clock() - timer.fallen)/(CLOCKS_PER_SEC/1000)) > 14971 &&
-				((int) (clock() - timer.fallen)/(CLOCKS_PER_SEC/1000)) < 14999 &&
-				(int) timer.fallen != 0)
 #ifdef DIVEHANDLER_TRAINING
+		if( timer.getTimeSince(timer.fallen) > 14971 && timer.getTimeSince(timer.fallen) < 14999 && timer.fallen != 0)
 			SPQR_SUCCESS("TooEarly time window END.");
 #endif
-		if( ((int) (clock() - timer.fallen)/(CLOCKS_PER_SEC/1000)) > 10000 &&
-				((int) (clock() - timer.fallen)/(CLOCKS_PER_SEC/1000)) < 15000 &&
-				(int) timer.fallen != 0)
+
+		if(opponentScore != (int)theOpponentTeamInfo.score && !goalDetected)
 		{
-			if(opponentScore != (int)theOpponentTeamInfo.score)
-				tooEarly=true;
+			if( timer.getTimeSince(timer.fallen) > 10000 && timer.getTimeSince(timer.fallen) < 15000 &&
+					(unsigned int) timer.fallen != 0)
+			{
+#ifdef DIVEHANDLER_TRAINING
+				SPQR_FAILURE("too FAST dude!");
+#endif
+				estimatedInterval += 3000;
+			}
+			else
+			{
+//				if(goalTimer.setTimer)
+				{
+#ifdef DIVEHANDLER_TRAINING
+					SPQR_FAILURE("too SLOW dude!");
+#endif
+					estimatedInterval = goalTimer.getTimeSince(goalTimer.start) -500;
+				}
+			}
+			estimatedTime=true;
+			goalDetected=true;
 		}
+
+		if(theGameInfo.state == STATE_SET)
+			goalTimer.reset();
+
 		// Check whether the ball is close enough
 		if( (distanceBall2Goal < SPQR::FIELD_DIMENSION_X) && (fabs(ballProjectionIntercept) < SPQR::FIELD_DIMENSION_Y) )
         {
@@ -640,50 +658,67 @@ void DiveHandler::update(DiveHandle& diveHandle)
 			if(state != notLearning)
 			{
 				// if not in playing state
+
 				if(theGameInfo.state != STATE_PLAYING)
 					timer.reset();
 				else
 				{
+//					if(goalTimer.setTimer)
+//						SPQR_INFO("time: "<< goalTimer.getTimeSince(goalTimer.start));
+
 					// if the ball is moving enough fast then set the timer
-					if( !timer.setTimer && (theBallModel.estimate.velocity.abs() > SPQR::MOVING_BALL_MIN_VELOCITY &&
+					if( (theBallModel.estimate.velocity.abs() > SPQR::MOVING_BALL_MIN_VELOCITY &&
 										  theFrameInfo.getTimeSince(theBallModel.timeWhenLastSeen) < 1000) )
-						timer.set(clock());
-					// else reset it...
-					if( timer.setTimer && (theBallModel.estimate.velocity.abs() < SPQR::MOVING_BALL_MIN_VELOCITY ||
-										 theFrameInfo.getTimeSince(theBallModel.timeWhenLastSeen) > 1000) )
-						timer.reset();
-
-					// if the goalie dives
-					if( (int)theFallDownState.state == (int)FallDownState::fallen )
 					{
-						timer.fallen=clock();
-						estimatedInterval = (int) (clock() - timer.start)/(CLOCKS_PER_SEC/1000);
+						if(!timer.setTimer)
+						{
+							timer.set(clock());
+#ifdef DIVEHANDLER_TRAINING
+							std::cerr << "\033[33;1m" <<"[DiveHandler] " << "set Timer!" << "\033[0m" << std::endl;
+#endif
+							goalTimer.set(clock());
+#ifdef DIVEHANDLER_TRAINING
+							std::cerr << "\033[33;1m" <<"[DiveHandler] " << "set goal Timer!" << "\033[0m" << std::endl;
+#endif
+						}
 					}
-
-					if(opponentScore != (int)theOpponentTeamInfo.score && !estimatedTime)
+					// else reset it...
+					if( (theBallModel.estimate.velocity.abs() < SPQR::MOVING_BALL_MIN_VELOCITY ||
+										   theFrameInfo.getTimeSince(theBallModel.timeWhenLastSeen) > 4000) )
 					{
-						if( tooEarly )
+						if(timer.setTimer)
 						{
-							SPQR_FAILURE("too FAST dude!");
-							estimatedInterval = timer.fallen + 3000;
-							tooEarly=false;
+							timer.reset();
+#ifdef DIVEHANDLER_TRAINING
+							std::cerr << "\033[33;1m" <<"[DiveHandler] " << "reset Timer!" << "\033[0m" << std::endl;
+#endif
 						}
-						else
+						if(goalTimer.setTimer)
 						{
-							SPQR_FAILURE("too SLOW dude!");
-							estimatedInterval += (int)(clock() - timer.fallen)/(CLOCKS_PER_SEC/1000) - 500;
+							goalTimer.reset();
+#ifdef DIVEHANDLER_TRAINING
+							std::cerr << "\033[33;1m" <<"[DiveHandler] " << "reset goal Timer!" << "\033[0m" << std::endl;
+#endif
 						}
-						estimatedTime=true;
-
 					}
+
 					// if the goalie succeeded
-					else if(ownScore != (int)theOwnTeamInfo.score && !estimatedTime)
+					if(ownScore != (int)theOwnTeamInfo.score && !estimatedTime)
 					{
+#ifdef DIVEHANDLER_TRAINING
 						SPQR_SUCCESS("SUPER!");
+#endif
 						estimatedInterval -= 200;
 						estimatedTime=true;
 					}
 
+					// if the goalie dives
+					if( (int)theFallDownState.state == (int)FallDownState::fallen )
+					{
+						timer.fallen=clock();
+						estimatedInterval = timer.getTimeSince(timer.start);
+					}
+
 				}
 			}
 
@@ -715,7 +750,7 @@ void DiveHandler::update(DiveHandle& diveHandle)
             else if( state == waitReward )
             {
                 // The opponent team scores: the goalie failed and gets a negative reward
-				if(opponentScore != (int)theOpponentTeamInfo.score && estimatedTime)
+				if(goalDetected && estimatedTime)
                 {
                     // The learner obtains a negative reward
                     rewardHistory.push_front(NEGATIVE_REWARD);
@@ -731,10 +766,11 @@ void DiveHandler::update(DiveHandle& diveHandle)
 #endif
                     // A reward has been received: re-enable learning
                     state = learning;
-					// Clear the pending rewardelse
+					// Clear the pending reward
                     if(!diveHandle.rewardAck)
                         diveHandle.rewardAck = true;
 
+					goalDetected=false;
 					estimatedTime=false;
 					stamp =true;
                 }
diff --git a/machineLearning/DiveHandler/DiveHandler.h b/machineLearning/DiveHandler/DiveHandler.h
index 4154b03..723d9b6 100644
--- a/machineLearning/DiveHandler/DiveHandler.h
+++ b/machineLearning/DiveHandler/DiveHandler.h
@@ -61,7 +61,7 @@ END_MODULE
 #define BUFFER_DIM 10
 #define REWARDS_HISTORY_SIZE 10
 #define EPSILON 0.05
-#define T 5
+#define T 15
 // Evaluation weight
 #define LAMBDA1 0.9
 //#define LAMBDA2 0.3
@@ -208,26 +208,28 @@ class DiveHandler : public DiveHandlerBase
 		bool setTimer;
 
 		Timer():start(0), fallen(0), setTimer(false){}
+
+		inline unsigned int getTimeSince(clock_t startTime)
+		{
+			return (unsigned int) ((clock() - startTime)/(CLOCKS_PER_SEC/1000));
+		}
 		inline void set(clock_t startTime)
 		{
-			if(!setTimer)
+//			if(!setTimer)
 			{
 				start = startTime;
 				setTimer = true;
-				std::cerr << "\033[33;1m" <<"[DiveHandler] " << "set Timer!" << "\033[0m" << std::endl;
 			}
 		}
 		inline void reset()
 		{
-			if(setTimer)
-			{
+//			if(setTimer)
 				setTimer = false;
-				std::cerr << "\033[33;1m" <<"[DiveHandler] " << "reset Timer!" << "\033[0m" << std::endl;
-			}
 		}
 	};
 
 	Timer timer;
+	Timer goalTimer;
 	unsigned int estimatedInterval;
 
     // Estimated intersection between the ball projection and the goal line

From da5e8cb47cfd164aa4d3c440159044d2d3a04276 Mon Sep 17 00:00:00 2001
From: Francesco Riccio <riccio.fran@gmail.com>
Date: Wed, 12 Mar 2014 20:34:42 +0100
Subject: [PATCH 11/17] jump

---
 machineLearning/keeperJumpLeft.mof | 44 ++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)
 create mode 100644 machineLearning/keeperJumpLeft.mof

diff --git a/machineLearning/keeperJumpLeft.mof b/machineLearning/keeperJumpLeft.mof
new file mode 100644
index 0000000..b63cdcd
--- /dev/null
+++ b/machineLearning/keeperJumpLeft.mof
@@ -0,0 +1,44 @@
+motion_id = keeperJumpLeft
+
+label start  
+
+hardness 75 75 75 75 75 75 75 75 75 75 75 75 75 75 75 75 75 75 75 75 75 75 50
+
+"riseHand 
+47.6 15 75 0 0 0 -75 * * * * * * * * * * * * * * * 0 500    
+
+"turnLeftAnkle
+- - * - - - * - - - - 20 * * - -30 - - * * - 22 1 400   
+
+// try no to sit at the initial phase of the match
+//  (1)  HeadYaw       [-119/119]    (2)  HeadPitch     [29/-38]   (3) LShoulderPitch  [-119/119]       (4)  LShoulderRoll [-18/76]     
+//  (5)  LElbowYaw     [-119/119]    (6)  LElbowRoll    [-88/0]    (7) RShoulderPitch  [119/-119]       (8)  RShoulderRoll [-76/18]
+//  (9)  RElbowYaw     [119/-119]    (10)  RElbowRoll   [88/0]     (11) LHipYawPitch   [-65/42]         (12) LHipRoll      [-21/45]  
+//  (13) LHipPitch     [-88/27]      (14) LKneePitch    [-5/121]   (15) LAnklePitch    [52/-68]         (16) LAnkleRoll    [-22/44]  
+//  (17) RHipYawPitch  [-65/42]      (18) RHipRoll      [-45/21]   (19) RHipPitch      [27/-88]         (20) RKneePitch    [121/-5]  
+//  (21) RAnklePitch   [53/-67]      (22) RAnkleRoll [-44/22]
+//"HY HP LSP LSR LEY LER RSP RSR REY RER LHYP LHR LHP LKP LAP LAR RHYP RHR RHP RKP RAP RAR Int Dur
+//- - - - - - - - - - - - - - - - - - - - - - 0 100
+
+"deactivate joints
+- - - - - - - - - - - - - - - - - - - - - - 0 800 
+
+hardness 75 75 75 75 75 75 75 75 75 75 75 75 75 75 75 75 75 75 75 75 75 75 50
+
+//- - - - - - - 0 0 0 -12.3 20 -71.5 122.3 -67.2 -6.5 0 -20 -34.8 66.9 -33 4.6 1 100
+//- - - - - - - 0 0 0 - 45 - - - -21.5 - -45 - 30 - 22 1 200
+
+//- - 117.9 16.3 2.7 -9.8 -87.4 18.2 0.3 -22.6 11.4 6.5 -75.4 123.3 -69 1.7 11.4 8.2 11.5 -4.6 -32.4 2.8 1 500
+
+//- - -5.2 28.7 -6.5 -1.9 -111.4 12.2 11.4 -9 -21.4 8.8 -76 122.4 -69.7 -1.9 -21.4 2.5 28.7 -7.5 -27.7 -6 1 350
+//- - 37.6 61.4 -18.2 -1.4 -95.3 15 11.1 -9.7 -34 -9.3 -62.1 122.6 -69.8 -3.6 -34 6.3 29.5 18.5 -27.8 -5.4 1 200
+//- - -90.6 15.2 0.1 -18.2 -91.1 12.7 -0.3 -16.7 0 0.2 -7 28.6 -21.5 - 0 0.2 -7 28.5 -21.3 - 1 300
+
+label repeat
+- - - - - - - - - - - - - - - - - - - - - - 0 100 
+
+transition keeperJumpLeft keeperJumpLeft repeat
+transition standUpFrontNao standUpFrontNao start
+transition standUpBackNao standUpBackNao start
+
+transition allMotions extern start

From 633fc75f1460b5078bbde159822ad4a67d994938 Mon Sep 17 00:00:00 2001
From: Francesco Riccio <riccio.fran@gmail.com>
Date: Thu, 13 Mar 2014 00:17:58 +0100
Subject: [PATCH 12/17] ...

---
 machineLearning/DiveHandler/DiveHandler.cpp | 55 ++++++++++-----------
 machineLearning/DiveHandler/DiveHandler.h   |  3 +-
 2 files changed, 28 insertions(+), 30 deletions(-)

diff --git a/machineLearning/DiveHandler/DiveHandler.cpp b/machineLearning/DiveHandler/DiveHandler.cpp
index 906440f..71dbcd6 100644
--- a/machineLearning/DiveHandler/DiveHandler.cpp
+++ b/machineLearning/DiveHandler/DiveHandler.cpp
@@ -254,7 +254,7 @@ float DiveHandler::PGLearner::evaluatePerturbation( std::vector<float> R )
     new_coeffs.at(0) = coeffs.at(0) + R.at(0);
     new_coeffs.at(1) = coeffs.at(1) + R.at(1);
 
-	return ( std::abs(diveHandler_ptr->estimatedInterval - ( R.at(0)*diveHandler_ptr->tBall2Goal)) ) ;
+	return ( std::abs(diveHandler_ptr->tBAGO - ( R.at(0)*diveHandler_ptr->tBAGOestimate)) ) ;
 //    return (1.0-LAMBDA1)*fabs(diveHandler_ptr->tBall2Goal - tDiveAndRecover) +
 //            LAMBDA1*fabs(magnitude(coeffs) - magnitude(coeffsBest));
 
@@ -315,7 +315,7 @@ bool DiveHandler::PGLearner::updateCoeffs()
 {
 
 #ifdef DIVEHANDLER_TRAINING
-    SPQR_INFO( "PG algorithm, iteration " << iter_count << "... " );
+	SPQR_INFO( "\nPG algorithm, iteration " << iter_count << "... " );
 #endif
 
     if( iter_count == MAX_ITER || converged() )
@@ -455,8 +455,8 @@ bool DiveHandler::PGLearner::updateCoeffs()
  */
 DiveHandler::DiveHandler():
     diveType(DiveHandle::none), state(static_cast<DiveHandler::LearningState>(SPQR::GOALIE_LEARNING_STATE)),
-    learner(new PGLearner(this, 2, EPSILON, T, 1.0, false)), opponentScore(0), tBall2Goal(SPQR::FIELD_DIMENSION_Y),
-	tDive(0.0), tBackInPose(0.0), estimatedInterval(0),
+	learner(new PGLearner(this, 2, EPSILON, T, 1.0, false)), opponentScore(0), tBall2Goal(-1),
+	tDive(0.0), tBackInPose(0.0), tBAGO(0),
 	ballProjectionIntercept(SPQR::FIELD_DIMENSION_Y), distanceBall2Goal(SPQR::FIELD_DIMENSION_X)
 {
 #ifdef DIVEHANDLER_TRAINING
@@ -538,12 +538,8 @@ void DiveHandler::estimateBallProjection()
     // Updating the class parameters with the obtained value
     ballProjectionIntercept = yIntercept;
 
-    // Computing the distance vector from the ball to the goal
-//    float delta_x = -SPQR::FIELD_DIMENSION_X - theGlobalBallEstimation.singleRobotX;
-//    float delta_y = ballProjectionIntercept - theGlobalBallEstimation.singleRobotY;
     // Estimated distance from the ball
-//    distanceBall2Goal = sqrt( delta_x*delta_x + delta_y*delta_y);
-	distanceBall2Goal = theBallModel.estimate.position.x;
+	distanceBall2Goal = theBallModel.estimate.position.abs();
 }
 
 /*
@@ -554,13 +550,13 @@ void DiveHandler::estimateBallProjection()
 void DiveHandler::estimateDiveTimes()
 {
     // Check whether the ball is actually moving toward the goal
-    if ( (theBallModel.estimate.velocity.abs() != 0.0)
-         && (theBallModel.estimate.velocity.x < 0.0) )
+	if ( (theBallModel.estimate.velocity.abs() != 0.0) &&
+		 (theBallModel.estimate.velocity.x < 0.0) )
         // Use a constant velocity approximation to the estimate the time interval
-        tBall2Goal = 1000.0 * ( distanceBall2Goal / theBallModel.estimate.velocity.abs() );
+		tBall2Goal = 1000.0 * ( distanceBall2Goal / theBallModel.estimate.velocity.abs() );
     else
         // Otherwise, set the parameter to a meaningless value
-        tBall2Goal = -1.0;
+		tBall2Goal = -1.0;
 
     // Using the appropriate estimates for recover and reposition times
     float tRecover = 0.0;
@@ -614,23 +610,22 @@ void DiveHandler::update(DiveHandle& diveHandle)
         diveHandle.ballProjectionEstimate = ballProjectionIntercept;
 
 #ifdef DIVEHANDLER_TRAINING
-		if( timer.getTimeSince(timer.fallen) > 10000 && timer.getTimeSince(timer.fallen) < 10050 && timer.fallen != 0)
+		if( timer.getTimeSince(timer.fallen) > 5000 && timer.getTimeSince(timer.fallen) < 5040 && timer.fallen != 0)
 			SPQR_SUCCESS("TooEarly time window START...");
-#endif
-#ifdef DIVEHANDLER_TRAINING
-		if( timer.getTimeSince(timer.fallen) > 14971 && timer.getTimeSince(timer.fallen) < 14999 && timer.fallen != 0)
+
+		if( timer.getTimeSince(timer.fallen) > 9961 && timer.getTimeSince(timer.fallen) < 9999 && timer.fallen != 0)
 			SPQR_SUCCESS("TooEarly time window END.");
 #endif
 
 		if(opponentScore != (int)theOpponentTeamInfo.score && !goalDetected)
 		{
-			if( timer.getTimeSince(timer.fallen) > 10000 && timer.getTimeSince(timer.fallen) < 15000 &&
+			if( timer.getTimeSince(timer.fallen) > 5000 && timer.getTimeSince(timer.fallen) < 10000 &&
 					(unsigned int) timer.fallen != 0)
 			{
 #ifdef DIVEHANDLER_TRAINING
 				SPQR_FAILURE("too FAST dude!");
 #endif
-				estimatedInterval += 3000;
+				tBAGO += 3000;
 			}
 			else
 			{
@@ -639,7 +634,7 @@ void DiveHandler::update(DiveHandle& diveHandle)
 #ifdef DIVEHANDLER_TRAINING
 					SPQR_FAILURE("too SLOW dude!");
 #endif
-					estimatedInterval = goalTimer.getTimeSince(goalTimer.start) -500;
+					tBAGO = goalTimer.getTimeSince(goalTimer.start) -1500;
 				}
 			}
 			estimatedTime=true;
@@ -647,7 +642,10 @@ void DiveHandler::update(DiveHandle& diveHandle)
 		}
 
 		if(theGameInfo.state == STATE_SET)
+		{
+			tBAGOestimate=0;
 			goalTimer.reset();
+		}
 
 		// Check whether the ball is close enough
 		if( (distanceBall2Goal < SPQR::FIELD_DIMENSION_X) && (fabs(ballProjectionIntercept) < SPQR::FIELD_DIMENSION_Y) )
@@ -673,11 +671,10 @@ void DiveHandler::update(DiveHandle& diveHandle)
 						if(!timer.setTimer)
 						{
 							timer.set(clock());
-#ifdef DIVEHANDLER_TRAINING
-							std::cerr << "\033[33;1m" <<"[DiveHandler] " << "set Timer!" << "\033[0m" << std::endl;
-#endif
 							goalTimer.set(clock());
+							tBAGOestimate=tBall2Goal;
 #ifdef DIVEHANDLER_TRAINING
+							std::cerr << "\033[33;1m" <<"[DiveHandler] " << "set Timer!" << "\033[0m" << std::endl;
 							std::cerr << "\033[33;1m" <<"[DiveHandler] " << "set goal Timer!" << "\033[0m" << std::endl;
 #endif
 						}
@@ -699,6 +696,7 @@ void DiveHandler::update(DiveHandle& diveHandle)
 #ifdef DIVEHANDLER_TRAINING
 							std::cerr << "\033[33;1m" <<"[DiveHandler] " << "reset goal Timer!" << "\033[0m" << std::endl;
 #endif
+							tBAGOestimate=0;
 						}
 					}
 
@@ -708,7 +706,7 @@ void DiveHandler::update(DiveHandle& diveHandle)
 #ifdef DIVEHANDLER_TRAINING
 						SPQR_SUCCESS("SUPER!");
 #endif
-						estimatedInterval -= 200;
+						tBAGO -= 200;
 						estimatedTime=true;
 					}
 
@@ -716,7 +714,7 @@ void DiveHandler::update(DiveHandle& diveHandle)
 					if( (int)theFallDownState.state == (int)FallDownState::fallen )
 					{
 						timer.fallen=clock();
-						estimatedInterval = timer.getTimeSince(timer.start);
+						tBAGO = timer.getTimeSince(timer.start);
 					}
 
 				}
@@ -810,10 +808,9 @@ void DiveHandler::update(DiveHandle& diveHandle)
 #ifdef DIVEHANDLER_TRAINING
 			if(stamp)
 			{
-				SPQR_INFO("diveTime: " << diveTime );
-				SPQR_INFO("estimated time interval: " << estimatedInterval );
-				SPQR_ERR("TimeError: "<< std::abs(estimatedInterval - diveTime) );
-				SPQR_INFO("/-----------------------------------------/\n");
+				SPQR_INFO("BAGO: " << tBAGO );
+				SPQR_INFO("BAGO estimate: " << tBAGOestimate );
+				SPQR_ERR("BAGO error: "<< std::abs(tBAGO - tBAGOestimate) );
 				stamp = false;
 			}
 #endif
diff --git a/machineLearning/DiveHandler/DiveHandler.h b/machineLearning/DiveHandler/DiveHandler.h
index 723d9b6..26d6056 100644
--- a/machineLearning/DiveHandler/DiveHandler.h
+++ b/machineLearning/DiveHandler/DiveHandler.h
@@ -230,7 +230,8 @@ class DiveHandler : public DiveHandlerBase
 
 	Timer timer;
 	Timer goalTimer;
-	unsigned int estimatedInterval;
+	unsigned int tBAGO;
+	float tBAGOestimate;
 
     // Estimated intersection between the ball projection and the goal line
     float ballProjectionIntercept;

From 6ee974735dfe95306ba77811d771e4102e6155c5 Mon Sep 17 00:00:00 2001
From: Francesco Riccio <riccio.fran@gmail.com>
Date: Sat, 15 Mar 2014 16:30:54 +0100
Subject: [PATCH 13/17] BAGO

---
 machineLearning/DiveHandler/DiveHandler.cpp | 25 ++++++++++++++++++++-
 machineLearning/DiveHandler/DiveHandler.h   |  2 ++
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/machineLearning/DiveHandler/DiveHandler.cpp b/machineLearning/DiveHandler/DiveHandler.cpp
index 71dbcd6..c5c19a1 100644
--- a/machineLearning/DiveHandler/DiveHandler.cpp
+++ b/machineLearning/DiveHandler/DiveHandler.cpp
@@ -644,6 +644,8 @@ void DiveHandler::update(DiveHandle& diveHandle)
 		if(theGameInfo.state == STATE_SET)
 		{
 			tBAGOestimate=0;
+			dBAGOestimate=0;
+			sampledVelocities.clear();
 			goalTimer.reset();
 		}
 
@@ -668,11 +670,13 @@ void DiveHandler::update(DiveHandle& diveHandle)
 					if( (theBallModel.estimate.velocity.abs() > SPQR::MOVING_BALL_MIN_VELOCITY &&
 										  theFrameInfo.getTimeSince(theBallModel.timeWhenLastSeen) < 1000) )
 					{
+						sampledVelocities.push_back( theBallModel.estimate.velocity.abs() );
 						if(!timer.setTimer)
 						{
 							timer.set(clock());
 							goalTimer.set(clock());
-							tBAGOestimate=tBall2Goal;
+							dBAGOestimate=distanceBall2Goal;
+//							tBAGOestimate=tBall2Goal;
 #ifdef DIVEHANDLER_TRAINING
 							std::cerr << "\033[33;1m" <<"[DiveHandler] " << "set Timer!" << "\033[0m" << std::endl;
 							std::cerr << "\033[33;1m" <<"[DiveHandler] " << "set goal Timer!" << "\033[0m" << std::endl;
@@ -697,6 +701,8 @@ void DiveHandler::update(DiveHandle& diveHandle)
 							std::cerr << "\033[33;1m" <<"[DiveHandler] " << "reset goal Timer!" << "\033[0m" << std::endl;
 #endif
 							tBAGOestimate=0;
+							dBAGOestimate=0;
+							sampledVelocities.clear();
 						}
 					}
 
@@ -716,10 +722,27 @@ void DiveHandler::update(DiveHandle& diveHandle)
 						timer.fallen=clock();
 						tBAGO = timer.getTimeSince(timer.start);
 					}
+				}
+			}
 
+			if(estimatedTime)
+			{
+				float velocityMean=0;
+				float velocityMax=0;
+				std::list<float>::const_iterator it=sampledVelocities.begin();
+				for(; it != sampledVelocities.end(); ++it)
+				{
+					if((*it) > velocityMax) velocityMax=(*it);
+					velocityMean += (*it) /sampledVelocities.size();
 				}
+
+				tBAGOestimate = 1000*(dBAGOestimate / velocityMax);
+				SPQR_INFO("distance: " << dBAGOestimate);
+				SPQR_INFO("velocity: " << (.75f*velocityMax)/1000);
+				SPQR_INFO("tBAGO: " << tBAGOestimate);
 			}
 
+
 #ifdef DIVEHANDLER_DEBUG
             SPQR_INFO("Ball projection: " << ballProjectionIntercept);
             SPQR_INFO("PAPO time: " << tBall2Goal);
diff --git a/machineLearning/DiveHandler/DiveHandler.h b/machineLearning/DiveHandler/DiveHandler.h
index 26d6056..bd6b49f 100644
--- a/machineLearning/DiveHandler/DiveHandler.h
+++ b/machineLearning/DiveHandler/DiveHandler.h
@@ -232,11 +232,13 @@ class DiveHandler : public DiveHandlerBase
 	Timer goalTimer;
 	unsigned int tBAGO;
 	float tBAGOestimate;
+	float dBAGOestimate;
 
     // Estimated intersection between the ball projection and the goal line
     float ballProjectionIntercept;
     // Estimated distance of the ball from the own goal
     float distanceBall2Goal;
+	std::list<float> sampledVelocities;
 
     // Computes parameters using the ball estimated position and velocity
     void estimateDiveTimes();

From 687089a31dc1fe5e36a04ce58cfd781887fc5e2f Mon Sep 17 00:00:00 2001
From: Francesco Riccio <riccio.fran@gmail.com>
Date: Sun, 16 Mar 2014 20:08:15 +0100
Subject: [PATCH 14/17] GA

---
 machineLearning/DiveHandler/DiveHandler.cpp | 244 ++++++++++++++++++--
 machineLearning/DiveHandler/DiveHandler.h   |  65 +++++-
 2 files changed, 281 insertions(+), 28 deletions(-)

diff --git a/machineLearning/DiveHandler/DiveHandler.cpp b/machineLearning/DiveHandler/DiveHandler.cpp
index c5c19a1..0f443c5 100644
--- a/machineLearning/DiveHandler/DiveHandler.cpp
+++ b/machineLearning/DiveHandler/DiveHandler.cpp
@@ -245,23 +245,9 @@ void DiveHandler::PGLearner::generatePerturbations(std::vector<float>* partial_p
 float DiveHandler::PGLearner::evaluatePerturbation( std::vector<float> R )
 {
     // Dimensions check
-    assert(R.size() == coeffs.size());
-    // Generate perturbated policy and call the DiveHandler object for evaluation
-//    float tDiveAndRecover = diveHandler_ptr->computeDiveAndRecoverTime(coeffs.at(0) + R.at(0), coeffs.at(1) + R.at(1));
-
-    // Perturbated coefficients
-    std::vector<float> new_coeffs(2);
-    new_coeffs.at(0) = coeffs.at(0) + R.at(0);
-    new_coeffs.at(1) = coeffs.at(1) + R.at(1);
+	assert(R.size() == coeffs.size());
 
 	return ( std::abs(diveHandler_ptr->tBAGO - ( R.at(0)*diveHandler_ptr->tBAGOestimate)) ) ;
-//    return (1.0-LAMBDA1)*fabs(diveHandler_ptr->tBall2Goal - tDiveAndRecover) +
-//            LAMBDA1*fabs(magnitude(coeffs) - magnitude(coeffsBest));
-
-//    return (1.0-LAMBDA1-LAMBDA2)*fabs(tDiveAndRecover) +
-//           LAMBDA1*fabs(diveHandler_ptr->tBall2Goal - tDiveAndRecover) +
-//           LAMBDA2*fabs(1.0 - ((coeffs.at(0) + R.at(0))+(coeffs.at(1) + R.at(1))));
-
 }
 
 
@@ -447,6 +433,221 @@ bool DiveHandler::PGLearner::updateCoeffs()
 }
 
 
+/** --------------------- CoeffsLearner: Genetic Algorithm --------------------- */
+DiveHandler::GALearner::GALearner( DiveHandler* _dhPtr, int _nCoeffs, float _initValue ):
+	CoeffsLearner(_nCoeffs, _initValue, _dhPtr),
+	reward_score(.0f), reward_norm(.0f)
+{
+	setParam("selection", SELECTION);
+	setParam("crossover", CROSSOVER);
+	setParam("mutation", MUTATION);
+
+	setParam("elite", ELITE_SIZE);
+
+	srand(time(NULL));
+	for(unsigned int i=0; i< POPULATION_SIZE; ++i)
+		population.insert( Individual( (rand()%600) + 500) );
+
+}
+
+float DiveHandler::GALearner::evaluate(Individual i)
+{
+	return ( std::abs(diveHandler_ptr->tBAGO - ( i.hypothesis.to_ulong()*diveHandler_ptr->tBAGOestimate)) );
+}
+
+DiveHandler::GALearner::Individual DiveHandler::GALearner::rnd_mutate(Individual i)
+{
+	srand(time(NULL));
+	unsigned int n_flips = rand()%3+1;
+	for(unsigned int j=0; j< n_flips; ++j )
+		(i.hypothesis).flip(rand()%(INDIVIDUAL_SIZE-1));
+
+	return i;
+}
+
+DiveHandler::GALearner::Individual DiveHandler::GALearner::crossover(Individual mommy, const Individual& daddy)
+{
+	srand(time(NULL));
+	int crossover_point = rand()%INDIVIDUAL_SIZE;
+	for(unsigned int i = crossover_point+1; i<INDIVIDUAL_SIZE; ++i)
+		mommy.hypothesis[i] = daddy.hypothesis[i];
+
+	return mommy;
+}
+
+bool DiveHandler::GALearner::converged()
+{
+	// Skip convergence check if the buffer is not full
+	if (fitnessBuffer.size() < BUFFER_DIM)
+		return false;
+	// Average every coefficients variation across the buffer
+	else
+	{
+		// Compute variations mean
+		float avg_variation = .0f;
+		// Iterate over the whole buffer and compute deltas from step i-1 to i
+		std::list<float>::const_iterator i = fitnessBuffer.begin();
+		std::list<float>::const_iterator j = fitnessBuffer.begin(); ++j;
+		while (j != fitnessBuffer.end())
+		{
+			avg_variation += ( (*i) - (*j) )/fitnessBuffer.size();
+			++i; ++j;
+		}
+
+		// Compute variations standard deviation
+		float std_variation = .0f;
+		// Iterate over the whole buffer and compute deltas from step i-1 to i
+		std::list<float>::const_iterator k = fitnessBuffer.begin();
+		std::list<float>::const_iterator t = fitnessBuffer.begin(); ++t;
+		while (t != fitnessBuffer.end())
+		{
+			std_variation += ( pow((*k)-(*t) - avg_variation, 2) ) / fitnessBuffer.size();
+			++k; ++t;
+		}
+		std_variation = sqrt(std_variation);
+
+		// Check result against variation threshold
+		if ((avg_variation < CONVERGENCE_THRESHOLD) && (std_variation < CONVERGENCE_THRESHOLD))
+		{
+	#ifdef DIVEHANDLER_TRAINING
+			SPQR_SUCCESS("GALearner converged!");
+			SPQR_SUCCESS("Coefficients values:");
+			for (unsigned int i = 0; i < coeffs.size(); ++i)
+				SPQR_SUCCESS("\t" << coeffs.at(i));
+	#endif
+			return true;
+		}
+		else
+			return false;
+	}
+}
+
+void DiveHandler::GALearner::evolutionStep()
+{
+	std::set<Individual, cmp> previousPopulation(population);
+	population.clear();
+
+	int sel = 0;
+	std::set<Individual, cmp>::iterator selector = previousPopulation.begin();
+	std::set<Individual, cmp>::iterator partner = previousPopulation.end();
+	for(; selector != previousPopulation.end(); ++selector, ++sel)
+	{
+		if(sel < round(getParam("selection")*POPULATION_SIZE))
+			population.insert(Individual(evaluate(*selector), (*selector).hypothesis.to_string()));
+		else
+		{
+			srand(time(NULL));
+
+			if( rand()/RAND_MAX < getParam("mutation") )
+				population.insert( Individual(evaluate(rnd_mutate( *selector )), (rnd_mutate( *selector )).hypothesis.to_string()) );
+			else if( rand()/RAND_MAX < sqrt(getParam("crossover")) )
+			{
+				if(partner == previousPopulation.end())
+					partner = selector;
+				else
+				{
+					population.insert(Individual(evaluate(crossover( *selector, *partner )), (crossover( *selector, *partner )).hypothesis.to_string()));
+					population.insert(Individual(evaluate(crossover( *partner, *selector )), (crossover( *partner, *selector )).hypothesis.to_string()));
+					partner = previousPopulation.end();
+				}
+			}
+			else
+				population.insert(Individual(evaluate( *selector ), ( *selector ).hypothesis.to_string()));
+		}
+	}
+
+}
+
+void DiveHandler::GALearner::updateParams(const std::list<float>& rewards)
+{
+	// Re-initialize reward scores
+	reward_score = 0.0;
+	if (!rewards.empty()) reward_norm = 0.0;
+	int discount_exp = 0;
+	int positives = 0;
+
+	std::list<float>::const_iterator i = rewards.begin();
+	while (i != rewards.end())
+	{
+		// Counting positives
+		if (*i == POSITIVE_REWARD)
+			++positives;
+
+		// Computing discounted rewards
+		reward_score += (*i) * pow(GAMMA, discount_exp);
+		reward_norm += fabs((*i) * pow(GAMMA, discount_exp));
+		++i; ++discount_exp;
+	}
+
+#ifdef DIVEHANDLER_TRAINING_DEBUG
+	SPQR_INFO("Positive rewards: " << positives << " out of " << rewards.size());
+	SPQR_INFO("Negative rewards: " << (rewards.size() - positives) << " out of " << rewards.size());
+	SPQR_INFO("Reward total score: " << reward_score);
+#endif
+
+	//Adjusting GA parameters according to the obtained score
+	if(exp( -reward_score / (2*REWARDS_HISTORY_SIZE) ) * getParam("mutation") >= 1.0)
+		setParam("mutation", 1.0);
+	else
+		setParam("mutation", exp( -reward_score / REWARDS_HISTORY_SIZE ) * getParam("mutation"));
+
+	if(exp( -reward_score / (2*REWARDS_HISTORY_SIZE) ) * getParam("crossover") >= 1.0)
+		setParam("crossover", 1.0);
+	else
+		setParam("crossover", exp( -reward_score / (2*REWARDS_HISTORY_SIZE) ) * getParam("crossover"));
+
+	if(exp( -reward_score / (2*REWARDS_HISTORY_SIZE) ) * getParam("elite") >= 1.0)
+		setParam("elite", 1.0);
+	else
+		setParam("elite", exp( -reward_score / (2*REWARDS_HISTORY_SIZE) ) * getParam("elite"));
+
+#ifdef DIVEHANDLER_TRAINING
+	SPQR_INFO( "Mutation rate value changed to: " << getParam("mutation") << " according to the obtained rewards. ");
+	SPQR_INFO( "Crossover rate value changed to: " << getParam("crossover") << " according to the obtained rewards. ");
+	SPQR_INFO( "Elite percentage changed to: " << getParam("elite") << " according to the obtained rewards. ");
+#endif
+
+}
+
+bool DiveHandler::GALearner::updateCoeffs()
+{
+#ifdef DIVEHANDLER_TRAINING
+	SPQR_INFO( "\nGA algorithm, iteration " << iter_count << "... " );
+#endif
+
+	if( iter_count == MAX_ITER || converged() )
+		return false;
+	else
+	{
+		evolutionStep();
+
+		float avg_fitness=.0f;
+		float avg_coeff=.0f;
+		std::set<Individual, cmp>::iterator evaluator = population.begin();
+		for( unsigned int sel=0; sel<round(getParam("elite")*POPULATION_SIZE); ++evaluator, ++sel)
+		{
+			avg_fitness += evaluator->fitness / round(getParam("elite")*POPULATION_SIZE);
+			avg_coeff += (evaluator->hypothesis.to_ulong()) / (1000*round(getParam("elite")*POPULATION_SIZE));
+		}
+
+		fitnessBuffer.push_front(avg_fitness);
+
+		// Crop buffer
+		if (fitnessBuffer.size() > BUFFER_DIM)
+			fitnessBuffer.resize(BUFFER_DIM);
+
+		coeffs.at(0) = avg_coeff;
+
+#ifdef DIVEHANDLER_TRAINING
+		SPQR_INFO("New coefficients: [ " << coeffs.at(0) << " ]");
+#endif
+			++iter_count;
+
+		return true;
+	}
+}
+
+
 /** --------------------------- Dive Handler ---------------------------- */
 
 
@@ -455,8 +656,8 @@ bool DiveHandler::PGLearner::updateCoeffs()
  */
 DiveHandler::DiveHandler():
     diveType(DiveHandle::none), state(static_cast<DiveHandler::LearningState>(SPQR::GOALIE_LEARNING_STATE)),
-	learner(new PGLearner(this, 2, EPSILON, T, 1.0, false)), opponentScore(0), tBall2Goal(-1),
-	tDive(0.0), tBackInPose(0.0), tBAGO(0),
+	learner(new PGLearner(this, 2, EPSILON, T, 1.0, false)),
+	opponentScore(0), tBall2Goal(-1), tDive(0.0), tBackInPose(0.0), tBAGO(0), tBAGOestimate(0),
 	ballProjectionIntercept(SPQR::FIELD_DIMENSION_Y), distanceBall2Goal(SPQR::FIELD_DIMENSION_X)
 {
 #ifdef DIVEHANDLER_TRAINING
@@ -481,6 +682,7 @@ DiveHandler::~DiveHandler()
  * at which the ball is expected to reach the goal.
  * Then, the diveTime and the diveType parameters are defined accordingly.
  */
+
 void DiveHandler::estimateBallProjection()
 {
     // Ball path line
@@ -658,7 +860,6 @@ void DiveHandler::update(DiveHandle& diveHandle)
 			if(state != notLearning)
 			{
 				// if not in playing state
-
 				if(theGameInfo.state != STATE_PLAYING)
 					timer.reset();
 				else
@@ -667,7 +868,7 @@ void DiveHandler::update(DiveHandle& diveHandle)
 //						SPQR_INFO("time: "<< goalTimer.getTimeSince(goalTimer.start));
 
 					// if the ball is moving enough fast then set the timer
-					if( (theBallModel.estimate.velocity.abs() > SPQR::MOVING_BALL_MIN_VELOCITY &&
+					if( (theBallModel.estimate.velocity.abs() > SPQR::GOALIE_MOVING_BALL_MIN_VELOCITY &&
 										  theFrameInfo.getTimeSince(theBallModel.timeWhenLastSeen) < 1000) )
 					{
 						sampledVelocities.push_back( theBallModel.estimate.velocity.abs() );
@@ -684,7 +885,7 @@ void DiveHandler::update(DiveHandle& diveHandle)
 						}
 					}
 					// else reset it...
-					if( (theBallModel.estimate.velocity.abs() < SPQR::MOVING_BALL_MIN_VELOCITY ||
+					if( (theBallModel.estimate.velocity.abs() < SPQR::GOALIE_MOVING_BALL_MIN_VELOCITY ||
 										   theFrameInfo.getTimeSince(theBallModel.timeWhenLastSeen) > 4000) )
 					{
 						if(timer.setTimer)
@@ -736,10 +937,9 @@ void DiveHandler::update(DiveHandle& diveHandle)
 					velocityMean += (*it) /sampledVelocities.size();
 				}
 
-				tBAGOestimate = 1000*(dBAGOestimate / velocityMax);
+				tBAGOestimate = 1000*(dBAGOestimate / (.75f*velocityMax));
 				SPQR_INFO("distance: " << dBAGOestimate);
 				SPQR_INFO("velocity: " << (.75f*velocityMax)/1000);
-				SPQR_INFO("tBAGO: " << tBAGOestimate);
 			}
 
 
diff --git a/machineLearning/DiveHandler/DiveHandler.h b/machineLearning/DiveHandler/DiveHandler.h
index bd6b49f..a7a3549 100644
--- a/machineLearning/DiveHandler/DiveHandler.h
+++ b/machineLearning/DiveHandler/DiveHandler.h
@@ -22,6 +22,8 @@
 #include <vector>
 #include <list>
 #include <map>
+#include <set>
+#include <bitset>
 #include <time.h>
 
 #include "Tools/Module/Module.h"
@@ -52,7 +54,6 @@ MODULE(DiveHandler)
     PROVIDES(DiveHandle)
 END_MODULE
 
-
 // Termination conditions
 #define MAX_ITER 15
 #define CONVERGENCE_THRESHOLD 0.01
@@ -62,10 +63,14 @@ END_MODULE
 #define REWARDS_HISTORY_SIZE 10
 #define EPSILON 0.05
 #define T 15
-// Evaluation weight
-#define LAMBDA1 0.9
-//#define LAMBDA2 0.3
+// GA parameters
+#define POPULATION_SIZE 100
+#define INDIVIDUAL_SIZE 11
 
+#define SELECTION  0.1
+#define CROSSOVER 0.5
+#define MUTATION 0.3
+#define ELITE_SIZE 0.2
 
 // Module class declaration
 class DiveHandler : public DiveHandlerBase
@@ -171,9 +176,57 @@ class DiveHandler : public DiveHandlerBase
         }
 
     };
+
 	
-// 	class GALearner : public CoeffsLearner
-// 	{};
+	class GALearner : public CoeffsLearner
+	{
+	private:
+		// Current reward score
+		float reward_score;
+		// Current reward normalization factor
+		float reward_norm;
+
+		std::list<float> fitnessBuffer;
+
+		class Individual
+		{
+		public:
+			float fitness;
+			std::bitset<INDIVIDUAL_SIZE> hypothesis;
+			Individual( std::string id): fitness(.0f), hypothesis(id){}
+			Individual( float f, std::string id): fitness(f), hypothesis(id){}
+			Individual( unsigned int id): fitness(.0f), hypothesis(id){}
+			inline bool operator<(const Individual& right) const { return (this->fitness) <= right.fitness; }
+		};
+
+		struct cmp
+		{
+			bool operator()(const Individual& left, const Individual& right) const
+			{
+				return left < right;
+			}
+		};
+		std::set<Individual, GALearner::cmp> population;
+
+		float evaluate(Individual i);
+		Individual rnd_mutate(Individual i);
+		Individual crossover(Individual mommy, const Individual& daddy);
+
+		// Check for convergence of the algorithm
+		bool converged();
+
+	public:
+		GALearner( DiveHandler* _dhPtr, int _nCoeffs, float _initValue );
+
+		void evolutionStep();
+
+		// Update the GA parameters according to the obtained rewards
+		void updateParams(const std::list<float>& rewards);
+
+		// Update coefficients performing a step of the learning algorithm
+		virtual bool updateCoeffs();
+	};
+
 	
 private:
 

From fd1104ca8036421b4cc429ed172ac139cdf6678c Mon Sep 17 00:00:00 2001
From: Claudio Delli Bovi <snakecharmer182@gmail.com>
Date: Mon, 17 Mar 2014 00:38:15 +0100
Subject: [PATCH 15/17] GALearner debug (almost) done

---
 machineLearning/DiveHandler/DiveHandler.cpp | 887 +++++++++++---------
 1 file changed, 473 insertions(+), 414 deletions(-)

diff --git a/machineLearning/DiveHandler/DiveHandler.cpp b/machineLearning/DiveHandler/DiveHandler.cpp
index 0f443c5..6aaa6bf 100644
--- a/machineLearning/DiveHandler/DiveHandler.cpp
+++ b/machineLearning/DiveHandler/DiveHandler.cpp
@@ -46,6 +46,11 @@ bool tooEarly=false;
 bool estimatedTime=false;
 bool goalDetected=false;
 
+#ifdef DIVEHANDLER_TRAINING_DEBUG
+int n_mutation = 0;
+int n_crossover = 0;
+#endif
+
 MAKE_MODULE(DiveHandler, SPQR-Modules)
 
 // Shortcut to compute the magnitude of a vector
@@ -155,12 +160,12 @@ bool DiveHandler::PGLearner::converged()
         // Check result against variation threshold
         if ((avg_variation < CONVERGENCE_THRESHOLD) && (std_variation < CONVERGENCE_THRESHOLD))
         {
-    #ifdef DIVEHANDLER_TRAINING
+#ifdef DIVEHANDLER_TRAINING
             SPQR_SUCCESS("PGLearner converged!");
             SPQR_SUCCESS("Coefficients values:");
             for (unsigned int i = 0; i < coeffs.size(); ++i)
                 SPQR_SUCCESS("\t" << coeffs.at(i));
-    #endif
+#endif
             return true;
         }
         else
@@ -245,9 +250,9 @@ void DiveHandler::PGLearner::generatePerturbations(std::vector<float>* partial_p
 float DiveHandler::PGLearner::evaluatePerturbation( std::vector<float> R )
 {
     // Dimensions check
-	assert(R.size() == coeffs.size());
+    assert(R.size() == coeffs.size());
 
-	return ( std::abs(diveHandler_ptr->tBAGO - ( R.at(0)*diveHandler_ptr->tBAGOestimate)) ) ;
+    return ( std::abs(diveHandler_ptr->tBAGO - ( R.at(0)*diveHandler_ptr->tBAGOestimate)) ) ;
 }
 
 
@@ -301,310 +306,361 @@ bool DiveHandler::PGLearner::updateCoeffs()
 {
 
 #ifdef DIVEHANDLER_TRAINING
-	SPQR_INFO( "\nPG algorithm, iteration " << iter_count << "... " );
+    SPQR_INFO( "\nPG algorithm, iteration " << iter_count << "... " );
 #endif
 
     if( iter_count == MAX_ITER || converged() )
         return false;
     else
-        {
-            // First generate the set of random perturbation for the current coefficients
-            generatePerturbations();
+    {
+        // First generate the set of random perturbation for the current coefficients
+        generatePerturbations();
 
-            // For each perturbation, evaluate with the objective function and store the result in a temporary container
-            std::vector<float> evaluatedPerturbations (perturbationsBuffer.size());
-            PGbuffer::const_iterator evaluator;
-            for(evaluator = perturbationsBuffer.begin(); evaluator != perturbationsBuffer.end(); ++evaluator)
-                evaluatedPerturbations.push_back( evaluatePerturbation(*evaluator) );
+        // For each perturbation, evaluate with the objective function and store the result in a temporary container
+        std::vector<float> evaluatedPerturbations (perturbationsBuffer.size());
+        PGbuffer::const_iterator evaluator;
+        for(evaluator = perturbationsBuffer.begin(); evaluator != perturbationsBuffer.end(); ++evaluator)
+            evaluatedPerturbations.push_back( evaluatePerturbation(*evaluator) );
 
-            // Compute the average 'gradient' for the current coefficients
-            std::vector<float> coeffs_avgGradient(coeffs.size());
+        // Compute the average 'gradient' for the current coefficients
+        std::vector<float> coeffs_avgGradient(coeffs.size());
 
 #ifdef RAND_PERMUTATIONS
-            // For each coefficient, compute the average score to determine the correspondent 'gradient' entry
-            PGbuffer::const_iterator current_perturbation = perturbationsBuffer.begin();
-            for( unsigned int n = 0; n < coeffs.size(); ++n )
+        // For each coefficient, compute the average score to determine the correspondent 'gradient' entry
+        PGbuffer::const_iterator current_perturbation = perturbationsBuffer.begin();
+        for( unsigned int n = 0; n < coeffs.size(); ++n )
+        {
+            std::vector<float> score_plus, score_minus, score_zero;
+
+            // Keep track of the perturbation type and store each score in a container
+            for( unsigned int i = 0; i < evaluatedPerturbations.size(); ++i )
             {
-                std::vector<float> score_plus, score_minus, score_zero;
+                if ( ((*current_perturbation).at(n) - coeffs.at(n)) > 0 )
+                    score_plus.push_back(evaluatedPerturbations.at(i));
+                else if ( ((*current_perturbation).at(n) - coeffs.at(n)) < 0 )
+                    score_minus.push_back(evaluatedPerturbations.at(i));
+                else
+                    score_zero.push_back(evaluatedPerturbations.at(i));
 
-                // Keep track of the perturbation type and store each score in a container
-                for( unsigned int i = 0; i < evaluatedPerturbations.size(); ++i )
-                {
-                    if ( ((*current_perturbation).at(n) - coeffs.at(n)) > 0 )
-                        score_plus.push_back(evaluatedPerturbations.at(i));
-                    else if ( ((*current_perturbation).at(n) - coeffs.at(n)) < 0 )
-                        score_minus.push_back(evaluatedPerturbations.at(i));
-                    else
-                        score_zero.push_back(evaluatedPerturbations.at(i));
-
-                    ++current_perturbation;
-                }
+                ++current_perturbation;
+            }
 
-                // Sum up all positive perturbation scores
-                float avg_plus = 0.0;
-                for (unsigned int j = 0; j < score_plus.size(); ++j)
-                    avg_plus += score_plus.at(j) / score_plus.size();
+            // Sum up all positive perturbation scores
+            float avg_plus = 0.0;
+            for (unsigned int j = 0; j < score_plus.size(); ++j)
+                avg_plus += score_plus.at(j) / score_plus.size();
 
-                // Sum up all negative perturbation scores
-                float avg_minus = 0.0;
-                for (unsigned int j = 0; j < score_minus.size(); ++j)
-                    avg_minus += score_minus.at(j) / score_minus.size();
+            // Sum up all negative perturbation scores
+            float avg_minus = 0.0;
+            for (unsigned int j = 0; j < score_minus.size(); ++j)
+                avg_minus += score_minus.at(j) / score_minus.size();
 
-                // Sum up all null perturbation scores
-                float avg_zero = 0.0;
-                for (unsigned int j = 0; j < score_zero.size(); ++j)
-                    avg_zero += score_zero.at(j) / score_zero.size();
+            // Sum up all null perturbation scores
+            float avg_zero = 0.0;
+            for (unsigned int j = 0; j < score_zero.size(); ++j)
+                avg_zero += score_zero.at(j) / score_zero.size();
 
-                if( avg_zero <= avg_plus && avg_zero<= avg_minus )
-                    coeffs_avgGradient.at(n) = 0.0;
-                else
-                    coeffs_avgGradient.at(n) = avg_plus - avg_minus;
-            }
+            if( avg_zero <= avg_plus && avg_zero<= avg_minus )
+                coeffs_avgGradient.at(n) = 0.0;
+            else
+                coeffs_avgGradient.at(n) = avg_plus - avg_minus;
+        }
 #else
-            // For each coefficient, compute different averages to determine the correspondent 'gradient' entry
-            for( unsigned int n = 0; n < coeffs.size(); ++n )
+        // For each coefficient, compute different averages to determine the correspondent 'gradient' entry
+        for( unsigned int n = 0; n < coeffs.size(); ++n )
+        {
+            int avg_selector = 0;
+            float avg_minus = 0.0 , avg_zero = 0.0, avg_plus = 0.0;
+            for( unsigned int i = 0; i < evaluatedPerturbations.size(); i = i + pow(3,n) )
             {
-                int avg_selector = 0;
-                float avg_minus = 0.0 , avg_zero = 0.0, avg_plus = 0.0;
-                for( unsigned int i = 0; i < evaluatedPerturbations.size(); i = i + pow(3,n) )
+                for( unsigned int k = i; k < i + pow(3,n); ++k )
                 {
-                    for( unsigned int k = i; k < i + pow(3,n); ++k )
-                    {
-                        float evaluation = evaluatedPerturbations.at(k) / (evaluatedPerturbations.size()/3);
+                    float evaluation = evaluatedPerturbations.at(k) / (evaluatedPerturbations.size()/3);
 
-                        if( (avg_selector)%3 == 0 ) avg_minus += evaluation;
-                        if( (avg_selector)%3 == 1 ) avg_zero += evaluation;
-                        if( (avg_selector)%3 == 2 ) avg_plus += evaluation;
-                    }
-                    ++avg_selector;
+                    if( (avg_selector)%3 == 0 ) avg_minus += evaluation;
+                    if( (avg_selector)%3 == 1 ) avg_zero += evaluation;
+                    if( (avg_selector)%3 == 2 ) avg_plus += evaluation;
                 }
-                // evaluate An
-                if( avg_zero <= avg_plus && avg_zero<= avg_minus )
-                    coeffs_avgGradient.at(coeffs.size() - (n +1)) = 0.0;
-                else
-                    coeffs_avgGradient.at(coeffs.size() - (n +1)) = avg_plus - avg_minus;
+                ++avg_selector;
             }
+            // evaluate An
+            if( avg_zero <= avg_plus && avg_zero<= avg_minus )
+                coeffs_avgGradient.at(coeffs.size() - (n +1)) = 0.0;
+            else
+                coeffs_avgGradient.at(coeffs.size() - (n +1)) = avg_plus - avg_minus;
+        }
 #endif
-            // Avoid 'nan' when the gradient is zeroed
-            float normalization = 1.0;
-            if (magnitude(coeffs_avgGradient) != 0)
-                normalization = magnitude(coeffs_avgGradient);
+        // Avoid 'nan' when the gradient is zeroed
+        float normalization = 1.0;
+        if (magnitude(coeffs_avgGradient) != 0)
+            normalization = magnitude(coeffs_avgGradient);
 
 
 #ifdef DIVEHANDLER_TRAINING
-            SPQR_INFO("Computed policy gradient: [ " << coeffs_avgGradient.at(0)/normalization
-                      << ", " << coeffs_avgGradient.at(1)/normalization << " ]");
+        SPQR_INFO("Computed policy gradient: [ " << coeffs_avgGradient.at(0)/normalization
+                  << ", " << coeffs_avgGradient.at(1)/normalization << " ]");
 #endif
-            // Weight new gradient estimate and previous one according to the reward score
-            std::vector<float> newGradient (coeffsGradient.size());
-            for( unsigned int j=0; j<newGradient.size(); ++j )
-                newGradient.at(j) = coeffs_avgGradient.at(j)/normalization;
+        // Weight new gradient estimate and previous one according to the reward score
+        std::vector<float> newGradient (coeffsGradient.size());
+        for( unsigned int j=0; j<newGradient.size(); ++j )
+            newGradient.at(j) = coeffs_avgGradient.at(j)/normalization;
 
 #ifdef DIVEHANDLER_TRAINING
-            SPQR_INFO("New policy gradient: [ " << newGradient.at(0)
-                      << ", " << newGradient.at(1) << " ]");
+        SPQR_INFO("New policy gradient: [ " << newGradient.at(0)
+                  << ", " << newGradient.at(1) << " ]");
 #endif
 
-            // Update coefficients history
-            coeffsBuffer.push_front(coeffs);
-            // Crop buffer
-            if (coeffsBuffer.size() > BUFFER_DIM)
-                coeffsBuffer.resize(BUFFER_DIM);
+        // Update coefficients history
+        coeffsBuffer.push_front(coeffs);
+        // Crop buffer
+        if (coeffsBuffer.size() > BUFFER_DIM)
+            coeffsBuffer.resize(BUFFER_DIM);
 
-            // Update the coefficients following the gradient direction
-            for( unsigned int i=0; i<coeffs_avgGradient.size(); ++i )
-            {
-                // Coefficients
-                coeffs.at(i) += - newGradient.at(i) * getParam("epsilon");
-                // Gradient estimate
-                coeffsGradient.at(i) = newGradient.at(i);
+        // Update the coefficients following the gradient direction
+        for( unsigned int i=0; i<coeffs_avgGradient.size(); ++i )
+        {
+            // Coefficients
+            coeffs.at(i) += - newGradient.at(i) * getParam("epsilon");
+            // Gradient estimate
+            coeffsGradient.at(i) = newGradient.at(i);
 
-                // Crop negative coefficients
-                if (coeffs.at(i) < 0) coeffs.at(i) = 0.0;
-            }
+            // Crop negative coefficients
+            if (coeffs.at(i) < 0) coeffs.at(i) = 0.0;
+        }
 
 #ifdef DIVEHANDLER_TRAINING
-            SPQR_INFO("New coefficients: [ " << coeffs.at(0) << ", " << coeffs.at(1) << " ]");
+        SPQR_INFO("New coefficients: [ " << coeffs.at(0) << ", " << coeffs.at(1) << " ]");
 #endif
-            ++iter_count;
+        ++iter_count;
 
-            return true;
+        return true;
     }
 }
 
 
 /** --------------------- CoeffsLearner: Genetic Algorithm --------------------- */
 DiveHandler::GALearner::GALearner( DiveHandler* _dhPtr, int _nCoeffs, float _initValue ):
-	CoeffsLearner(_nCoeffs, _initValue, _dhPtr),
-	reward_score(.0f), reward_norm(.0f)
+    CoeffsLearner(_nCoeffs, _initValue, _dhPtr),
+    reward_score(.0f), reward_norm(.0f)
 {
-	setParam("selection", SELECTION);
-	setParam("crossover", CROSSOVER);
-	setParam("mutation", MUTATION);
+    setParam("selection", SELECTION);
+    setParam("crossover", CROSSOVER);
+    setParam("mutation", MUTATION);
+
+    setParam("elite", ELITE_SIZE);
 
-	setParam("elite", ELITE_SIZE);
+    srand(time(NULL));
+    for(unsigned int i=0; i< POPULATION_SIZE; ++i)
+        population.insert( Individual( (rand()%600) + 500) );
+
+#ifdef DIVEHANDLER_DEBUG
+    std::set<Individual, cmp>::iterator i = population.begin();
+    for(; i != population.end(); ++i)
+        SPQR_INFO("Individual, encoding: " << (*i).hypothesis.to_string() << ", value: " << (((float)(*i).hypothesis.to_ulong())/1000));
 
-	srand(time(NULL));
-	for(unsigned int i=0; i< POPULATION_SIZE; ++i)
-		population.insert( Individual( (rand()%600) + 500) );
+#endif
 
 }
 
 float DiveHandler::GALearner::evaluate(Individual i)
 {
-	return ( std::abs(diveHandler_ptr->tBAGO - ( i.hypothesis.to_ulong()*diveHandler_ptr->tBAGOestimate)) );
+    return ( std::abs(diveHandler_ptr->tBAGO - ( (((float)i.hypothesis.to_ulong())/1000)*diveHandler_ptr->tBAGOestimate)) );
 }
 
 DiveHandler::GALearner::Individual DiveHandler::GALearner::rnd_mutate(Individual i)
 {
-	srand(time(NULL));
-	unsigned int n_flips = rand()%3+1;
-	for(unsigned int j=0; j< n_flips; ++j )
-		(i.hypothesis).flip(rand()%(INDIVIDUAL_SIZE-1));
+#ifdef DIVEHANDLER_TRAINING_DEBUG
+    SPQR_INFO("Individual " << (((float)i.hypothesis.to_ulong())/1000) << " mutates into: ");
+    ++n_mutation;
+#endif
 
-	return i;
+    srand(time(NULL));
+    unsigned int n_flips = rand()%3+1;
+    for(unsigned int j=0; j< n_flips; ++j )
+        (i.hypothesis).flip(rand()%(INDIVIDUAL_SIZE-1));
+
+#ifdef DIVEHANDLER_TRAINING_DEBUG
+    SPQR_INFO(((float)i.hypothesis.to_ulong())/1000);
+#endif
+
+    return i;
 }
 
 DiveHandler::GALearner::Individual DiveHandler::GALearner::crossover(Individual mommy, const Individual& daddy)
 {
-	srand(time(NULL));
-	int crossover_point = rand()%INDIVIDUAL_SIZE;
-	for(unsigned int i = crossover_point+1; i<INDIVIDUAL_SIZE; ++i)
-		mommy.hypothesis[i] = daddy.hypothesis[i];
+#ifdef DIVEHANDLER_TRAINING_DEBUG
+    ++n_crossover;
+    SPQR_INFO("Couple " << ((float)mommy.hypothesis.to_ulong())/1000 << " and " << ((float)daddy.hypothesis.to_ulong())/1000);
+#endif
+
+//    srand(time(NULL));
+    int crossover_point = rand() % (INDIVIDUAL_SIZE-5) +2;
 
-	return mommy;
+#ifdef DIVEHANDLER_TRAINING_DEBUG
+    SPQR_INFO("Crossover point: " << crossover_point);
+#endif
+
+    for(unsigned int i = crossover_point; i < INDIVIDUAL_SIZE; ++i)
+        mommy.hypothesis[i] = daddy.hypothesis[i];
+
+#ifdef DIVEHANDLER_TRAINING_DEBUG
+    SPQR_INFO(((float)mommy.hypothesis.to_ulong())/1000);
+#endif
+    return mommy;
 }
 
 bool DiveHandler::GALearner::converged()
 {
-	// Skip convergence check if the buffer is not full
-	if (fitnessBuffer.size() < BUFFER_DIM)
-		return false;
-	// Average every coefficients variation across the buffer
-	else
-	{
-		// Compute variations mean
-		float avg_variation = .0f;
-		// Iterate over the whole buffer and compute deltas from step i-1 to i
-		std::list<float>::const_iterator i = fitnessBuffer.begin();
-		std::list<float>::const_iterator j = fitnessBuffer.begin(); ++j;
-		while (j != fitnessBuffer.end())
-		{
-			avg_variation += ( (*i) - (*j) )/fitnessBuffer.size();
-			++i; ++j;
-		}
-
-		// Compute variations standard deviation
-		float std_variation = .0f;
-		// Iterate over the whole buffer and compute deltas from step i-1 to i
-		std::list<float>::const_iterator k = fitnessBuffer.begin();
-		std::list<float>::const_iterator t = fitnessBuffer.begin(); ++t;
-		while (t != fitnessBuffer.end())
-		{
-			std_variation += ( pow((*k)-(*t) - avg_variation, 2) ) / fitnessBuffer.size();
-			++k; ++t;
-		}
-		std_variation = sqrt(std_variation);
-
-		// Check result against variation threshold
-		if ((avg_variation < CONVERGENCE_THRESHOLD) && (std_variation < CONVERGENCE_THRESHOLD))
-		{
-	#ifdef DIVEHANDLER_TRAINING
-			SPQR_SUCCESS("GALearner converged!");
-			SPQR_SUCCESS("Coefficients values:");
-			for (unsigned int i = 0; i < coeffs.size(); ++i)
-				SPQR_SUCCESS("\t" << coeffs.at(i));
-	#endif
-			return true;
-		}
-		else
-			return false;
-	}
+    // Skip convergence check if the buffer is not full
+    if (fitnessBuffer.size() < BUFFER_DIM)
+        return false;
+    // Average every coefficients variation across the buffer
+    else
+    {
+        // Compute variations mean
+        float avg_variation = .0f;
+        // Iterate over the whole buffer and compute deltas from step i-1 to i
+        std::list<float>::const_iterator i = fitnessBuffer.begin();
+        std::list<float>::const_iterator j = fitnessBuffer.begin(); ++j;
+        while (j != fitnessBuffer.end())
+        {
+            avg_variation += ( (*i) - (*j) )/fitnessBuffer.size();
+            ++i; ++j;
+        }
+
+        // Compute variations standard deviation
+        float std_variation = .0f;
+        // Iterate over the whole buffer and compute deltas from step i-1 to i
+        std::list<float>::const_iterator k = fitnessBuffer.begin();
+        std::list<float>::const_iterator t = fitnessBuffer.begin(); ++t;
+        while (t != fitnessBuffer.end())
+        {
+            std_variation += ( pow((*k)-(*t) - avg_variation, 2) ) / fitnessBuffer.size();
+            ++k; ++t;
+        }
+        std_variation = sqrt(std_variation);
+
+        // Check result against variation threshold
+        if ((avg_variation < CONVERGENCE_THRESHOLD) && (std_variation < CONVERGENCE_THRESHOLD))
+        {
+#ifdef DIVEHANDLER_TRAINING
+            SPQR_SUCCESS("GALearner converged!");
+            SPQR_SUCCESS("Coefficients values:");
+            for (unsigned int i = 0; i < coeffs.size(); ++i)
+                SPQR_SUCCESS("\t" << coeffs.at(i));
+#endif
+            return true;
+        }
+        else
+            return false;
+    }
 }
 
 void DiveHandler::GALearner::evolutionStep()
 {
-	std::set<Individual, cmp> previousPopulation(population);
-	population.clear();
-
-	int sel = 0;
-	std::set<Individual, cmp>::iterator selector = previousPopulation.begin();
-	std::set<Individual, cmp>::iterator partner = previousPopulation.end();
-	for(; selector != previousPopulation.end(); ++selector, ++sel)
-	{
-		if(sel < round(getParam("selection")*POPULATION_SIZE))
-			population.insert(Individual(evaluate(*selector), (*selector).hypothesis.to_string()));
-		else
-		{
-			srand(time(NULL));
-
-			if( rand()/RAND_MAX < getParam("mutation") )
-				population.insert( Individual(evaluate(rnd_mutate( *selector )), (rnd_mutate( *selector )).hypothesis.to_string()) );
-			else if( rand()/RAND_MAX < sqrt(getParam("crossover")) )
-			{
-				if(partner == previousPopulation.end())
-					partner = selector;
-				else
-				{
-					population.insert(Individual(evaluate(crossover( *selector, *partner )), (crossover( *selector, *partner )).hypothesis.to_string()));
-					population.insert(Individual(evaluate(crossover( *partner, *selector )), (crossover( *partner, *selector )).hypothesis.to_string()));
-					partner = previousPopulation.end();
-				}
-			}
-			else
-				population.insert(Individual(evaluate( *selector ), ( *selector ).hypothesis.to_string()));
-		}
-	}
+#ifdef DIVEHANDLER_DEBUG
+    SPQR_INFO("Population before:");
+    std::set<Individual, cmp>::iterator i = population.begin();
+    for(; i != population.end(); ++i)
+        SPQR_INFO("Individual, value: " << (((float)(*i).hypothesis.to_ulong())/1000) << ", fitness: " << ((*i).fitness));
+
+#endif
+    std::set<Individual, cmp> previousPopulation(population);
+    population.clear();
+
+    int sel = 0;
+    std::set<Individual, cmp>::iterator selector = previousPopulation.begin();
+    std::set<Individual, cmp>::iterator partner = previousPopulation.end();
+    for(; selector != previousPopulation.end(); ++selector, ++sel)
+    {
+        if(sel < round(getParam("selection")*POPULATION_SIZE))
+            population.insert(Individual(evaluate(*selector), (*selector).hypothesis.to_string()));
+        else
+        {
+            if( ((float)rand())/RAND_MAX < getParam("mutation") )
+            {
+                Individual mutated (rnd_mutate( *selector ));
+                population.insert( Individual(evaluate(mutated), (mutated).hypothesis.to_string()) );
+            }
+            else if( ((float)rand())/RAND_MAX < sqrt(getParam("crossover")) )
+            {
+                if(partner == previousPopulation.end())
+                    partner = selector;
+                else
+                {
+                    Individual first_child (crossover( *selector, *partner ));
+                    Individual second_child (crossover( *partner, *selector ));
+                    population.insert(Individual(evaluate(first_child), first_child.hypothesis.to_string()));
+                    population.insert(Individual(evaluate(second_child), second_child.hypothesis.to_string()));
+                    partner = previousPopulation.end();
+                }
+            }
+            else
+                population.insert(Individual(evaluate( *selector ), ( *selector ).hypothesis.to_string()));
+        }
+    }
+
+#ifdef DIVEHANDLER_TRAINING_DEBUG
+    SPQR_INFO("Number of mutations: " << n_mutation);
+    SPQR_INFO("Number of crossover: " << n_crossover);
+    n_mutation = 0; n_crossover = 0;
+
+    SPQR_INFO("New population:");
+    std::set<Individual, cmp>::iterator i = population.begin();
+    for(; i != population.end(); ++i)
+        SPQR_INFO("Individual, value: " << (((float)(*i).hypothesis.to_ulong())/1000) << ", fitness: " << ((*i).fitness));
+
+#endif
 
 }
 
 void DiveHandler::GALearner::updateParams(const std::list<float>& rewards)
 {
-	// Re-initialize reward scores
-	reward_score = 0.0;
-	if (!rewards.empty()) reward_norm = 0.0;
-	int discount_exp = 0;
-	int positives = 0;
-
-	std::list<float>::const_iterator i = rewards.begin();
-	while (i != rewards.end())
-	{
-		// Counting positives
-		if (*i == POSITIVE_REWARD)
-			++positives;
-
-		// Computing discounted rewards
-		reward_score += (*i) * pow(GAMMA, discount_exp);
-		reward_norm += fabs((*i) * pow(GAMMA, discount_exp));
-		++i; ++discount_exp;
-	}
+    // Re-initialize reward scores
+    reward_score = 0.0;
+    if (!rewards.empty()) reward_norm = 0.0;
+    int discount_exp = 0;
+    int positives = 0;
+
+    std::list<float>::const_iterator i = rewards.begin();
+    while (i != rewards.end())
+    {
+        // Counting positives
+        if (*i == POSITIVE_REWARD)
+            ++positives;
+
+        // Computing discounted rewards
+        reward_score += (*i) * pow(GAMMA, discount_exp);
+        reward_norm += fabs((*i) * pow(GAMMA, discount_exp));
+        ++i; ++discount_exp;
+    }
 
 #ifdef DIVEHANDLER_TRAINING_DEBUG
-	SPQR_INFO("Positive rewards: " << positives << " out of " << rewards.size());
-	SPQR_INFO("Negative rewards: " << (rewards.size() - positives) << " out of " << rewards.size());
-	SPQR_INFO("Reward total score: " << reward_score);
+    SPQR_INFO("Positive rewards: " << positives << " out of " << rewards.size());
+    SPQR_INFO("Negative rewards: " << (rewards.size() - positives) << " out of " << rewards.size());
+    SPQR_INFO("Reward total score: " << reward_score);
 #endif
 
-	//Adjusting GA parameters according to the obtained score
-	if(exp( -reward_score / (2*REWARDS_HISTORY_SIZE) ) * getParam("mutation") >= 1.0)
-		setParam("mutation", 1.0);
-	else
-		setParam("mutation", exp( -reward_score / REWARDS_HISTORY_SIZE ) * getParam("mutation"));
+    //Adjusting GA parameters according to the obtained score
+    if(exp( -reward_score / (2*REWARDS_HISTORY_SIZE) ) * getParam("mutation") >= 1.0)
+        setParam("mutation", 1.0);
+    else
+        setParam("mutation", exp( -reward_score / REWARDS_HISTORY_SIZE ) * getParam("mutation"));
 
-	if(exp( -reward_score / (2*REWARDS_HISTORY_SIZE) ) * getParam("crossover") >= 1.0)
-		setParam("crossover", 1.0);
-	else
-		setParam("crossover", exp( -reward_score / (2*REWARDS_HISTORY_SIZE) ) * getParam("crossover"));
+    if(exp( -reward_score / (2*REWARDS_HISTORY_SIZE) ) * getParam("crossover") >= 1.0)
+        setParam("crossover", 1.0);
+    else
+        setParam("crossover", exp( -reward_score / (2*REWARDS_HISTORY_SIZE) ) * getParam("crossover"));
 
-	if(exp( -reward_score / (2*REWARDS_HISTORY_SIZE) ) * getParam("elite") >= 1.0)
-		setParam("elite", 1.0);
-	else
-		setParam("elite", exp( -reward_score / (2*REWARDS_HISTORY_SIZE) ) * getParam("elite"));
+    if(exp( -reward_score / (2*REWARDS_HISTORY_SIZE) ) * getParam("elite") >= 1.0)
+        setParam("elite", 1.0);
+    else
+        setParam("elite", exp( -reward_score / (2*REWARDS_HISTORY_SIZE) ) * getParam("elite"));
 
 #ifdef DIVEHANDLER_TRAINING
-	SPQR_INFO( "Mutation rate value changed to: " << getParam("mutation") << " according to the obtained rewards. ");
-	SPQR_INFO( "Crossover rate value changed to: " << getParam("crossover") << " according to the obtained rewards. ");
-	SPQR_INFO( "Elite percentage changed to: " << getParam("elite") << " according to the obtained rewards. ");
+    SPQR_INFO( "Mutation rate value changed to: " << getParam("mutation") << " according to the obtained rewards. ");
+    SPQR_INFO( "Crossover rate value changed to: " << getParam("crossover") << " according to the obtained rewards. ");
+    SPQR_INFO( "Elite percentage changed to: " << getParam("elite") << " according to the obtained rewards. ");
 #endif
 
 }
@@ -612,39 +668,39 @@ void DiveHandler::GALearner::updateParams(const std::list<float>& rewards)
 bool DiveHandler::GALearner::updateCoeffs()
 {
 #ifdef DIVEHANDLER_TRAINING
-	SPQR_INFO( "\nGA algorithm, iteration " << iter_count << "... " );
+    SPQR_INFO( "\nGA algorithm, iteration " << iter_count << "... " );
 #endif
 
-	if( iter_count == MAX_ITER || converged() )
-		return false;
-	else
-	{
-		evolutionStep();
+    if( iter_count == MAX_ITER || converged() )
+        return false;
+    else
+    {
+        evolutionStep();
 
-		float avg_fitness=.0f;
-		float avg_coeff=.0f;
-		std::set<Individual, cmp>::iterator evaluator = population.begin();
-		for( unsigned int sel=0; sel<round(getParam("elite")*POPULATION_SIZE); ++evaluator, ++sel)
-		{
-			avg_fitness += evaluator->fitness / round(getParam("elite")*POPULATION_SIZE);
-			avg_coeff += (evaluator->hypothesis.to_ulong()) / (1000*round(getParam("elite")*POPULATION_SIZE));
-		}
+        float avg_fitness=.0f;
+        float avg_coeff=.0f;
+        std::set<Individual, cmp>::iterator evaluator = population.begin();
+        for( unsigned int sel=0; sel<round(getParam("elite")*POPULATION_SIZE); ++evaluator, ++sel)
+        {
+            avg_fitness += evaluator->fitness / round(getParam("elite")*POPULATION_SIZE);
+            avg_coeff += ((float)evaluator->hypothesis.to_ulong()) / (1000*round(getParam("elite")*POPULATION_SIZE));
+        }
 
-		fitnessBuffer.push_front(avg_fitness);
+        fitnessBuffer.push_front(avg_fitness);
 
-		// Crop buffer
-		if (fitnessBuffer.size() > BUFFER_DIM)
-			fitnessBuffer.resize(BUFFER_DIM);
+        // Crop buffer
+        if (fitnessBuffer.size() > BUFFER_DIM)
+            fitnessBuffer.resize(BUFFER_DIM);
 
-		coeffs.at(0) = avg_coeff;
+        coeffs.at(0) = avg_coeff;
 
 #ifdef DIVEHANDLER_TRAINING
-		SPQR_INFO("New coefficients: [ " << coeffs.at(0) << " ]");
+        SPQR_INFO("New coefficients: [ " << coeffs.at(0) << " ]");
 #endif
-			++iter_count;
+        ++iter_count;
 
-		return true;
-	}
+        return true;
+    }
 }
 
 
@@ -656,15 +712,15 @@ bool DiveHandler::GALearner::updateCoeffs()
  */
 DiveHandler::DiveHandler():
     diveType(DiveHandle::none), state(static_cast<DiveHandler::LearningState>(SPQR::GOALIE_LEARNING_STATE)),
-	learner(new PGLearner(this, 2, EPSILON, T, 1.0, false)),
-	opponentScore(0), tBall2Goal(-1), tDive(0.0), tBackInPose(0.0), tBAGO(0), tBAGOestimate(0),
-	ballProjectionIntercept(SPQR::FIELD_DIMENSION_Y), distanceBall2Goal(SPQR::FIELD_DIMENSION_X)
+    learner(new GALearner(this, 1, 1.0)),
+    opponentScore(0), tBall2Goal(-1), tDive(0.0), tBackInPose(0.0), tBAGO(0), tBAGOestimate(0),
+    ballProjectionIntercept(SPQR::FIELD_DIMENSION_Y), distanceBall2Goal(SPQR::FIELD_DIMENSION_X)
 {
 #ifdef DIVEHANDLER_TRAINING
-    SPQR_INFO("Initializing PGlearner...");
+    SPQR_INFO("Initializing GAlearner...");
     std::vector<float> coeffs = learner->getCoeffs();
-    SPQR_INFO("Coefficients: alpha 1 = " << coeffs.at(0) << ", alpha 2 = " << coeffs.at(1));
-    SPQR_INFO("Parameters: epsilon = " << learner->getParam("epsilon") << ", T = " << learner->getParam("T"));
+    SPQR_INFO("Coefficient alpha = " << coeffs.at(0));
+    //    SPQR_INFO("Parameters: epsilon = " << learner->getParam("epsilon") << ", T = " << learner->getParam("T"));
 #endif
 }
 
@@ -707,25 +763,25 @@ void DiveHandler::estimateBallProjection()
 
         // Devising the type of dive to be performed
 
-		if( yIntercept > ( SPQR::GOALIE_CLOSE_LIMIT_Y/2) && yIntercept < SPQR::GOALIE_FAR_LIMIT_Y )
-			// Close intercept on the left
-			diveType = DiveHandle::lcloseDive;
-		else if( yIntercept > SPQR::GOALIE_FAR_LIMIT_Y )
-			// Far intercept on the left
-			diveType = DiveHandle::lDive;
-		else if( yIntercept < (-SPQR::GOALIE_CLOSE_LIMIT_Y/2) && yIntercept > -SPQR::GOALIE_FAR_LIMIT_Y )
-			// Close intercept on the right
-			diveType = DiveHandle::rcloseDive;
-		else if( yIntercept < -SPQR::GOALIE_FAR_LIMIT_Y )
-			// Far intercept on the right
-			diveType = DiveHandle::rDive;
-
-		else if( fabs(yIntercept) < SPQR::GOALIE_CLOSE_LIMIT_Y/2)
-			diveType = DiveHandle::stopBall;
-		else
-			// Any other case: no dive at all
-			diveType = DiveHandle::none;
-	}
+        if( yIntercept > ( SPQR::GOALIE_CLOSE_LIMIT_Y/2) && yIntercept < SPQR::GOALIE_FAR_LIMIT_Y )
+            // Close intercept on the left
+            diveType = DiveHandle::lcloseDive;
+        else if( yIntercept > SPQR::GOALIE_FAR_LIMIT_Y )
+            // Far intercept on the left
+            diveType = DiveHandle::lDive;
+        else if( yIntercept < (-SPQR::GOALIE_CLOSE_LIMIT_Y/2) && yIntercept > -SPQR::GOALIE_FAR_LIMIT_Y )
+            // Close intercept on the right
+            diveType = DiveHandle::rcloseDive;
+        else if( yIntercept < -SPQR::GOALIE_FAR_LIMIT_Y )
+            // Far intercept on the right
+            diveType = DiveHandle::rDive;
+
+        else if( fabs(yIntercept) < SPQR::GOALIE_CLOSE_LIMIT_Y/2)
+            diveType = DiveHandle::stopBall;
+        else
+            // Any other case: no dive at all
+            diveType = DiveHandle::none;
+    }
 
     // Using the appropriate estimate for the dive time
     if (diveType == DiveHandle::lDive || diveType == DiveHandle::rDive )
@@ -741,7 +797,7 @@ void DiveHandler::estimateBallProjection()
     ballProjectionIntercept = yIntercept;
 
     // Estimated distance from the ball
-	distanceBall2Goal = theBallModel.estimate.position.abs();
+    distanceBall2Goal = theBallModel.estimate.position.abs();
 }
 
 /*
@@ -752,13 +808,13 @@ void DiveHandler::estimateBallProjection()
 void DiveHandler::estimateDiveTimes()
 {
     // Check whether the ball is actually moving toward the goal
-	if ( (theBallModel.estimate.velocity.abs() != 0.0) &&
-		 (theBallModel.estimate.velocity.x < 0.0) )
+    if ( (theBallModel.estimate.velocity.abs() != 0.0) &&
+         (theBallModel.estimate.velocity.x < 0.0) )
         // Use a constant velocity approximation to the estimate the time interval
-		tBall2Goal = 1000.0 * ( distanceBall2Goal / theBallModel.estimate.velocity.abs() );
+        tBall2Goal = 1000.0 * ( distanceBall2Goal / theBallModel.estimate.velocity.abs() );
     else
         // Otherwise, set the parameter to a meaningless value
-		tBall2Goal = -1.0;
+        tBall2Goal = -1.0;
 
     // Using the appropriate estimates for recover and reposition times
     float tRecover = 0.0;
@@ -803,144 +859,147 @@ inline float DiveHandler::computeDiveAndRecoverTime(float alpha1, float alpha2)
  */
 void DiveHandler::update(DiveHandle& diveHandle)
 {
+    if ( time(NULL) % 6 == 0 )
+        srand(time(NULL));
+
     // Check you're actually the goalie...
     if (theRobotInfo.number == 1)
-	{
-		// Compute the ball projection estimate
+    {
+        // Compute the ball projection estimate
         estimateBallProjection();
         // Update the DiveHandle
         diveHandle.ballProjectionEstimate = ballProjectionIntercept;
 
 #ifdef DIVEHANDLER_TRAINING
-		if( timer.getTimeSince(timer.fallen) > 5000 && timer.getTimeSince(timer.fallen) < 5040 && timer.fallen != 0)
-			SPQR_SUCCESS("TooEarly time window START...");
+        if( timer.getTimeSince(timer.fallen) > 5000 && timer.getTimeSince(timer.fallen) < 5040 && timer.fallen != 0)
+            SPQR_SUCCESS("TooEarly time window START...");
 
-		if( timer.getTimeSince(timer.fallen) > 9961 && timer.getTimeSince(timer.fallen) < 9999 && timer.fallen != 0)
-			SPQR_SUCCESS("TooEarly time window END.");
+        if( timer.getTimeSince(timer.fallen) > 9961 && timer.getTimeSince(timer.fallen) < 9999 && timer.fallen != 0)
+            SPQR_SUCCESS("TooEarly time window END.");
 #endif
 
-		if(opponentScore != (int)theOpponentTeamInfo.score && !goalDetected)
-		{
-			if( timer.getTimeSince(timer.fallen) > 5000 && timer.getTimeSince(timer.fallen) < 10000 &&
-					(unsigned int) timer.fallen != 0)
-			{
+        if(opponentScore != (int)theOpponentTeamInfo.score && !goalDetected)
+        {
+            if( timer.getTimeSince(timer.fallen) > 5000 && timer.getTimeSince(timer.fallen) < 10000 &&
+                    (unsigned int) timer.fallen != 0)
+            {
 #ifdef DIVEHANDLER_TRAINING
-				SPQR_FAILURE("too FAST dude!");
+                SPQR_FAILURE("too FAST dude!");
 #endif
-				tBAGO += 3000;
-			}
-			else
-			{
-//				if(goalTimer.setTimer)
-				{
+                tBAGO += 3000;
+            }
+            else
+            {
+                //				if(goalTimer.setTimer)
+                {
 #ifdef DIVEHANDLER_TRAINING
-					SPQR_FAILURE("too SLOW dude!");
+                    SPQR_FAILURE("too SLOW dude!");
 #endif
-					tBAGO = goalTimer.getTimeSince(goalTimer.start) -1500;
-				}
-			}
-			estimatedTime=true;
-			goalDetected=true;
-		}
-
-		if(theGameInfo.state == STATE_SET)
-		{
-			tBAGOestimate=0;
-			dBAGOestimate=0;
-			sampledVelocities.clear();
-			goalTimer.reset();
-		}
-
-		// Check whether the ball is close enough
-		if( (distanceBall2Goal < SPQR::FIELD_DIMENSION_X) && (fabs(ballProjectionIntercept) < SPQR::FIELD_DIMENSION_Y) )
+                    tBAGO = goalTimer.getTimeSince(goalTimer.start) -1500;
+                }
+            }
+            estimatedTime=true;
+            goalDetected=true;
+        }
+
+        if(theGameInfo.state == STATE_SET)
+        {
+            tBAGOestimate=0;
+            dBAGOestimate=0;
+            sampledVelocities.clear();
+            goalTimer.reset();
+        }
+
+        // Check whether the ball is close enough
+        if( (distanceBall2Goal < SPQR::FIELD_DIMENSION_X) && (fabs(ballProjectionIntercept) < SPQR::FIELD_DIMENSION_Y) )
         {
             // Estimate all temporal parameters
             estimateDiveTimes();
 
-			if(state != notLearning)
-			{
-				// if not in playing state
-				if(theGameInfo.state != STATE_PLAYING)
-					timer.reset();
-				else
-				{
-//					if(goalTimer.setTimer)
-//						SPQR_INFO("time: "<< goalTimer.getTimeSince(goalTimer.start));
-
-					// if the ball is moving enough fast then set the timer
-					if( (theBallModel.estimate.velocity.abs() > SPQR::GOALIE_MOVING_BALL_MIN_VELOCITY &&
-										  theFrameInfo.getTimeSince(theBallModel.timeWhenLastSeen) < 1000) )
-					{
-						sampledVelocities.push_back( theBallModel.estimate.velocity.abs() );
-						if(!timer.setTimer)
-						{
-							timer.set(clock());
-							goalTimer.set(clock());
-							dBAGOestimate=distanceBall2Goal;
-//							tBAGOestimate=tBall2Goal;
+            if(state != notLearning)
+            {
+                // if not in playing state
+                if(theGameInfo.state != STATE_PLAYING)
+                    timer.reset();
+                else
+                {
+                    //					if(goalTimer.setTimer)
+                    //						SPQR_INFO("time: "<< goalTimer.getTimeSince(goalTimer.start));
+
+                    // if the ball is moving enough fast then set the timer
+                    if( (theBallModel.estimate.velocity.abs() > SPQR::GOALIE_MOVING_BALL_MIN_VELOCITY &&
+                         theFrameInfo.getTimeSince(theBallModel.timeWhenLastSeen) < 1000) )
+                    {
+                        sampledVelocities.push_back( theBallModel.estimate.velocity.abs() );
+                        if(!timer.setTimer)
+                        {
+                            timer.set(clock());
+                            goalTimer.set(clock());
+                            dBAGOestimate=distanceBall2Goal;
+                            //							tBAGOestimate=tBall2Goal;
 #ifdef DIVEHANDLER_TRAINING
-							std::cerr << "\033[33;1m" <<"[DiveHandler] " << "set Timer!" << "\033[0m" << std::endl;
-							std::cerr << "\033[33;1m" <<"[DiveHandler] " << "set goal Timer!" << "\033[0m" << std::endl;
+                            std::cerr << "\033[33;1m" <<"[DiveHandler] " << "set Timer!" << "\033[0m" << std::endl;
+                            std::cerr << "\033[33;1m" <<"[DiveHandler] " << "set goal Timer!" << "\033[0m" << std::endl;
 #endif
-						}
-					}
-					// else reset it...
-					if( (theBallModel.estimate.velocity.abs() < SPQR::GOALIE_MOVING_BALL_MIN_VELOCITY ||
-										   theFrameInfo.getTimeSince(theBallModel.timeWhenLastSeen) > 4000) )
-					{
-						if(timer.setTimer)
-						{
-							timer.reset();
+                        }
+                    }
+                    // else reset it...
+                    if( (theBallModel.estimate.velocity.abs() < SPQR::GOALIE_MOVING_BALL_MIN_VELOCITY ||
+                         theFrameInfo.getTimeSince(theBallModel.timeWhenLastSeen) > 4000) )
+                    {
+                        if(timer.setTimer)
+                        {
+                            timer.reset();
 #ifdef DIVEHANDLER_TRAINING
-							std::cerr << "\033[33;1m" <<"[DiveHandler] " << "reset Timer!" << "\033[0m" << std::endl;
+                            std::cerr << "\033[33;1m" <<"[DiveHandler] " << "reset Timer!" << "\033[0m" << std::endl;
 #endif
-						}
-						if(goalTimer.setTimer)
-						{
-							goalTimer.reset();
+                        }
+                        if(goalTimer.setTimer)
+                        {
+                            goalTimer.reset();
 #ifdef DIVEHANDLER_TRAINING
-							std::cerr << "\033[33;1m" <<"[DiveHandler] " << "reset goal Timer!" << "\033[0m" << std::endl;
+                            std::cerr << "\033[33;1m" <<"[DiveHandler] " << "reset goal Timer!" << "\033[0m" << std::endl;
 #endif
-							tBAGOestimate=0;
-							dBAGOestimate=0;
-							sampledVelocities.clear();
-						}
-					}
-
-					// if the goalie succeeded
-					if(ownScore != (int)theOwnTeamInfo.score && !estimatedTime)
-					{
+                            tBAGOestimate=0;
+                            dBAGOestimate=0;
+                            sampledVelocities.clear();
+                        }
+                    }
+
+                    // if the goalie succeeded
+                    if(ownScore != (int)theOwnTeamInfo.score && !estimatedTime)
+                    {
 #ifdef DIVEHANDLER_TRAINING
-						SPQR_SUCCESS("SUPER!");
+                        SPQR_SUCCESS("SUPER!");
 #endif
-						tBAGO -= 200;
-						estimatedTime=true;
-					}
-
-					// if the goalie dives
-					if( (int)theFallDownState.state == (int)FallDownState::fallen )
-					{
-						timer.fallen=clock();
-						tBAGO = timer.getTimeSince(timer.start);
-					}
-				}
-			}
-
-			if(estimatedTime)
-			{
-				float velocityMean=0;
-				float velocityMax=0;
-				std::list<float>::const_iterator it=sampledVelocities.begin();
-				for(; it != sampledVelocities.end(); ++it)
-				{
-					if((*it) > velocityMax) velocityMax=(*it);
-					velocityMean += (*it) /sampledVelocities.size();
-				}
-
-				tBAGOestimate = 1000*(dBAGOestimate / (.75f*velocityMax));
-				SPQR_INFO("distance: " << dBAGOestimate);
-				SPQR_INFO("velocity: " << (.75f*velocityMax)/1000);
-			}
+                        tBAGO -= 200;
+                        estimatedTime=true;
+                    }
+
+                    // if the goalie dives
+                    if( (int)theFallDownState.state == (int)FallDownState::fallen )
+                    {
+                        timer.fallen=clock();
+                        tBAGO = timer.getTimeSince(timer.start);
+                    }
+                }
+            }
+
+            if(estimatedTime)
+            {
+                float velocityMean=0;
+                float velocityMax=0;
+                std::list<float>::const_iterator it=sampledVelocities.begin();
+                for(; it != sampledVelocities.end(); ++it)
+                {
+                    if((*it) > velocityMax) velocityMax=(*it);
+                    velocityMean += (*it) /sampledVelocities.size();
+                }
+
+                tBAGOestimate = 1000*(dBAGOestimate / (.75f*velocityMax));
+                SPQR_INFO("distance: " << dBAGOestimate);
+                SPQR_INFO("velocity: " << (.75f*velocityMax)/1000);
+            }
 
 
 #ifdef DIVEHANDLER_DEBUG
@@ -971,7 +1030,7 @@ void DiveHandler::update(DiveHandle& diveHandle)
             else if( state == waitReward )
             {
                 // The opponent team scores: the goalie failed and gets a negative reward
-				if(goalDetected && estimatedTime)
+                if(goalDetected && estimatedTime)
                 {
                     // The learner obtains a negative reward
                     rewardHistory.push_front(NEGATIVE_REWARD);
@@ -983,20 +1042,20 @@ void DiveHandler::update(DiveHandle& diveHandle)
                     opponentScore = (int)theOpponentTeamInfo.score;
 
 #ifdef DIVEHANDLER_TRAINING
-					SPQR_FAILURE("The opponent team scored! Negative reward for the learner.");
+                    SPQR_FAILURE("The opponent team scored! Negative reward for the learner.");
 #endif
                     // A reward has been received: re-enable learning
                     state = learning;
-					// Clear the pending reward
+                    // Clear the pending reward
                     if(!diveHandle.rewardAck)
                         diveHandle.rewardAck = true;
 
-					goalDetected=false;
-					estimatedTime=false;
-					stamp =true;
+                    goalDetected=false;
+                    estimatedTime=false;
+                    stamp =true;
                 }
                 // The own team scores: user-guided move to provide the goalie a positive reward
-				else if(ownScore != (int)theOwnTeamInfo.score && estimatedTime)
+                else if(ownScore != (int)theOwnTeamInfo.score && estimatedTime)
                 {
                     // The learner obtains a positive reward
                     rewardHistory.push_front(POSITIVE_REWARD);
@@ -1016,8 +1075,8 @@ void DiveHandler::update(DiveHandle& diveHandle)
                     if(!diveHandle.rewardAck)
                         diveHandle.rewardAck = true;
 
-					estimatedTime=false;
-					stamp=true;
+                    estimatedTime=false;
+                    stamp=true;
                 }
             }
 
@@ -1025,28 +1084,28 @@ void DiveHandler::update(DiveHandle& diveHandle)
             if( state == learning )
                 learner->updateParams(rewardHistory);
 
-			// Compute the dive time using the current coefficients as T = alpha2 * (alpha1*T_PAPO - T_dive)
-			float diveTime = ( (learner->getCoeffs()).at(0) * tBall2Goal );
+            // Compute the dive time using the current coefficients as T = alpha2 * (alpha1*T_PAPO - T_dive)
+            float diveTime = ( (learner->getCoeffs()).at(0) * tBall2Goal );
 
 #ifdef DIVEHANDLER_TRAINING
-			if(stamp)
-			{
-				SPQR_INFO("BAGO: " << tBAGO );
-				SPQR_INFO("BAGO estimate: " << tBAGOestimate );
-				SPQR_ERR("BAGO error: "<< std::abs(tBAGO - tBAGOestimate) );
-				stamp = false;
-			}
+            if(stamp)
+            {
+                SPQR_INFO("BAGO: " << tBAGO );
+                SPQR_INFO("BAGO estimate: " << tBAGOestimate );
+                SPQR_ERR("BAGO error: "<< std::abs(tBAGO - tBAGOestimate) );
+                stamp = false;
+            }
 #endif
 
 #ifdef DIVEHANDLER_DEBUG
             SPQR_INFO( "Estimated overall time to dive and recover position: " <<
-                      computeDiveAndRecoverTime( (learner->getCoeffs()).at(0), (learner->getCoeffs()).at(1) ) );
+                       computeDiveAndRecoverTime( (learner->getCoeffs()).at(0), (learner->getCoeffs()).at(1) ) );
             SPQR_INFO("Suggested dive in " << diveTime << " ms. ");
 #endif
 
             // Update the DiveHandle
             if (diveTime > 0.0)
-				diveHandle.diveTime = diveTime -tDive;
+                diveHandle.diveTime = diveTime -tDive;
             else
                 diveHandle.diveTime = -1.0;
         }
@@ -1054,8 +1113,8 @@ void DiveHandler::update(DiveHandle& diveHandle)
         else
         {
             diveHandle.diveTime = -1;
-			diveHandle.diveType = diveType;
-			timer.reset();
+            diveHandle.diveType = diveType;
+            timer.reset();
         }
     }
 }

From bf7f68ddbebc62b57b7eb0667e1d206aeb1fd3a4 Mon Sep 17 00:00:00 2001
From: Francesco Riccio <riccio.fran@gmail.com>
Date: Wed, 19 Mar 2014 23:31:47 +0100
Subject: [PATCH 16/17] GA tuning

---
 machineLearning/DiveHandler/DiveHandler.cpp | 45 ++++++++++++---------
 machineLearning/DiveHandler/DiveHandler.h   |  4 +-
 2 files changed, 29 insertions(+), 20 deletions(-)

diff --git a/machineLearning/DiveHandler/DiveHandler.cpp b/machineLearning/DiveHandler/DiveHandler.cpp
index 6aaa6bf..678e65c 100644
--- a/machineLearning/DiveHandler/DiveHandler.cpp
+++ b/machineLearning/DiveHandler/DiveHandler.cpp
@@ -23,8 +23,8 @@
 
 // Uncomment to have debug information
 //#define DIVEHANDLER_DEBUG
-#define DIVEHANDLER_TRAINING_DEBUG
-#define DIVEHANDLER_TRAINING
+//#define DIVEHANDLER_TRAINING_DEBUG
+//#define DIVEHANDLER_TRAINING
 //#define RAND_PERMUTATIONS
 
 #define NEGATIVE_REWARD -1.0
@@ -46,7 +46,7 @@ bool tooEarly=false;
 bool estimatedTime=false;
 bool goalDetected=false;
 
-#ifdef DIVEHANDLER_TRAINING_DEBUG
+#ifdef DIVEHANDLER_TRAINING
 int n_mutation = 0;
 int n_crossover = 0;
 #endif
@@ -451,7 +451,7 @@ DiveHandler::GALearner::GALearner( DiveHandler* _dhPtr, int _nCoeffs, float _ini
 
     srand(time(NULL));
     for(unsigned int i=0; i< POPULATION_SIZE; ++i)
-        population.insert( Individual( (rand()%600) + 500) );
+		population.insert( Individual( (rand()%600) + 600) );
 
 #ifdef DIVEHANDLER_DEBUG
     std::set<Individual, cmp>::iterator i = population.begin();
@@ -471,13 +471,16 @@ DiveHandler::GALearner::Individual DiveHandler::GALearner::rnd_mutate(Individual
 {
 #ifdef DIVEHANDLER_TRAINING_DEBUG
     SPQR_INFO("Individual " << (((float)i.hypothesis.to_ulong())/1000) << " mutates into: ");
-    ++n_mutation;
 #endif
 
-    srand(time(NULL));
+#ifdef DIVEHANDLER_TRAINING
+	++n_mutation;
+#endif
+
+//    srand(time(NULL));
     unsigned int n_flips = rand()%3+1;
     for(unsigned int j=0; j< n_flips; ++j )
-        (i.hypothesis).flip(rand()%(INDIVIDUAL_SIZE-1));
+		(i.hypothesis).flip(rand()%(INDIVIDUAL_SIZE-7) + 2);
 
 #ifdef DIVEHANDLER_TRAINING_DEBUG
     SPQR_INFO(((float)i.hypothesis.to_ulong())/1000);
@@ -489,12 +492,15 @@ DiveHandler::GALearner::Individual DiveHandler::GALearner::rnd_mutate(Individual
 DiveHandler::GALearner::Individual DiveHandler::GALearner::crossover(Individual mommy, const Individual& daddy)
 {
 #ifdef DIVEHANDLER_TRAINING_DEBUG
-    ++n_crossover;
     SPQR_INFO("Couple " << ((float)mommy.hypothesis.to_ulong())/1000 << " and " << ((float)daddy.hypothesis.to_ulong())/1000);
 #endif
 
+#ifdef DIVEHANDLER_TRAINING
+	++n_crossover;
+#endif
+
 //    srand(time(NULL));
-    int crossover_point = rand() % (INDIVIDUAL_SIZE-5) +2;
+	int crossover_point = rand() % (INDIVIDUAL_SIZE-7) +2;
 
 #ifdef DIVEHANDLER_TRAINING_DEBUG
     SPQR_INFO("Crossover point: " << crossover_point);
@@ -600,10 +606,13 @@ void DiveHandler::GALearner::evolutionStep()
         }
     }
 
+#ifdef DIVEHANDLER_TRAINING
+	SPQR_INFO("Number of mutations: " << n_mutation);
+	SPQR_INFO("Number of crossover: " << n_crossover);
+	n_mutation = 0; n_crossover = 0;
+#endif
+
 #ifdef DIVEHANDLER_TRAINING_DEBUG
-    SPQR_INFO("Number of mutations: " << n_mutation);
-    SPQR_INFO("Number of crossover: " << n_crossover);
-    n_mutation = 0; n_crossover = 0;
 
     SPQR_INFO("New population:");
     std::set<Individual, cmp>::iterator i = population.begin();
@@ -635,7 +644,7 @@ void DiveHandler::GALearner::updateParams(const std::list<float>& rewards)
         ++i; ++discount_exp;
     }
 
-#ifdef DIVEHANDLER_TRAINING_DEBUG
+#ifdef DIVEHANDLER_TRAINING
     SPQR_INFO("Positive rewards: " << positives << " out of " << rewards.size());
     SPQR_INFO("Negative rewards: " << (rewards.size() - positives) << " out of " << rewards.size());
     SPQR_INFO("Reward total score: " << reward_score);
@@ -650,7 +659,7 @@ void DiveHandler::GALearner::updateParams(const std::list<float>& rewards)
     if(exp( -reward_score / (2*REWARDS_HISTORY_SIZE) ) * getParam("crossover") >= 1.0)
         setParam("crossover", 1.0);
     else
-        setParam("crossover", exp( -reward_score / (2*REWARDS_HISTORY_SIZE) ) * getParam("crossover"));
+		setParam("crossover", exp( -reward_score / (REWARDS_HISTORY_SIZE) ) * getParam("crossover"));
 
     if(exp( -reward_score / (2*REWARDS_HISTORY_SIZE) ) * getParam("elite") >= 1.0)
         setParam("elite", 1.0);
@@ -738,7 +747,6 @@ DiveHandler::~DiveHandler()
  * at which the ball is expected to reach the goal.
  * Then, the diveTime and the diveType parameters are defined accordingly.
  */
-
 void DiveHandler::estimateBallProjection()
 {
     // Ball path line
@@ -859,7 +867,7 @@ inline float DiveHandler::computeDiveAndRecoverTime(float alpha1, float alpha2)
  */
 void DiveHandler::update(DiveHandle& diveHandle)
 {
-    if ( time(NULL) % 6 == 0 )
+	if ( time(NULL) % 30 == 0 )
         srand(time(NULL));
 
     // Check you're actually the goalie...
@@ -896,6 +904,7 @@ void DiveHandler::update(DiveHandle& diveHandle)
                     SPQR_FAILURE("too SLOW dude!");
 #endif
                     tBAGO = goalTimer.getTimeSince(goalTimer.start) -1500;
+					if(tBAGO > 4000000000) tBAGO=1000;
                 }
             }
             estimatedTime=true;
@@ -997,8 +1006,8 @@ void DiveHandler::update(DiveHandle& diveHandle)
                 }
 
                 tBAGOestimate = 1000*(dBAGOestimate / (.75f*velocityMax));
-                SPQR_INFO("distance: " << dBAGOestimate);
-                SPQR_INFO("velocity: " << (.75f*velocityMax)/1000);
+//                SPQR_INFO("distance: " << dBAGOestimate);
+//                SPQR_INFO("velocity: " << (.75f*velocityMax)/1000);
             }
 
 
diff --git a/machineLearning/DiveHandler/DiveHandler.h b/machineLearning/DiveHandler/DiveHandler.h
index a7a3549..0136149 100644
--- a/machineLearning/DiveHandler/DiveHandler.h
+++ b/machineLearning/DiveHandler/DiveHandler.h
@@ -68,8 +68,8 @@ END_MODULE
 #define INDIVIDUAL_SIZE 11
 
 #define SELECTION  0.1
-#define CROSSOVER 0.5
-#define MUTATION 0.3
+#define CROSSOVER 0.3
+#define MUTATION 0.2
 #define ELITE_SIZE 0.2
 
 // Module class declaration

From f81a5fae72ac2340d70aac149a88c8063e9416f4 Mon Sep 17 00:00:00 2001
From: Francesco Riccio <riccio.fran@gmail.com>
Date: Tue, 25 Mar 2014 18:13:25 +0100
Subject: [PATCH 17/17] updates

---
 machineLearning/DiveHandler/DiveHandler.cpp | 42 ++++++++++++++-------
 machineLearning/DiveHandler/DiveHandler.h   |  4 ++
 2 files changed, 32 insertions(+), 14 deletions(-)

diff --git a/machineLearning/DiveHandler/DiveHandler.cpp b/machineLearning/DiveHandler/DiveHandler.cpp
index 678e65c..4141169 100644
--- a/machineLearning/DiveHandler/DiveHandler.cpp
+++ b/machineLearning/DiveHandler/DiveHandler.cpp
@@ -24,7 +24,7 @@
 // Uncomment to have debug information
 //#define DIVEHANDLER_DEBUG
 //#define DIVEHANDLER_TRAINING_DEBUG
-//#define DIVEHANDLER_TRAINING
+#define DIVEHANDLER_TRAINING
 //#define RAND_PERMUTATIONS
 
 #define NEGATIVE_REWARD -1.0
@@ -398,7 +398,7 @@ bool DiveHandler::PGLearner::updateCoeffs()
 
 #ifdef DIVEHANDLER_TRAINING
         SPQR_INFO("Computed policy gradient: [ " << coeffs_avgGradient.at(0)/normalization
-                  << ", " << coeffs_avgGradient.at(1)/normalization << " ]");
+				  /*<< ", " << coeffs_avgGradient.at(1)/normalization */<< " ]");
 #endif
         // Weight new gradient estimate and previous one according to the reward score
         std::vector<float> newGradient (coeffsGradient.size());
@@ -407,7 +407,7 @@ bool DiveHandler::PGLearner::updateCoeffs()
 
 #ifdef DIVEHANDLER_TRAINING
         SPQR_INFO("New policy gradient: [ " << newGradient.at(0)
-                  << ", " << newGradient.at(1) << " ]");
+				  << /*", " << newGradient.at(1) << */" ]");
 #endif
 
         // Update coefficients history
@@ -429,7 +429,7 @@ bool DiveHandler::PGLearner::updateCoeffs()
         }
 
 #ifdef DIVEHANDLER_TRAINING
-        SPQR_INFO("New coefficients: [ " << coeffs.at(0) << ", " << coeffs.at(1) << " ]");
+		SPQR_INFO("New coefficients: [ " << coeffs.at(0) << /*", " << coeffs.at(1) <<*/ " ]");
 #endif
         ++iter_count;
 
@@ -574,9 +574,9 @@ void DiveHandler::GALearner::evolutionStep()
     std::set<Individual, cmp> previousPopulation(population);
     population.clear();
 
-    int sel = 0;
+	int sel = 0;
     std::set<Individual, cmp>::iterator selector = previousPopulation.begin();
-    std::set<Individual, cmp>::iterator partner = previousPopulation.end();
+	std::set<Individual, cmp>::iterator partner = previousPopulation.end();
     for(; selector != previousPopulation.end(); ++selector, ++sel)
     {
         if(sel < round(getParam("selection")*POPULATION_SIZE))
@@ -588,17 +588,17 @@ void DiveHandler::GALearner::evolutionStep()
                 Individual mutated (rnd_mutate( *selector ));
                 population.insert( Individual(evaluate(mutated), (mutated).hypothesis.to_string()) );
             }
-            else if( ((float)rand())/RAND_MAX < sqrt(getParam("crossover")) )
+			else if( ((float)rand())/RAND_MAX < sqrt(getParam("crossover")))
             {
-                if(partner == previousPopulation.end())
-                    partner = selector;
+				if(partner == previousPopulation.end())
+					partner = selector;
                 else
                 {
                     Individual first_child (crossover( *selector, *partner ));
                     Individual second_child (crossover( *partner, *selector ));
                     population.insert(Individual(evaluate(first_child), first_child.hypothesis.to_string()));
                     population.insert(Individual(evaluate(second_child), second_child.hypothesis.to_string()));
-                    partner = previousPopulation.end();
+					partner = previousPopulation.end();
                 }
             }
             else
@@ -607,6 +607,7 @@ void DiveHandler::GALearner::evolutionStep()
     }
 
 #ifdef DIVEHANDLER_TRAINING
+	SPQR_INFO("Population size: " << population.size());
 	SPQR_INFO("Number of mutations: " << n_mutation);
 	SPQR_INFO("Number of crossover: " << n_crossover);
 	n_mutation = 0; n_crossover = 0;
@@ -721,7 +722,11 @@ bool DiveHandler::GALearner::updateCoeffs()
  */
 DiveHandler::DiveHandler():
     diveType(DiveHandle::none), state(static_cast<DiveHandler::LearningState>(SPQR::GOALIE_LEARNING_STATE)),
-    learner(new GALearner(this, 1, 1.0)),
+#ifdef PG_LEARNER
+	learner(new PGLearner(this, 1, 1.0)),
+#else
+	learner(new GALearner(this, 1, 1.0)),
+#endif
     opponentScore(0), tBall2Goal(-1), tDive(0.0), tBackInPose(0.0), tBAGO(0), tBAGOestimate(0),
     ballProjectionIntercept(SPQR::FIELD_DIMENSION_Y), distanceBall2Goal(SPQR::FIELD_DIMENSION_X)
 {
@@ -1005,7 +1010,8 @@ void DiveHandler::update(DiveHandle& diveHandle)
                     velocityMean += (*it) /sampledVelocities.size();
                 }
 
-                tBAGOestimate = 1000*(dBAGOestimate / (.75f*velocityMax));
+				if(velocityMax != .0f)
+					tBAGOestimate = 1000*(dBAGOestimate / (.75f*velocityMax));
 //                SPQR_INFO("distance: " << dBAGOestimate);
 //                SPQR_INFO("velocity: " << (.75f*velocityMax)/1000);
             }
@@ -1020,13 +1026,15 @@ void DiveHandler::update(DiveHandle& diveHandle)
 #endif
 
             // The module is in the learning state and a reward has been received
-            if( state == learning )
+			if( clock() % 240 && state == learning )
             {
                 // Perform a single iteration of the learning algorithm
                 if( learner->updateCoeffs() )
                 {
+#ifdef PG_LEARNER
                     // Change the state in 'waiting for reward'
                     state = waitReward;
+#endif
                     // Flag a pending reward to the goalie behavior
                     diveHandle.rewardAck = false;
                 }
@@ -1036,7 +1044,9 @@ void DiveHandler::update(DiveHandle& diveHandle)
 
             }
             // The module is in the learning state, waiting for the next reward
-            else if( state == waitReward )
+#ifdef PG_LEARNER
+			else if( state == waitReward )
+#endif
             {
                 // The opponent team scores: the goalie failed and gets a negative reward
                 if(goalDetected && estimatedTime)
@@ -1090,7 +1100,11 @@ void DiveHandler::update(DiveHandle& diveHandle)
             }
 
             // Use the reward to adjust the algorithm parameters
+#ifdef PG_LEARNER
             if( state == learning )
+#else
+			if( state == learning && diveHandle.rewardAck )
+#endif
                 learner->updateParams(rewardHistory);
 
             // Compute the dive time using the current coefficients as T = alpha2 * (alpha1*T_PAPO - T_dive)
diff --git a/machineLearning/DiveHandler/DiveHandler.h b/machineLearning/DiveHandler/DiveHandler.h
index 0136149..720161b 100644
--- a/machineLearning/DiveHandler/DiveHandler.h
+++ b/machineLearning/DiveHandler/DiveHandler.h
@@ -57,12 +57,16 @@ END_MODULE
 // Termination conditions
 #define MAX_ITER 15
 #define CONVERGENCE_THRESHOLD 0.01
+// PG algorithm define, if commented the module performs a GA algorithm instead
+#define PG_LEARNER
+
 // PG parameters
 #define GAMMA 0.5
 #define BUFFER_DIM 10
 #define REWARDS_HISTORY_SIZE 10
 #define EPSILON 0.05
 #define T 15
+
 // GA parameters
 #define POPULATION_SIZE 100
 #define INDIVIDUAL_SIZE 11