From 36a3532c471953d2489058fc327532e39d5151e7 Mon Sep 17 00:00:00 2001 From: Claudio Delli Bovi Date: Tue, 28 Jan 2014 18:28:04 +0100 Subject: [PATCH 01/17] TOFIX: negative coeffs + rewards --- machineLearning/DiveHandler/DiveHandler.cpp | 23 +++++++++++++-------- machineLearning/DiveHandler/DiveHandler.h | 2 +- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/machineLearning/DiveHandler/DiveHandler.cpp b/machineLearning/DiveHandler/DiveHandler.cpp index df6d44d..9f5d5bb 100644 --- a/machineLearning/DiveHandler/DiveHandler.cpp +++ b/machineLearning/DiveHandler/DiveHandler.cpp @@ -1,7 +1,7 @@ /** * @file DiveHandler.cpp * -* This header file contains the implementation of a module working as a dive handler for the goalie. +* This source file contains the implementation of a module working as a dive handler for the goalie. * Such handler is activated when the ball gets in the own field side, and it computes an estimate of its projection toward the goal * with respect to the goalie reference frame. It also provides estimates for the amount of time needed to dive, save the ball and * then get back to the goalie position. This measure is compared against the estimated time the ball needs to reach the goal. @@ -230,7 +230,6 @@ void DiveHandler::PGLearner::generatePerturbations(std::vector* partial_p } } - /* TOCOMMENT */ float DiveHandler::PGLearner::evaluatePerturbation( std::vector R ) { @@ -356,10 +355,14 @@ bool DiveHandler::PGLearner::updateCoeffs() coeffs_avgGradient.at(coeffs.size() - (n +1)) = avg_plus - avg_minus; } #endif + // Avoid 'nan' when the gradient is zeroed + float normalization = 1.0; + if (magnitude(coeffs_avgGradient) != 0) + normalization = magnitude(coeffs_avgGradient); #ifdef DIVEHANDLER_TRAINING - SPQR_INFO("Computed policy gradient: [ " << coeffs_avgGradient.at(0)/magnitude(coeffs_avgGradient) - << ", " << coeffs_avgGradient.at(1)/magnitude(coeffs_avgGradient) << " ]"); + SPQR_INFO("Computed policy gradient: [ " << coeffs_avgGradient.at(0)/normalization + << ", " << coeffs_avgGradient.at(1)/normalization << " ]"); #endif // Update coefficients history @@ -370,7 +373,7 @@ bool DiveHandler::PGLearner::updateCoeffs() // Update the coefficients following the gradient direction for( unsigned int i=0; i REWARDS_HISTORY_SIZE) rewardHistory.resize(REWARDS_HISTORY_SIZE); + // Update own score + ownScore = (int)theOwnTeamInfo.score; + #ifdef DIVEHANDLER_TRAINING SPQR_SUCCESS("The goalie has succeeded! Positive reward for the learner. "); #endif @@ -616,7 +622,6 @@ void DiveHandler::update(DiveHandle& diveHandle) // Clear the pending reward if(!diveHandle.rewardAck) diveHandle.rewardAck = true; - } } diff --git a/machineLearning/DiveHandler/DiveHandler.h b/machineLearning/DiveHandler/DiveHandler.h index 7b4f763..88fc986 100644 --- a/machineLearning/DiveHandler/DiveHandler.h +++ b/machineLearning/DiveHandler/DiveHandler.h @@ -185,7 +185,7 @@ class DiveHandler : public DiveHandlerBase // Obtained rewards std::list rewardHistory; - // Opponent team current score + // Current scores int opponentScore; int ownScore; From c01ff3c007722b53c077baccec4c97237b40d107 Mon Sep 17 00:00:00 2001 From: Claudio Delli Bovi Date: Tue, 28 Jan 2014 21:01:56 +0100 Subject: [PATCH 02/17] TOFIX: reward values --- machineLearning/DiveHandler/DiveHandler.cpp | 32 +++++++++++++++++---- 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/machineLearning/DiveHandler/DiveHandler.cpp b/machineLearning/DiveHandler/DiveHandler.cpp index 9f5d5bb..076358d 100644 --- a/machineLearning/DiveHandler/DiveHandler.cpp +++ b/machineLearning/DiveHandler/DiveHandler.cpp @@ -22,6 +22,7 @@ // Uncomment to have debug information //#define DIVEHANDLER_DEBUG +#define DIVEHANDLER_TRAINING_DEBUG #define DIVEHANDLER_TRAINING //#define RAND_PERMUTATIONS @@ -66,9 +67,10 @@ void DiveHandler::CoeffsLearner::setCoeffs(const std::vector& _coeffs) void DiveHandler::CoeffsLearner::setParam(const std::string& _key, float _value) { - // "Smart" insertion procedure using iterators (C++ 11) - std::map::iterator iter = params.begin(); - params.insert( iter, std::pair< std::string, float >(_key, _value) ); + params[_key] = _value; +// // "Smart" insertion procedure using iterators (C++ 11) +// std::map::iterator iter = params.begin(); +// params.insert( std::pair< std::string, float >(_key, _value) ); } @@ -245,23 +247,36 @@ void DiveHandler::PGLearner::updateParams(const std::list& rewards) { float reward_score = 0.0; int discount_exp = 0; +#ifdef DIVEHANDLER_TRAINING_DEBUG + int positives = 0, negatives = 0; +#endif + std::list::const_iterator i = rewards.begin(); while (i != rewards.end()) { +#ifdef DIVEHANDLER_TRAINING_DEBUG + if (*i == POSITIVE_REWARD) ++positives; + else ++ negatives; +#endif // Computing discounted rewards reward_score += (*i) * pow(GAMMA, discount_exp); - ++i; ++discount_exp; + ++i; ++discount_exp; } +#ifdef DIVEHANDLER_TRAINING_DEBUG + SPQR_INFO("Positive rewards: " << positives << " out of " << rewards.size()); + SPQR_INFO("Negative rewards: " << negatives << " out of " << rewards.size()); + SPQR_INFO("Reward total score: " << reward_score); +#endif //Adjusting PG parameters according to the obtained score - setParam("epsilon", exp( reward_score / rewards.size() ) * getParam("epsilon")); + setParam("epsilon", exp( -reward_score / REWARDS_HISTORY_SIZE ) * getParam("epsilon")); #ifdef DIVEHANDLER_TRAINING SPQR_INFO( "Epsilon value changed to: " << getParam("epsilon") << " according to the obtained rewards. "); #endif #ifdef RAND_PERMUTATIONS - setParam("T", exp( reward_score / rewards.size() ) * getParam("T")); + setParam("T", exp( -reward_score / REWARDS_HISTORY_SIZE ) * getParam("T")); #endif } @@ -373,8 +388,13 @@ bool DiveHandler::PGLearner::updateCoeffs() // Update the coefficients following the gradient direction for( unsigned int i=0; i Date: Wed, 29 Jan 2014 20:02:19 +0100 Subject: [PATCH 03/17] TOFIX: Evaluation of hypotheses --- machineLearning/DiveHandler/DiveHandler.cpp | 49 ++++++++++++++------- machineLearning/DiveHandler/DiveHandler.h | 9 +++- 2 files changed, 39 insertions(+), 19 deletions(-) diff --git a/machineLearning/DiveHandler/DiveHandler.cpp b/machineLearning/DiveHandler/DiveHandler.cpp index 076358d..4037c45 100644 --- a/machineLearning/DiveHandler/DiveHandler.cpp +++ b/machineLearning/DiveHandler/DiveHandler.cpp @@ -26,8 +26,8 @@ #define DIVEHANDLER_TRAINING //#define RAND_PERMUTATIONS -#define NEGATIVE_REWARD -1.0 -#define POSITIVE_REWARD 1.0 +#define NEGATIVE_REWARD -0.5 +#define POSITIVE_REWARD 1.5 // Debug messages template #define SPQR_ERR(x) std::cerr << "\033[22;31;1m" <<"[DiveHandler] " << x << "\033[0m"<< std::endl; @@ -68,9 +68,6 @@ void DiveHandler::CoeffsLearner::setCoeffs(const std::vector& _coeffs) void DiveHandler::CoeffsLearner::setParam(const std::string& _key, float _value) { params[_key] = _value; -// // "Smart" insertion procedure using iterators (C++ 11) -// std::map::iterator iter = params.begin(); -// params.insert( std::pair< std::string, float >(_key, _value) ); } @@ -88,7 +85,9 @@ void DiveHandler::CoeffsLearner::setParam(const std::string& _key, float _value) */ DiveHandler::PGLearner::PGLearner( DiveHandler* _dhPtr, int _nCoeffs, float _epsilon, int _T, float _initValue, bool randomize ): // Initialize the base class - CoeffsLearner(_nCoeffs, _initValue, _dhPtr) + CoeffsLearner(_nCoeffs, _initValue, _dhPtr), + // Initialize the gradient estimate + coeffsGradient(_nCoeffs, 0.0), positivesWeight(0.0) { // Initializing coefficients if(randomize) @@ -238,7 +237,8 @@ float DiveHandler::PGLearner::evaluatePerturbation( std::vector R ) // Dimensions check assert(R.size() == coeffs.size()); // Generate perturbated policy and call the DiveHandler object for evaluation - return diveHandler_ptr->computeDiveAndRecoverTime(coeffs.at(0) + R.at(0), coeffs.at(1) + R.at(1)); + float tDiveAndRecover = diveHandler_ptr->computeDiveAndRecoverTime(coeffs.at(0) + R.at(0), coeffs.at(1) + R.at(1)); + return LAMBDA*fabs(tDiveAndRecover) + (1-LAMBDA)*fabs(diveHandler_ptr->tBall2Goal - tDiveAndRecover); } @@ -247,24 +247,24 @@ void DiveHandler::PGLearner::updateParams(const std::list& rewards) { float reward_score = 0.0; int discount_exp = 0; -#ifdef DIVEHANDLER_TRAINING_DEBUG - int positives = 0, negatives = 0; -#endif + int positives = 0; std::list::const_iterator i = rewards.begin(); while (i != rewards.end()) { -#ifdef DIVEHANDLER_TRAINING_DEBUG - if (*i == POSITIVE_REWARD) ++positives; - else ++ negatives; -#endif + // Counting positives + if (*i == POSITIVE_REWARD) + ++positives; + // Computing discounted rewards reward_score += (*i) * pow(GAMMA, discount_exp); ++i; ++discount_exp; } + positivesWeight = static_cast(positives)/rewards.size(); + #ifdef DIVEHANDLER_TRAINING_DEBUG SPQR_INFO("Positive rewards: " << positives << " out of " << rewards.size()); - SPQR_INFO("Negative rewards: " << negatives << " out of " << rewards.size()); + SPQR_INFO("Negative rewards: " << (rewards.size() - positives) << " out of " << rewards.size()); SPQR_INFO("Reward total score: " << reward_score); #endif @@ -375,10 +375,22 @@ bool DiveHandler::PGLearner::updateCoeffs() if (magnitude(coeffs_avgGradient) != 0) normalization = magnitude(coeffs_avgGradient); + #ifdef DIVEHANDLER_TRAINING SPQR_INFO("Computed policy gradient: [ " << coeffs_avgGradient.at(0)/normalization << ", " << coeffs_avgGradient.at(1)/normalization << " ]"); #endif + // Weight new gradient estimate and previous one according to the reward score + std::vector newGradient (coeffsGradient.size()); + for( unsigned int j=0; j coeffsGradient; + // Weight of the current gradient estimate + float positivesWeight; + // Memory buffer for the PG algorithm PGbuffer coeffsBuffer; // Set of perturbations to be performed From 40ffbc53e7c69ebfca8301428fbb3d34f8d2857c Mon Sep 17 00:00:00 2001 From: Claudio Delli Bovi Date: Thu, 30 Jan 2014 18:07:39 +0100 Subject: [PATCH 04/17] TOTUNE: Evaluation of hypotheses --- machineLearning/DiveHandler/DiveHandler.cpp | 7 +++++-- machineLearning/DiveHandler/DiveHandler.h | 3 ++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/machineLearning/DiveHandler/DiveHandler.cpp b/machineLearning/DiveHandler/DiveHandler.cpp index 4037c45..ba646ee 100644 --- a/machineLearning/DiveHandler/DiveHandler.cpp +++ b/machineLearning/DiveHandler/DiveHandler.cpp @@ -238,7 +238,10 @@ float DiveHandler::PGLearner::evaluatePerturbation( std::vector R ) assert(R.size() == coeffs.size()); // Generate perturbated policy and call the DiveHandler object for evaluation float tDiveAndRecover = diveHandler_ptr->computeDiveAndRecoverTime(coeffs.at(0) + R.at(0), coeffs.at(1) + R.at(1)); - return LAMBDA*fabs(tDiveAndRecover) + (1-LAMBDA)*fabs(diveHandler_ptr->tBall2Goal - tDiveAndRecover); + + return (1.0-LAMBDA1-LAMBDA2)*fabs(tDiveAndRecover) + + LAMBDA1*fabs(diveHandler_ptr->tBall2Goal - tDiveAndRecover) + + LAMBDA2*fabs(1.0 - ((coeffs.at(0) + R.at(0))+(coeffs.at(1) + R.at(1)))); } @@ -260,7 +263,7 @@ void DiveHandler::PGLearner::updateParams(const std::list& rewards) reward_score += (*i) * pow(GAMMA, discount_exp); ++i; ++discount_exp; } - positivesWeight = static_cast(positives)/rewards.size(); + positivesWeight = (POSITIVE_REWARD*static_cast(positives))/(positives*POSITIVE_REWARD + (rewards.size()-positives)*fabs(NEGATIVE_REWARD)); #ifdef DIVEHANDLER_TRAINING_DEBUG SPQR_INFO("Positive rewards: " << positives << " out of " << rewards.size()); diff --git a/machineLearning/DiveHandler/DiveHandler.h b/machineLearning/DiveHandler/DiveHandler.h index 15095d1..402a699 100644 --- a/machineLearning/DiveHandler/DiveHandler.h +++ b/machineLearning/DiveHandler/DiveHandler.h @@ -60,7 +60,8 @@ END_MODULE #define EPSILON 0.15 #define T 15 // Evaluation weight -#define LAMBDA 0.15 +#define LAMBDA1 0.6 +#define LAMBDA2 0.3 // Module class declaration From 2874684d8896a23040ec835758bf7513420813aa Mon Sep 17 00:00:00 2001 From: Claudio Delli Bovi Date: Sat, 8 Feb 2014 20:04:31 +0100 Subject: [PATCH 05/17] TOTRY: Evaluation function with individual best as attractor --- machineLearning/DiveHandler/DiveHandler.cpp | 32 ++++++++++++++------- machineLearning/DiveHandler/DiveHandler.h | 21 ++++++++++---- 2 files changed, 36 insertions(+), 17 deletions(-) diff --git a/machineLearning/DiveHandler/DiveHandler.cpp b/machineLearning/DiveHandler/DiveHandler.cpp index ba646ee..321654c 100644 --- a/machineLearning/DiveHandler/DiveHandler.cpp +++ b/machineLearning/DiveHandler/DiveHandler.cpp @@ -26,7 +26,7 @@ #define DIVEHANDLER_TRAINING //#define RAND_PERMUTATIONS -#define NEGATIVE_REWARD -0.5 +#define NEGATIVE_REWARD -1.0 #define POSITIVE_REWARD 1.5 // Debug messages template @@ -87,8 +87,11 @@ DiveHandler::PGLearner::PGLearner( DiveHandler* _dhPtr, int _nCoeffs, float _eps // Initialize the base class CoeffsLearner(_nCoeffs, _initValue, _dhPtr), // Initialize the gradient estimate - coeffsGradient(_nCoeffs, 0.0), positivesWeight(0.0) + coeffsGradient(_nCoeffs, 0.0) { + reward_score = 0.0; + reward_norm = 1.0; + // Initializing coefficients if(randomize) { @@ -238,17 +241,26 @@ float DiveHandler::PGLearner::evaluatePerturbation( std::vector R ) assert(R.size() == coeffs.size()); // Generate perturbated policy and call the DiveHandler object for evaluation float tDiveAndRecover = diveHandler_ptr->computeDiveAndRecoverTime(coeffs.at(0) + R.at(0), coeffs.at(1) + R.at(1)); + // Perturbated coefficients + std::vector new_coeffs(2); + new_coeffs.at(0) = coeffs.at(0) + R.at(0); + new_coeffs.at(1) = coeffs.at(1) + R.at(1); + +// return (1.0-LAMBDA1-LAMBDA2)*fabs(tDiveAndRecover) + +// LAMBDA1*fabs(diveHandler_ptr->tBall2Goal - tDiveAndRecover) + +// LAMBDA2*fabs(1.0 - ((coeffs.at(0) + R.at(0))+(coeffs.at(1) + R.at(1)))); - return (1.0-LAMBDA1-LAMBDA2)*fabs(tDiveAndRecover) + - LAMBDA1*fabs(diveHandler_ptr->tBall2Goal - tDiveAndRecover) + - LAMBDA2*fabs(1.0 - ((coeffs.at(0) + R.at(0))+(coeffs.at(1) + R.at(1)))); + return (1.0 - fabs(reward_score/reward_norm))*fabs(diveHandler_ptr->tBall2Goal - tDiveAndRecover) + + fabs(reward_score/reward_norm)*fabs(magnitude(coeffs) - magnitude(new_coeffs)); } /* TOTEST&COMMENT */ void DiveHandler::PGLearner::updateParams(const std::list& rewards) { - float reward_score = 0.0; + reward_score = 0.0; + if (!rewards.empty()) reward_norm = 0.0; + int discount_exp = 0; int positives = 0; @@ -261,9 +273,9 @@ void DiveHandler::PGLearner::updateParams(const std::list& rewards) // Computing discounted rewards reward_score += (*i) * pow(GAMMA, discount_exp); + reward_norm += fabs((*i) * pow(GAMMA, discount_exp)); ++i; ++discount_exp; } - positivesWeight = (POSITIVE_REWARD*static_cast(positives))/(positives*POSITIVE_REWARD + (rewards.size()-positives)*fabs(NEGATIVE_REWARD)); #ifdef DIVEHANDLER_TRAINING_DEBUG SPQR_INFO("Positive rewards: " << positives << " out of " << rewards.size()); @@ -386,11 +398,9 @@ bool DiveHandler::PGLearner::updateCoeffs() // Weight new gradient estimate and previous one according to the reward score std::vector newGradient (coeffsGradient.size()); for( unsigned int j=0; j(SPQR::GOALIE_LEARNING_STATE)), - learner(new PGLearner(this, 2, EPSILON, T, 1.0, true)), opponentScore(0), tBall2Goal(SPQR::FIELD_DIMENSION_Y), + learner(new PGLearner(this, 2, EPSILON, T, 1.0, false)), opponentScore(0), tBall2Goal(SPQR::FIELD_DIMENSION_Y), tDive(0.0), tBackInPose(0.0), ballProjectionIntercept(SPQR::FIELD_DIMENSION_Y), distanceBall2Goal(SPQR::FIELD_DIMENSION_X) { #ifdef DIVEHANDLER_TRAINING diff --git a/machineLearning/DiveHandler/DiveHandler.h b/machineLearning/DiveHandler/DiveHandler.h index 402a699..351dbbf 100644 --- a/machineLearning/DiveHandler/DiveHandler.h +++ b/machineLearning/DiveHandler/DiveHandler.h @@ -52,15 +52,15 @@ END_MODULE // Termination conditions #define MAX_ITER 300 -#define CONVERGENCE_THRESHOLD 0.05 +#define CONVERGENCE_THRESHOLD 0.01 // PG parameters #define GAMMA 0.5 #define BUFFER_DIM 10 #define REWARDS_HISTORY_SIZE 15 -#define EPSILON 0.15 +#define EPSILON 0.10 #define T 15 // Evaluation weight -#define LAMBDA1 0.6 +#define LAMBDA1 0.7 #define LAMBDA2 0.3 @@ -142,8 +142,11 @@ class DiveHandler : public DiveHandlerBase // Current estimate for the coefficients gradient std::vector coeffsGradient; - // Weight of the current gradient estimate - float positivesWeight; + + // Current reward score + float reward_score; + // Current reward normalization factor + float reward_norm; // Memory buffer for the PG algorithm PGbuffer coeffsBuffer; @@ -160,7 +163,7 @@ class DiveHandler : public DiveHandlerBase // Default constructor PGLearner(DiveHandler* _dhPtr, int _nCoeffs, float _epsilon = EPSILON, - int _T = T, float _initValue = 0.0, bool randomize = false); + int _T = T, float _initValue = 1.0, bool randomize = false); // Generate a set of perturbations for the current policy void generatePerturbations(); @@ -222,6 +225,12 @@ class DiveHandler : public DiveHandlerBase // Destructor ~DiveHandler(); + // Setter for the reward list + inline const std::list& getRewardList() const + { + return rewardHistory; + } + // Update the DiveHandle for the goalie behavior void update(DiveHandle& diveHandle); From 5170928866df8fc29f53105b238d8089ccad1a5a Mon Sep 17 00:00:00 2001 From: Claudio Delli Bovi Date: Tue, 11 Feb 2014 17:02:35 +0100 Subject: [PATCH 06/17] TOTRY: Evaluation function with overall best as attractor --- machineLearning/DiveHandler/DiveHandler.cpp | 16 ++++++++++++---- machineLearning/DiveHandler/DiveHandler.h | 12 ++++++++++-- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/machineLearning/DiveHandler/DiveHandler.cpp b/machineLearning/DiveHandler/DiveHandler.cpp index 321654c..7a900d4 100644 --- a/machineLearning/DiveHandler/DiveHandler.cpp +++ b/machineLearning/DiveHandler/DiveHandler.cpp @@ -89,8 +89,10 @@ DiveHandler::PGLearner::PGLearner( DiveHandler* _dhPtr, int _nCoeffs, float _eps // Initialize the gradient estimate coeffsGradient(_nCoeffs, 0.0) { + // Initializing reward scores reward_score = 0.0; reward_norm = 1.0; + coeffsBest = coeffs; // Initializing coefficients if(randomize) @@ -241,26 +243,28 @@ float DiveHandler::PGLearner::evaluatePerturbation( std::vector R ) assert(R.size() == coeffs.size()); // Generate perturbated policy and call the DiveHandler object for evaluation float tDiveAndRecover = diveHandler_ptr->computeDiveAndRecoverTime(coeffs.at(0) + R.at(0), coeffs.at(1) + R.at(1)); + // Perturbated coefficients std::vector new_coeffs(2); new_coeffs.at(0) = coeffs.at(0) + R.at(0); new_coeffs.at(1) = coeffs.at(1) + R.at(1); + return (1.0-LAMBDA1)*fabs(diveHandler_ptr->tBall2Goal - tDiveAndRecover) + + LAMBDA1*fabs(magnitude(coeffs) - magnitude(coeffsBest)); + // return (1.0-LAMBDA1-LAMBDA2)*fabs(tDiveAndRecover) + // LAMBDA1*fabs(diveHandler_ptr->tBall2Goal - tDiveAndRecover) + // LAMBDA2*fabs(1.0 - ((coeffs.at(0) + R.at(0))+(coeffs.at(1) + R.at(1)))); - return (1.0 - fabs(reward_score/reward_norm))*fabs(diveHandler_ptr->tBall2Goal - tDiveAndRecover) + - fabs(reward_score/reward_norm)*fabs(magnitude(coeffs) - magnitude(new_coeffs)); } /* TOTEST&COMMENT */ void DiveHandler::PGLearner::updateParams(const std::list& rewards) { + // Re-initialize reward scores reward_score = 0.0; if (!rewards.empty()) reward_norm = 0.0; - int discount_exp = 0; int positives = 0; @@ -286,6 +290,10 @@ void DiveHandler::PGLearner::updateParams(const std::list& rewards) //Adjusting PG parameters according to the obtained score setParam("epsilon", exp( -reward_score / REWARDS_HISTORY_SIZE ) * getParam("epsilon")); + // Update best performance + if (rewards.front() == POSITIVE_REWARD) + coeffsBest = coeffs; + #ifdef DIVEHANDLER_TRAINING SPQR_INFO( "Epsilon value changed to: " << getParam("epsilon") << " according to the obtained rewards. "); #endif @@ -569,7 +577,7 @@ void DiveHandler::estimateDiveTimes() /* TOCOMMENT */ inline float DiveHandler::computeDiveAndRecoverTime(float alpha1, float alpha2) { - return alpha2*( alpha1*tBall2Goal - tDive ) + tBackInPose; + return alpha2*( alpha1*tBall2Goal - tDive ); } /* TOTEST&COMMENT */ diff --git a/machineLearning/DiveHandler/DiveHandler.h b/machineLearning/DiveHandler/DiveHandler.h index 351dbbf..5fc43ec 100644 --- a/machineLearning/DiveHandler/DiveHandler.h +++ b/machineLearning/DiveHandler/DiveHandler.h @@ -60,8 +60,8 @@ END_MODULE #define EPSILON 0.10 #define T 15 // Evaluation weight -#define LAMBDA1 0.7 -#define LAMBDA2 0.3 +#define LAMBDA1 0.9 +//#define LAMBDA2 0.3 // Module class declaration @@ -142,6 +142,8 @@ class DiveHandler : public DiveHandlerBase // Current estimate for the coefficients gradient std::vector coeffsGradient; + // Best individual performance achieved so far + std::vector coeffsBest; // Current reward score float reward_score; @@ -177,6 +179,12 @@ class DiveHandler : public DiveHandlerBase // Update coefficients performing a step of the learning algorithm virtual bool updateCoeffs(); + // Update the best coefficient setting so far + inline void updateCoeffsBest() + { + coeffsBest = coeffs; + } + }; // class GALearner : public CoeffsLearner From ded204dfa03fe1306f66a77f33cc4f163e18c996 Mon Sep 17 00:00:00 2001 From: Claudio Delli Bovi Date: Wed, 5 Mar 2014 22:49:33 +0100 Subject: [PATCH 07/17] last update --- machineLearning/DiveHandler/DiveHandler.cpp | 134 +++++++++++--------- machineLearning/DiveHandler/DiveHandler.h | 58 +++++---- 2 files changed, 106 insertions(+), 86 deletions(-) diff --git a/machineLearning/DiveHandler/DiveHandler.cpp b/machineLearning/DiveHandler/DiveHandler.cpp index 7a900d4..a06bed5 100644 --- a/machineLearning/DiveHandler/DiveHandler.cpp +++ b/machineLearning/DiveHandler/DiveHandler.cpp @@ -29,6 +29,8 @@ #define NEGATIVE_REWARD -1.0 #define POSITIVE_REWARD 1.5 +#define REWARD_WORST 999999.9 + // Debug messages template #define SPQR_ERR(x) std::cerr << "\033[22;31;1m" <<"[DiveHandler] " << x << "\033[0m"<< std::endl; #define SPQR_INFO(x) std::cerr << "\033[22;34;1m" <<"[DiveHandler] " << x << "\033[0m" << std::endl; @@ -44,9 +46,9 @@ MAKE_MODULE(DiveHandler, SPQR-Modules) // Shortcut to compute the magnitude of a vector -float magnitude(std::vector v) +double magnitude(std::vector v) { - float m = 0.0; + double m = 0.0; for (unsigned int i = 0; i < v.size(); ++i) m += v.at(i) * v.at(i); @@ -60,12 +62,12 @@ float magnitude(std::vector v) /* * Simple setters for the learner's parameters and coefficients. */ -void DiveHandler::CoeffsLearner::setCoeffs(const std::vector& _coeffs) +void DiveHandler::CoeffsLearner::setCoeffs(const std::vector& _coeffs) { coeffs = _coeffs; } -void DiveHandler::CoeffsLearner::setParam(const std::string& _key, float _value) +void DiveHandler::CoeffsLearner::setParam(const std::string& _key, double _value) { params[_key] = _value; } @@ -83,16 +85,16 @@ void DiveHandler::CoeffsLearner::setParam(const std::string& _key, float _value) * - An initial value for the learning coefficients (or an upper bound for the random initialization of those); * - A flag indicating whether a fixed or random initialization has to be performed. */ -DiveHandler::PGLearner::PGLearner( DiveHandler* _dhPtr, int _nCoeffs, float _epsilon, int _T, float _initValue, bool randomize ): +DiveHandler::PGLearner::PGLearner( DiveHandler* _dhPtr, int _nCoeffs, double _epsilon, int _T, double _initValue, bool randomize ): // Initialize the base class CoeffsLearner(_nCoeffs, _initValue, _dhPtr), // Initialize the gradient estimate - coeffsGradient(_nCoeffs, 0.0) + coeffsGradient(_nCoeffs, 0.0), coeffsBest(_nCoeffs, 0.0) { // Initializing reward scores reward_score = 0.0; reward_norm = 1.0; - coeffsBest = coeffs; + rewardBest = REWARD_WORST; // Initializing coefficients if(randomize) @@ -100,7 +102,7 @@ DiveHandler::PGLearner::PGLearner( DiveHandler* _dhPtr, int _nCoeffs, float _eps // Random initialization in [0, INIT_VALUE] srand(time(NULL)); for( int i=0; i<_nCoeffs; ++i) - coeffs.at(i) = (static_cast(rand()%101)/100 ) *_initValue; + coeffs.at(i) = (static_cast(rand()%101)/100 ) *_initValue; } // Initializing parameters @@ -124,7 +126,7 @@ bool DiveHandler::PGLearner::converged() { // Compute variations mean // Delta previous to current step - float avg_variation = (magnitude(coeffs) - magnitude(coeffsBuffer.front()))/coeffsBuffer.size() ; + double avg_variation = (magnitude(coeffs) - magnitude(coeffsBuffer.front()))/coeffsBuffer.size() ; // Iterate over the whole buffer and compute deltas from step i-1 to i PGbuffer::const_iterator i = coeffsBuffer.begin(); PGbuffer::const_iterator j = coeffsBuffer.begin(); ++j; @@ -136,7 +138,7 @@ bool DiveHandler::PGLearner::converged() // Compute variations standard deviation // Delta previous to current step - float std_variation = pow(magnitude(coeffs)-magnitude(coeffsBuffer.front()) - avg_variation, 2) / coeffsBuffer.size(); + double std_variation = pow(magnitude(coeffs)-magnitude(coeffsBuffer.front()) - avg_variation, 2) / coeffsBuffer.size(); // Iterate over the whole buffer and compute deltas from step i-1 to i PGbuffer::const_iterator k = coeffsBuffer.begin(); PGbuffer::const_iterator t = coeffsBuffer.begin(); ++t; @@ -175,7 +177,7 @@ void DiveHandler::PGLearner::generatePerturbations() for(int i=0; i perturbation(coeffs); + std::vector perturbation(coeffs); for(unsigned int j=0; j perturbation (coeffs.size(),0.0); + std::vector perturbation (coeffs.size(),0.0); // Generate all possible combinations recursively generatePerturbations(&perturbation, 0); @@ -208,7 +210,7 @@ void DiveHandler::PGLearner::generatePerturbations() } /* TOTEST&COMMENT */ -void DiveHandler::PGLearner::generatePerturbations(std::vector* partial_perturbation, unsigned int index) +void DiveHandler::PGLearner::generatePerturbations(std::vector* partial_perturbation, unsigned int index) { if (index == partial_perturbation->size()-1) { @@ -216,7 +218,7 @@ void DiveHandler::PGLearner::generatePerturbations(std::vector* partial_p for (int perturbation_type = -1; perturbation_type <= 1; ++perturbation_type) { // Compute last index and generate the final perturbation - std::vector perturbation (*partial_perturbation); + std::vector perturbation (*partial_perturbation); perturbation.at(index) = coeffs.at(index) + perturbation_type * params["epsilon"]; // Update the perturbations buffer @@ -237,20 +239,29 @@ void DiveHandler::PGLearner::generatePerturbations(std::vector* partial_p } /* TOCOMMENT */ -float DiveHandler::PGLearner::evaluatePerturbation( std::vector R ) +double DiveHandler::PGLearner::evaluatePerturbation( std::vector R ) { // Dimensions check assert(R.size() == coeffs.size()); + + if (R.at(0) == 0.0 || R.at(1) == 0.0) + return REWARD_WORST; + // Generate perturbated policy and call the DiveHandler object for evaluation - float tDiveAndRecover = diveHandler_ptr->computeDiveAndRecoverTime(coeffs.at(0) + R.at(0), coeffs.at(1) + R.at(1)); + double tDiveAndRecover = diveHandler_ptr->computeDiveAndRecoverTime(R.at(0), R.at(1)); - // Perturbated coefficients - std::vector new_coeffs(2); - new_coeffs.at(0) = coeffs.at(0) + R.at(0); - new_coeffs.at(1) = coeffs.at(1) + R.at(1); + // Attractor + std::vector distanceToBest(2); + distanceToBest.at(0) = coeffsBest.at(0) - R.at(0); + distanceToBest.at(1) = coeffsBest.at(1) - R.at(1); + +#ifdef DIVEHANDLER_TRAINING_DEBUG + SPQR_INFO("Perturbated policy: [" << R.at(0) << ", " << R.at(1) + << "], Score: " << ((1.0-LAMBDA1)*fabs(diveHandler_ptr->tBall2Goal-tDiveAndRecover)+LAMBDA1*magnitude(distanceToBest))); +#endif return (1.0-LAMBDA1)*fabs(diveHandler_ptr->tBall2Goal - tDiveAndRecover) + - LAMBDA1*fabs(magnitude(coeffs) - magnitude(coeffsBest)); + LAMBDA1*magnitude(distanceToBest); // return (1.0-LAMBDA1-LAMBDA2)*fabs(tDiveAndRecover) + // LAMBDA1*fabs(diveHandler_ptr->tBall2Goal - tDiveAndRecover) + @@ -260,7 +271,7 @@ float DiveHandler::PGLearner::evaluatePerturbation( std::vector R ) /* TOTEST&COMMENT */ -void DiveHandler::PGLearner::updateParams(const std::list& rewards) +void DiveHandler::PGLearner::updateParams(const std::list& rewards) { // Re-initialize reward scores reward_score = 0.0; @@ -268,7 +279,7 @@ void DiveHandler::PGLearner::updateParams(const std::list& rewards) int discount_exp = 0; int positives = 0; - std::list::const_iterator i = rewards.begin(); + std::list::const_iterator i = rewards.begin(); while (i != rewards.end()) { // Counting positives @@ -281,18 +292,21 @@ void DiveHandler::PGLearner::updateParams(const std::list& rewards) ++i; ++discount_exp; } -#ifdef DIVEHANDLER_TRAINING_DEBUG - SPQR_INFO("Positive rewards: " << positives << " out of " << rewards.size()); - SPQR_INFO("Negative rewards: " << (rewards.size() - positives) << " out of " << rewards.size()); - SPQR_INFO("Reward total score: " << reward_score); -#endif - //Adjusting PG parameters according to the obtained score setParam("epsilon", exp( -reward_score / REWARDS_HISTORY_SIZE ) * getParam("epsilon")); // Update best performance - if (rewards.front() == POSITIVE_REWARD) + if (rewardGradient < rewardBest) + { + rewardBest = rewardGradient; coeffsBest = coeffs; + } +#ifdef DIVEHANDLER_TRAINING_DEBUG + SPQR_INFO("Positive rewards: " << positives << " out of " << rewards.size()); + SPQR_INFO("Negative rewards: " << (rewards.size() - positives) << " out of " << rewards.size()); + SPQR_INFO("Reward total score: " << reward_score); + SPQR_INFO("Best evaluation so far: [ " << coeffsBest.at(0) << ", " << coeffsBest.at(1) << " ] with score: " << rewardBest); +#endif #ifdef DIVEHANDLER_TRAINING SPQR_INFO( "Epsilon value changed to: " << getParam("epsilon") << " according to the obtained rewards. "); @@ -307,33 +321,31 @@ void DiveHandler::PGLearner::updateParams(const std::list& rewards) /* TOTEST&COMMENT */ bool DiveHandler::PGLearner::updateCoeffs() { - -#ifdef DIVEHANDLER_TRAINING - SPQR_INFO( "PG algorithm, iteration " << iter_count << "... " ); -#endif - if( iter_count == MAX_ITER || converged() ) return false; else { +#ifdef DIVEHANDLER_TRAINING + SPQR_INFO( "PG algorithm, iteration " << iter_count << "... " ); +#endif // First generate the set of random perturbation for the current coefficients generatePerturbations(); // For each perturbation, evaluate with the objective function and store the result in a temporary container - std::vector evaluatedPerturbations (perturbationsBuffer.size()); + std::vector evaluatedPerturbations (perturbationsBuffer.size()); PGbuffer::const_iterator evaluator; for(evaluator = perturbationsBuffer.begin(); evaluator != perturbationsBuffer.end(); ++evaluator) evaluatedPerturbations.push_back( evaluatePerturbation(*evaluator) ); // Compute the average 'gradient' for the current coefficients - std::vector coeffs_avgGradient(coeffs.size()); + std::vector coeffs_avgGradient(coeffs.size()); #ifdef RAND_PERMUTATIONS // For each coefficient, compute the average score to determine the correspondent 'gradient' entry PGbuffer::const_iterator current_perturbation = perturbationsBuffer.begin(); for( unsigned int n = 0; n < coeffs.size(); ++n ) { - std::vector score_plus, score_minus, score_zero; + std::vector score_plus, score_minus, score_zero; // Keep track of the perturbation type and store each score in a container for( unsigned int i = 0; i < evaluatedPerturbations.size(); ++i ) @@ -349,17 +361,17 @@ bool DiveHandler::PGLearner::updateCoeffs() } // Sum up all positive perturbation scores - float avg_plus = 0.0; + double avg_plus = 0.0; for (unsigned int j = 0; j < score_plus.size(); ++j) avg_plus += score_plus.at(j) / score_plus.size(); // Sum up all negative perturbation scores - float avg_minus = 0.0; + double avg_minus = 0.0; for (unsigned int j = 0; j < score_minus.size(); ++j) avg_minus += score_minus.at(j) / score_minus.size(); // Sum up all null perturbation scores - float avg_zero = 0.0; + double avg_zero = 0.0; for (unsigned int j = 0; j < score_zero.size(); ++j) avg_zero += score_zero.at(j) / score_zero.size(); @@ -373,12 +385,12 @@ bool DiveHandler::PGLearner::updateCoeffs() for( unsigned int n = 0; n < coeffs.size(); ++n ) { int avg_selector = 0; - float avg_minus = 0.0 , avg_zero = 0.0, avg_plus = 0.0; + double avg_minus = 0.0 , avg_zero = 0.0, avg_plus = 0.0; for( unsigned int i = 0; i < evaluatedPerturbations.size(); i = i + pow(3,n) ) { for( unsigned int k = i; k < i + pow(3,n); ++k ) { - float evaluation = evaluatedPerturbations.at(k) / (evaluatedPerturbations.size()/3); + double evaluation = evaluatedPerturbations.at(k) / (evaluatedPerturbations.size()/3); if( (avg_selector)%3 == 0 ) avg_minus += evaluation; if( (avg_selector)%3 == 1 ) avg_zero += evaluation; @@ -393,8 +405,11 @@ bool DiveHandler::PGLearner::updateCoeffs() coeffs_avgGradient.at(coeffs.size() - (n +1)) = avg_plus - avg_minus; } #endif + // Evaluate the gradient + rewardGradient = evaluatePerturbation(coeffs_avgGradient); + // Avoid 'nan' when the gradient is zeroed - float normalization = 1.0; + double normalization = 1.0; if (magnitude(coeffs_avgGradient) != 0) normalization = magnitude(coeffs_avgGradient); @@ -402,9 +417,10 @@ bool DiveHandler::PGLearner::updateCoeffs() #ifdef DIVEHANDLER_TRAINING SPQR_INFO("Computed policy gradient: [ " << coeffs_avgGradient.at(0)/normalization << ", " << coeffs_avgGradient.at(1)/normalization << " ]"); + SPQR_INFO("Gradient score (before normalization): " << rewardGradient); #endif // Weight new gradient estimate and previous one according to the reward score - std::vector newGradient (coeffsGradient.size()); + std::vector newGradient (coeffsGradient.size()); for( unsigned int j=0; j(SPQR::GOALIE_LEARNING_STATE)), - learner(new PGLearner(this, 2, EPSILON, T, 1.0, false)), opponentScore(0), tBall2Goal(SPQR::FIELD_DIMENSION_Y), + learner(new PGLearner(this, 2, EPSILON, T)), opponentScore(0), tBall2Goal(SPQR::FIELD_DIMENSION_Y), tDive(0.0), tBackInPose(0.0), ballProjectionIntercept(SPQR::FIELD_DIMENSION_Y), distanceBall2Goal(SPQR::FIELD_DIMENSION_X) { #ifdef DIVEHANDLER_TRAINING SPQR_INFO("Initializing PGlearner..."); - std::vector coeffs = learner->getCoeffs(); + std::vector coeffs = learner->getCoeffs(); SPQR_INFO("Coefficients: alpha 1 = " << coeffs.at(0) << ", alpha 2 = " << coeffs.at(1)); SPQR_INFO("Parameters: epsilon = " << learner->getParam("epsilon") << ", T = " << learner->getParam("T")); #endif @@ -477,18 +493,18 @@ DiveHandler::~DiveHandler() void DiveHandler::estimateBallProjection() { // Ball path line - float A1 = (theBallModel.estimate.position.y - theBallModel.estimate.velocity.y) - theBallModel.estimate.position.y; - float B1 = theBallModel.estimate.position.x - (theBallModel.estimate.position.x - theBallModel.estimate.velocity.x); - float C1 = A1*theBallModel.estimate.position.x + B1*theBallModel.estimate.position.y; + double A1 = (theBallModel.estimate.position.y - theBallModel.estimate.velocity.y) - theBallModel.estimate.position.y; + double B1 = theBallModel.estimate.position.x - (theBallModel.estimate.position.x - theBallModel.estimate.velocity.x); + double C1 = A1*theBallModel.estimate.position.x + B1*theBallModel.estimate.position.y; // Goal line - float A2 = SPQR::GOALIE_FAR_LIMIT_Y - -SPQR::GOALIE_FAR_LIMIT_Y; + double A2 = SPQR::GOALIE_FAR_LIMIT_Y - -SPQR::GOALIE_FAR_LIMIT_Y; // Cross product/determinant - float det = - A2*B1; + double det = - A2*B1; // Y-intercept initialized with the maximum value possible - float yIntercept = SPQR::FIELD_DIMENSION_Y; + double yIntercept = SPQR::FIELD_DIMENSION_Y; // Non-singular case if( fabs(det) > SPQR::GOALIE_EPSILON_COLLINEAR ) @@ -530,8 +546,8 @@ void DiveHandler::estimateBallProjection() ballProjectionIntercept = yIntercept; // Computing the distance vector from the ball to the goal - float delta_x = -SPQR::FIELD_DIMENSION_X - theGlobalBallEstimation.singleRobotX; - float delta_y = ballProjectionIntercept - theGlobalBallEstimation.singleRobotY; + double delta_x = -SPQR::FIELD_DIMENSION_X - theGlobalBallEstimation.singleRobotX; + double delta_y = ballProjectionIntercept - theGlobalBallEstimation.singleRobotY; // Estimated distance from the ball distanceBall2Goal = sqrt( delta_x*delta_x + delta_y*delta_y); } @@ -553,8 +569,8 @@ void DiveHandler::estimateDiveTimes() tBall2Goal = -1.0; // Using the appropriate estimates for recover and reposition times - float tRecover = 0.0; - float tReposition = 0.0; + double tRecover = 0.0; + double tReposition = 0.0; if( diveType == rcloseDive || diveType == lcloseDive ) // Close dive: no need to back up to the original position tRecover = SPQR::GOALIE_CLOSE_DIVE_RECOVER_TIME; @@ -575,7 +591,7 @@ void DiveHandler::estimateDiveTimes() } /* TOCOMMENT */ -inline float DiveHandler::computeDiveAndRecoverTime(float alpha1, float alpha2) +inline double DiveHandler::computeDiveAndRecoverTime(double alpha1, double alpha2) { return alpha2*( alpha1*tBall2Goal - tDive ); } @@ -686,7 +702,7 @@ void DiveHandler::update(DiveHandle& diveHandle) learner->updateParams(rewardHistory); // Compute the dive time using the current coefficients as T = alpha2 * (alpha1*T_PAPO - T_dive) - float diveTime = (learner->getCoeffs()).at(1) * ( (learner->getCoeffs()).at(0) * tBall2Goal - tDive ); + double diveTime = (learner->getCoeffs()).at(1) * ( (learner->getCoeffs()).at(0) * tBall2Goal - tDive ); #ifdef DIVEHANDLER_DEBUG SPQR_INFO( "Estimated overall time to dive and recover position: " << diff --git a/machineLearning/DiveHandler/DiveHandler.h b/machineLearning/DiveHandler/DiveHandler.h index 5fc43ec..fa104b7 100644 --- a/machineLearning/DiveHandler/DiveHandler.h +++ b/machineLearning/DiveHandler/DiveHandler.h @@ -60,7 +60,7 @@ END_MODULE #define EPSILON 0.10 #define T 15 // Evaluation weight -#define LAMBDA1 0.9 +#define LAMBDA1 0.7 //#define LAMBDA2 0.3 @@ -102,9 +102,9 @@ class DiveHandler : public DiveHandlerBase { protected: // Set of coefficients representing the learning objective - std::vector coeffs; + std::vector coeffs; // Set of fixed parameters defining the cost funcion - std::map params; + std::map params; // Iteration counter int iter_count; @@ -114,41 +114,45 @@ class DiveHandler : public DiveHandlerBase public: // Default constructor - CoeffsLearner(int _nCoeffs, float _initValue, DiveHandler* _dhPtr): + CoeffsLearner(int _nCoeffs, double _initValue, DiveHandler* _dhPtr): coeffs(_nCoeffs, _initValue), iter_count(0), diveHandler_ptr(_dhPtr) { } // Setter/getter for the coefficients - void setCoeffs(const std::vector& _coeffs); - inline std::vector getCoeffs(){ return coeffs; } + void setCoeffs(const std::vector& _coeffs); + inline std::vector getCoeffs(){ return coeffs; } // Setter/getter for the parameters - void setParam(const std::string& _key, float _value); - inline float getParam(std::string _key){ return params[_key]; } + void setParam(const std::string& _key, double _value); + inline double getParam(std::string _key){ return params[_key]; } // Update coefficients performing a step of the learning algorithm virtual bool updateCoeffs() = 0; // Use the obtained rewards to adjust the algorithm parameters - virtual void updateParams(const std::list& rewards) = 0; + virtual void updateParams(const std::list& rewards) = 0; }; // Inner class modeling a PolicyGradient-based learning agent class PGLearner : public CoeffsLearner { - typedef std::list< std::vector > PGbuffer; + typedef std::list< std::vector > PGbuffer; private: // Current estimate for the coefficients gradient - std::vector coeffsGradient; + std::vector coeffsGradient; // Best individual performance achieved so far - std::vector coeffsBest; + std::vector coeffsBest; // Current reward score - float reward_score; + double reward_score; // Current reward normalization factor - float reward_norm; + double reward_norm; + // Score of the current gradient estimate + double rewardGradient; + // Best gradient score so far + double rewardBest; // Memory buffer for the PG algorithm PGbuffer coeffsBuffer; @@ -159,22 +163,22 @@ class DiveHandler : public DiveHandlerBase bool converged(); // Recursive perturbation generator - void generatePerturbations(std::vector* partial_perturbation, unsigned int index); + void generatePerturbations(std::vector* partial_perturbation, unsigned int index); public: // Default constructor - PGLearner(DiveHandler* _dhPtr, int _nCoeffs, float _epsilon = EPSILON, - int _T = T, float _initValue = 1.0, bool randomize = false); + PGLearner(DiveHandler* _dhPtr, int _nCoeffs, double _epsilon = EPSILON, + int _T = T, double _initValue = 1.0, bool randomize = false); // Generate a set of perturbations for the current policy void generatePerturbations(); // Evaluate a single policy perturbation with the cost function - float evaluatePerturbation( std::vector R ); + double evaluatePerturbation( std::vector R ); // Update the PG parameters according to the obtained rewards - void updateParams(const std::list& rewards); + void updateParams(const std::list& rewards); // Update coefficients performing a step of the learning algorithm virtual bool updateCoeffs(); @@ -200,7 +204,7 @@ class DiveHandler : public DiveHandlerBase // Learning agent CoeffsLearner* learner; // Obtained rewards - std::list rewardHistory; + std::list rewardHistory; // Current scores int opponentScore; @@ -208,23 +212,23 @@ class DiveHandler : public DiveHandlerBase // Estimated time the ball needs to reach the goal // a.k.a. Tpapo (historical reasons) - float tBall2Goal; + double tBall2Goal; // Estimated time needed for the current dive action to be performed - float tDive; + double tDive; // Estimated time the goalie needs to back up to its original position - float tBackInPose; + double tBackInPose; // Estimated intersection between the ball projection and the goal line - float ballProjectionIntercept; + double ballProjectionIntercept; // Estimated distance of the ball from the own goal - float distanceBall2Goal; + double distanceBall2Goal; // Computes parameters using the ball estimated position and velocity void estimateDiveTimes(); void estimateBallProjection(); // Compute the overall time the goalie needs to dive and then recover its position - inline float computeDiveAndRecoverTime(float alpha1, float alpha2); + inline double computeDiveAndRecoverTime(double alpha1, double alpha2); public: @@ -234,7 +238,7 @@ class DiveHandler : public DiveHandlerBase ~DiveHandler(); // Setter for the reward list - inline const std::list& getRewardList() const + inline const std::list& getRewardList() const { return rewardHistory; } From eede2eeccbeb7424c5a089fc50e4bc536c86c7c6 Mon Sep 17 00:00:00 2001 From: Francesco Riccio Date: Tue, 11 Mar 2014 20:35:16 +0100 Subject: [PATCH 08/17] SUPER --- machineLearning/ConfigurationParameters.h | 112 ------- machineLearning/DiveHandle.h | 21 +- machineLearning/DiveHandler/DiveHandler.cpp | 327 ++++++++++++-------- machineLearning/DiveHandler/DiveHandler.h | 134 ++++---- 4 files changed, 284 insertions(+), 310 deletions(-) delete mode 100644 machineLearning/ConfigurationParameters.h diff --git a/machineLearning/ConfigurationParameters.h b/machineLearning/ConfigurationParameters.h deleted file mode 100644 index 9c8bb8a..0000000 --- a/machineLearning/ConfigurationParameters.h +++ /dev/null @@ -1,112 +0,0 @@ -#pragma once - -#include - -namespace SPQR -{ - /************ GAME CONTROLLER ************/ - static const std::string IP_GOALIE = "10.0.19.14"; - static const int CHEST_BUTTON_MANUAL_GAME_CONTROLLER_PORT = 18003; - static const int FIELD_DIMENSION_X = 3000; - static const int FIELD_DIMENSION_Y = 2000; - - static const unsigned int POLICY = 0; ///{STABLE ="0", S_POSITIONIG_X ="1", S_POSITIONIG_XY ="2", WALL ="3", TANK ="4", STATIC_POSITIONG="5"}; - static const unsigned int STRATEGY = 0; ///{DRIBBLING ="0", PASSING ="1"}; - - static const float TURN_VALID_THS = 10; /// degree - static const float TURN_EXCESS = 10; - - static const int COORDINATION_PORT_NUMBER = 11937; - static const int MAXIMUM_DISTANCE_BALL_VIEWED = 6000; - static const int MAXIMUM_DISTANCE_ON_THE_FIELD = 11000; - static const unsigned int TABLE_ROWS = 5; /// TABLE_ROWS also equals to the number of roles. - static const unsigned int ACTIVE_ROLES = 5; /// Active roles (including the goalie) => max 5 (goalie, defender, supporter, jolly, striker) - static const unsigned int TABLE_COLUMNS = ACTIVE_ROLES+4; - static const unsigned int DEAD_ROBOT_TIME_THRESHOLD = 5000; - static const unsigned int HYSTERESIS_PERIOD_IN_CYCLES = 100; - static const unsigned int COORDINATION_INFORMATION_NETWORK_FREQUENCY = 10; /// FREQUENCY! - static const unsigned int FALL_DOWN_PENALTY = 200; - static const unsigned int TIME_TO_GET_UP = 10000; - static const unsigned int MOVING_BALL_MIN_VELOCITY = 10; /// [mm/s] - static const unsigned int SUPPORTER_MIN_TIME_WHEN_LAST_SEEN = 500; /// [ms] - static const unsigned int DEFENDER_MIN_TIME_WHEN_LAST_SEEN = 500; /// [ms] - static const unsigned int JOLLY_MIN_TIME_WHEN_LAST_SEEN = 500; /// [ms] - static const int MINIMUM_PASSING_DISTANCE = 1000; /// [mm] - static const int HYSTERESIS_BOUND_DISTANCE = 300; /// [mm] - - static const float DEFENDER_KICKOFF_DEFAULT_POSITION_X = -0.55 * FIELD_DIMENSION_X; - static const float DEFENDER_KICKOFF_DEFAULT_POSITION_Y = 0.13 * FIELD_DIMENSION_Y; - static const float DEFENDER_NO_KICKOFF_DEFAULT_POSITION_X = -0.55 * FIELD_DIMENSION_X; - static const float DEFENDER_NO_KICKOFF_DEFAULT_POSITION_Y = 0.13 * FIELD_DIMENSION_Y; - - static const float SUPPORTER_KICKOFF_DEFAULT_POSITION_X = -0.27 * FIELD_DIMENSION_X; - static const float SUPPORTER_KICKOFF_DEFAULT_POSITION_Y = 0.33 * FIELD_DIMENSION_Y; - static const float SUPPORTER_NO_KICKOFF_DEFAULT_POSITION_X = -0.27 * FIELD_DIMENSION_X; - static const float SUPPORTER_NO_KICKOFF_DEFAULT_POSITION_Y = 0.33 * FIELD_DIMENSION_Y; - - static const float JOLLY_KICKOFF_DEFAULT_POSITION_X = -0.27 * FIELD_DIMENSION_X; - static const float JOLLY_KICKOFF_DEFAULT_POSITION_Y = -0.33 * FIELD_DIMENSION_Y; - static const float JOLLY_NO_KICKOFF_DEFAULT_POSITION_X = -0.27 * FIELD_DIMENSION_X; - static const float JOLLY_NO_KICKOFF_DEFAULT_POSITION_Y = -0.33 * FIELD_DIMENSION_Y; - - static const float STRIKER_KICKOFF_POSITION_X = -220.0; - static const float STRIKER_KICKOFF_POSITION_Y = 0.0; - static const float STRIKER_NO_KICKOFF_POSITION_X = -1200.0; - static const float STRIKER_NO_KICKOFF_POSITION_Y = 0.0; - - static const float SPEED_X = 0.6; - static const float SPEED_Y = 0.6; - static const float HEAD_ROTATION = 8.0; - static const float TIME_BEFORE_STARTING_TO_COORD_SEARCH = 7000.0; - - /************ WALL ************/ - static const float DEFENDER_KICKOFF_WALL_POSITION_X = -0.75 * FIELD_DIMENSION_X; - static const float DEFENDER_KICKOFF_WALL_POSITION_Y = 0.16 * FIELD_DIMENSION_Y; - static const float SUPPORTER_KICKOFF_WALL_POSITION_X = -0.75 * FIELD_DIMENSION_X; - static const float SUPPORTER_KICKOFF_WALL_POSITION_Y = 0.45 * FIELD_DIMENSION_Y; - static const float JOLLY_KICKOFF_WALL_POSITION_X = -0.75 * FIELD_DIMENSION_X; - static const float JOLLY_KICKOFF_WALL_POSITION_Y = -0.30 * FIELD_DIMENSION_Y; - - /************ NO BALL ************/ - static const float DEFENDER_KICKOFF_NO_BALL_POSITION_X = -0.75 * FIELD_DIMENSION_X; - static const float DEFENDER_KICKOFF_NO_BALL_POSITION_Y = 0.75 * FIELD_DIMENSION_Y; - static const float SUPPORTER_KICKOFF_NO_BALL_POSITION_X = -0.75 * FIELD_DIMENSION_X; - static const float SUPPORTER_KICKOFF_NO_BALL_POSITION_Y = -0.75 * FIELD_DIMENSION_Y; - static const float JOLLY_KICKOFF_NO_BALL_POSITION_X = 0.75 * FIELD_DIMENSION_X; - static const float JOLLY_KICKOFF_NO_BALL_POSITION_Y = -0.50 * FIELD_DIMENSION_Y; - - /************ GOALIE ************/ - static const float GOALIE_BASE_POSITION_X = -FIELD_DIMENSION_X + 250; /// [mm] //TODO take this from theFieldDimensions - static const float GOALIE_BASE_POSITION_Y = 0; /// [mm] - static const float GOALIE_BASE_POSITION_BEARING = 0; /// [mm] - - static const int GOALIE_LEARNING_STATE = 3; /// 1 = learning disabled, 3 = learning enabled - - static const float GOALIE_DIVE_TIME = 3000; - static const float GOALIE_DIVE_RECOVER_TIME = 3000; - static const float GOALIE_DIVE_REPOSITION_TIME = 3000; - -static const float GOALIE_CLOSE_DIVE_TIME = 1500; -static const float GOALIE_CLOSE_DIVE_RECOVER_TIME = 1500; - -static const float GOALIE_STOP_BALL_TIME = 2000; -static const float GOALIE_STOP_BALL_RECOVER_TIME = 2000; - -static const float GOALIE_POSE_X_TOLLERANCE = 150; /// [mm] -static const float GOALIE_POSE_Y_TOLLERANCE = 150; /// [mm] -static const float GOALIE_POSE_ANGLE_TOLLERANCE = 10; /// [deg] -static const float GOALIE_POSE_X_TOLLERANCE_AFTER_DIVE = 150; /// [mm] -static const float GOALIE_POSE_Y_TOLLERANCE_AFTER_DIVE = 150; /// [mm] - -static const float GOALIE_DIVE_TIME_TOLERANCE = 100; /// [ms] - -static const float GOALIE_MOVING_BALL_MIN_VELOCITY = 10; /// [mm/s] -static const float GOALIE_EPSILON_COLLINEAR = 0.001; /// [??] -static const float GOALIE_FAR_LIMIT_Y = 800; /// a little more than goal post //TODO take this from FieldDimensions -static const float GOALIE_CLOSE_LIMIT_Y = 200; /// dont-dive distance //TODO take this from FieldDimensions -static const unsigned int GOALIE_MIN_TIME_WHEN_LAST_SEEN = 500; /// [ms] -static const float GOALIE_MIN_BALL_DIST_FROM_POST = 500; - -static const float GOALIE_MAX_DIST_BALL_IN_RANGE_ABS = 500; /// [mm] -} - diff --git a/machineLearning/DiveHandle.h b/machineLearning/DiveHandle.h index 1d75bf9..876473e 100644 --- a/machineLearning/DiveHandle.h +++ b/machineLearning/DiveHandle.h @@ -1,6 +1,7 @@ #pragma once #include "Tools/Math/Vector2.h" +#include "Tools/Enum.h" class DiveHandle : public Streamable { @@ -20,18 +21,14 @@ class DiveHandle : public Streamable } public: - enum Dive - { - none = 1, - lDive, - rDive, - lcloseDive, - rcloseDive, - stopBall - }; - - typedef int Dive; - + ENUM(Dive, + none = 1, + lDive, + rDive, + lcloseDive, + rcloseDive, + stopBall); + float diveTime; float ballProjectionEstimate; Dive diveType; diff --git a/machineLearning/DiveHandler/DiveHandler.cpp b/machineLearning/DiveHandler/DiveHandler.cpp index a06bed5..61e67ac 100644 --- a/machineLearning/DiveHandler/DiveHandler.cpp +++ b/machineLearning/DiveHandler/DiveHandler.cpp @@ -16,21 +16,20 @@ #include #include -#include +#include +#include "Tools/Enum.h" #include "DiveHandler.h" // Uncomment to have debug information //#define DIVEHANDLER_DEBUG -#define DIVEHANDLER_TRAINING_DEBUG -#define DIVEHANDLER_TRAINING +//#define DIVEHANDLER_TRAINING_DEBUG +//#define DIVEHANDLER_TRAINING //#define RAND_PERMUTATIONS #define NEGATIVE_REWARD -1.0 #define POSITIVE_REWARD 1.5 -#define REWARD_WORST 999999.9 - // Debug messages template #define SPQR_ERR(x) std::cerr << "\033[22;31;1m" <<"[DiveHandler] " << x << "\033[0m"<< std::endl; #define SPQR_INFO(x) std::cerr << "\033[22;34;1m" <<"[DiveHandler] " << x << "\033[0m" << std::endl; @@ -42,13 +41,16 @@ else if(x == 2) std::cerr << "\033[22;34;1m"<<"Learner state: paused (waiting for reward). "<<"\033[0m" << std::endl; \ else if(x == 3) std::cerr << "\033[22;34;1m"<<"Learner state: enabled. "<<"\033[0m" << std::endl; \ +bool stamp =false; +bool tooEarly=false; +bool estimatedTime=false; MAKE_MODULE(DiveHandler, SPQR-Modules) // Shortcut to compute the magnitude of a vector -double magnitude(std::vector v) +float magnitude(std::vector v) { - double m = 0.0; + float m = 0.0; for (unsigned int i = 0; i < v.size(); ++i) m += v.at(i) * v.at(i); @@ -62,12 +64,12 @@ double magnitude(std::vector v) /* * Simple setters for the learner's parameters and coefficients. */ -void DiveHandler::CoeffsLearner::setCoeffs(const std::vector& _coeffs) +void DiveHandler::CoeffsLearner::setCoeffs(const std::vector& _coeffs) { coeffs = _coeffs; } -void DiveHandler::CoeffsLearner::setParam(const std::string& _key, double _value) +void DiveHandler::CoeffsLearner::setParam(const std::string& _key, float _value) { params[_key] = _value; } @@ -85,16 +87,16 @@ void DiveHandler::CoeffsLearner::setParam(const std::string& _key, double _value * - An initial value for the learning coefficients (or an upper bound for the random initialization of those); * - A flag indicating whether a fixed or random initialization has to be performed. */ -DiveHandler::PGLearner::PGLearner( DiveHandler* _dhPtr, int _nCoeffs, double _epsilon, int _T, double _initValue, bool randomize ): +DiveHandler::PGLearner::PGLearner( DiveHandler* _dhPtr, int _nCoeffs, float _epsilon, int _T, float _initValue, bool randomize ): // Initialize the base class CoeffsLearner(_nCoeffs, _initValue, _dhPtr), // Initialize the gradient estimate - coeffsGradient(_nCoeffs, 0.0), coeffsBest(_nCoeffs, 0.0) + coeffsGradient(_nCoeffs, 0.0) { // Initializing reward scores reward_score = 0.0; reward_norm = 1.0; - rewardBest = REWARD_WORST; + coeffsBest = coeffs; // Initializing coefficients if(randomize) @@ -102,7 +104,7 @@ DiveHandler::PGLearner::PGLearner( DiveHandler* _dhPtr, int _nCoeffs, double _ep // Random initialization in [0, INIT_VALUE] srand(time(NULL)); for( int i=0; i<_nCoeffs; ++i) - coeffs.at(i) = (static_cast(rand()%101)/100 ) *_initValue; + coeffs.at(i) = (static_cast(rand()%101)/100 ) *_initValue; } // Initializing parameters @@ -126,7 +128,7 @@ bool DiveHandler::PGLearner::converged() { // Compute variations mean // Delta previous to current step - double avg_variation = (magnitude(coeffs) - magnitude(coeffsBuffer.front()))/coeffsBuffer.size() ; + float avg_variation = (magnitude(coeffs) - magnitude(coeffsBuffer.front()))/coeffsBuffer.size() ; // Iterate over the whole buffer and compute deltas from step i-1 to i PGbuffer::const_iterator i = coeffsBuffer.begin(); PGbuffer::const_iterator j = coeffsBuffer.begin(); ++j; @@ -138,7 +140,7 @@ bool DiveHandler::PGLearner::converged() // Compute variations standard deviation // Delta previous to current step - double std_variation = pow(magnitude(coeffs)-magnitude(coeffsBuffer.front()) - avg_variation, 2) / coeffsBuffer.size(); + float std_variation = pow(magnitude(coeffs)-magnitude(coeffsBuffer.front()) - avg_variation, 2) / coeffsBuffer.size(); // Iterate over the whole buffer and compute deltas from step i-1 to i PGbuffer::const_iterator k = coeffsBuffer.begin(); PGbuffer::const_iterator t = coeffsBuffer.begin(); ++t; @@ -177,7 +179,7 @@ void DiveHandler::PGLearner::generatePerturbations() for(int i=0; i perturbation(coeffs); + std::vector perturbation(coeffs); for(unsigned int j=0; j perturbation (coeffs.size(),0.0); + std::vector perturbation (coeffs.size(),0.0); // Generate all possible combinations recursively generatePerturbations(&perturbation, 0); @@ -210,7 +212,7 @@ void DiveHandler::PGLearner::generatePerturbations() } /* TOTEST&COMMENT */ -void DiveHandler::PGLearner::generatePerturbations(std::vector* partial_perturbation, unsigned int index) +void DiveHandler::PGLearner::generatePerturbations(std::vector* partial_perturbation, unsigned int index) { if (index == partial_perturbation->size()-1) { @@ -218,7 +220,7 @@ void DiveHandler::PGLearner::generatePerturbations(std::vector* partial_ for (int perturbation_type = -1; perturbation_type <= 1; ++perturbation_type) { // Compute last index and generate the final perturbation - std::vector perturbation (*partial_perturbation); + std::vector perturbation (*partial_perturbation); perturbation.at(index) = coeffs.at(index) + perturbation_type * params["epsilon"]; // Update the perturbations buffer @@ -239,29 +241,22 @@ void DiveHandler::PGLearner::generatePerturbations(std::vector* partial_ } /* TOCOMMENT */ -double DiveHandler::PGLearner::evaluatePerturbation( std::vector R ) +float DiveHandler::PGLearner::evaluatePerturbation( std::vector R ) { // Dimensions check assert(R.size() == coeffs.size()); - - if (R.at(0) == 0.0 || R.at(1) == 0.0) - return REWARD_WORST; - // Generate perturbated policy and call the DiveHandler object for evaluation - double tDiveAndRecover = diveHandler_ptr->computeDiveAndRecoverTime(R.at(0), R.at(1)); +// float tDiveAndRecover = diveHandler_ptr->computeDiveAndRecoverTime(coeffs.at(0) + R.at(0), coeffs.at(1) + R.at(1)); - // Attractor - std::vector distanceToBest(2); - distanceToBest.at(0) = coeffsBest.at(0) - R.at(0); - distanceToBest.at(1) = coeffsBest.at(1) - R.at(1); - -#ifdef DIVEHANDLER_TRAINING_DEBUG - SPQR_INFO("Perturbated policy: [" << R.at(0) << ", " << R.at(1) - << "], Score: " << ((1.0-LAMBDA1)*fabs(diveHandler_ptr->tBall2Goal-tDiveAndRecover)+LAMBDA1*magnitude(distanceToBest))); -#endif + // Perturbated coefficients + std::vector new_coeffs(2); + new_coeffs.at(0) = coeffs.at(0) + R.at(0); + new_coeffs.at(1) = coeffs.at(1) + R.at(1); - return (1.0-LAMBDA1)*fabs(diveHandler_ptr->tBall2Goal - tDiveAndRecover) + - LAMBDA1*magnitude(distanceToBest); + return (diveHandler_ptr->estimatedInterval - ( R.at(0)*diveHandler_ptr->tBall2Goal))* + (diveHandler_ptr->estimatedInterval - ( R.at(0)*diveHandler_ptr->tBall2Goal)) ; +// return (1.0-LAMBDA1)*fabs(diveHandler_ptr->tBall2Goal - tDiveAndRecover) + +// LAMBDA1*fabs(magnitude(coeffs) - magnitude(coeffsBest)); // return (1.0-LAMBDA1-LAMBDA2)*fabs(tDiveAndRecover) + // LAMBDA1*fabs(diveHandler_ptr->tBall2Goal - tDiveAndRecover) + @@ -271,7 +266,7 @@ double DiveHandler::PGLearner::evaluatePerturbation( std::vector R ) /* TOTEST&COMMENT */ -void DiveHandler::PGLearner::updateParams(const std::list& rewards) +void DiveHandler::PGLearner::updateParams(const std::list& rewards) { // Re-initialize reward scores reward_score = 0.0; @@ -279,7 +274,7 @@ void DiveHandler::PGLearner::updateParams(const std::list& rewards) int discount_exp = 0; int positives = 0; - std::list::const_iterator i = rewards.begin(); + std::list::const_iterator i = rewards.begin(); while (i != rewards.end()) { // Counting positives @@ -292,22 +287,19 @@ void DiveHandler::PGLearner::updateParams(const std::list& rewards) ++i; ++discount_exp; } - //Adjusting PG parameters according to the obtained score - setParam("epsilon", exp( -reward_score / REWARDS_HISTORY_SIZE ) * getParam("epsilon")); - - // Update best performance - if (rewardGradient < rewardBest) - { - rewardBest = rewardGradient; - coeffsBest = coeffs; - } #ifdef DIVEHANDLER_TRAINING_DEBUG SPQR_INFO("Positive rewards: " << positives << " out of " << rewards.size()); SPQR_INFO("Negative rewards: " << (rewards.size() - positives) << " out of " << rewards.size()); SPQR_INFO("Reward total score: " << reward_score); - SPQR_INFO("Best evaluation so far: [ " << coeffsBest.at(0) << ", " << coeffsBest.at(1) << " ] with score: " << rewardBest); #endif + //Adjusting PG parameters according to the obtained score + setParam("epsilon", exp( -reward_score / REWARDS_HISTORY_SIZE ) * getParam("epsilon")); + + // Update best performance + if (rewards.front() == POSITIVE_REWARD) + coeffsBest = coeffs; + #ifdef DIVEHANDLER_TRAINING SPQR_INFO( "Epsilon value changed to: " << getParam("epsilon") << " according to the obtained rewards. "); #endif @@ -321,31 +313,33 @@ void DiveHandler::PGLearner::updateParams(const std::list& rewards) /* TOTEST&COMMENT */ bool DiveHandler::PGLearner::updateCoeffs() { + +#ifdef DIVEHANDLER_TRAINING + SPQR_INFO( "PG algorithm, iteration " << iter_count << "... " ); +#endif + if( iter_count == MAX_ITER || converged() ) return false; else { -#ifdef DIVEHANDLER_TRAINING - SPQR_INFO( "PG algorithm, iteration " << iter_count << "... " ); -#endif // First generate the set of random perturbation for the current coefficients generatePerturbations(); // For each perturbation, evaluate with the objective function and store the result in a temporary container - std::vector evaluatedPerturbations (perturbationsBuffer.size()); + std::vector evaluatedPerturbations (perturbationsBuffer.size()); PGbuffer::const_iterator evaluator; for(evaluator = perturbationsBuffer.begin(); evaluator != perturbationsBuffer.end(); ++evaluator) evaluatedPerturbations.push_back( evaluatePerturbation(*evaluator) ); // Compute the average 'gradient' for the current coefficients - std::vector coeffs_avgGradient(coeffs.size()); + std::vector coeffs_avgGradient(coeffs.size()); #ifdef RAND_PERMUTATIONS // For each coefficient, compute the average score to determine the correspondent 'gradient' entry PGbuffer::const_iterator current_perturbation = perturbationsBuffer.begin(); for( unsigned int n = 0; n < coeffs.size(); ++n ) { - std::vector score_plus, score_minus, score_zero; + std::vector score_plus, score_minus, score_zero; // Keep track of the perturbation type and store each score in a container for( unsigned int i = 0; i < evaluatedPerturbations.size(); ++i ) @@ -361,17 +355,17 @@ bool DiveHandler::PGLearner::updateCoeffs() } // Sum up all positive perturbation scores - double avg_plus = 0.0; + float avg_plus = 0.0; for (unsigned int j = 0; j < score_plus.size(); ++j) avg_plus += score_plus.at(j) / score_plus.size(); // Sum up all negative perturbation scores - double avg_minus = 0.0; + float avg_minus = 0.0; for (unsigned int j = 0; j < score_minus.size(); ++j) avg_minus += score_minus.at(j) / score_minus.size(); // Sum up all null perturbation scores - double avg_zero = 0.0; + float avg_zero = 0.0; for (unsigned int j = 0; j < score_zero.size(); ++j) avg_zero += score_zero.at(j) / score_zero.size(); @@ -385,12 +379,12 @@ bool DiveHandler::PGLearner::updateCoeffs() for( unsigned int n = 0; n < coeffs.size(); ++n ) { int avg_selector = 0; - double avg_minus = 0.0 , avg_zero = 0.0, avg_plus = 0.0; + float avg_minus = 0.0 , avg_zero = 0.0, avg_plus = 0.0; for( unsigned int i = 0; i < evaluatedPerturbations.size(); i = i + pow(3,n) ) { for( unsigned int k = i; k < i + pow(3,n); ++k ) { - double evaluation = evaluatedPerturbations.at(k) / (evaluatedPerturbations.size()/3); + float evaluation = evaluatedPerturbations.at(k) / (evaluatedPerturbations.size()/3); if( (avg_selector)%3 == 0 ) avg_minus += evaluation; if( (avg_selector)%3 == 1 ) avg_zero += evaluation; @@ -405,11 +399,8 @@ bool DiveHandler::PGLearner::updateCoeffs() coeffs_avgGradient.at(coeffs.size() - (n +1)) = avg_plus - avg_minus; } #endif - // Evaluate the gradient - rewardGradient = evaluatePerturbation(coeffs_avgGradient); - // Avoid 'nan' when the gradient is zeroed - double normalization = 1.0; + float normalization = 1.0; if (magnitude(coeffs_avgGradient) != 0) normalization = magnitude(coeffs_avgGradient); @@ -417,10 +408,9 @@ bool DiveHandler::PGLearner::updateCoeffs() #ifdef DIVEHANDLER_TRAINING SPQR_INFO("Computed policy gradient: [ " << coeffs_avgGradient.at(0)/normalization << ", " << coeffs_avgGradient.at(1)/normalization << " ]"); - SPQR_INFO("Gradient score (before normalization): " << rewardGradient); #endif // Weight new gradient estimate and previous one according to the reward score - std::vector newGradient (coeffsGradient.size()); + std::vector newGradient (coeffsGradient.size()); for( unsigned int j=0; j(SPQR::GOALIE_LEARNING_STATE)), - learner(new PGLearner(this, 2, EPSILON, T)), opponentScore(0), tBall2Goal(SPQR::FIELD_DIMENSION_Y), - tDive(0.0), tBackInPose(0.0), ballProjectionIntercept(SPQR::FIELD_DIMENSION_Y), distanceBall2Goal(SPQR::FIELD_DIMENSION_X) + diveType(DiveHandle::none), state(static_cast(SPQR::GOALIE_LEARNING_STATE)), + learner(new PGLearner(this, 2, EPSILON, T, 1.0, false)), opponentScore(0), tBall2Goal(SPQR::FIELD_DIMENSION_Y), + tDive(0.0), tBackInPose(0.0), estimatedInterval(0), + ballProjectionIntercept(SPQR::FIELD_DIMENSION_Y), distanceBall2Goal(SPQR::FIELD_DIMENSION_X) { #ifdef DIVEHANDLER_TRAINING SPQR_INFO("Initializing PGlearner..."); - std::vector coeffs = learner->getCoeffs(); + std::vector coeffs = learner->getCoeffs(); SPQR_INFO("Coefficients: alpha 1 = " << coeffs.at(0) << ", alpha 2 = " << coeffs.at(1)); SPQR_INFO("Parameters: epsilon = " << learner->getParam("epsilon") << ", T = " << learner->getParam("T")); #endif @@ -481,7 +472,7 @@ DiveHandler::DiveHandler(): */ DiveHandler::~DiveHandler() { - if (learner) delete learner; + if(learner) delete learner; } /* @@ -493,18 +484,18 @@ DiveHandler::~DiveHandler() void DiveHandler::estimateBallProjection() { // Ball path line - double A1 = (theBallModel.estimate.position.y - theBallModel.estimate.velocity.y) - theBallModel.estimate.position.y; - double B1 = theBallModel.estimate.position.x - (theBallModel.estimate.position.x - theBallModel.estimate.velocity.x); - double C1 = A1*theBallModel.estimate.position.x + B1*theBallModel.estimate.position.y; + float A1 = (theBallModel.estimate.position.y - theBallModel.estimate.velocity.y) - theBallModel.estimate.position.y; + float B1 = theBallModel.estimate.position.x - (theBallModel.estimate.position.x - theBallModel.estimate.velocity.x); + float C1 = A1*theBallModel.estimate.position.x + B1*theBallModel.estimate.position.y; // Goal line - double A2 = SPQR::GOALIE_FAR_LIMIT_Y - -SPQR::GOALIE_FAR_LIMIT_Y; + float A2 = SPQR::GOALIE_FAR_LIMIT_Y - -SPQR::GOALIE_FAR_LIMIT_Y; // Cross product/determinant - double det = - A2*B1; + float det = - A2*B1; // Y-intercept initialized with the maximum value possible - double yIntercept = SPQR::FIELD_DIMENSION_Y; + float yIntercept = SPQR::FIELD_DIMENSION_Y; // Non-singular case if( fabs(det) > SPQR::GOALIE_EPSILON_COLLINEAR ) @@ -513,31 +504,33 @@ void DiveHandler::estimateBallProjection() yIntercept = (- A2*C1) / det; // Devising the type of dive to be performed - if( yIntercept > ( SPQR::GOALIE_CLOSE_LIMIT_Y/2) && yIntercept < SPQR::GOALIE_FAR_LIMIT_Y ) - // Close intercept on the left - diveType = lcloseDive; - else if( yIntercept > SPQR::GOALIE_FAR_LIMIT_Y ) - // Far intercept on the left - diveType = lDive; - else if( yIntercept < (-SPQR::GOALIE_CLOSE_LIMIT_Y/2) && yIntercept > -SPQR::GOALIE_FAR_LIMIT_Y ) - // Close intercept on the right - diveType = rcloseDive; - else if( yIntercept < -SPQR::GOALIE_FAR_LIMIT_Y ) - // Far intercept on the right - diveType = rDive; - else if( fabs(yIntercept) < SPQR::GOALIE_CLOSE_LIMIT_Y/2) - diveType = stopBall; - else - // Any other case: no dive at all - diveType = none; - } + + if( yIntercept > ( SPQR::GOALIE_CLOSE_LIMIT_Y/2) && yIntercept < SPQR::GOALIE_FAR_LIMIT_Y ) + // Close intercept on the left + diveType = DiveHandle::lcloseDive; + else if( yIntercept > SPQR::GOALIE_FAR_LIMIT_Y ) + // Far intercept on the left + diveType = DiveHandle::lDive; + else if( yIntercept < (-SPQR::GOALIE_CLOSE_LIMIT_Y/2) && yIntercept > -SPQR::GOALIE_FAR_LIMIT_Y ) + // Close intercept on the right + diveType = DiveHandle::rcloseDive; + else if( yIntercept < -SPQR::GOALIE_FAR_LIMIT_Y ) + // Far intercept on the right + diveType = DiveHandle::rDive; + + else if( fabs(yIntercept) < SPQR::GOALIE_CLOSE_LIMIT_Y/2) + diveType = DiveHandle::stopBall; + else + // Any other case: no dive at all + diveType = DiveHandle::none; + } // Using the appropriate estimate for the dive time - if (diveType == lDive || diveType == rDive ) + if (diveType == DiveHandle::lDive || diveType == DiveHandle::rDive ) tDive = SPQR::GOALIE_DIVE_TIME; - else if (diveType == lcloseDive || diveType == rcloseDive ) + else if (diveType == DiveHandle::lcloseDive || diveType == DiveHandle::rcloseDive ) tDive = SPQR::GOALIE_CLOSE_DIVE_TIME; - else if (diveType == stopBall ) + else if (diveType == DiveHandle::stopBall ) tDive = SPQR::GOALIE_STOP_BALL_TIME; else tDive = 0.0; @@ -546,10 +539,11 @@ void DiveHandler::estimateBallProjection() ballProjectionIntercept = yIntercept; // Computing the distance vector from the ball to the goal - double delta_x = -SPQR::FIELD_DIMENSION_X - theGlobalBallEstimation.singleRobotX; - double delta_y = ballProjectionIntercept - theGlobalBallEstimation.singleRobotY; +// float delta_x = -SPQR::FIELD_DIMENSION_X - theGlobalBallEstimation.singleRobotX; +// float delta_y = ballProjectionIntercept - theGlobalBallEstimation.singleRobotY; // Estimated distance from the ball - distanceBall2Goal = sqrt( delta_x*delta_x + delta_y*delta_y); +// distanceBall2Goal = sqrt( delta_x*delta_x + delta_y*delta_y); + distanceBall2Goal = theBallModel.estimate.position.x; } /* @@ -569,18 +563,18 @@ void DiveHandler::estimateDiveTimes() tBall2Goal = -1.0; // Using the appropriate estimates for recover and reposition times - double tRecover = 0.0; - double tReposition = 0.0; - if( diveType == rcloseDive || diveType == lcloseDive ) + float tRecover = 0.0; + float tReposition = 0.0; + if( diveType == DiveHandle::rcloseDive || diveType == DiveHandle::lcloseDive ) // Close dive: no need to back up to the original position tRecover = SPQR::GOALIE_CLOSE_DIVE_RECOVER_TIME; - else if( diveType == rDive || diveType == lDive ) + else if( diveType == DiveHandle::rDive || diveType == DiveHandle::lDive ) { // Long dive: the robot has to stand up and reposition tRecover = SPQR::GOALIE_DIVE_RECOVER_TIME; tReposition = SPQR::GOALIE_DIVE_REPOSITION_TIME; } - else if( diveType == stopBall ) + else if( diveType == DiveHandle::stopBall ) { // stop ball: the robot has to stand up and stop the ball tRecover = SPQR::GOALIE_STOP_BALL_RECOVER_TIME; @@ -591,7 +585,7 @@ void DiveHandler::estimateDiveTimes() } /* TOCOMMENT */ -inline double DiveHandler::computeDiveAndRecoverTime(double alpha1, double alpha2) +inline float DiveHandler::computeDiveAndRecoverTime(float alpha1, float alpha2) { return alpha2*( alpha1*tBall2Goal - tDive ); } @@ -611,21 +605,86 @@ inline double DiveHandler::computeDiveAndRecoverTime(double alpha1, double alpha */ void DiveHandler::update(DiveHandle& diveHandle) { -// theOpponentTeamInfo.score; // Check you're actually the goalie... if (theRobotInfo.number == 1) - { - // Compute the ball projection estimate + { + // Compute the ball projection estimate estimateBallProjection(); // Update the DiveHandle diveHandle.ballProjectionEstimate = ballProjectionIntercept; - // Check whether the ball is close enough - if( (distanceBall2Goal < SPQR::FIELD_DIMENSION_X) && (fabs(ballProjectionIntercept) < SPQR::FIELD_DIMENSION_Y) ) + if( ((int) (clock() - timer.fallen)/(CLOCKS_PER_SEC/1000)) > 10001 && + ((int) (clock() - timer.fallen)/(CLOCKS_PER_SEC/1000)) < 10050 && + (int) timer.fallen != 0) +// SPQR_SUCCESS("TooEarly time window START..."); + if( ((int) (clock() - timer.fallen)/(CLOCKS_PER_SEC/1000)) > 14971 && + ((int) (clock() - timer.fallen)/(CLOCKS_PER_SEC/1000)) < 14999 && + (int) timer.fallen != 0) +// SPQR_SUCCESS("TooEarly time window END."); + + if( ((int) (clock() - timer.fallen)/(CLOCKS_PER_SEC/1000)) > 10000 && + ((int) (clock() - timer.fallen)/(CLOCKS_PER_SEC/1000)) < 15000 && + (int) timer.fallen != 0) + { + if(opponentScore != (int)theOpponentTeamInfo.score) + tooEarly=true; + } + // Check whether the ball is close enough + if( (distanceBall2Goal < SPQR::FIELD_DIMENSION_X) && (fabs(ballProjectionIntercept) < SPQR::FIELD_DIMENSION_Y) ) { // Estimate all temporal parameters estimateDiveTimes(); + if(state != notLearning) + { + // if not in playing state + if(theGameInfo.state != STATE_PLAYING) + timer.reset(); + else + { + // if the ball is moving enough fast then set the timer + if( !timer.setTimer && (theBallModel.estimate.velocity.abs() > SPQR::MOVING_BALL_MIN_VELOCITY && + theFrameInfo.getTimeSince(theBallModel.timeWhenLastSeen) < 1000) ) + timer.set(clock()); + // else reset it... + if( timer.setTimer && (theBallModel.estimate.velocity.abs() < SPQR::MOVING_BALL_MIN_VELOCITY || + theFrameInfo.getTimeSince(theBallModel.timeWhenLastSeen) > 1000) ) + timer.reset(); + + // if the goalie dives + if( (int)theFallDownState.state == (int)FallDownState::fallen ) + { + timer.fallen=clock(); + estimatedInterval = (int) (clock() - timer.start)/(CLOCKS_PER_SEC/1000); + } + + if(opponentScore != (int)theOpponentTeamInfo.score && !estimatedTime) + { + if( tooEarly ) + { + SPQR_FAILURE("too FAST dude!"); + estimatedInterval += 2000; + tooEarly=false; + } + else + { + SPQR_FAILURE("too SLOW dude!"); + estimatedInterval += (int)(clock() - timer.fallen)/(CLOCKS_PER_SEC/1000) - 500; + } + estimatedTime=true; + + } + // if the goalie succeeded + else if(ownScore != (int)theOwnTeamInfo.score && !estimatedTime) + { + SPQR_SUCCESS("SUPER!"); + estimatedInterval -= 100; + estimatedTime=true; + } + + } + } + #ifdef DIVEHANDLER_DEBUG SPQR_INFO("Ball projection: " << ballProjectionIntercept); SPQR_INFO("PAPO time: " << tBall2Goal); @@ -635,7 +694,7 @@ void DiveHandler::update(DiveHandle& diveHandle) #endif // The module is in the learning state and a reward has been received - if( (state == learning) ) + if( state == learning ) { // Perform a single iteration of the learning algorithm if( learner->updateCoeffs() ) @@ -654,7 +713,7 @@ void DiveHandler::update(DiveHandle& diveHandle) else if( state == waitReward ) { // The opponent team scores: the goalie failed and gets a negative reward - if(opponentScore != (int)theOpponentTeamInfo.score) + if(opponentScore != (int)theOpponentTeamInfo.score && estimatedTime) { // The learner obtains a negative reward rewardHistory.push_front(NEGATIVE_REWARD); @@ -666,16 +725,19 @@ void DiveHandler::update(DiveHandle& diveHandle) opponentScore = (int)theOpponentTeamInfo.score; #ifdef DIVEHANDLER_TRAINING - SPQR_FAILURE("The opponent team scored! Negative reward for the learner. "); + SPQR_FAILURE("The opponent team scored! Negative reward for the learner."); #endif // A reward has been received: re-enable learning state = learning; - // Clear the pending reward + // Clear the pending rewardelse if(!diveHandle.rewardAck) diveHandle.rewardAck = true; + + estimatedTime=false; + stamp =true; } // The own team scores: user-guided move to provide the goalie a positive reward - else if(ownScore != (int)theOwnTeamInfo.score) + else if(ownScore != (int)theOwnTeamInfo.score && estimatedTime) { // The learner obtains a positive reward rewardHistory.push_front(POSITIVE_REWARD); @@ -694,6 +756,9 @@ void DiveHandler::update(DiveHandle& diveHandle) // Clear the pending reward if(!diveHandle.rewardAck) diveHandle.rewardAck = true; + + estimatedTime=false; + stamp=true; } } @@ -701,8 +766,19 @@ void DiveHandler::update(DiveHandle& diveHandle) if( state == learning ) learner->updateParams(rewardHistory); - // Compute the dive time using the current coefficients as T = alpha2 * (alpha1*T_PAPO - T_dive) - double diveTime = (learner->getCoeffs()).at(1) * ( (learner->getCoeffs()).at(0) * tBall2Goal - tDive ); + // Compute the dive time using the current coefficients as T = alpha2 * (alpha1*T_PAPO - T_dive) + float diveTime = ( (learner->getCoeffs()).at(0) * tBall2Goal ); + +#ifdef DIVEHANDLER_TRAINING + if(stamp) + { + SPQR_INFO("diveTime: " << diveTime ); + SPQR_INFO("estimated time interval: " << estimatedInterval ); + SPQR_ERR("TimeError: "<< (estimatedInterval - diveTime)*(estimatedInterval - diveTime)); + SPQR_INFO("/-----------------------------------------/\n"); + stamp = false; + } +#endif #ifdef DIVEHANDLER_DEBUG SPQR_INFO( "Estimated overall time to dive and recover position: " << @@ -712,16 +788,16 @@ void DiveHandler::update(DiveHandle& diveHandle) // Update the DiveHandle if (diveTime > 0.0) - diveHandle.diveTime = diveTime; + diveHandle.diveTime = diveTime -tDive; else diveHandle.diveTime = -1.0; #ifdef DIVEHANDLER_TRAINING - if (diveTime > 0.0) - { - if(diveHandle.diveTime < SPQR::GOALIE_DIVE_TIME_TOLERANCE) - SPQR_INFO("Dive now! "); - } +// if (diveTime > 0.0) +// { +// if(diveHandle.diveTime < SPQR::GOALIE_DIVE_TIME_TOLERANCE) +// SPQR_INFO("Dive now! "); +// } #endif } @@ -729,7 +805,8 @@ void DiveHandler::update(DiveHandle& diveHandle) else { diveHandle.diveTime = -1; - diveHandle.diveType = diveType; + diveHandle.diveType = diveType; + timer.reset(); } } } diff --git a/machineLearning/DiveHandler/DiveHandler.h b/machineLearning/DiveHandler/DiveHandler.h index fa104b7..d593b7a 100644 --- a/machineLearning/DiveHandler/DiveHandler.h +++ b/machineLearning/DiveHandler/DiveHandler.h @@ -22,27 +22,30 @@ #include #include #include +#include #include "Tools/Module/Module.h" #include "Representations/Modeling/BallModel.h" #include "Representations/Infrastructure/TeamInfo.h" #include "Representations/Infrastructure/FrameInfo.h" +#include "Representations/Infrastructure/GameInfo.h" #include "Representations/Infrastructure/RobotInfo.h" +#include "Representations/Sensing/FallDownState.h" #include "Representations/SPQR-Representations/ConfigurationParameters.h" #include "Representations/SPQR-Representations/RobotPoseSpqrFiltered.h" #include "Representations/SPQR-Representations/GlobalBallEstimation.h" #include "Representations/SPQR-Representations/DiveHandle.h" -#include "SPQR-Libraries/PTracking/src/Utils/AgentPacket.h" +#include "Utils/AgentPacket.h" // Module definition - - MODULE(DiveHandler) REQUIRES(OpponentTeamInfo) REQUIRES(OwnTeamInfo) REQUIRES(FrameInfo) - REQUIRES(RobotInfo) + REQUIRES(GameInfo) + REQUIRES(FallDownState) + REQUIRES(RobotInfo) REQUIRES(RobotPoseSpqrFiltered) REQUIRES(BallModel) REQUIRES(GlobalBallEstimation) @@ -51,60 +54,40 @@ END_MODULE // Termination conditions -#define MAX_ITER 300 +#define MAX_ITER 15 #define CONVERGENCE_THRESHOLD 0.01 // PG parameters #define GAMMA 0.5 #define BUFFER_DIM 10 -#define REWARDS_HISTORY_SIZE 15 -#define EPSILON 0.10 +#define REWARDS_HISTORY_SIZE 10 +#define EPSILON 0.05 #define T 15 // Evaluation weight -#define LAMBDA1 0.7 +#define LAMBDA1 0.9 //#define LAMBDA2 0.3 // Module class declaration - - class DiveHandler : public DiveHandlerBase { // Learning state - enum LearningState - { + ENUM( LearningState, // Learning disabled notLearning = 1, // Learning paused, expecting reward waitReward, // Learning active learning - }; - - // Dive type - enum Dive - { - // No dive at all - none = 1, - // Long dive on the left - lDive, - // Long dive on the right - rDive, - // Close dive on the left - lcloseDive, - // Close dive on the right - rcloseDive, - // Stop the ball without diving - stopBall - }; + ); // Inner base class modeling the learning agent class CoeffsLearner { protected: // Set of coefficients representing the learning objective - std::vector coeffs; + std::vector coeffs; // Set of fixed parameters defining the cost funcion - std::map params; + std::map params; // Iteration counter int iter_count; @@ -114,45 +97,43 @@ class DiveHandler : public DiveHandlerBase public: // Default constructor - CoeffsLearner(int _nCoeffs, double _initValue, DiveHandler* _dhPtr): + CoeffsLearner(int _nCoeffs, float _initValue, DiveHandler* _dhPtr): coeffs(_nCoeffs, _initValue), iter_count(0), diveHandler_ptr(_dhPtr) { } + virtual ~CoeffsLearner(){} + // Setter/getter for the coefficients - void setCoeffs(const std::vector& _coeffs); - inline std::vector getCoeffs(){ return coeffs; } + void setCoeffs(const std::vector& _coeffs); + inline std::vector getCoeffs(){ return coeffs; } // Setter/getter for the parameters - void setParam(const std::string& _key, double _value); - inline double getParam(std::string _key){ return params[_key]; } + void setParam(const std::string& _key, float _value); + inline float getParam(std::string _key){ return params[_key]; } // Update coefficients performing a step of the learning algorithm virtual bool updateCoeffs() = 0; // Use the obtained rewards to adjust the algorithm parameters - virtual void updateParams(const std::list& rewards) = 0; + virtual void updateParams(const std::list& rewards) = 0; }; // Inner class modeling a PolicyGradient-based learning agent class PGLearner : public CoeffsLearner { - typedef std::list< std::vector > PGbuffer; + typedef std::list< std::vector > PGbuffer; private: // Current estimate for the coefficients gradient - std::vector coeffsGradient; + std::vector coeffsGradient; // Best individual performance achieved so far - std::vector coeffsBest; + std::vector coeffsBest; // Current reward score - double reward_score; + float reward_score; // Current reward normalization factor - double reward_norm; - // Score of the current gradient estimate - double rewardGradient; - // Best gradient score so far - double rewardBest; + float reward_norm; // Memory buffer for the PG algorithm PGbuffer coeffsBuffer; @@ -163,22 +144,22 @@ class DiveHandler : public DiveHandlerBase bool converged(); // Recursive perturbation generator - void generatePerturbations(std::vector* partial_perturbation, unsigned int index); + void generatePerturbations(std::vector* partial_perturbation, unsigned int index); public: // Default constructor - PGLearner(DiveHandler* _dhPtr, int _nCoeffs, double _epsilon = EPSILON, - int _T = T, double _initValue = 1.0, bool randomize = false); + PGLearner(DiveHandler* _dhPtr, int _nCoeffs, float _epsilon = EPSILON, + int _T = T, float _initValue = 1.0, bool randomize = false); // Generate a set of perturbations for the current policy void generatePerturbations(); // Evaluate a single policy perturbation with the cost function - double evaluatePerturbation( std::vector R ); + float evaluatePerturbation( std::vector R ); // Update the PG parameters according to the obtained rewards - void updateParams(const std::list& rewards); + void updateParams(const std::list& rewards); // Update coefficients performing a step of the learning algorithm virtual bool updateCoeffs(); @@ -197,14 +178,14 @@ class DiveHandler : public DiveHandlerBase private: // Dive type currently selected - Dive diveType; + DiveHandle::Dive diveType; // Current learning state LearningState state; // Learning agent CoeffsLearner* learner; // Obtained rewards - std::list rewardHistory; + std::list rewardHistory; // Current scores int opponentScore; @@ -212,23 +193,54 @@ class DiveHandler : public DiveHandlerBase // Estimated time the ball needs to reach the goal // a.k.a. Tpapo (historical reasons) - double tBall2Goal; + float tBall2Goal; // Estimated time needed for the current dive action to be performed - double tDive; + float tDive; // Estimated time the goalie needs to back up to its original position - double tBackInPose; + float tBackInPose; + + // Timer + class Timer + { + public: + clock_t start; + clock_t fallen; + bool setTimer; + + Timer():start(0), fallen(0), setTimer(false){} + inline void set(clock_t startTime) + { + if(!setTimer) + { + start = startTime; + setTimer = true; +// std::cerr << "\033[33;1m" <<"[DiveHandler] " << "set Timer!" << "\033[0m" << std::endl; + } + } + inline void reset() + { + if(setTimer) + { + setTimer = false; +// std::cerr << "\033[33;1m" <<"[DiveHandler] " << "reset Timer!" << "\033[0m" << std::endl; + } + } + }; + + Timer timer; + unsigned int estimatedInterval; // Estimated intersection between the ball projection and the goal line - double ballProjectionIntercept; + float ballProjectionIntercept; // Estimated distance of the ball from the own goal - double distanceBall2Goal; + float distanceBall2Goal; // Computes parameters using the ball estimated position and velocity void estimateDiveTimes(); void estimateBallProjection(); // Compute the overall time the goalie needs to dive and then recover its position - inline double computeDiveAndRecoverTime(double alpha1, double alpha2); + inline float computeDiveAndRecoverTime(float alpha1, float alpha2); public: @@ -238,7 +250,7 @@ class DiveHandler : public DiveHandlerBase ~DiveHandler(); // Setter for the reward list - inline const std::list& getRewardList() const + inline const std::list& getRewardList() const { return rewardHistory; } From d9ecaa6351e75284a4e9ccba07c1de3c31e52c96 Mon Sep 17 00:00:00 2001 From: Francesco Riccio Date: Tue, 11 Mar 2014 23:21:28 +0100 Subject: [PATCH 09/17] updates --- machineLearning/DiveHandler/DiveHandler.cpp | 31 ++++++++------------- machineLearning/DiveHandler/DiveHandler.h | 6 ++-- 2 files changed, 15 insertions(+), 22 deletions(-) diff --git a/machineLearning/DiveHandler/DiveHandler.cpp b/machineLearning/DiveHandler/DiveHandler.cpp index 61e67ac..62d4c38 100644 --- a/machineLearning/DiveHandler/DiveHandler.cpp +++ b/machineLearning/DiveHandler/DiveHandler.cpp @@ -23,8 +23,8 @@ // Uncomment to have debug information //#define DIVEHANDLER_DEBUG -//#define DIVEHANDLER_TRAINING_DEBUG -//#define DIVEHANDLER_TRAINING +#define DIVEHANDLER_TRAINING_DEBUG +#define DIVEHANDLER_TRAINING //#define RAND_PERMUTATIONS #define NEGATIVE_REWARD -1.0 @@ -253,8 +253,7 @@ float DiveHandler::PGLearner::evaluatePerturbation( std::vector R ) new_coeffs.at(0) = coeffs.at(0) + R.at(0); new_coeffs.at(1) = coeffs.at(1) + R.at(1); - return (diveHandler_ptr->estimatedInterval - ( R.at(0)*diveHandler_ptr->tBall2Goal))* - (diveHandler_ptr->estimatedInterval - ( R.at(0)*diveHandler_ptr->tBall2Goal)) ; + return ( std::abs(diveHandler_ptr->estimatedInterval - ( R.at(0)*diveHandler_ptr->tBall2Goal)) ) ; // return (1.0-LAMBDA1)*fabs(diveHandler_ptr->tBall2Goal - tDiveAndRecover) + // LAMBDA1*fabs(magnitude(coeffs) - magnitude(coeffsBest)); @@ -616,12 +615,15 @@ void DiveHandler::update(DiveHandle& diveHandle) if( ((int) (clock() - timer.fallen)/(CLOCKS_PER_SEC/1000)) > 10001 && ((int) (clock() - timer.fallen)/(CLOCKS_PER_SEC/1000)) < 10050 && (int) timer.fallen != 0) -// SPQR_SUCCESS("TooEarly time window START..."); +#ifdef DIVEHANDLER_TRAINING + SPQR_SUCCESS("TooEarly time window START..."); +#endif if( ((int) (clock() - timer.fallen)/(CLOCKS_PER_SEC/1000)) > 14971 && ((int) (clock() - timer.fallen)/(CLOCKS_PER_SEC/1000)) < 14999 && (int) timer.fallen != 0) -// SPQR_SUCCESS("TooEarly time window END."); - +#ifdef DIVEHANDLER_TRAINING + SPQR_SUCCESS("TooEarly time window END."); +#endif if( ((int) (clock() - timer.fallen)/(CLOCKS_PER_SEC/1000)) > 10000 && ((int) (clock() - timer.fallen)/(CLOCKS_PER_SEC/1000)) < 15000 && (int) timer.fallen != 0) @@ -663,7 +665,7 @@ void DiveHandler::update(DiveHandle& diveHandle) if( tooEarly ) { SPQR_FAILURE("too FAST dude!"); - estimatedInterval += 2000; + estimatedInterval = timer.fallen + 3000; tooEarly=false; } else @@ -678,7 +680,7 @@ void DiveHandler::update(DiveHandle& diveHandle) else if(ownScore != (int)theOwnTeamInfo.score && !estimatedTime) { SPQR_SUCCESS("SUPER!"); - estimatedInterval -= 100; + estimatedInterval -= 200; estimatedTime=true; } @@ -774,7 +776,7 @@ void DiveHandler::update(DiveHandle& diveHandle) { SPQR_INFO("diveTime: " << diveTime ); SPQR_INFO("estimated time interval: " << estimatedInterval ); - SPQR_ERR("TimeError: "<< (estimatedInterval - diveTime)*(estimatedInterval - diveTime)); + SPQR_ERR("TimeError: "<< std::abs(estimatedInterval - diveTime) ); SPQR_INFO("/-----------------------------------------/\n"); stamp = false; } @@ -791,15 +793,6 @@ void DiveHandler::update(DiveHandle& diveHandle) diveHandle.diveTime = diveTime -tDive; else diveHandle.diveTime = -1.0; - -#ifdef DIVEHANDLER_TRAINING -// if (diveTime > 0.0) -// { -// if(diveHandle.diveTime < SPQR::GOALIE_DIVE_TIME_TOLERANCE) -// SPQR_INFO("Dive now! "); -// } -#endif - } // If the ball is far away or completely off target, no dive has to performed else diff --git a/machineLearning/DiveHandler/DiveHandler.h b/machineLearning/DiveHandler/DiveHandler.h index d593b7a..4154b03 100644 --- a/machineLearning/DiveHandler/DiveHandler.h +++ b/machineLearning/DiveHandler/DiveHandler.h @@ -61,7 +61,7 @@ END_MODULE #define BUFFER_DIM 10 #define REWARDS_HISTORY_SIZE 10 #define EPSILON 0.05 -#define T 15 +#define T 5 // Evaluation weight #define LAMBDA1 0.9 //#define LAMBDA2 0.3 @@ -214,7 +214,7 @@ class DiveHandler : public DiveHandlerBase { start = startTime; setTimer = true; -// std::cerr << "\033[33;1m" <<"[DiveHandler] " << "set Timer!" << "\033[0m" << std::endl; + std::cerr << "\033[33;1m" <<"[DiveHandler] " << "set Timer!" << "\033[0m" << std::endl; } } inline void reset() @@ -222,7 +222,7 @@ class DiveHandler : public DiveHandlerBase if(setTimer) { setTimer = false; -// std::cerr << "\033[33;1m" <<"[DiveHandler] " << "reset Timer!" << "\033[0m" << std::endl; + std::cerr << "\033[33;1m" <<"[DiveHandler] " << "reset Timer!" << "\033[0m" << std::endl; } } }; From 94b6fc2e3d21fa936736709d26fef9b5b4aaeff3 Mon Sep 17 00:00:00 2001 From: Francesco Riccio Date: Wed, 12 Mar 2014 20:28:13 +0100 Subject: [PATCH 10/17] too early too late too jesus --- machineLearning/DiveHandler/DiveHandler.cpp | 108 +++++++++++++------- machineLearning/DiveHandler/DiveHandler.h | 16 +-- 2 files changed, 81 insertions(+), 43 deletions(-) diff --git a/machineLearning/DiveHandler/DiveHandler.cpp b/machineLearning/DiveHandler/DiveHandler.cpp index 62d4c38..906440f 100644 --- a/machineLearning/DiveHandler/DiveHandler.cpp +++ b/machineLearning/DiveHandler/DiveHandler.cpp @@ -44,6 +44,7 @@ bool stamp =false; bool tooEarly=false; bool estimatedTime=false; +bool goalDetected=false; MAKE_MODULE(DiveHandler, SPQR-Modules) @@ -612,25 +613,42 @@ void DiveHandler::update(DiveHandle& diveHandle) // Update the DiveHandle diveHandle.ballProjectionEstimate = ballProjectionIntercept; - if( ((int) (clock() - timer.fallen)/(CLOCKS_PER_SEC/1000)) > 10001 && - ((int) (clock() - timer.fallen)/(CLOCKS_PER_SEC/1000)) < 10050 && - (int) timer.fallen != 0) #ifdef DIVEHANDLER_TRAINING + if( timer.getTimeSince(timer.fallen) > 10000 && timer.getTimeSince(timer.fallen) < 10050 && timer.fallen != 0) SPQR_SUCCESS("TooEarly time window START..."); #endif - if( ((int) (clock() - timer.fallen)/(CLOCKS_PER_SEC/1000)) > 14971 && - ((int) (clock() - timer.fallen)/(CLOCKS_PER_SEC/1000)) < 14999 && - (int) timer.fallen != 0) #ifdef DIVEHANDLER_TRAINING + if( timer.getTimeSince(timer.fallen) > 14971 && timer.getTimeSince(timer.fallen) < 14999 && timer.fallen != 0) SPQR_SUCCESS("TooEarly time window END."); #endif - if( ((int) (clock() - timer.fallen)/(CLOCKS_PER_SEC/1000)) > 10000 && - ((int) (clock() - timer.fallen)/(CLOCKS_PER_SEC/1000)) < 15000 && - (int) timer.fallen != 0) + + if(opponentScore != (int)theOpponentTeamInfo.score && !goalDetected) { - if(opponentScore != (int)theOpponentTeamInfo.score) - tooEarly=true; + if( timer.getTimeSince(timer.fallen) > 10000 && timer.getTimeSince(timer.fallen) < 15000 && + (unsigned int) timer.fallen != 0) + { +#ifdef DIVEHANDLER_TRAINING + SPQR_FAILURE("too FAST dude!"); +#endif + estimatedInterval += 3000; + } + else + { +// if(goalTimer.setTimer) + { +#ifdef DIVEHANDLER_TRAINING + SPQR_FAILURE("too SLOW dude!"); +#endif + estimatedInterval = goalTimer.getTimeSince(goalTimer.start) -500; + } + } + estimatedTime=true; + goalDetected=true; } + + if(theGameInfo.state == STATE_SET) + goalTimer.reset(); + // Check whether the ball is close enough if( (distanceBall2Goal < SPQR::FIELD_DIMENSION_X) && (fabs(ballProjectionIntercept) < SPQR::FIELD_DIMENSION_Y) ) { @@ -640,50 +658,67 @@ void DiveHandler::update(DiveHandle& diveHandle) if(state != notLearning) { // if not in playing state + if(theGameInfo.state != STATE_PLAYING) timer.reset(); else { +// if(goalTimer.setTimer) +// SPQR_INFO("time: "<< goalTimer.getTimeSince(goalTimer.start)); + // if the ball is moving enough fast then set the timer - if( !timer.setTimer && (theBallModel.estimate.velocity.abs() > SPQR::MOVING_BALL_MIN_VELOCITY && + if( (theBallModel.estimate.velocity.abs() > SPQR::MOVING_BALL_MIN_VELOCITY && theFrameInfo.getTimeSince(theBallModel.timeWhenLastSeen) < 1000) ) - timer.set(clock()); - // else reset it... - if( timer.setTimer && (theBallModel.estimate.velocity.abs() < SPQR::MOVING_BALL_MIN_VELOCITY || - theFrameInfo.getTimeSince(theBallModel.timeWhenLastSeen) > 1000) ) - timer.reset(); - - // if the goalie dives - if( (int)theFallDownState.state == (int)FallDownState::fallen ) { - timer.fallen=clock(); - estimatedInterval = (int) (clock() - timer.start)/(CLOCKS_PER_SEC/1000); + if(!timer.setTimer) + { + timer.set(clock()); +#ifdef DIVEHANDLER_TRAINING + std::cerr << "\033[33;1m" <<"[DiveHandler] " << "set Timer!" << "\033[0m" << std::endl; +#endif + goalTimer.set(clock()); +#ifdef DIVEHANDLER_TRAINING + std::cerr << "\033[33;1m" <<"[DiveHandler] " << "set goal Timer!" << "\033[0m" << std::endl; +#endif + } } - - if(opponentScore != (int)theOpponentTeamInfo.score && !estimatedTime) + // else reset it... + if( (theBallModel.estimate.velocity.abs() < SPQR::MOVING_BALL_MIN_VELOCITY || + theFrameInfo.getTimeSince(theBallModel.timeWhenLastSeen) > 4000) ) { - if( tooEarly ) + if(timer.setTimer) { - SPQR_FAILURE("too FAST dude!"); - estimatedInterval = timer.fallen + 3000; - tooEarly=false; + timer.reset(); +#ifdef DIVEHANDLER_TRAINING + std::cerr << "\033[33;1m" <<"[DiveHandler] " << "reset Timer!" << "\033[0m" << std::endl; +#endif } - else + if(goalTimer.setTimer) { - SPQR_FAILURE("too SLOW dude!"); - estimatedInterval += (int)(clock() - timer.fallen)/(CLOCKS_PER_SEC/1000) - 500; + goalTimer.reset(); +#ifdef DIVEHANDLER_TRAINING + std::cerr << "\033[33;1m" <<"[DiveHandler] " << "reset goal Timer!" << "\033[0m" << std::endl; +#endif } - estimatedTime=true; - } + // if the goalie succeeded - else if(ownScore != (int)theOwnTeamInfo.score && !estimatedTime) + if(ownScore != (int)theOwnTeamInfo.score && !estimatedTime) { +#ifdef DIVEHANDLER_TRAINING SPQR_SUCCESS("SUPER!"); +#endif estimatedInterval -= 200; estimatedTime=true; } + // if the goalie dives + if( (int)theFallDownState.state == (int)FallDownState::fallen ) + { + timer.fallen=clock(); + estimatedInterval = timer.getTimeSince(timer.start); + } + } } @@ -715,7 +750,7 @@ void DiveHandler::update(DiveHandle& diveHandle) else if( state == waitReward ) { // The opponent team scores: the goalie failed and gets a negative reward - if(opponentScore != (int)theOpponentTeamInfo.score && estimatedTime) + if(goalDetected && estimatedTime) { // The learner obtains a negative reward rewardHistory.push_front(NEGATIVE_REWARD); @@ -731,10 +766,11 @@ void DiveHandler::update(DiveHandle& diveHandle) #endif // A reward has been received: re-enable learning state = learning; - // Clear the pending rewardelse + // Clear the pending reward if(!diveHandle.rewardAck) diveHandle.rewardAck = true; + goalDetected=false; estimatedTime=false; stamp =true; } diff --git a/machineLearning/DiveHandler/DiveHandler.h b/machineLearning/DiveHandler/DiveHandler.h index 4154b03..723d9b6 100644 --- a/machineLearning/DiveHandler/DiveHandler.h +++ b/machineLearning/DiveHandler/DiveHandler.h @@ -61,7 +61,7 @@ END_MODULE #define BUFFER_DIM 10 #define REWARDS_HISTORY_SIZE 10 #define EPSILON 0.05 -#define T 5 +#define T 15 // Evaluation weight #define LAMBDA1 0.9 //#define LAMBDA2 0.3 @@ -208,26 +208,28 @@ class DiveHandler : public DiveHandlerBase bool setTimer; Timer():start(0), fallen(0), setTimer(false){} + + inline unsigned int getTimeSince(clock_t startTime) + { + return (unsigned int) ((clock() - startTime)/(CLOCKS_PER_SEC/1000)); + } inline void set(clock_t startTime) { - if(!setTimer) +// if(!setTimer) { start = startTime; setTimer = true; - std::cerr << "\033[33;1m" <<"[DiveHandler] " << "set Timer!" << "\033[0m" << std::endl; } } inline void reset() { - if(setTimer) - { +// if(setTimer) setTimer = false; - std::cerr << "\033[33;1m" <<"[DiveHandler] " << "reset Timer!" << "\033[0m" << std::endl; - } } }; Timer timer; + Timer goalTimer; unsigned int estimatedInterval; // Estimated intersection between the ball projection and the goal line From da5e8cb47cfd164aa4d3c440159044d2d3a04276 Mon Sep 17 00:00:00 2001 From: Francesco Riccio Date: Wed, 12 Mar 2014 20:34:42 +0100 Subject: [PATCH 11/17] jump --- machineLearning/keeperJumpLeft.mof | 44 ++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 machineLearning/keeperJumpLeft.mof diff --git a/machineLearning/keeperJumpLeft.mof b/machineLearning/keeperJumpLeft.mof new file mode 100644 index 0000000..b63cdcd --- /dev/null +++ b/machineLearning/keeperJumpLeft.mof @@ -0,0 +1,44 @@ +motion_id = keeperJumpLeft + +label start + +hardness 75 75 75 75 75 75 75 75 75 75 75 75 75 75 75 75 75 75 75 75 75 75 50 + +"riseHand +47.6 15 75 0 0 0 -75 * * * * * * * * * * * * * * * 0 500 + +"turnLeftAnkle +- - * - - - * - - - - 20 * * - -30 - - * * - 22 1 400 + +// try no to sit at the initial phase of the match +// (1) HeadYaw [-119/119] (2) HeadPitch [29/-38] (3) LShoulderPitch [-119/119] (4) LShoulderRoll [-18/76] +// (5) LElbowYaw [-119/119] (6) LElbowRoll [-88/0] (7) RShoulderPitch [119/-119] (8) RShoulderRoll [-76/18] +// (9) RElbowYaw [119/-119] (10) RElbowRoll [88/0] (11) LHipYawPitch [-65/42] (12) LHipRoll [-21/45] +// (13) LHipPitch [-88/27] (14) LKneePitch [-5/121] (15) LAnklePitch [52/-68] (16) LAnkleRoll [-22/44] +// (17) RHipYawPitch [-65/42] (18) RHipRoll [-45/21] (19) RHipPitch [27/-88] (20) RKneePitch [121/-5] +// (21) RAnklePitch [53/-67] (22) RAnkleRoll [-44/22] +//"HY HP LSP LSR LEY LER RSP RSR REY RER LHYP LHR LHP LKP LAP LAR RHYP RHR RHP RKP RAP RAR Int Dur +//- - - - - - - - - - - - - - - - - - - - - - 0 100 + +"deactivate joints +- - - - - - - - - - - - - - - - - - - - - - 0 800 + +hardness 75 75 75 75 75 75 75 75 75 75 75 75 75 75 75 75 75 75 75 75 75 75 50 + +//- - - - - - - 0 0 0 -12.3 20 -71.5 122.3 -67.2 -6.5 0 -20 -34.8 66.9 -33 4.6 1 100 +//- - - - - - - 0 0 0 - 45 - - - -21.5 - -45 - 30 - 22 1 200 + +//- - 117.9 16.3 2.7 -9.8 -87.4 18.2 0.3 -22.6 11.4 6.5 -75.4 123.3 -69 1.7 11.4 8.2 11.5 -4.6 -32.4 2.8 1 500 + +//- - -5.2 28.7 -6.5 -1.9 -111.4 12.2 11.4 -9 -21.4 8.8 -76 122.4 -69.7 -1.9 -21.4 2.5 28.7 -7.5 -27.7 -6 1 350 +//- - 37.6 61.4 -18.2 -1.4 -95.3 15 11.1 -9.7 -34 -9.3 -62.1 122.6 -69.8 -3.6 -34 6.3 29.5 18.5 -27.8 -5.4 1 200 +//- - -90.6 15.2 0.1 -18.2 -91.1 12.7 -0.3 -16.7 0 0.2 -7 28.6 -21.5 - 0 0.2 -7 28.5 -21.3 - 1 300 + +label repeat +- - - - - - - - - - - - - - - - - - - - - - 0 100 + +transition keeperJumpLeft keeperJumpLeft repeat +transition standUpFrontNao standUpFrontNao start +transition standUpBackNao standUpBackNao start + +transition allMotions extern start From 633fc75f1460b5078bbde159822ad4a67d994938 Mon Sep 17 00:00:00 2001 From: Francesco Riccio Date: Thu, 13 Mar 2014 00:17:58 +0100 Subject: [PATCH 12/17] ... --- machineLearning/DiveHandler/DiveHandler.cpp | 55 ++++++++++----------- machineLearning/DiveHandler/DiveHandler.h | 3 +- 2 files changed, 28 insertions(+), 30 deletions(-) diff --git a/machineLearning/DiveHandler/DiveHandler.cpp b/machineLearning/DiveHandler/DiveHandler.cpp index 906440f..71dbcd6 100644 --- a/machineLearning/DiveHandler/DiveHandler.cpp +++ b/machineLearning/DiveHandler/DiveHandler.cpp @@ -254,7 +254,7 @@ float DiveHandler::PGLearner::evaluatePerturbation( std::vector R ) new_coeffs.at(0) = coeffs.at(0) + R.at(0); new_coeffs.at(1) = coeffs.at(1) + R.at(1); - return ( std::abs(diveHandler_ptr->estimatedInterval - ( R.at(0)*diveHandler_ptr->tBall2Goal)) ) ; + return ( std::abs(diveHandler_ptr->tBAGO - ( R.at(0)*diveHandler_ptr->tBAGOestimate)) ) ; // return (1.0-LAMBDA1)*fabs(diveHandler_ptr->tBall2Goal - tDiveAndRecover) + // LAMBDA1*fabs(magnitude(coeffs) - magnitude(coeffsBest)); @@ -315,7 +315,7 @@ bool DiveHandler::PGLearner::updateCoeffs() { #ifdef DIVEHANDLER_TRAINING - SPQR_INFO( "PG algorithm, iteration " << iter_count << "... " ); + SPQR_INFO( "\nPG algorithm, iteration " << iter_count << "... " ); #endif if( iter_count == MAX_ITER || converged() ) @@ -455,8 +455,8 @@ bool DiveHandler::PGLearner::updateCoeffs() */ DiveHandler::DiveHandler(): diveType(DiveHandle::none), state(static_cast(SPQR::GOALIE_LEARNING_STATE)), - learner(new PGLearner(this, 2, EPSILON, T, 1.0, false)), opponentScore(0), tBall2Goal(SPQR::FIELD_DIMENSION_Y), - tDive(0.0), tBackInPose(0.0), estimatedInterval(0), + learner(new PGLearner(this, 2, EPSILON, T, 1.0, false)), opponentScore(0), tBall2Goal(-1), + tDive(0.0), tBackInPose(0.0), tBAGO(0), ballProjectionIntercept(SPQR::FIELD_DIMENSION_Y), distanceBall2Goal(SPQR::FIELD_DIMENSION_X) { #ifdef DIVEHANDLER_TRAINING @@ -538,12 +538,8 @@ void DiveHandler::estimateBallProjection() // Updating the class parameters with the obtained value ballProjectionIntercept = yIntercept; - // Computing the distance vector from the ball to the goal -// float delta_x = -SPQR::FIELD_DIMENSION_X - theGlobalBallEstimation.singleRobotX; -// float delta_y = ballProjectionIntercept - theGlobalBallEstimation.singleRobotY; // Estimated distance from the ball -// distanceBall2Goal = sqrt( delta_x*delta_x + delta_y*delta_y); - distanceBall2Goal = theBallModel.estimate.position.x; + distanceBall2Goal = theBallModel.estimate.position.abs(); } /* @@ -554,13 +550,13 @@ void DiveHandler::estimateBallProjection() void DiveHandler::estimateDiveTimes() { // Check whether the ball is actually moving toward the goal - if ( (theBallModel.estimate.velocity.abs() != 0.0) - && (theBallModel.estimate.velocity.x < 0.0) ) + if ( (theBallModel.estimate.velocity.abs() != 0.0) && + (theBallModel.estimate.velocity.x < 0.0) ) // Use a constant velocity approximation to the estimate the time interval - tBall2Goal = 1000.0 * ( distanceBall2Goal / theBallModel.estimate.velocity.abs() ); + tBall2Goal = 1000.0 * ( distanceBall2Goal / theBallModel.estimate.velocity.abs() ); else // Otherwise, set the parameter to a meaningless value - tBall2Goal = -1.0; + tBall2Goal = -1.0; // Using the appropriate estimates for recover and reposition times float tRecover = 0.0; @@ -614,23 +610,22 @@ void DiveHandler::update(DiveHandle& diveHandle) diveHandle.ballProjectionEstimate = ballProjectionIntercept; #ifdef DIVEHANDLER_TRAINING - if( timer.getTimeSince(timer.fallen) > 10000 && timer.getTimeSince(timer.fallen) < 10050 && timer.fallen != 0) + if( timer.getTimeSince(timer.fallen) > 5000 && timer.getTimeSince(timer.fallen) < 5040 && timer.fallen != 0) SPQR_SUCCESS("TooEarly time window START..."); -#endif -#ifdef DIVEHANDLER_TRAINING - if( timer.getTimeSince(timer.fallen) > 14971 && timer.getTimeSince(timer.fallen) < 14999 && timer.fallen != 0) + + if( timer.getTimeSince(timer.fallen) > 9961 && timer.getTimeSince(timer.fallen) < 9999 && timer.fallen != 0) SPQR_SUCCESS("TooEarly time window END."); #endif if(opponentScore != (int)theOpponentTeamInfo.score && !goalDetected) { - if( timer.getTimeSince(timer.fallen) > 10000 && timer.getTimeSince(timer.fallen) < 15000 && + if( timer.getTimeSince(timer.fallen) > 5000 && timer.getTimeSince(timer.fallen) < 10000 && (unsigned int) timer.fallen != 0) { #ifdef DIVEHANDLER_TRAINING SPQR_FAILURE("too FAST dude!"); #endif - estimatedInterval += 3000; + tBAGO += 3000; } else { @@ -639,7 +634,7 @@ void DiveHandler::update(DiveHandle& diveHandle) #ifdef DIVEHANDLER_TRAINING SPQR_FAILURE("too SLOW dude!"); #endif - estimatedInterval = goalTimer.getTimeSince(goalTimer.start) -500; + tBAGO = goalTimer.getTimeSince(goalTimer.start) -1500; } } estimatedTime=true; @@ -647,7 +642,10 @@ void DiveHandler::update(DiveHandle& diveHandle) } if(theGameInfo.state == STATE_SET) + { + tBAGOestimate=0; goalTimer.reset(); + } // Check whether the ball is close enough if( (distanceBall2Goal < SPQR::FIELD_DIMENSION_X) && (fabs(ballProjectionIntercept) < SPQR::FIELD_DIMENSION_Y) ) @@ -673,11 +671,10 @@ void DiveHandler::update(DiveHandle& diveHandle) if(!timer.setTimer) { timer.set(clock()); -#ifdef DIVEHANDLER_TRAINING - std::cerr << "\033[33;1m" <<"[DiveHandler] " << "set Timer!" << "\033[0m" << std::endl; -#endif goalTimer.set(clock()); + tBAGOestimate=tBall2Goal; #ifdef DIVEHANDLER_TRAINING + std::cerr << "\033[33;1m" <<"[DiveHandler] " << "set Timer!" << "\033[0m" << std::endl; std::cerr << "\033[33;1m" <<"[DiveHandler] " << "set goal Timer!" << "\033[0m" << std::endl; #endif } @@ -699,6 +696,7 @@ void DiveHandler::update(DiveHandle& diveHandle) #ifdef DIVEHANDLER_TRAINING std::cerr << "\033[33;1m" <<"[DiveHandler] " << "reset goal Timer!" << "\033[0m" << std::endl; #endif + tBAGOestimate=0; } } @@ -708,7 +706,7 @@ void DiveHandler::update(DiveHandle& diveHandle) #ifdef DIVEHANDLER_TRAINING SPQR_SUCCESS("SUPER!"); #endif - estimatedInterval -= 200; + tBAGO -= 200; estimatedTime=true; } @@ -716,7 +714,7 @@ void DiveHandler::update(DiveHandle& diveHandle) if( (int)theFallDownState.state == (int)FallDownState::fallen ) { timer.fallen=clock(); - estimatedInterval = timer.getTimeSince(timer.start); + tBAGO = timer.getTimeSince(timer.start); } } @@ -810,10 +808,9 @@ void DiveHandler::update(DiveHandle& diveHandle) #ifdef DIVEHANDLER_TRAINING if(stamp) { - SPQR_INFO("diveTime: " << diveTime ); - SPQR_INFO("estimated time interval: " << estimatedInterval ); - SPQR_ERR("TimeError: "<< std::abs(estimatedInterval - diveTime) ); - SPQR_INFO("/-----------------------------------------/\n"); + SPQR_INFO("BAGO: " << tBAGO ); + SPQR_INFO("BAGO estimate: " << tBAGOestimate ); + SPQR_ERR("BAGO error: "<< std::abs(tBAGO - tBAGOestimate) ); stamp = false; } #endif diff --git a/machineLearning/DiveHandler/DiveHandler.h b/machineLearning/DiveHandler/DiveHandler.h index 723d9b6..26d6056 100644 --- a/machineLearning/DiveHandler/DiveHandler.h +++ b/machineLearning/DiveHandler/DiveHandler.h @@ -230,7 +230,8 @@ class DiveHandler : public DiveHandlerBase Timer timer; Timer goalTimer; - unsigned int estimatedInterval; + unsigned int tBAGO; + float tBAGOestimate; // Estimated intersection between the ball projection and the goal line float ballProjectionIntercept; From 6ee974735dfe95306ba77811d771e4102e6155c5 Mon Sep 17 00:00:00 2001 From: Francesco Riccio Date: Sat, 15 Mar 2014 16:30:54 +0100 Subject: [PATCH 13/17] BAGO --- machineLearning/DiveHandler/DiveHandler.cpp | 25 ++++++++++++++++++++- machineLearning/DiveHandler/DiveHandler.h | 2 ++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/machineLearning/DiveHandler/DiveHandler.cpp b/machineLearning/DiveHandler/DiveHandler.cpp index 71dbcd6..c5c19a1 100644 --- a/machineLearning/DiveHandler/DiveHandler.cpp +++ b/machineLearning/DiveHandler/DiveHandler.cpp @@ -644,6 +644,8 @@ void DiveHandler::update(DiveHandle& diveHandle) if(theGameInfo.state == STATE_SET) { tBAGOestimate=0; + dBAGOestimate=0; + sampledVelocities.clear(); goalTimer.reset(); } @@ -668,11 +670,13 @@ void DiveHandler::update(DiveHandle& diveHandle) if( (theBallModel.estimate.velocity.abs() > SPQR::MOVING_BALL_MIN_VELOCITY && theFrameInfo.getTimeSince(theBallModel.timeWhenLastSeen) < 1000) ) { + sampledVelocities.push_back( theBallModel.estimate.velocity.abs() ); if(!timer.setTimer) { timer.set(clock()); goalTimer.set(clock()); - tBAGOestimate=tBall2Goal; + dBAGOestimate=distanceBall2Goal; +// tBAGOestimate=tBall2Goal; #ifdef DIVEHANDLER_TRAINING std::cerr << "\033[33;1m" <<"[DiveHandler] " << "set Timer!" << "\033[0m" << std::endl; std::cerr << "\033[33;1m" <<"[DiveHandler] " << "set goal Timer!" << "\033[0m" << std::endl; @@ -697,6 +701,8 @@ void DiveHandler::update(DiveHandle& diveHandle) std::cerr << "\033[33;1m" <<"[DiveHandler] " << "reset goal Timer!" << "\033[0m" << std::endl; #endif tBAGOestimate=0; + dBAGOestimate=0; + sampledVelocities.clear(); } } @@ -716,10 +722,27 @@ void DiveHandler::update(DiveHandle& diveHandle) timer.fallen=clock(); tBAGO = timer.getTimeSince(timer.start); } + } + } + if(estimatedTime) + { + float velocityMean=0; + float velocityMax=0; + std::list::const_iterator it=sampledVelocities.begin(); + for(; it != sampledVelocities.end(); ++it) + { + if((*it) > velocityMax) velocityMax=(*it); + velocityMean += (*it) /sampledVelocities.size(); } + + tBAGOestimate = 1000*(dBAGOestimate / velocityMax); + SPQR_INFO("distance: " << dBAGOestimate); + SPQR_INFO("velocity: " << (.75f*velocityMax)/1000); + SPQR_INFO("tBAGO: " << tBAGOestimate); } + #ifdef DIVEHANDLER_DEBUG SPQR_INFO("Ball projection: " << ballProjectionIntercept); SPQR_INFO("PAPO time: " << tBall2Goal); diff --git a/machineLearning/DiveHandler/DiveHandler.h b/machineLearning/DiveHandler/DiveHandler.h index 26d6056..bd6b49f 100644 --- a/machineLearning/DiveHandler/DiveHandler.h +++ b/machineLearning/DiveHandler/DiveHandler.h @@ -232,11 +232,13 @@ class DiveHandler : public DiveHandlerBase Timer goalTimer; unsigned int tBAGO; float tBAGOestimate; + float dBAGOestimate; // Estimated intersection between the ball projection and the goal line float ballProjectionIntercept; // Estimated distance of the ball from the own goal float distanceBall2Goal; + std::list sampledVelocities; // Computes parameters using the ball estimated position and velocity void estimateDiveTimes(); From 687089a31dc1fe5e36a04ce58cfd781887fc5e2f Mon Sep 17 00:00:00 2001 From: Francesco Riccio Date: Sun, 16 Mar 2014 20:08:15 +0100 Subject: [PATCH 14/17] GA --- machineLearning/DiveHandler/DiveHandler.cpp | 244 ++++++++++++++++++-- machineLearning/DiveHandler/DiveHandler.h | 65 +++++- 2 files changed, 281 insertions(+), 28 deletions(-) diff --git a/machineLearning/DiveHandler/DiveHandler.cpp b/machineLearning/DiveHandler/DiveHandler.cpp index c5c19a1..0f443c5 100644 --- a/machineLearning/DiveHandler/DiveHandler.cpp +++ b/machineLearning/DiveHandler/DiveHandler.cpp @@ -245,23 +245,9 @@ void DiveHandler::PGLearner::generatePerturbations(std::vector* partial_p float DiveHandler::PGLearner::evaluatePerturbation( std::vector R ) { // Dimensions check - assert(R.size() == coeffs.size()); - // Generate perturbated policy and call the DiveHandler object for evaluation -// float tDiveAndRecover = diveHandler_ptr->computeDiveAndRecoverTime(coeffs.at(0) + R.at(0), coeffs.at(1) + R.at(1)); - - // Perturbated coefficients - std::vector new_coeffs(2); - new_coeffs.at(0) = coeffs.at(0) + R.at(0); - new_coeffs.at(1) = coeffs.at(1) + R.at(1); + assert(R.size() == coeffs.size()); return ( std::abs(diveHandler_ptr->tBAGO - ( R.at(0)*diveHandler_ptr->tBAGOestimate)) ) ; -// return (1.0-LAMBDA1)*fabs(diveHandler_ptr->tBall2Goal - tDiveAndRecover) + -// LAMBDA1*fabs(magnitude(coeffs) - magnitude(coeffsBest)); - -// return (1.0-LAMBDA1-LAMBDA2)*fabs(tDiveAndRecover) + -// LAMBDA1*fabs(diveHandler_ptr->tBall2Goal - tDiveAndRecover) + -// LAMBDA2*fabs(1.0 - ((coeffs.at(0) + R.at(0))+(coeffs.at(1) + R.at(1)))); - } @@ -447,6 +433,221 @@ bool DiveHandler::PGLearner::updateCoeffs() } +/** --------------------- CoeffsLearner: Genetic Algorithm --------------------- */ +DiveHandler::GALearner::GALearner( DiveHandler* _dhPtr, int _nCoeffs, float _initValue ): + CoeffsLearner(_nCoeffs, _initValue, _dhPtr), + reward_score(.0f), reward_norm(.0f) +{ + setParam("selection", SELECTION); + setParam("crossover", CROSSOVER); + setParam("mutation", MUTATION); + + setParam("elite", ELITE_SIZE); + + srand(time(NULL)); + for(unsigned int i=0; i< POPULATION_SIZE; ++i) + population.insert( Individual( (rand()%600) + 500) ); + +} + +float DiveHandler::GALearner::evaluate(Individual i) +{ + return ( std::abs(diveHandler_ptr->tBAGO - ( i.hypothesis.to_ulong()*diveHandler_ptr->tBAGOestimate)) ); +} + +DiveHandler::GALearner::Individual DiveHandler::GALearner::rnd_mutate(Individual i) +{ + srand(time(NULL)); + unsigned int n_flips = rand()%3+1; + for(unsigned int j=0; j< n_flips; ++j ) + (i.hypothesis).flip(rand()%(INDIVIDUAL_SIZE-1)); + + return i; +} + +DiveHandler::GALearner::Individual DiveHandler::GALearner::crossover(Individual mommy, const Individual& daddy) +{ + srand(time(NULL)); + int crossover_point = rand()%INDIVIDUAL_SIZE; + for(unsigned int i = crossover_point+1; i::const_iterator i = fitnessBuffer.begin(); + std::list::const_iterator j = fitnessBuffer.begin(); ++j; + while (j != fitnessBuffer.end()) + { + avg_variation += ( (*i) - (*j) )/fitnessBuffer.size(); + ++i; ++j; + } + + // Compute variations standard deviation + float std_variation = .0f; + // Iterate over the whole buffer and compute deltas from step i-1 to i + std::list::const_iterator k = fitnessBuffer.begin(); + std::list::const_iterator t = fitnessBuffer.begin(); ++t; + while (t != fitnessBuffer.end()) + { + std_variation += ( pow((*k)-(*t) - avg_variation, 2) ) / fitnessBuffer.size(); + ++k; ++t; + } + std_variation = sqrt(std_variation); + + // Check result against variation threshold + if ((avg_variation < CONVERGENCE_THRESHOLD) && (std_variation < CONVERGENCE_THRESHOLD)) + { + #ifdef DIVEHANDLER_TRAINING + SPQR_SUCCESS("GALearner converged!"); + SPQR_SUCCESS("Coefficients values:"); + for (unsigned int i = 0; i < coeffs.size(); ++i) + SPQR_SUCCESS("\t" << coeffs.at(i)); + #endif + return true; + } + else + return false; + } +} + +void DiveHandler::GALearner::evolutionStep() +{ + std::set previousPopulation(population); + population.clear(); + + int sel = 0; + std::set::iterator selector = previousPopulation.begin(); + std::set::iterator partner = previousPopulation.end(); + for(; selector != previousPopulation.end(); ++selector, ++sel) + { + if(sel < round(getParam("selection")*POPULATION_SIZE)) + population.insert(Individual(evaluate(*selector), (*selector).hypothesis.to_string())); + else + { + srand(time(NULL)); + + if( rand()/RAND_MAX < getParam("mutation") ) + population.insert( Individual(evaluate(rnd_mutate( *selector )), (rnd_mutate( *selector )).hypothesis.to_string()) ); + else if( rand()/RAND_MAX < sqrt(getParam("crossover")) ) + { + if(partner == previousPopulation.end()) + partner = selector; + else + { + population.insert(Individual(evaluate(crossover( *selector, *partner )), (crossover( *selector, *partner )).hypothesis.to_string())); + population.insert(Individual(evaluate(crossover( *partner, *selector )), (crossover( *partner, *selector )).hypothesis.to_string())); + partner = previousPopulation.end(); + } + } + else + population.insert(Individual(evaluate( *selector ), ( *selector ).hypothesis.to_string())); + } + } + +} + +void DiveHandler::GALearner::updateParams(const std::list& rewards) +{ + // Re-initialize reward scores + reward_score = 0.0; + if (!rewards.empty()) reward_norm = 0.0; + int discount_exp = 0; + int positives = 0; + + std::list::const_iterator i = rewards.begin(); + while (i != rewards.end()) + { + // Counting positives + if (*i == POSITIVE_REWARD) + ++positives; + + // Computing discounted rewards + reward_score += (*i) * pow(GAMMA, discount_exp); + reward_norm += fabs((*i) * pow(GAMMA, discount_exp)); + ++i; ++discount_exp; + } + +#ifdef DIVEHANDLER_TRAINING_DEBUG + SPQR_INFO("Positive rewards: " << positives << " out of " << rewards.size()); + SPQR_INFO("Negative rewards: " << (rewards.size() - positives) << " out of " << rewards.size()); + SPQR_INFO("Reward total score: " << reward_score); +#endif + + //Adjusting GA parameters according to the obtained score + if(exp( -reward_score / (2*REWARDS_HISTORY_SIZE) ) * getParam("mutation") >= 1.0) + setParam("mutation", 1.0); + else + setParam("mutation", exp( -reward_score / REWARDS_HISTORY_SIZE ) * getParam("mutation")); + + if(exp( -reward_score / (2*REWARDS_HISTORY_SIZE) ) * getParam("crossover") >= 1.0) + setParam("crossover", 1.0); + else + setParam("crossover", exp( -reward_score / (2*REWARDS_HISTORY_SIZE) ) * getParam("crossover")); + + if(exp( -reward_score / (2*REWARDS_HISTORY_SIZE) ) * getParam("elite") >= 1.0) + setParam("elite", 1.0); + else + setParam("elite", exp( -reward_score / (2*REWARDS_HISTORY_SIZE) ) * getParam("elite")); + +#ifdef DIVEHANDLER_TRAINING + SPQR_INFO( "Mutation rate value changed to: " << getParam("mutation") << " according to the obtained rewards. "); + SPQR_INFO( "Crossover rate value changed to: " << getParam("crossover") << " according to the obtained rewards. "); + SPQR_INFO( "Elite percentage changed to: " << getParam("elite") << " according to the obtained rewards. "); +#endif + +} + +bool DiveHandler::GALearner::updateCoeffs() +{ +#ifdef DIVEHANDLER_TRAINING + SPQR_INFO( "\nGA algorithm, iteration " << iter_count << "... " ); +#endif + + if( iter_count == MAX_ITER || converged() ) + return false; + else + { + evolutionStep(); + + float avg_fitness=.0f; + float avg_coeff=.0f; + std::set::iterator evaluator = population.begin(); + for( unsigned int sel=0; selfitness / round(getParam("elite")*POPULATION_SIZE); + avg_coeff += (evaluator->hypothesis.to_ulong()) / (1000*round(getParam("elite")*POPULATION_SIZE)); + } + + fitnessBuffer.push_front(avg_fitness); + + // Crop buffer + if (fitnessBuffer.size() > BUFFER_DIM) + fitnessBuffer.resize(BUFFER_DIM); + + coeffs.at(0) = avg_coeff; + +#ifdef DIVEHANDLER_TRAINING + SPQR_INFO("New coefficients: [ " << coeffs.at(0) << " ]"); +#endif + ++iter_count; + + return true; + } +} + + /** --------------------------- Dive Handler ---------------------------- */ @@ -455,8 +656,8 @@ bool DiveHandler::PGLearner::updateCoeffs() */ DiveHandler::DiveHandler(): diveType(DiveHandle::none), state(static_cast(SPQR::GOALIE_LEARNING_STATE)), - learner(new PGLearner(this, 2, EPSILON, T, 1.0, false)), opponentScore(0), tBall2Goal(-1), - tDive(0.0), tBackInPose(0.0), tBAGO(0), + learner(new PGLearner(this, 2, EPSILON, T, 1.0, false)), + opponentScore(0), tBall2Goal(-1), tDive(0.0), tBackInPose(0.0), tBAGO(0), tBAGOestimate(0), ballProjectionIntercept(SPQR::FIELD_DIMENSION_Y), distanceBall2Goal(SPQR::FIELD_DIMENSION_X) { #ifdef DIVEHANDLER_TRAINING @@ -481,6 +682,7 @@ DiveHandler::~DiveHandler() * at which the ball is expected to reach the goal. * Then, the diveTime and the diveType parameters are defined accordingly. */ + void DiveHandler::estimateBallProjection() { // Ball path line @@ -658,7 +860,6 @@ void DiveHandler::update(DiveHandle& diveHandle) if(state != notLearning) { // if not in playing state - if(theGameInfo.state != STATE_PLAYING) timer.reset(); else @@ -667,7 +868,7 @@ void DiveHandler::update(DiveHandle& diveHandle) // SPQR_INFO("time: "<< goalTimer.getTimeSince(goalTimer.start)); // if the ball is moving enough fast then set the timer - if( (theBallModel.estimate.velocity.abs() > SPQR::MOVING_BALL_MIN_VELOCITY && + if( (theBallModel.estimate.velocity.abs() > SPQR::GOALIE_MOVING_BALL_MIN_VELOCITY && theFrameInfo.getTimeSince(theBallModel.timeWhenLastSeen) < 1000) ) { sampledVelocities.push_back( theBallModel.estimate.velocity.abs() ); @@ -684,7 +885,7 @@ void DiveHandler::update(DiveHandle& diveHandle) } } // else reset it... - if( (theBallModel.estimate.velocity.abs() < SPQR::MOVING_BALL_MIN_VELOCITY || + if( (theBallModel.estimate.velocity.abs() < SPQR::GOALIE_MOVING_BALL_MIN_VELOCITY || theFrameInfo.getTimeSince(theBallModel.timeWhenLastSeen) > 4000) ) { if(timer.setTimer) @@ -736,10 +937,9 @@ void DiveHandler::update(DiveHandle& diveHandle) velocityMean += (*it) /sampledVelocities.size(); } - tBAGOestimate = 1000*(dBAGOestimate / velocityMax); + tBAGOestimate = 1000*(dBAGOestimate / (.75f*velocityMax)); SPQR_INFO("distance: " << dBAGOestimate); SPQR_INFO("velocity: " << (.75f*velocityMax)/1000); - SPQR_INFO("tBAGO: " << tBAGOestimate); } diff --git a/machineLearning/DiveHandler/DiveHandler.h b/machineLearning/DiveHandler/DiveHandler.h index bd6b49f..a7a3549 100644 --- a/machineLearning/DiveHandler/DiveHandler.h +++ b/machineLearning/DiveHandler/DiveHandler.h @@ -22,6 +22,8 @@ #include #include #include +#include +#include #include #include "Tools/Module/Module.h" @@ -52,7 +54,6 @@ MODULE(DiveHandler) PROVIDES(DiveHandle) END_MODULE - // Termination conditions #define MAX_ITER 15 #define CONVERGENCE_THRESHOLD 0.01 @@ -62,10 +63,14 @@ END_MODULE #define REWARDS_HISTORY_SIZE 10 #define EPSILON 0.05 #define T 15 -// Evaluation weight -#define LAMBDA1 0.9 -//#define LAMBDA2 0.3 +// GA parameters +#define POPULATION_SIZE 100 +#define INDIVIDUAL_SIZE 11 +#define SELECTION 0.1 +#define CROSSOVER 0.5 +#define MUTATION 0.3 +#define ELITE_SIZE 0.2 // Module class declaration class DiveHandler : public DiveHandlerBase @@ -171,9 +176,57 @@ class DiveHandler : public DiveHandlerBase } }; + -// class GALearner : public CoeffsLearner -// {}; + class GALearner : public CoeffsLearner + { + private: + // Current reward score + float reward_score; + // Current reward normalization factor + float reward_norm; + + std::list fitnessBuffer; + + class Individual + { + public: + float fitness; + std::bitset hypothesis; + Individual( std::string id): fitness(.0f), hypothesis(id){} + Individual( float f, std::string id): fitness(f), hypothesis(id){} + Individual( unsigned int id): fitness(.0f), hypothesis(id){} + inline bool operator<(const Individual& right) const { return (this->fitness) <= right.fitness; } + }; + + struct cmp + { + bool operator()(const Individual& left, const Individual& right) const + { + return left < right; + } + }; + std::set population; + + float evaluate(Individual i); + Individual rnd_mutate(Individual i); + Individual crossover(Individual mommy, const Individual& daddy); + + // Check for convergence of the algorithm + bool converged(); + + public: + GALearner( DiveHandler* _dhPtr, int _nCoeffs, float _initValue ); + + void evolutionStep(); + + // Update the GA parameters according to the obtained rewards + void updateParams(const std::list& rewards); + + // Update coefficients performing a step of the learning algorithm + virtual bool updateCoeffs(); + }; + private: From fd1104ca8036421b4cc429ed172ac139cdf6678c Mon Sep 17 00:00:00 2001 From: Claudio Delli Bovi Date: Mon, 17 Mar 2014 00:38:15 +0100 Subject: [PATCH 15/17] GALearner debug (almost) done --- machineLearning/DiveHandler/DiveHandler.cpp | 887 +++++++++++--------- 1 file changed, 473 insertions(+), 414 deletions(-) diff --git a/machineLearning/DiveHandler/DiveHandler.cpp b/machineLearning/DiveHandler/DiveHandler.cpp index 0f443c5..6aaa6bf 100644 --- a/machineLearning/DiveHandler/DiveHandler.cpp +++ b/machineLearning/DiveHandler/DiveHandler.cpp @@ -46,6 +46,11 @@ bool tooEarly=false; bool estimatedTime=false; bool goalDetected=false; +#ifdef DIVEHANDLER_TRAINING_DEBUG +int n_mutation = 0; +int n_crossover = 0; +#endif + MAKE_MODULE(DiveHandler, SPQR-Modules) // Shortcut to compute the magnitude of a vector @@ -155,12 +160,12 @@ bool DiveHandler::PGLearner::converged() // Check result against variation threshold if ((avg_variation < CONVERGENCE_THRESHOLD) && (std_variation < CONVERGENCE_THRESHOLD)) { - #ifdef DIVEHANDLER_TRAINING +#ifdef DIVEHANDLER_TRAINING SPQR_SUCCESS("PGLearner converged!"); SPQR_SUCCESS("Coefficients values:"); for (unsigned int i = 0; i < coeffs.size(); ++i) SPQR_SUCCESS("\t" << coeffs.at(i)); - #endif +#endif return true; } else @@ -245,9 +250,9 @@ void DiveHandler::PGLearner::generatePerturbations(std::vector* partial_p float DiveHandler::PGLearner::evaluatePerturbation( std::vector R ) { // Dimensions check - assert(R.size() == coeffs.size()); + assert(R.size() == coeffs.size()); - return ( std::abs(diveHandler_ptr->tBAGO - ( R.at(0)*diveHandler_ptr->tBAGOestimate)) ) ; + return ( std::abs(diveHandler_ptr->tBAGO - ( R.at(0)*diveHandler_ptr->tBAGOestimate)) ) ; } @@ -301,310 +306,361 @@ bool DiveHandler::PGLearner::updateCoeffs() { #ifdef DIVEHANDLER_TRAINING - SPQR_INFO( "\nPG algorithm, iteration " << iter_count << "... " ); + SPQR_INFO( "\nPG algorithm, iteration " << iter_count << "... " ); #endif if( iter_count == MAX_ITER || converged() ) return false; else - { - // First generate the set of random perturbation for the current coefficients - generatePerturbations(); + { + // First generate the set of random perturbation for the current coefficients + generatePerturbations(); - // For each perturbation, evaluate with the objective function and store the result in a temporary container - std::vector evaluatedPerturbations (perturbationsBuffer.size()); - PGbuffer::const_iterator evaluator; - for(evaluator = perturbationsBuffer.begin(); evaluator != perturbationsBuffer.end(); ++evaluator) - evaluatedPerturbations.push_back( evaluatePerturbation(*evaluator) ); + // For each perturbation, evaluate with the objective function and store the result in a temporary container + std::vector evaluatedPerturbations (perturbationsBuffer.size()); + PGbuffer::const_iterator evaluator; + for(evaluator = perturbationsBuffer.begin(); evaluator != perturbationsBuffer.end(); ++evaluator) + evaluatedPerturbations.push_back( evaluatePerturbation(*evaluator) ); - // Compute the average 'gradient' for the current coefficients - std::vector coeffs_avgGradient(coeffs.size()); + // Compute the average 'gradient' for the current coefficients + std::vector coeffs_avgGradient(coeffs.size()); #ifdef RAND_PERMUTATIONS - // For each coefficient, compute the average score to determine the correspondent 'gradient' entry - PGbuffer::const_iterator current_perturbation = perturbationsBuffer.begin(); - for( unsigned int n = 0; n < coeffs.size(); ++n ) + // For each coefficient, compute the average score to determine the correspondent 'gradient' entry + PGbuffer::const_iterator current_perturbation = perturbationsBuffer.begin(); + for( unsigned int n = 0; n < coeffs.size(); ++n ) + { + std::vector score_plus, score_minus, score_zero; + + // Keep track of the perturbation type and store each score in a container + for( unsigned int i = 0; i < evaluatedPerturbations.size(); ++i ) { - std::vector score_plus, score_minus, score_zero; + if ( ((*current_perturbation).at(n) - coeffs.at(n)) > 0 ) + score_plus.push_back(evaluatedPerturbations.at(i)); + else if ( ((*current_perturbation).at(n) - coeffs.at(n)) < 0 ) + score_minus.push_back(evaluatedPerturbations.at(i)); + else + score_zero.push_back(evaluatedPerturbations.at(i)); - // Keep track of the perturbation type and store each score in a container - for( unsigned int i = 0; i < evaluatedPerturbations.size(); ++i ) - { - if ( ((*current_perturbation).at(n) - coeffs.at(n)) > 0 ) - score_plus.push_back(evaluatedPerturbations.at(i)); - else if ( ((*current_perturbation).at(n) - coeffs.at(n)) < 0 ) - score_minus.push_back(evaluatedPerturbations.at(i)); - else - score_zero.push_back(evaluatedPerturbations.at(i)); - - ++current_perturbation; - } + ++current_perturbation; + } - // Sum up all positive perturbation scores - float avg_plus = 0.0; - for (unsigned int j = 0; j < score_plus.size(); ++j) - avg_plus += score_plus.at(j) / score_plus.size(); + // Sum up all positive perturbation scores + float avg_plus = 0.0; + for (unsigned int j = 0; j < score_plus.size(); ++j) + avg_plus += score_plus.at(j) / score_plus.size(); - // Sum up all negative perturbation scores - float avg_minus = 0.0; - for (unsigned int j = 0; j < score_minus.size(); ++j) - avg_minus += score_minus.at(j) / score_minus.size(); + // Sum up all negative perturbation scores + float avg_minus = 0.0; + for (unsigned int j = 0; j < score_minus.size(); ++j) + avg_minus += score_minus.at(j) / score_minus.size(); - // Sum up all null perturbation scores - float avg_zero = 0.0; - for (unsigned int j = 0; j < score_zero.size(); ++j) - avg_zero += score_zero.at(j) / score_zero.size(); + // Sum up all null perturbation scores + float avg_zero = 0.0; + for (unsigned int j = 0; j < score_zero.size(); ++j) + avg_zero += score_zero.at(j) / score_zero.size(); - if( avg_zero <= avg_plus && avg_zero<= avg_minus ) - coeffs_avgGradient.at(n) = 0.0; - else - coeffs_avgGradient.at(n) = avg_plus - avg_minus; - } + if( avg_zero <= avg_plus && avg_zero<= avg_minus ) + coeffs_avgGradient.at(n) = 0.0; + else + coeffs_avgGradient.at(n) = avg_plus - avg_minus; + } #else - // For each coefficient, compute different averages to determine the correspondent 'gradient' entry - for( unsigned int n = 0; n < coeffs.size(); ++n ) + // For each coefficient, compute different averages to determine the correspondent 'gradient' entry + for( unsigned int n = 0; n < coeffs.size(); ++n ) + { + int avg_selector = 0; + float avg_minus = 0.0 , avg_zero = 0.0, avg_plus = 0.0; + for( unsigned int i = 0; i < evaluatedPerturbations.size(); i = i + pow(3,n) ) { - int avg_selector = 0; - float avg_minus = 0.0 , avg_zero = 0.0, avg_plus = 0.0; - for( unsigned int i = 0; i < evaluatedPerturbations.size(); i = i + pow(3,n) ) + for( unsigned int k = i; k < i + pow(3,n); ++k ) { - for( unsigned int k = i; k < i + pow(3,n); ++k ) - { - float evaluation = evaluatedPerturbations.at(k) / (evaluatedPerturbations.size()/3); + float evaluation = evaluatedPerturbations.at(k) / (evaluatedPerturbations.size()/3); - if( (avg_selector)%3 == 0 ) avg_minus += evaluation; - if( (avg_selector)%3 == 1 ) avg_zero += evaluation; - if( (avg_selector)%3 == 2 ) avg_plus += evaluation; - } - ++avg_selector; + if( (avg_selector)%3 == 0 ) avg_minus += evaluation; + if( (avg_selector)%3 == 1 ) avg_zero += evaluation; + if( (avg_selector)%3 == 2 ) avg_plus += evaluation; } - // evaluate An - if( avg_zero <= avg_plus && avg_zero<= avg_minus ) - coeffs_avgGradient.at(coeffs.size() - (n +1)) = 0.0; - else - coeffs_avgGradient.at(coeffs.size() - (n +1)) = avg_plus - avg_minus; + ++avg_selector; } + // evaluate An + if( avg_zero <= avg_plus && avg_zero<= avg_minus ) + coeffs_avgGradient.at(coeffs.size() - (n +1)) = 0.0; + else + coeffs_avgGradient.at(coeffs.size() - (n +1)) = avg_plus - avg_minus; + } #endif - // Avoid 'nan' when the gradient is zeroed - float normalization = 1.0; - if (magnitude(coeffs_avgGradient) != 0) - normalization = magnitude(coeffs_avgGradient); + // Avoid 'nan' when the gradient is zeroed + float normalization = 1.0; + if (magnitude(coeffs_avgGradient) != 0) + normalization = magnitude(coeffs_avgGradient); #ifdef DIVEHANDLER_TRAINING - SPQR_INFO("Computed policy gradient: [ " << coeffs_avgGradient.at(0)/normalization - << ", " << coeffs_avgGradient.at(1)/normalization << " ]"); + SPQR_INFO("Computed policy gradient: [ " << coeffs_avgGradient.at(0)/normalization + << ", " << coeffs_avgGradient.at(1)/normalization << " ]"); #endif - // Weight new gradient estimate and previous one according to the reward score - std::vector newGradient (coeffsGradient.size()); - for( unsigned int j=0; j newGradient (coeffsGradient.size()); + for( unsigned int j=0; j BUFFER_DIM) - coeffsBuffer.resize(BUFFER_DIM); + // Update coefficients history + coeffsBuffer.push_front(coeffs); + // Crop buffer + if (coeffsBuffer.size() > BUFFER_DIM) + coeffsBuffer.resize(BUFFER_DIM); - // Update the coefficients following the gradient direction - for( unsigned int i=0; i::iterator i = population.begin(); + for(; i != population.end(); ++i) + SPQR_INFO("Individual, encoding: " << (*i).hypothesis.to_string() << ", value: " << (((float)(*i).hypothesis.to_ulong())/1000)); - srand(time(NULL)); - for(unsigned int i=0; i< POPULATION_SIZE; ++i) - population.insert( Individual( (rand()%600) + 500) ); +#endif } float DiveHandler::GALearner::evaluate(Individual i) { - return ( std::abs(diveHandler_ptr->tBAGO - ( i.hypothesis.to_ulong()*diveHandler_ptr->tBAGOestimate)) ); + return ( std::abs(diveHandler_ptr->tBAGO - ( (((float)i.hypothesis.to_ulong())/1000)*diveHandler_ptr->tBAGOestimate)) ); } DiveHandler::GALearner::Individual DiveHandler::GALearner::rnd_mutate(Individual i) { - srand(time(NULL)); - unsigned int n_flips = rand()%3+1; - for(unsigned int j=0; j< n_flips; ++j ) - (i.hypothesis).flip(rand()%(INDIVIDUAL_SIZE-1)); +#ifdef DIVEHANDLER_TRAINING_DEBUG + SPQR_INFO("Individual " << (((float)i.hypothesis.to_ulong())/1000) << " mutates into: "); + ++n_mutation; +#endif - return i; + srand(time(NULL)); + unsigned int n_flips = rand()%3+1; + for(unsigned int j=0; j< n_flips; ++j ) + (i.hypothesis).flip(rand()%(INDIVIDUAL_SIZE-1)); + +#ifdef DIVEHANDLER_TRAINING_DEBUG + SPQR_INFO(((float)i.hypothesis.to_ulong())/1000); +#endif + + return i; } DiveHandler::GALearner::Individual DiveHandler::GALearner::crossover(Individual mommy, const Individual& daddy) { - srand(time(NULL)); - int crossover_point = rand()%INDIVIDUAL_SIZE; - for(unsigned int i = crossover_point+1; i::const_iterator i = fitnessBuffer.begin(); - std::list::const_iterator j = fitnessBuffer.begin(); ++j; - while (j != fitnessBuffer.end()) - { - avg_variation += ( (*i) - (*j) )/fitnessBuffer.size(); - ++i; ++j; - } - - // Compute variations standard deviation - float std_variation = .0f; - // Iterate over the whole buffer and compute deltas from step i-1 to i - std::list::const_iterator k = fitnessBuffer.begin(); - std::list::const_iterator t = fitnessBuffer.begin(); ++t; - while (t != fitnessBuffer.end()) - { - std_variation += ( pow((*k)-(*t) - avg_variation, 2) ) / fitnessBuffer.size(); - ++k; ++t; - } - std_variation = sqrt(std_variation); - - // Check result against variation threshold - if ((avg_variation < CONVERGENCE_THRESHOLD) && (std_variation < CONVERGENCE_THRESHOLD)) - { - #ifdef DIVEHANDLER_TRAINING - SPQR_SUCCESS("GALearner converged!"); - SPQR_SUCCESS("Coefficients values:"); - for (unsigned int i = 0; i < coeffs.size(); ++i) - SPQR_SUCCESS("\t" << coeffs.at(i)); - #endif - return true; - } - else - return false; - } + // Skip convergence check if the buffer is not full + if (fitnessBuffer.size() < BUFFER_DIM) + return false; + // Average every coefficients variation across the buffer + else + { + // Compute variations mean + float avg_variation = .0f; + // Iterate over the whole buffer and compute deltas from step i-1 to i + std::list::const_iterator i = fitnessBuffer.begin(); + std::list::const_iterator j = fitnessBuffer.begin(); ++j; + while (j != fitnessBuffer.end()) + { + avg_variation += ( (*i) - (*j) )/fitnessBuffer.size(); + ++i; ++j; + } + + // Compute variations standard deviation + float std_variation = .0f; + // Iterate over the whole buffer and compute deltas from step i-1 to i + std::list::const_iterator k = fitnessBuffer.begin(); + std::list::const_iterator t = fitnessBuffer.begin(); ++t; + while (t != fitnessBuffer.end()) + { + std_variation += ( pow((*k)-(*t) - avg_variation, 2) ) / fitnessBuffer.size(); + ++k; ++t; + } + std_variation = sqrt(std_variation); + + // Check result against variation threshold + if ((avg_variation < CONVERGENCE_THRESHOLD) && (std_variation < CONVERGENCE_THRESHOLD)) + { +#ifdef DIVEHANDLER_TRAINING + SPQR_SUCCESS("GALearner converged!"); + SPQR_SUCCESS("Coefficients values:"); + for (unsigned int i = 0; i < coeffs.size(); ++i) + SPQR_SUCCESS("\t" << coeffs.at(i)); +#endif + return true; + } + else + return false; + } } void DiveHandler::GALearner::evolutionStep() { - std::set previousPopulation(population); - population.clear(); - - int sel = 0; - std::set::iterator selector = previousPopulation.begin(); - std::set::iterator partner = previousPopulation.end(); - for(; selector != previousPopulation.end(); ++selector, ++sel) - { - if(sel < round(getParam("selection")*POPULATION_SIZE)) - population.insert(Individual(evaluate(*selector), (*selector).hypothesis.to_string())); - else - { - srand(time(NULL)); - - if( rand()/RAND_MAX < getParam("mutation") ) - population.insert( Individual(evaluate(rnd_mutate( *selector )), (rnd_mutate( *selector )).hypothesis.to_string()) ); - else if( rand()/RAND_MAX < sqrt(getParam("crossover")) ) - { - if(partner == previousPopulation.end()) - partner = selector; - else - { - population.insert(Individual(evaluate(crossover( *selector, *partner )), (crossover( *selector, *partner )).hypothesis.to_string())); - population.insert(Individual(evaluate(crossover( *partner, *selector )), (crossover( *partner, *selector )).hypothesis.to_string())); - partner = previousPopulation.end(); - } - } - else - population.insert(Individual(evaluate( *selector ), ( *selector ).hypothesis.to_string())); - } - } +#ifdef DIVEHANDLER_DEBUG + SPQR_INFO("Population before:"); + std::set::iterator i = population.begin(); + for(; i != population.end(); ++i) + SPQR_INFO("Individual, value: " << (((float)(*i).hypothesis.to_ulong())/1000) << ", fitness: " << ((*i).fitness)); + +#endif + std::set previousPopulation(population); + population.clear(); + + int sel = 0; + std::set::iterator selector = previousPopulation.begin(); + std::set::iterator partner = previousPopulation.end(); + for(; selector != previousPopulation.end(); ++selector, ++sel) + { + if(sel < round(getParam("selection")*POPULATION_SIZE)) + population.insert(Individual(evaluate(*selector), (*selector).hypothesis.to_string())); + else + { + if( ((float)rand())/RAND_MAX < getParam("mutation") ) + { + Individual mutated (rnd_mutate( *selector )); + population.insert( Individual(evaluate(mutated), (mutated).hypothesis.to_string()) ); + } + else if( ((float)rand())/RAND_MAX < sqrt(getParam("crossover")) ) + { + if(partner == previousPopulation.end()) + partner = selector; + else + { + Individual first_child (crossover( *selector, *partner )); + Individual second_child (crossover( *partner, *selector )); + population.insert(Individual(evaluate(first_child), first_child.hypothesis.to_string())); + population.insert(Individual(evaluate(second_child), second_child.hypothesis.to_string())); + partner = previousPopulation.end(); + } + } + else + population.insert(Individual(evaluate( *selector ), ( *selector ).hypothesis.to_string())); + } + } + +#ifdef DIVEHANDLER_TRAINING_DEBUG + SPQR_INFO("Number of mutations: " << n_mutation); + SPQR_INFO("Number of crossover: " << n_crossover); + n_mutation = 0; n_crossover = 0; + + SPQR_INFO("New population:"); + std::set::iterator i = population.begin(); + for(; i != population.end(); ++i) + SPQR_INFO("Individual, value: " << (((float)(*i).hypothesis.to_ulong())/1000) << ", fitness: " << ((*i).fitness)); + +#endif } void DiveHandler::GALearner::updateParams(const std::list& rewards) { - // Re-initialize reward scores - reward_score = 0.0; - if (!rewards.empty()) reward_norm = 0.0; - int discount_exp = 0; - int positives = 0; - - std::list::const_iterator i = rewards.begin(); - while (i != rewards.end()) - { - // Counting positives - if (*i == POSITIVE_REWARD) - ++positives; - - // Computing discounted rewards - reward_score += (*i) * pow(GAMMA, discount_exp); - reward_norm += fabs((*i) * pow(GAMMA, discount_exp)); - ++i; ++discount_exp; - } + // Re-initialize reward scores + reward_score = 0.0; + if (!rewards.empty()) reward_norm = 0.0; + int discount_exp = 0; + int positives = 0; + + std::list::const_iterator i = rewards.begin(); + while (i != rewards.end()) + { + // Counting positives + if (*i == POSITIVE_REWARD) + ++positives; + + // Computing discounted rewards + reward_score += (*i) * pow(GAMMA, discount_exp); + reward_norm += fabs((*i) * pow(GAMMA, discount_exp)); + ++i; ++discount_exp; + } #ifdef DIVEHANDLER_TRAINING_DEBUG - SPQR_INFO("Positive rewards: " << positives << " out of " << rewards.size()); - SPQR_INFO("Negative rewards: " << (rewards.size() - positives) << " out of " << rewards.size()); - SPQR_INFO("Reward total score: " << reward_score); + SPQR_INFO("Positive rewards: " << positives << " out of " << rewards.size()); + SPQR_INFO("Negative rewards: " << (rewards.size() - positives) << " out of " << rewards.size()); + SPQR_INFO("Reward total score: " << reward_score); #endif - //Adjusting GA parameters according to the obtained score - if(exp( -reward_score / (2*REWARDS_HISTORY_SIZE) ) * getParam("mutation") >= 1.0) - setParam("mutation", 1.0); - else - setParam("mutation", exp( -reward_score / REWARDS_HISTORY_SIZE ) * getParam("mutation")); + //Adjusting GA parameters according to the obtained score + if(exp( -reward_score / (2*REWARDS_HISTORY_SIZE) ) * getParam("mutation") >= 1.0) + setParam("mutation", 1.0); + else + setParam("mutation", exp( -reward_score / REWARDS_HISTORY_SIZE ) * getParam("mutation")); - if(exp( -reward_score / (2*REWARDS_HISTORY_SIZE) ) * getParam("crossover") >= 1.0) - setParam("crossover", 1.0); - else - setParam("crossover", exp( -reward_score / (2*REWARDS_HISTORY_SIZE) ) * getParam("crossover")); + if(exp( -reward_score / (2*REWARDS_HISTORY_SIZE) ) * getParam("crossover") >= 1.0) + setParam("crossover", 1.0); + else + setParam("crossover", exp( -reward_score / (2*REWARDS_HISTORY_SIZE) ) * getParam("crossover")); - if(exp( -reward_score / (2*REWARDS_HISTORY_SIZE) ) * getParam("elite") >= 1.0) - setParam("elite", 1.0); - else - setParam("elite", exp( -reward_score / (2*REWARDS_HISTORY_SIZE) ) * getParam("elite")); + if(exp( -reward_score / (2*REWARDS_HISTORY_SIZE) ) * getParam("elite") >= 1.0) + setParam("elite", 1.0); + else + setParam("elite", exp( -reward_score / (2*REWARDS_HISTORY_SIZE) ) * getParam("elite")); #ifdef DIVEHANDLER_TRAINING - SPQR_INFO( "Mutation rate value changed to: " << getParam("mutation") << " according to the obtained rewards. "); - SPQR_INFO( "Crossover rate value changed to: " << getParam("crossover") << " according to the obtained rewards. "); - SPQR_INFO( "Elite percentage changed to: " << getParam("elite") << " according to the obtained rewards. "); + SPQR_INFO( "Mutation rate value changed to: " << getParam("mutation") << " according to the obtained rewards. "); + SPQR_INFO( "Crossover rate value changed to: " << getParam("crossover") << " according to the obtained rewards. "); + SPQR_INFO( "Elite percentage changed to: " << getParam("elite") << " according to the obtained rewards. "); #endif } @@ -612,39 +668,39 @@ void DiveHandler::GALearner::updateParams(const std::list& rewards) bool DiveHandler::GALearner::updateCoeffs() { #ifdef DIVEHANDLER_TRAINING - SPQR_INFO( "\nGA algorithm, iteration " << iter_count << "... " ); + SPQR_INFO( "\nGA algorithm, iteration " << iter_count << "... " ); #endif - if( iter_count == MAX_ITER || converged() ) - return false; - else - { - evolutionStep(); + if( iter_count == MAX_ITER || converged() ) + return false; + else + { + evolutionStep(); - float avg_fitness=.0f; - float avg_coeff=.0f; - std::set::iterator evaluator = population.begin(); - for( unsigned int sel=0; selfitness / round(getParam("elite")*POPULATION_SIZE); - avg_coeff += (evaluator->hypothesis.to_ulong()) / (1000*round(getParam("elite")*POPULATION_SIZE)); - } + float avg_fitness=.0f; + float avg_coeff=.0f; + std::set::iterator evaluator = population.begin(); + for( unsigned int sel=0; selfitness / round(getParam("elite")*POPULATION_SIZE); + avg_coeff += ((float)evaluator->hypothesis.to_ulong()) / (1000*round(getParam("elite")*POPULATION_SIZE)); + } - fitnessBuffer.push_front(avg_fitness); + fitnessBuffer.push_front(avg_fitness); - // Crop buffer - if (fitnessBuffer.size() > BUFFER_DIM) - fitnessBuffer.resize(BUFFER_DIM); + // Crop buffer + if (fitnessBuffer.size() > BUFFER_DIM) + fitnessBuffer.resize(BUFFER_DIM); - coeffs.at(0) = avg_coeff; + coeffs.at(0) = avg_coeff; #ifdef DIVEHANDLER_TRAINING - SPQR_INFO("New coefficients: [ " << coeffs.at(0) << " ]"); + SPQR_INFO("New coefficients: [ " << coeffs.at(0) << " ]"); #endif - ++iter_count; + ++iter_count; - return true; - } + return true; + } } @@ -656,15 +712,15 @@ bool DiveHandler::GALearner::updateCoeffs() */ DiveHandler::DiveHandler(): diveType(DiveHandle::none), state(static_cast(SPQR::GOALIE_LEARNING_STATE)), - learner(new PGLearner(this, 2, EPSILON, T, 1.0, false)), - opponentScore(0), tBall2Goal(-1), tDive(0.0), tBackInPose(0.0), tBAGO(0), tBAGOestimate(0), - ballProjectionIntercept(SPQR::FIELD_DIMENSION_Y), distanceBall2Goal(SPQR::FIELD_DIMENSION_X) + learner(new GALearner(this, 1, 1.0)), + opponentScore(0), tBall2Goal(-1), tDive(0.0), tBackInPose(0.0), tBAGO(0), tBAGOestimate(0), + ballProjectionIntercept(SPQR::FIELD_DIMENSION_Y), distanceBall2Goal(SPQR::FIELD_DIMENSION_X) { #ifdef DIVEHANDLER_TRAINING - SPQR_INFO("Initializing PGlearner..."); + SPQR_INFO("Initializing GAlearner..."); std::vector coeffs = learner->getCoeffs(); - SPQR_INFO("Coefficients: alpha 1 = " << coeffs.at(0) << ", alpha 2 = " << coeffs.at(1)); - SPQR_INFO("Parameters: epsilon = " << learner->getParam("epsilon") << ", T = " << learner->getParam("T")); + SPQR_INFO("Coefficient alpha = " << coeffs.at(0)); + // SPQR_INFO("Parameters: epsilon = " << learner->getParam("epsilon") << ", T = " << learner->getParam("T")); #endif } @@ -707,25 +763,25 @@ void DiveHandler::estimateBallProjection() // Devising the type of dive to be performed - if( yIntercept > ( SPQR::GOALIE_CLOSE_LIMIT_Y/2) && yIntercept < SPQR::GOALIE_FAR_LIMIT_Y ) - // Close intercept on the left - diveType = DiveHandle::lcloseDive; - else if( yIntercept > SPQR::GOALIE_FAR_LIMIT_Y ) - // Far intercept on the left - diveType = DiveHandle::lDive; - else if( yIntercept < (-SPQR::GOALIE_CLOSE_LIMIT_Y/2) && yIntercept > -SPQR::GOALIE_FAR_LIMIT_Y ) - // Close intercept on the right - diveType = DiveHandle::rcloseDive; - else if( yIntercept < -SPQR::GOALIE_FAR_LIMIT_Y ) - // Far intercept on the right - diveType = DiveHandle::rDive; - - else if( fabs(yIntercept) < SPQR::GOALIE_CLOSE_LIMIT_Y/2) - diveType = DiveHandle::stopBall; - else - // Any other case: no dive at all - diveType = DiveHandle::none; - } + if( yIntercept > ( SPQR::GOALIE_CLOSE_LIMIT_Y/2) && yIntercept < SPQR::GOALIE_FAR_LIMIT_Y ) + // Close intercept on the left + diveType = DiveHandle::lcloseDive; + else if( yIntercept > SPQR::GOALIE_FAR_LIMIT_Y ) + // Far intercept on the left + diveType = DiveHandle::lDive; + else if( yIntercept < (-SPQR::GOALIE_CLOSE_LIMIT_Y/2) && yIntercept > -SPQR::GOALIE_FAR_LIMIT_Y ) + // Close intercept on the right + diveType = DiveHandle::rcloseDive; + else if( yIntercept < -SPQR::GOALIE_FAR_LIMIT_Y ) + // Far intercept on the right + diveType = DiveHandle::rDive; + + else if( fabs(yIntercept) < SPQR::GOALIE_CLOSE_LIMIT_Y/2) + diveType = DiveHandle::stopBall; + else + // Any other case: no dive at all + diveType = DiveHandle::none; + } // Using the appropriate estimate for the dive time if (diveType == DiveHandle::lDive || diveType == DiveHandle::rDive ) @@ -741,7 +797,7 @@ void DiveHandler::estimateBallProjection() ballProjectionIntercept = yIntercept; // Estimated distance from the ball - distanceBall2Goal = theBallModel.estimate.position.abs(); + distanceBall2Goal = theBallModel.estimate.position.abs(); } /* @@ -752,13 +808,13 @@ void DiveHandler::estimateBallProjection() void DiveHandler::estimateDiveTimes() { // Check whether the ball is actually moving toward the goal - if ( (theBallModel.estimate.velocity.abs() != 0.0) && - (theBallModel.estimate.velocity.x < 0.0) ) + if ( (theBallModel.estimate.velocity.abs() != 0.0) && + (theBallModel.estimate.velocity.x < 0.0) ) // Use a constant velocity approximation to the estimate the time interval - tBall2Goal = 1000.0 * ( distanceBall2Goal / theBallModel.estimate.velocity.abs() ); + tBall2Goal = 1000.0 * ( distanceBall2Goal / theBallModel.estimate.velocity.abs() ); else // Otherwise, set the parameter to a meaningless value - tBall2Goal = -1.0; + tBall2Goal = -1.0; // Using the appropriate estimates for recover and reposition times float tRecover = 0.0; @@ -803,144 +859,147 @@ inline float DiveHandler::computeDiveAndRecoverTime(float alpha1, float alpha2) */ void DiveHandler::update(DiveHandle& diveHandle) { + if ( time(NULL) % 6 == 0 ) + srand(time(NULL)); + // Check you're actually the goalie... if (theRobotInfo.number == 1) - { - // Compute the ball projection estimate + { + // Compute the ball projection estimate estimateBallProjection(); // Update the DiveHandle diveHandle.ballProjectionEstimate = ballProjectionIntercept; #ifdef DIVEHANDLER_TRAINING - if( timer.getTimeSince(timer.fallen) > 5000 && timer.getTimeSince(timer.fallen) < 5040 && timer.fallen != 0) - SPQR_SUCCESS("TooEarly time window START..."); + if( timer.getTimeSince(timer.fallen) > 5000 && timer.getTimeSince(timer.fallen) < 5040 && timer.fallen != 0) + SPQR_SUCCESS("TooEarly time window START..."); - if( timer.getTimeSince(timer.fallen) > 9961 && timer.getTimeSince(timer.fallen) < 9999 && timer.fallen != 0) - SPQR_SUCCESS("TooEarly time window END."); + if( timer.getTimeSince(timer.fallen) > 9961 && timer.getTimeSince(timer.fallen) < 9999 && timer.fallen != 0) + SPQR_SUCCESS("TooEarly time window END."); #endif - if(opponentScore != (int)theOpponentTeamInfo.score && !goalDetected) - { - if( timer.getTimeSince(timer.fallen) > 5000 && timer.getTimeSince(timer.fallen) < 10000 && - (unsigned int) timer.fallen != 0) - { + if(opponentScore != (int)theOpponentTeamInfo.score && !goalDetected) + { + if( timer.getTimeSince(timer.fallen) > 5000 && timer.getTimeSince(timer.fallen) < 10000 && + (unsigned int) timer.fallen != 0) + { #ifdef DIVEHANDLER_TRAINING - SPQR_FAILURE("too FAST dude!"); + SPQR_FAILURE("too FAST dude!"); #endif - tBAGO += 3000; - } - else - { -// if(goalTimer.setTimer) - { + tBAGO += 3000; + } + else + { + // if(goalTimer.setTimer) + { #ifdef DIVEHANDLER_TRAINING - SPQR_FAILURE("too SLOW dude!"); + SPQR_FAILURE("too SLOW dude!"); #endif - tBAGO = goalTimer.getTimeSince(goalTimer.start) -1500; - } - } - estimatedTime=true; - goalDetected=true; - } - - if(theGameInfo.state == STATE_SET) - { - tBAGOestimate=0; - dBAGOestimate=0; - sampledVelocities.clear(); - goalTimer.reset(); - } - - // Check whether the ball is close enough - if( (distanceBall2Goal < SPQR::FIELD_DIMENSION_X) && (fabs(ballProjectionIntercept) < SPQR::FIELD_DIMENSION_Y) ) + tBAGO = goalTimer.getTimeSince(goalTimer.start) -1500; + } + } + estimatedTime=true; + goalDetected=true; + } + + if(theGameInfo.state == STATE_SET) + { + tBAGOestimate=0; + dBAGOestimate=0; + sampledVelocities.clear(); + goalTimer.reset(); + } + + // Check whether the ball is close enough + if( (distanceBall2Goal < SPQR::FIELD_DIMENSION_X) && (fabs(ballProjectionIntercept) < SPQR::FIELD_DIMENSION_Y) ) { // Estimate all temporal parameters estimateDiveTimes(); - if(state != notLearning) - { - // if not in playing state - if(theGameInfo.state != STATE_PLAYING) - timer.reset(); - else - { -// if(goalTimer.setTimer) -// SPQR_INFO("time: "<< goalTimer.getTimeSince(goalTimer.start)); - - // if the ball is moving enough fast then set the timer - if( (theBallModel.estimate.velocity.abs() > SPQR::GOALIE_MOVING_BALL_MIN_VELOCITY && - theFrameInfo.getTimeSince(theBallModel.timeWhenLastSeen) < 1000) ) - { - sampledVelocities.push_back( theBallModel.estimate.velocity.abs() ); - if(!timer.setTimer) - { - timer.set(clock()); - goalTimer.set(clock()); - dBAGOestimate=distanceBall2Goal; -// tBAGOestimate=tBall2Goal; + if(state != notLearning) + { + // if not in playing state + if(theGameInfo.state != STATE_PLAYING) + timer.reset(); + else + { + // if(goalTimer.setTimer) + // SPQR_INFO("time: "<< goalTimer.getTimeSince(goalTimer.start)); + + // if the ball is moving enough fast then set the timer + if( (theBallModel.estimate.velocity.abs() > SPQR::GOALIE_MOVING_BALL_MIN_VELOCITY && + theFrameInfo.getTimeSince(theBallModel.timeWhenLastSeen) < 1000) ) + { + sampledVelocities.push_back( theBallModel.estimate.velocity.abs() ); + if(!timer.setTimer) + { + timer.set(clock()); + goalTimer.set(clock()); + dBAGOestimate=distanceBall2Goal; + // tBAGOestimate=tBall2Goal; #ifdef DIVEHANDLER_TRAINING - std::cerr << "\033[33;1m" <<"[DiveHandler] " << "set Timer!" << "\033[0m" << std::endl; - std::cerr << "\033[33;1m" <<"[DiveHandler] " << "set goal Timer!" << "\033[0m" << std::endl; + std::cerr << "\033[33;1m" <<"[DiveHandler] " << "set Timer!" << "\033[0m" << std::endl; + std::cerr << "\033[33;1m" <<"[DiveHandler] " << "set goal Timer!" << "\033[0m" << std::endl; #endif - } - } - // else reset it... - if( (theBallModel.estimate.velocity.abs() < SPQR::GOALIE_MOVING_BALL_MIN_VELOCITY || - theFrameInfo.getTimeSince(theBallModel.timeWhenLastSeen) > 4000) ) - { - if(timer.setTimer) - { - timer.reset(); + } + } + // else reset it... + if( (theBallModel.estimate.velocity.abs() < SPQR::GOALIE_MOVING_BALL_MIN_VELOCITY || + theFrameInfo.getTimeSince(theBallModel.timeWhenLastSeen) > 4000) ) + { + if(timer.setTimer) + { + timer.reset(); #ifdef DIVEHANDLER_TRAINING - std::cerr << "\033[33;1m" <<"[DiveHandler] " << "reset Timer!" << "\033[0m" << std::endl; + std::cerr << "\033[33;1m" <<"[DiveHandler] " << "reset Timer!" << "\033[0m" << std::endl; #endif - } - if(goalTimer.setTimer) - { - goalTimer.reset(); + } + if(goalTimer.setTimer) + { + goalTimer.reset(); #ifdef DIVEHANDLER_TRAINING - std::cerr << "\033[33;1m" <<"[DiveHandler] " << "reset goal Timer!" << "\033[0m" << std::endl; + std::cerr << "\033[33;1m" <<"[DiveHandler] " << "reset goal Timer!" << "\033[0m" << std::endl; #endif - tBAGOestimate=0; - dBAGOestimate=0; - sampledVelocities.clear(); - } - } - - // if the goalie succeeded - if(ownScore != (int)theOwnTeamInfo.score && !estimatedTime) - { + tBAGOestimate=0; + dBAGOestimate=0; + sampledVelocities.clear(); + } + } + + // if the goalie succeeded + if(ownScore != (int)theOwnTeamInfo.score && !estimatedTime) + { #ifdef DIVEHANDLER_TRAINING - SPQR_SUCCESS("SUPER!"); + SPQR_SUCCESS("SUPER!"); #endif - tBAGO -= 200; - estimatedTime=true; - } - - // if the goalie dives - if( (int)theFallDownState.state == (int)FallDownState::fallen ) - { - timer.fallen=clock(); - tBAGO = timer.getTimeSince(timer.start); - } - } - } - - if(estimatedTime) - { - float velocityMean=0; - float velocityMax=0; - std::list::const_iterator it=sampledVelocities.begin(); - for(; it != sampledVelocities.end(); ++it) - { - if((*it) > velocityMax) velocityMax=(*it); - velocityMean += (*it) /sampledVelocities.size(); - } - - tBAGOestimate = 1000*(dBAGOestimate / (.75f*velocityMax)); - SPQR_INFO("distance: " << dBAGOestimate); - SPQR_INFO("velocity: " << (.75f*velocityMax)/1000); - } + tBAGO -= 200; + estimatedTime=true; + } + + // if the goalie dives + if( (int)theFallDownState.state == (int)FallDownState::fallen ) + { + timer.fallen=clock(); + tBAGO = timer.getTimeSince(timer.start); + } + } + } + + if(estimatedTime) + { + float velocityMean=0; + float velocityMax=0; + std::list::const_iterator it=sampledVelocities.begin(); + for(; it != sampledVelocities.end(); ++it) + { + if((*it) > velocityMax) velocityMax=(*it); + velocityMean += (*it) /sampledVelocities.size(); + } + + tBAGOestimate = 1000*(dBAGOestimate / (.75f*velocityMax)); + SPQR_INFO("distance: " << dBAGOestimate); + SPQR_INFO("velocity: " << (.75f*velocityMax)/1000); + } #ifdef DIVEHANDLER_DEBUG @@ -971,7 +1030,7 @@ void DiveHandler::update(DiveHandle& diveHandle) else if( state == waitReward ) { // The opponent team scores: the goalie failed and gets a negative reward - if(goalDetected && estimatedTime) + if(goalDetected && estimatedTime) { // The learner obtains a negative reward rewardHistory.push_front(NEGATIVE_REWARD); @@ -983,20 +1042,20 @@ void DiveHandler::update(DiveHandle& diveHandle) opponentScore = (int)theOpponentTeamInfo.score; #ifdef DIVEHANDLER_TRAINING - SPQR_FAILURE("The opponent team scored! Negative reward for the learner."); + SPQR_FAILURE("The opponent team scored! Negative reward for the learner."); #endif // A reward has been received: re-enable learning state = learning; - // Clear the pending reward + // Clear the pending reward if(!diveHandle.rewardAck) diveHandle.rewardAck = true; - goalDetected=false; - estimatedTime=false; - stamp =true; + goalDetected=false; + estimatedTime=false; + stamp =true; } // The own team scores: user-guided move to provide the goalie a positive reward - else if(ownScore != (int)theOwnTeamInfo.score && estimatedTime) + else if(ownScore != (int)theOwnTeamInfo.score && estimatedTime) { // The learner obtains a positive reward rewardHistory.push_front(POSITIVE_REWARD); @@ -1016,8 +1075,8 @@ void DiveHandler::update(DiveHandle& diveHandle) if(!diveHandle.rewardAck) diveHandle.rewardAck = true; - estimatedTime=false; - stamp=true; + estimatedTime=false; + stamp=true; } } @@ -1025,28 +1084,28 @@ void DiveHandler::update(DiveHandle& diveHandle) if( state == learning ) learner->updateParams(rewardHistory); - // Compute the dive time using the current coefficients as T = alpha2 * (alpha1*T_PAPO - T_dive) - float diveTime = ( (learner->getCoeffs()).at(0) * tBall2Goal ); + // Compute the dive time using the current coefficients as T = alpha2 * (alpha1*T_PAPO - T_dive) + float diveTime = ( (learner->getCoeffs()).at(0) * tBall2Goal ); #ifdef DIVEHANDLER_TRAINING - if(stamp) - { - SPQR_INFO("BAGO: " << tBAGO ); - SPQR_INFO("BAGO estimate: " << tBAGOestimate ); - SPQR_ERR("BAGO error: "<< std::abs(tBAGO - tBAGOestimate) ); - stamp = false; - } + if(stamp) + { + SPQR_INFO("BAGO: " << tBAGO ); + SPQR_INFO("BAGO estimate: " << tBAGOestimate ); + SPQR_ERR("BAGO error: "<< std::abs(tBAGO - tBAGOestimate) ); + stamp = false; + } #endif #ifdef DIVEHANDLER_DEBUG SPQR_INFO( "Estimated overall time to dive and recover position: " << - computeDiveAndRecoverTime( (learner->getCoeffs()).at(0), (learner->getCoeffs()).at(1) ) ); + computeDiveAndRecoverTime( (learner->getCoeffs()).at(0), (learner->getCoeffs()).at(1) ) ); SPQR_INFO("Suggested dive in " << diveTime << " ms. "); #endif // Update the DiveHandle if (diveTime > 0.0) - diveHandle.diveTime = diveTime -tDive; + diveHandle.diveTime = diveTime -tDive; else diveHandle.diveTime = -1.0; } @@ -1054,8 +1113,8 @@ void DiveHandler::update(DiveHandle& diveHandle) else { diveHandle.diveTime = -1; - diveHandle.diveType = diveType; - timer.reset(); + diveHandle.diveType = diveType; + timer.reset(); } } } From bf7f68ddbebc62b57b7eb0667e1d206aeb1fd3a4 Mon Sep 17 00:00:00 2001 From: Francesco Riccio Date: Wed, 19 Mar 2014 23:31:47 +0100 Subject: [PATCH 16/17] GA tuning --- machineLearning/DiveHandler/DiveHandler.cpp | 45 ++++++++++++--------- machineLearning/DiveHandler/DiveHandler.h | 4 +- 2 files changed, 29 insertions(+), 20 deletions(-) diff --git a/machineLearning/DiveHandler/DiveHandler.cpp b/machineLearning/DiveHandler/DiveHandler.cpp index 6aaa6bf..678e65c 100644 --- a/machineLearning/DiveHandler/DiveHandler.cpp +++ b/machineLearning/DiveHandler/DiveHandler.cpp @@ -23,8 +23,8 @@ // Uncomment to have debug information //#define DIVEHANDLER_DEBUG -#define DIVEHANDLER_TRAINING_DEBUG -#define DIVEHANDLER_TRAINING +//#define DIVEHANDLER_TRAINING_DEBUG +//#define DIVEHANDLER_TRAINING //#define RAND_PERMUTATIONS #define NEGATIVE_REWARD -1.0 @@ -46,7 +46,7 @@ bool tooEarly=false; bool estimatedTime=false; bool goalDetected=false; -#ifdef DIVEHANDLER_TRAINING_DEBUG +#ifdef DIVEHANDLER_TRAINING int n_mutation = 0; int n_crossover = 0; #endif @@ -451,7 +451,7 @@ DiveHandler::GALearner::GALearner( DiveHandler* _dhPtr, int _nCoeffs, float _ini srand(time(NULL)); for(unsigned int i=0; i< POPULATION_SIZE; ++i) - population.insert( Individual( (rand()%600) + 500) ); + population.insert( Individual( (rand()%600) + 600) ); #ifdef DIVEHANDLER_DEBUG std::set::iterator i = population.begin(); @@ -471,13 +471,16 @@ DiveHandler::GALearner::Individual DiveHandler::GALearner::rnd_mutate(Individual { #ifdef DIVEHANDLER_TRAINING_DEBUG SPQR_INFO("Individual " << (((float)i.hypothesis.to_ulong())/1000) << " mutates into: "); - ++n_mutation; #endif - srand(time(NULL)); +#ifdef DIVEHANDLER_TRAINING + ++n_mutation; +#endif + +// srand(time(NULL)); unsigned int n_flips = rand()%3+1; for(unsigned int j=0; j< n_flips; ++j ) - (i.hypothesis).flip(rand()%(INDIVIDUAL_SIZE-1)); + (i.hypothesis).flip(rand()%(INDIVIDUAL_SIZE-7) + 2); #ifdef DIVEHANDLER_TRAINING_DEBUG SPQR_INFO(((float)i.hypothesis.to_ulong())/1000); @@ -489,12 +492,15 @@ DiveHandler::GALearner::Individual DiveHandler::GALearner::rnd_mutate(Individual DiveHandler::GALearner::Individual DiveHandler::GALearner::crossover(Individual mommy, const Individual& daddy) { #ifdef DIVEHANDLER_TRAINING_DEBUG - ++n_crossover; SPQR_INFO("Couple " << ((float)mommy.hypothesis.to_ulong())/1000 << " and " << ((float)daddy.hypothesis.to_ulong())/1000); #endif +#ifdef DIVEHANDLER_TRAINING + ++n_crossover; +#endif + // srand(time(NULL)); - int crossover_point = rand() % (INDIVIDUAL_SIZE-5) +2; + int crossover_point = rand() % (INDIVIDUAL_SIZE-7) +2; #ifdef DIVEHANDLER_TRAINING_DEBUG SPQR_INFO("Crossover point: " << crossover_point); @@ -600,10 +606,13 @@ void DiveHandler::GALearner::evolutionStep() } } +#ifdef DIVEHANDLER_TRAINING + SPQR_INFO("Number of mutations: " << n_mutation); + SPQR_INFO("Number of crossover: " << n_crossover); + n_mutation = 0; n_crossover = 0; +#endif + #ifdef DIVEHANDLER_TRAINING_DEBUG - SPQR_INFO("Number of mutations: " << n_mutation); - SPQR_INFO("Number of crossover: " << n_crossover); - n_mutation = 0; n_crossover = 0; SPQR_INFO("New population:"); std::set::iterator i = population.begin(); @@ -635,7 +644,7 @@ void DiveHandler::GALearner::updateParams(const std::list& rewards) ++i; ++discount_exp; } -#ifdef DIVEHANDLER_TRAINING_DEBUG +#ifdef DIVEHANDLER_TRAINING SPQR_INFO("Positive rewards: " << positives << " out of " << rewards.size()); SPQR_INFO("Negative rewards: " << (rewards.size() - positives) << " out of " << rewards.size()); SPQR_INFO("Reward total score: " << reward_score); @@ -650,7 +659,7 @@ void DiveHandler::GALearner::updateParams(const std::list& rewards) if(exp( -reward_score / (2*REWARDS_HISTORY_SIZE) ) * getParam("crossover") >= 1.0) setParam("crossover", 1.0); else - setParam("crossover", exp( -reward_score / (2*REWARDS_HISTORY_SIZE) ) * getParam("crossover")); + setParam("crossover", exp( -reward_score / (REWARDS_HISTORY_SIZE) ) * getParam("crossover")); if(exp( -reward_score / (2*REWARDS_HISTORY_SIZE) ) * getParam("elite") >= 1.0) setParam("elite", 1.0); @@ -738,7 +747,6 @@ DiveHandler::~DiveHandler() * at which the ball is expected to reach the goal. * Then, the diveTime and the diveType parameters are defined accordingly. */ - void DiveHandler::estimateBallProjection() { // Ball path line @@ -859,7 +867,7 @@ inline float DiveHandler::computeDiveAndRecoverTime(float alpha1, float alpha2) */ void DiveHandler::update(DiveHandle& diveHandle) { - if ( time(NULL) % 6 == 0 ) + if ( time(NULL) % 30 == 0 ) srand(time(NULL)); // Check you're actually the goalie... @@ -896,6 +904,7 @@ void DiveHandler::update(DiveHandle& diveHandle) SPQR_FAILURE("too SLOW dude!"); #endif tBAGO = goalTimer.getTimeSince(goalTimer.start) -1500; + if(tBAGO > 4000000000) tBAGO=1000; } } estimatedTime=true; @@ -997,8 +1006,8 @@ void DiveHandler::update(DiveHandle& diveHandle) } tBAGOestimate = 1000*(dBAGOestimate / (.75f*velocityMax)); - SPQR_INFO("distance: " << dBAGOestimate); - SPQR_INFO("velocity: " << (.75f*velocityMax)/1000); +// SPQR_INFO("distance: " << dBAGOestimate); +// SPQR_INFO("velocity: " << (.75f*velocityMax)/1000); } diff --git a/machineLearning/DiveHandler/DiveHandler.h b/machineLearning/DiveHandler/DiveHandler.h index a7a3549..0136149 100644 --- a/machineLearning/DiveHandler/DiveHandler.h +++ b/machineLearning/DiveHandler/DiveHandler.h @@ -68,8 +68,8 @@ END_MODULE #define INDIVIDUAL_SIZE 11 #define SELECTION 0.1 -#define CROSSOVER 0.5 -#define MUTATION 0.3 +#define CROSSOVER 0.3 +#define MUTATION 0.2 #define ELITE_SIZE 0.2 // Module class declaration From f81a5fae72ac2340d70aac149a88c8063e9416f4 Mon Sep 17 00:00:00 2001 From: Francesco Riccio Date: Tue, 25 Mar 2014 18:13:25 +0100 Subject: [PATCH 17/17] updates --- machineLearning/DiveHandler/DiveHandler.cpp | 42 ++++++++++++++------- machineLearning/DiveHandler/DiveHandler.h | 4 ++ 2 files changed, 32 insertions(+), 14 deletions(-) diff --git a/machineLearning/DiveHandler/DiveHandler.cpp b/machineLearning/DiveHandler/DiveHandler.cpp index 678e65c..4141169 100644 --- a/machineLearning/DiveHandler/DiveHandler.cpp +++ b/machineLearning/DiveHandler/DiveHandler.cpp @@ -24,7 +24,7 @@ // Uncomment to have debug information //#define DIVEHANDLER_DEBUG //#define DIVEHANDLER_TRAINING_DEBUG -//#define DIVEHANDLER_TRAINING +#define DIVEHANDLER_TRAINING //#define RAND_PERMUTATIONS #define NEGATIVE_REWARD -1.0 @@ -398,7 +398,7 @@ bool DiveHandler::PGLearner::updateCoeffs() #ifdef DIVEHANDLER_TRAINING SPQR_INFO("Computed policy gradient: [ " << coeffs_avgGradient.at(0)/normalization - << ", " << coeffs_avgGradient.at(1)/normalization << " ]"); + /*<< ", " << coeffs_avgGradient.at(1)/normalization */<< " ]"); #endif // Weight new gradient estimate and previous one according to the reward score std::vector newGradient (coeffsGradient.size()); @@ -407,7 +407,7 @@ bool DiveHandler::PGLearner::updateCoeffs() #ifdef DIVEHANDLER_TRAINING SPQR_INFO("New policy gradient: [ " << newGradient.at(0) - << ", " << newGradient.at(1) << " ]"); + << /*", " << newGradient.at(1) << */" ]"); #endif // Update coefficients history @@ -429,7 +429,7 @@ bool DiveHandler::PGLearner::updateCoeffs() } #ifdef DIVEHANDLER_TRAINING - SPQR_INFO("New coefficients: [ " << coeffs.at(0) << ", " << coeffs.at(1) << " ]"); + SPQR_INFO("New coefficients: [ " << coeffs.at(0) << /*", " << coeffs.at(1) <<*/ " ]"); #endif ++iter_count; @@ -574,9 +574,9 @@ void DiveHandler::GALearner::evolutionStep() std::set previousPopulation(population); population.clear(); - int sel = 0; + int sel = 0; std::set::iterator selector = previousPopulation.begin(); - std::set::iterator partner = previousPopulation.end(); + std::set::iterator partner = previousPopulation.end(); for(; selector != previousPopulation.end(); ++selector, ++sel) { if(sel < round(getParam("selection")*POPULATION_SIZE)) @@ -588,17 +588,17 @@ void DiveHandler::GALearner::evolutionStep() Individual mutated (rnd_mutate( *selector )); population.insert( Individual(evaluate(mutated), (mutated).hypothesis.to_string()) ); } - else if( ((float)rand())/RAND_MAX < sqrt(getParam("crossover")) ) + else if( ((float)rand())/RAND_MAX < sqrt(getParam("crossover"))) { - if(partner == previousPopulation.end()) - partner = selector; + if(partner == previousPopulation.end()) + partner = selector; else { Individual first_child (crossover( *selector, *partner )); Individual second_child (crossover( *partner, *selector )); population.insert(Individual(evaluate(first_child), first_child.hypothesis.to_string())); population.insert(Individual(evaluate(second_child), second_child.hypothesis.to_string())); - partner = previousPopulation.end(); + partner = previousPopulation.end(); } } else @@ -607,6 +607,7 @@ void DiveHandler::GALearner::evolutionStep() } #ifdef DIVEHANDLER_TRAINING + SPQR_INFO("Population size: " << population.size()); SPQR_INFO("Number of mutations: " << n_mutation); SPQR_INFO("Number of crossover: " << n_crossover); n_mutation = 0; n_crossover = 0; @@ -721,7 +722,11 @@ bool DiveHandler::GALearner::updateCoeffs() */ DiveHandler::DiveHandler(): diveType(DiveHandle::none), state(static_cast(SPQR::GOALIE_LEARNING_STATE)), - learner(new GALearner(this, 1, 1.0)), +#ifdef PG_LEARNER + learner(new PGLearner(this, 1, 1.0)), +#else + learner(new GALearner(this, 1, 1.0)), +#endif opponentScore(0), tBall2Goal(-1), tDive(0.0), tBackInPose(0.0), tBAGO(0), tBAGOestimate(0), ballProjectionIntercept(SPQR::FIELD_DIMENSION_Y), distanceBall2Goal(SPQR::FIELD_DIMENSION_X) { @@ -1005,7 +1010,8 @@ void DiveHandler::update(DiveHandle& diveHandle) velocityMean += (*it) /sampledVelocities.size(); } - tBAGOestimate = 1000*(dBAGOestimate / (.75f*velocityMax)); + if(velocityMax != .0f) + tBAGOestimate = 1000*(dBAGOestimate / (.75f*velocityMax)); // SPQR_INFO("distance: " << dBAGOestimate); // SPQR_INFO("velocity: " << (.75f*velocityMax)/1000); } @@ -1020,13 +1026,15 @@ void DiveHandler::update(DiveHandle& diveHandle) #endif // The module is in the learning state and a reward has been received - if( state == learning ) + if( clock() % 240 && state == learning ) { // Perform a single iteration of the learning algorithm if( learner->updateCoeffs() ) { +#ifdef PG_LEARNER // Change the state in 'waiting for reward' state = waitReward; +#endif // Flag a pending reward to the goalie behavior diveHandle.rewardAck = false; } @@ -1036,7 +1044,9 @@ void DiveHandler::update(DiveHandle& diveHandle) } // The module is in the learning state, waiting for the next reward - else if( state == waitReward ) +#ifdef PG_LEARNER + else if( state == waitReward ) +#endif { // The opponent team scores: the goalie failed and gets a negative reward if(goalDetected && estimatedTime) @@ -1090,7 +1100,11 @@ void DiveHandler::update(DiveHandle& diveHandle) } // Use the reward to adjust the algorithm parameters +#ifdef PG_LEARNER if( state == learning ) +#else + if( state == learning && diveHandle.rewardAck ) +#endif learner->updateParams(rewardHistory); // Compute the dive time using the current coefficients as T = alpha2 * (alpha1*T_PAPO - T_dive) diff --git a/machineLearning/DiveHandler/DiveHandler.h b/machineLearning/DiveHandler/DiveHandler.h index 0136149..720161b 100644 --- a/machineLearning/DiveHandler/DiveHandler.h +++ b/machineLearning/DiveHandler/DiveHandler.h @@ -57,12 +57,16 @@ END_MODULE // Termination conditions #define MAX_ITER 15 #define CONVERGENCE_THRESHOLD 0.01 +// PG algorithm define, if commented the module performs a GA algorithm instead +#define PG_LEARNER + // PG parameters #define GAMMA 0.5 #define BUFFER_DIM 10 #define REWARDS_HISTORY_SIZE 10 #define EPSILON 0.05 #define T 15 + // GA parameters #define POPULATION_SIZE 100 #define INDIVIDUAL_SIZE 11