diff --git a/machineLearning/ConfigurationParameters.h b/machineLearning/ConfigurationParameters.h deleted file mode 100644 index 9c8bb8a..0000000 --- a/machineLearning/ConfigurationParameters.h +++ /dev/null @@ -1,112 +0,0 @@ -#pragma once - -#include - -namespace SPQR -{ - /************ GAME CONTROLLER ************/ - static const std::string IP_GOALIE = "10.0.19.14"; - static const int CHEST_BUTTON_MANUAL_GAME_CONTROLLER_PORT = 18003; - static const int FIELD_DIMENSION_X = 3000; - static const int FIELD_DIMENSION_Y = 2000; - - static const unsigned int POLICY = 0; ///{STABLE ="0", S_POSITIONIG_X ="1", S_POSITIONIG_XY ="2", WALL ="3", TANK ="4", STATIC_POSITIONG="5"}; - static const unsigned int STRATEGY = 0; ///{DRIBBLING ="0", PASSING ="1"}; - - static const float TURN_VALID_THS = 10; /// degree - static const float TURN_EXCESS = 10; - - static const int COORDINATION_PORT_NUMBER = 11937; - static const int MAXIMUM_DISTANCE_BALL_VIEWED = 6000; - static const int MAXIMUM_DISTANCE_ON_THE_FIELD = 11000; - static const unsigned int TABLE_ROWS = 5; /// TABLE_ROWS also equals to the number of roles. - static const unsigned int ACTIVE_ROLES = 5; /// Active roles (including the goalie) => max 5 (goalie, defender, supporter, jolly, striker) - static const unsigned int TABLE_COLUMNS = ACTIVE_ROLES+4; - static const unsigned int DEAD_ROBOT_TIME_THRESHOLD = 5000; - static const unsigned int HYSTERESIS_PERIOD_IN_CYCLES = 100; - static const unsigned int COORDINATION_INFORMATION_NETWORK_FREQUENCY = 10; /// FREQUENCY! - static const unsigned int FALL_DOWN_PENALTY = 200; - static const unsigned int TIME_TO_GET_UP = 10000; - static const unsigned int MOVING_BALL_MIN_VELOCITY = 10; /// [mm/s] - static const unsigned int SUPPORTER_MIN_TIME_WHEN_LAST_SEEN = 500; /// [ms] - static const unsigned int DEFENDER_MIN_TIME_WHEN_LAST_SEEN = 500; /// [ms] - static const unsigned int JOLLY_MIN_TIME_WHEN_LAST_SEEN = 500; /// [ms] - static const int MINIMUM_PASSING_DISTANCE = 1000; /// [mm] - static const int HYSTERESIS_BOUND_DISTANCE = 300; /// [mm] - - static const float DEFENDER_KICKOFF_DEFAULT_POSITION_X = -0.55 * FIELD_DIMENSION_X; - static const float DEFENDER_KICKOFF_DEFAULT_POSITION_Y = 0.13 * FIELD_DIMENSION_Y; - static const float DEFENDER_NO_KICKOFF_DEFAULT_POSITION_X = -0.55 * FIELD_DIMENSION_X; - static const float DEFENDER_NO_KICKOFF_DEFAULT_POSITION_Y = 0.13 * FIELD_DIMENSION_Y; - - static const float SUPPORTER_KICKOFF_DEFAULT_POSITION_X = -0.27 * FIELD_DIMENSION_X; - static const float SUPPORTER_KICKOFF_DEFAULT_POSITION_Y = 0.33 * FIELD_DIMENSION_Y; - static const float SUPPORTER_NO_KICKOFF_DEFAULT_POSITION_X = -0.27 * FIELD_DIMENSION_X; - static const float SUPPORTER_NO_KICKOFF_DEFAULT_POSITION_Y = 0.33 * FIELD_DIMENSION_Y; - - static const float JOLLY_KICKOFF_DEFAULT_POSITION_X = -0.27 * FIELD_DIMENSION_X; - static const float JOLLY_KICKOFF_DEFAULT_POSITION_Y = -0.33 * FIELD_DIMENSION_Y; - static const float JOLLY_NO_KICKOFF_DEFAULT_POSITION_X = -0.27 * FIELD_DIMENSION_X; - static const float JOLLY_NO_KICKOFF_DEFAULT_POSITION_Y = -0.33 * FIELD_DIMENSION_Y; - - static const float STRIKER_KICKOFF_POSITION_X = -220.0; - static const float STRIKER_KICKOFF_POSITION_Y = 0.0; - static const float STRIKER_NO_KICKOFF_POSITION_X = -1200.0; - static const float STRIKER_NO_KICKOFF_POSITION_Y = 0.0; - - static const float SPEED_X = 0.6; - static const float SPEED_Y = 0.6; - static const float HEAD_ROTATION = 8.0; - static const float TIME_BEFORE_STARTING_TO_COORD_SEARCH = 7000.0; - - /************ WALL ************/ - static const float DEFENDER_KICKOFF_WALL_POSITION_X = -0.75 * FIELD_DIMENSION_X; - static const float DEFENDER_KICKOFF_WALL_POSITION_Y = 0.16 * FIELD_DIMENSION_Y; - static const float SUPPORTER_KICKOFF_WALL_POSITION_X = -0.75 * FIELD_DIMENSION_X; - static const float SUPPORTER_KICKOFF_WALL_POSITION_Y = 0.45 * FIELD_DIMENSION_Y; - static const float JOLLY_KICKOFF_WALL_POSITION_X = -0.75 * FIELD_DIMENSION_X; - static const float JOLLY_KICKOFF_WALL_POSITION_Y = -0.30 * FIELD_DIMENSION_Y; - - /************ NO BALL ************/ - static const float DEFENDER_KICKOFF_NO_BALL_POSITION_X = -0.75 * FIELD_DIMENSION_X; - static const float DEFENDER_KICKOFF_NO_BALL_POSITION_Y = 0.75 * FIELD_DIMENSION_Y; - static const float SUPPORTER_KICKOFF_NO_BALL_POSITION_X = -0.75 * FIELD_DIMENSION_X; - static const float SUPPORTER_KICKOFF_NO_BALL_POSITION_Y = -0.75 * FIELD_DIMENSION_Y; - static const float JOLLY_KICKOFF_NO_BALL_POSITION_X = 0.75 * FIELD_DIMENSION_X; - static const float JOLLY_KICKOFF_NO_BALL_POSITION_Y = -0.50 * FIELD_DIMENSION_Y; - - /************ GOALIE ************/ - static const float GOALIE_BASE_POSITION_X = -FIELD_DIMENSION_X + 250; /// [mm] //TODO take this from theFieldDimensions - static const float GOALIE_BASE_POSITION_Y = 0; /// [mm] - static const float GOALIE_BASE_POSITION_BEARING = 0; /// [mm] - - static const int GOALIE_LEARNING_STATE = 3; /// 1 = learning disabled, 3 = learning enabled - - static const float GOALIE_DIVE_TIME = 3000; - static const float GOALIE_DIVE_RECOVER_TIME = 3000; - static const float GOALIE_DIVE_REPOSITION_TIME = 3000; - -static const float GOALIE_CLOSE_DIVE_TIME = 1500; -static const float GOALIE_CLOSE_DIVE_RECOVER_TIME = 1500; - -static const float GOALIE_STOP_BALL_TIME = 2000; -static const float GOALIE_STOP_BALL_RECOVER_TIME = 2000; - -static const float GOALIE_POSE_X_TOLLERANCE = 150; /// [mm] -static const float GOALIE_POSE_Y_TOLLERANCE = 150; /// [mm] -static const float GOALIE_POSE_ANGLE_TOLLERANCE = 10; /// [deg] -static const float GOALIE_POSE_X_TOLLERANCE_AFTER_DIVE = 150; /// [mm] -static const float GOALIE_POSE_Y_TOLLERANCE_AFTER_DIVE = 150; /// [mm] - -static const float GOALIE_DIVE_TIME_TOLERANCE = 100; /// [ms] - -static const float GOALIE_MOVING_BALL_MIN_VELOCITY = 10; /// [mm/s] -static const float GOALIE_EPSILON_COLLINEAR = 0.001; /// [??] -static const float GOALIE_FAR_LIMIT_Y = 800; /// a little more than goal post //TODO take this from FieldDimensions -static const float GOALIE_CLOSE_LIMIT_Y = 200; /// dont-dive distance //TODO take this from FieldDimensions -static const unsigned int GOALIE_MIN_TIME_WHEN_LAST_SEEN = 500; /// [ms] -static const float GOALIE_MIN_BALL_DIST_FROM_POST = 500; - -static const float GOALIE_MAX_DIST_BALL_IN_RANGE_ABS = 500; /// [mm] -} - diff --git a/machineLearning/DiveHandle.h b/machineLearning/DiveHandle.h index 1d75bf9..876473e 100644 --- a/machineLearning/DiveHandle.h +++ b/machineLearning/DiveHandle.h @@ -1,6 +1,7 @@ #pragma once #include "Tools/Math/Vector2.h" +#include "Tools/Enum.h" class DiveHandle : public Streamable { @@ -20,18 +21,14 @@ class DiveHandle : public Streamable } public: - enum Dive - { - none = 1, - lDive, - rDive, - lcloseDive, - rcloseDive, - stopBall - }; - - typedef int Dive; - + ENUM(Dive, + none = 1, + lDive, + rDive, + lcloseDive, + rcloseDive, + stopBall); + float diveTime; float ballProjectionEstimate; Dive diveType; diff --git a/machineLearning/DiveHandler/DiveHandler.cpp b/machineLearning/DiveHandler/DiveHandler.cpp index a06bed5..61e67ac 100644 --- a/machineLearning/DiveHandler/DiveHandler.cpp +++ b/machineLearning/DiveHandler/DiveHandler.cpp @@ -16,21 +16,20 @@ #include #include -#include +#include +#include "Tools/Enum.h" #include "DiveHandler.h" // Uncomment to have debug information //#define DIVEHANDLER_DEBUG -#define DIVEHANDLER_TRAINING_DEBUG -#define DIVEHANDLER_TRAINING +//#define DIVEHANDLER_TRAINING_DEBUG +//#define DIVEHANDLER_TRAINING //#define RAND_PERMUTATIONS #define NEGATIVE_REWARD -1.0 #define POSITIVE_REWARD 1.5 -#define REWARD_WORST 999999.9 - // Debug messages template #define SPQR_ERR(x) std::cerr << "\033[22;31;1m" <<"[DiveHandler] " << x << "\033[0m"<< std::endl; #define SPQR_INFO(x) std::cerr << "\033[22;34;1m" <<"[DiveHandler] " << x << "\033[0m" << std::endl; @@ -42,13 +41,16 @@ else if(x == 2) std::cerr << "\033[22;34;1m"<<"Learner state: paused (waiting for reward). "<<"\033[0m" << std::endl; \ else if(x == 3) std::cerr << "\033[22;34;1m"<<"Learner state: enabled. "<<"\033[0m" << std::endl; \ +bool stamp =false; +bool tooEarly=false; +bool estimatedTime=false; MAKE_MODULE(DiveHandler, SPQR-Modules) // Shortcut to compute the magnitude of a vector -double magnitude(std::vector v) +float magnitude(std::vector v) { - double m = 0.0; + float m = 0.0; for (unsigned int i = 0; i < v.size(); ++i) m += v.at(i) * v.at(i); @@ -62,12 +64,12 @@ double magnitude(std::vector v) /* * Simple setters for the learner's parameters and coefficients. */ -void DiveHandler::CoeffsLearner::setCoeffs(const std::vector& _coeffs) +void DiveHandler::CoeffsLearner::setCoeffs(const std::vector& _coeffs) { coeffs = _coeffs; } -void DiveHandler::CoeffsLearner::setParam(const std::string& _key, double _value) +void DiveHandler::CoeffsLearner::setParam(const std::string& _key, float _value) { params[_key] = _value; } @@ -85,16 +87,16 @@ void DiveHandler::CoeffsLearner::setParam(const std::string& _key, double _value * - An initial value for the learning coefficients (or an upper bound for the random initialization of those); * - A flag indicating whether a fixed or random initialization has to be performed. */ -DiveHandler::PGLearner::PGLearner( DiveHandler* _dhPtr, int _nCoeffs, double _epsilon, int _T, double _initValue, bool randomize ): +DiveHandler::PGLearner::PGLearner( DiveHandler* _dhPtr, int _nCoeffs, float _epsilon, int _T, float _initValue, bool randomize ): // Initialize the base class CoeffsLearner(_nCoeffs, _initValue, _dhPtr), // Initialize the gradient estimate - coeffsGradient(_nCoeffs, 0.0), coeffsBest(_nCoeffs, 0.0) + coeffsGradient(_nCoeffs, 0.0) { // Initializing reward scores reward_score = 0.0; reward_norm = 1.0; - rewardBest = REWARD_WORST; + coeffsBest = coeffs; // Initializing coefficients if(randomize) @@ -102,7 +104,7 @@ DiveHandler::PGLearner::PGLearner( DiveHandler* _dhPtr, int _nCoeffs, double _ep // Random initialization in [0, INIT_VALUE] srand(time(NULL)); for( int i=0; i<_nCoeffs; ++i) - coeffs.at(i) = (static_cast(rand()%101)/100 ) *_initValue; + coeffs.at(i) = (static_cast(rand()%101)/100 ) *_initValue; } // Initializing parameters @@ -126,7 +128,7 @@ bool DiveHandler::PGLearner::converged() { // Compute variations mean // Delta previous to current step - double avg_variation = (magnitude(coeffs) - magnitude(coeffsBuffer.front()))/coeffsBuffer.size() ; + float avg_variation = (magnitude(coeffs) - magnitude(coeffsBuffer.front()))/coeffsBuffer.size() ; // Iterate over the whole buffer and compute deltas from step i-1 to i PGbuffer::const_iterator i = coeffsBuffer.begin(); PGbuffer::const_iterator j = coeffsBuffer.begin(); ++j; @@ -138,7 +140,7 @@ bool DiveHandler::PGLearner::converged() // Compute variations standard deviation // Delta previous to current step - double std_variation = pow(magnitude(coeffs)-magnitude(coeffsBuffer.front()) - avg_variation, 2) / coeffsBuffer.size(); + float std_variation = pow(magnitude(coeffs)-magnitude(coeffsBuffer.front()) - avg_variation, 2) / coeffsBuffer.size(); // Iterate over the whole buffer and compute deltas from step i-1 to i PGbuffer::const_iterator k = coeffsBuffer.begin(); PGbuffer::const_iterator t = coeffsBuffer.begin(); ++t; @@ -177,7 +179,7 @@ void DiveHandler::PGLearner::generatePerturbations() for(int i=0; i perturbation(coeffs); + std::vector perturbation(coeffs); for(unsigned int j=0; j perturbation (coeffs.size(),0.0); + std::vector perturbation (coeffs.size(),0.0); // Generate all possible combinations recursively generatePerturbations(&perturbation, 0); @@ -210,7 +212,7 @@ void DiveHandler::PGLearner::generatePerturbations() } /* TOTEST&COMMENT */ -void DiveHandler::PGLearner::generatePerturbations(std::vector* partial_perturbation, unsigned int index) +void DiveHandler::PGLearner::generatePerturbations(std::vector* partial_perturbation, unsigned int index) { if (index == partial_perturbation->size()-1) { @@ -218,7 +220,7 @@ void DiveHandler::PGLearner::generatePerturbations(std::vector* partial_ for (int perturbation_type = -1; perturbation_type <= 1; ++perturbation_type) { // Compute last index and generate the final perturbation - std::vector perturbation (*partial_perturbation); + std::vector perturbation (*partial_perturbation); perturbation.at(index) = coeffs.at(index) + perturbation_type * params["epsilon"]; // Update the perturbations buffer @@ -239,29 +241,22 @@ void DiveHandler::PGLearner::generatePerturbations(std::vector* partial_ } /* TOCOMMENT */ -double DiveHandler::PGLearner::evaluatePerturbation( std::vector R ) +float DiveHandler::PGLearner::evaluatePerturbation( std::vector R ) { // Dimensions check assert(R.size() == coeffs.size()); - - if (R.at(0) == 0.0 || R.at(1) == 0.0) - return REWARD_WORST; - // Generate perturbated policy and call the DiveHandler object for evaluation - double tDiveAndRecover = diveHandler_ptr->computeDiveAndRecoverTime(R.at(0), R.at(1)); +// float tDiveAndRecover = diveHandler_ptr->computeDiveAndRecoverTime(coeffs.at(0) + R.at(0), coeffs.at(1) + R.at(1)); - // Attractor - std::vector distanceToBest(2); - distanceToBest.at(0) = coeffsBest.at(0) - R.at(0); - distanceToBest.at(1) = coeffsBest.at(1) - R.at(1); - -#ifdef DIVEHANDLER_TRAINING_DEBUG - SPQR_INFO("Perturbated policy: [" << R.at(0) << ", " << R.at(1) - << "], Score: " << ((1.0-LAMBDA1)*fabs(diveHandler_ptr->tBall2Goal-tDiveAndRecover)+LAMBDA1*magnitude(distanceToBest))); -#endif + // Perturbated coefficients + std::vector new_coeffs(2); + new_coeffs.at(0) = coeffs.at(0) + R.at(0); + new_coeffs.at(1) = coeffs.at(1) + R.at(1); - return (1.0-LAMBDA1)*fabs(diveHandler_ptr->tBall2Goal - tDiveAndRecover) + - LAMBDA1*magnitude(distanceToBest); + return (diveHandler_ptr->estimatedInterval - ( R.at(0)*diveHandler_ptr->tBall2Goal))* + (diveHandler_ptr->estimatedInterval - ( R.at(0)*diveHandler_ptr->tBall2Goal)) ; +// return (1.0-LAMBDA1)*fabs(diveHandler_ptr->tBall2Goal - tDiveAndRecover) + +// LAMBDA1*fabs(magnitude(coeffs) - magnitude(coeffsBest)); // return (1.0-LAMBDA1-LAMBDA2)*fabs(tDiveAndRecover) + // LAMBDA1*fabs(diveHandler_ptr->tBall2Goal - tDiveAndRecover) + @@ -271,7 +266,7 @@ double DiveHandler::PGLearner::evaluatePerturbation( std::vector R ) /* TOTEST&COMMENT */ -void DiveHandler::PGLearner::updateParams(const std::list& rewards) +void DiveHandler::PGLearner::updateParams(const std::list& rewards) { // Re-initialize reward scores reward_score = 0.0; @@ -279,7 +274,7 @@ void DiveHandler::PGLearner::updateParams(const std::list& rewards) int discount_exp = 0; int positives = 0; - std::list::const_iterator i = rewards.begin(); + std::list::const_iterator i = rewards.begin(); while (i != rewards.end()) { // Counting positives @@ -292,22 +287,19 @@ void DiveHandler::PGLearner::updateParams(const std::list& rewards) ++i; ++discount_exp; } - //Adjusting PG parameters according to the obtained score - setParam("epsilon", exp( -reward_score / REWARDS_HISTORY_SIZE ) * getParam("epsilon")); - - // Update best performance - if (rewardGradient < rewardBest) - { - rewardBest = rewardGradient; - coeffsBest = coeffs; - } #ifdef DIVEHANDLER_TRAINING_DEBUG SPQR_INFO("Positive rewards: " << positives << " out of " << rewards.size()); SPQR_INFO("Negative rewards: " << (rewards.size() - positives) << " out of " << rewards.size()); SPQR_INFO("Reward total score: " << reward_score); - SPQR_INFO("Best evaluation so far: [ " << coeffsBest.at(0) << ", " << coeffsBest.at(1) << " ] with score: " << rewardBest); #endif + //Adjusting PG parameters according to the obtained score + setParam("epsilon", exp( -reward_score / REWARDS_HISTORY_SIZE ) * getParam("epsilon")); + + // Update best performance + if (rewards.front() == POSITIVE_REWARD) + coeffsBest = coeffs; + #ifdef DIVEHANDLER_TRAINING SPQR_INFO( "Epsilon value changed to: " << getParam("epsilon") << " according to the obtained rewards. "); #endif @@ -321,31 +313,33 @@ void DiveHandler::PGLearner::updateParams(const std::list& rewards) /* TOTEST&COMMENT */ bool DiveHandler::PGLearner::updateCoeffs() { + +#ifdef DIVEHANDLER_TRAINING + SPQR_INFO( "PG algorithm, iteration " << iter_count << "... " ); +#endif + if( iter_count == MAX_ITER || converged() ) return false; else { -#ifdef DIVEHANDLER_TRAINING - SPQR_INFO( "PG algorithm, iteration " << iter_count << "... " ); -#endif // First generate the set of random perturbation for the current coefficients generatePerturbations(); // For each perturbation, evaluate with the objective function and store the result in a temporary container - std::vector evaluatedPerturbations (perturbationsBuffer.size()); + std::vector evaluatedPerturbations (perturbationsBuffer.size()); PGbuffer::const_iterator evaluator; for(evaluator = perturbationsBuffer.begin(); evaluator != perturbationsBuffer.end(); ++evaluator) evaluatedPerturbations.push_back( evaluatePerturbation(*evaluator) ); // Compute the average 'gradient' for the current coefficients - std::vector coeffs_avgGradient(coeffs.size()); + std::vector coeffs_avgGradient(coeffs.size()); #ifdef RAND_PERMUTATIONS // For each coefficient, compute the average score to determine the correspondent 'gradient' entry PGbuffer::const_iterator current_perturbation = perturbationsBuffer.begin(); for( unsigned int n = 0; n < coeffs.size(); ++n ) { - std::vector score_plus, score_minus, score_zero; + std::vector score_plus, score_minus, score_zero; // Keep track of the perturbation type and store each score in a container for( unsigned int i = 0; i < evaluatedPerturbations.size(); ++i ) @@ -361,17 +355,17 @@ bool DiveHandler::PGLearner::updateCoeffs() } // Sum up all positive perturbation scores - double avg_plus = 0.0; + float avg_plus = 0.0; for (unsigned int j = 0; j < score_plus.size(); ++j) avg_plus += score_plus.at(j) / score_plus.size(); // Sum up all negative perturbation scores - double avg_minus = 0.0; + float avg_minus = 0.0; for (unsigned int j = 0; j < score_minus.size(); ++j) avg_minus += score_minus.at(j) / score_minus.size(); // Sum up all null perturbation scores - double avg_zero = 0.0; + float avg_zero = 0.0; for (unsigned int j = 0; j < score_zero.size(); ++j) avg_zero += score_zero.at(j) / score_zero.size(); @@ -385,12 +379,12 @@ bool DiveHandler::PGLearner::updateCoeffs() for( unsigned int n = 0; n < coeffs.size(); ++n ) { int avg_selector = 0; - double avg_minus = 0.0 , avg_zero = 0.0, avg_plus = 0.0; + float avg_minus = 0.0 , avg_zero = 0.0, avg_plus = 0.0; for( unsigned int i = 0; i < evaluatedPerturbations.size(); i = i + pow(3,n) ) { for( unsigned int k = i; k < i + pow(3,n); ++k ) { - double evaluation = evaluatedPerturbations.at(k) / (evaluatedPerturbations.size()/3); + float evaluation = evaluatedPerturbations.at(k) / (evaluatedPerturbations.size()/3); if( (avg_selector)%3 == 0 ) avg_minus += evaluation; if( (avg_selector)%3 == 1 ) avg_zero += evaluation; @@ -405,11 +399,8 @@ bool DiveHandler::PGLearner::updateCoeffs() coeffs_avgGradient.at(coeffs.size() - (n +1)) = avg_plus - avg_minus; } #endif - // Evaluate the gradient - rewardGradient = evaluatePerturbation(coeffs_avgGradient); - // Avoid 'nan' when the gradient is zeroed - double normalization = 1.0; + float normalization = 1.0; if (magnitude(coeffs_avgGradient) != 0) normalization = magnitude(coeffs_avgGradient); @@ -417,10 +408,9 @@ bool DiveHandler::PGLearner::updateCoeffs() #ifdef DIVEHANDLER_TRAINING SPQR_INFO("Computed policy gradient: [ " << coeffs_avgGradient.at(0)/normalization << ", " << coeffs_avgGradient.at(1)/normalization << " ]"); - SPQR_INFO("Gradient score (before normalization): " << rewardGradient); #endif // Weight new gradient estimate and previous one according to the reward score - std::vector newGradient (coeffsGradient.size()); + std::vector newGradient (coeffsGradient.size()); for( unsigned int j=0; j(SPQR::GOALIE_LEARNING_STATE)), - learner(new PGLearner(this, 2, EPSILON, T)), opponentScore(0), tBall2Goal(SPQR::FIELD_DIMENSION_Y), - tDive(0.0), tBackInPose(0.0), ballProjectionIntercept(SPQR::FIELD_DIMENSION_Y), distanceBall2Goal(SPQR::FIELD_DIMENSION_X) + diveType(DiveHandle::none), state(static_cast(SPQR::GOALIE_LEARNING_STATE)), + learner(new PGLearner(this, 2, EPSILON, T, 1.0, false)), opponentScore(0), tBall2Goal(SPQR::FIELD_DIMENSION_Y), + tDive(0.0), tBackInPose(0.0), estimatedInterval(0), + ballProjectionIntercept(SPQR::FIELD_DIMENSION_Y), distanceBall2Goal(SPQR::FIELD_DIMENSION_X) { #ifdef DIVEHANDLER_TRAINING SPQR_INFO("Initializing PGlearner..."); - std::vector coeffs = learner->getCoeffs(); + std::vector coeffs = learner->getCoeffs(); SPQR_INFO("Coefficients: alpha 1 = " << coeffs.at(0) << ", alpha 2 = " << coeffs.at(1)); SPQR_INFO("Parameters: epsilon = " << learner->getParam("epsilon") << ", T = " << learner->getParam("T")); #endif @@ -481,7 +472,7 @@ DiveHandler::DiveHandler(): */ DiveHandler::~DiveHandler() { - if (learner) delete learner; + if(learner) delete learner; } /* @@ -493,18 +484,18 @@ DiveHandler::~DiveHandler() void DiveHandler::estimateBallProjection() { // Ball path line - double A1 = (theBallModel.estimate.position.y - theBallModel.estimate.velocity.y) - theBallModel.estimate.position.y; - double B1 = theBallModel.estimate.position.x - (theBallModel.estimate.position.x - theBallModel.estimate.velocity.x); - double C1 = A1*theBallModel.estimate.position.x + B1*theBallModel.estimate.position.y; + float A1 = (theBallModel.estimate.position.y - theBallModel.estimate.velocity.y) - theBallModel.estimate.position.y; + float B1 = theBallModel.estimate.position.x - (theBallModel.estimate.position.x - theBallModel.estimate.velocity.x); + float C1 = A1*theBallModel.estimate.position.x + B1*theBallModel.estimate.position.y; // Goal line - double A2 = SPQR::GOALIE_FAR_LIMIT_Y - -SPQR::GOALIE_FAR_LIMIT_Y; + float A2 = SPQR::GOALIE_FAR_LIMIT_Y - -SPQR::GOALIE_FAR_LIMIT_Y; // Cross product/determinant - double det = - A2*B1; + float det = - A2*B1; // Y-intercept initialized with the maximum value possible - double yIntercept = SPQR::FIELD_DIMENSION_Y; + float yIntercept = SPQR::FIELD_DIMENSION_Y; // Non-singular case if( fabs(det) > SPQR::GOALIE_EPSILON_COLLINEAR ) @@ -513,31 +504,33 @@ void DiveHandler::estimateBallProjection() yIntercept = (- A2*C1) / det; // Devising the type of dive to be performed - if( yIntercept > ( SPQR::GOALIE_CLOSE_LIMIT_Y/2) && yIntercept < SPQR::GOALIE_FAR_LIMIT_Y ) - // Close intercept on the left - diveType = lcloseDive; - else if( yIntercept > SPQR::GOALIE_FAR_LIMIT_Y ) - // Far intercept on the left - diveType = lDive; - else if( yIntercept < (-SPQR::GOALIE_CLOSE_LIMIT_Y/2) && yIntercept > -SPQR::GOALIE_FAR_LIMIT_Y ) - // Close intercept on the right - diveType = rcloseDive; - else if( yIntercept < -SPQR::GOALIE_FAR_LIMIT_Y ) - // Far intercept on the right - diveType = rDive; - else if( fabs(yIntercept) < SPQR::GOALIE_CLOSE_LIMIT_Y/2) - diveType = stopBall; - else - // Any other case: no dive at all - diveType = none; - } + + if( yIntercept > ( SPQR::GOALIE_CLOSE_LIMIT_Y/2) && yIntercept < SPQR::GOALIE_FAR_LIMIT_Y ) + // Close intercept on the left + diveType = DiveHandle::lcloseDive; + else if( yIntercept > SPQR::GOALIE_FAR_LIMIT_Y ) + // Far intercept on the left + diveType = DiveHandle::lDive; + else if( yIntercept < (-SPQR::GOALIE_CLOSE_LIMIT_Y/2) && yIntercept > -SPQR::GOALIE_FAR_LIMIT_Y ) + // Close intercept on the right + diveType = DiveHandle::rcloseDive; + else if( yIntercept < -SPQR::GOALIE_FAR_LIMIT_Y ) + // Far intercept on the right + diveType = DiveHandle::rDive; + + else if( fabs(yIntercept) < SPQR::GOALIE_CLOSE_LIMIT_Y/2) + diveType = DiveHandle::stopBall; + else + // Any other case: no dive at all + diveType = DiveHandle::none; + } // Using the appropriate estimate for the dive time - if (diveType == lDive || diveType == rDive ) + if (diveType == DiveHandle::lDive || diveType == DiveHandle::rDive ) tDive = SPQR::GOALIE_DIVE_TIME; - else if (diveType == lcloseDive || diveType == rcloseDive ) + else if (diveType == DiveHandle::lcloseDive || diveType == DiveHandle::rcloseDive ) tDive = SPQR::GOALIE_CLOSE_DIVE_TIME; - else if (diveType == stopBall ) + else if (diveType == DiveHandle::stopBall ) tDive = SPQR::GOALIE_STOP_BALL_TIME; else tDive = 0.0; @@ -546,10 +539,11 @@ void DiveHandler::estimateBallProjection() ballProjectionIntercept = yIntercept; // Computing the distance vector from the ball to the goal - double delta_x = -SPQR::FIELD_DIMENSION_X - theGlobalBallEstimation.singleRobotX; - double delta_y = ballProjectionIntercept - theGlobalBallEstimation.singleRobotY; +// float delta_x = -SPQR::FIELD_DIMENSION_X - theGlobalBallEstimation.singleRobotX; +// float delta_y = ballProjectionIntercept - theGlobalBallEstimation.singleRobotY; // Estimated distance from the ball - distanceBall2Goal = sqrt( delta_x*delta_x + delta_y*delta_y); +// distanceBall2Goal = sqrt( delta_x*delta_x + delta_y*delta_y); + distanceBall2Goal = theBallModel.estimate.position.x; } /* @@ -569,18 +563,18 @@ void DiveHandler::estimateDiveTimes() tBall2Goal = -1.0; // Using the appropriate estimates for recover and reposition times - double tRecover = 0.0; - double tReposition = 0.0; - if( diveType == rcloseDive || diveType == lcloseDive ) + float tRecover = 0.0; + float tReposition = 0.0; + if( diveType == DiveHandle::rcloseDive || diveType == DiveHandle::lcloseDive ) // Close dive: no need to back up to the original position tRecover = SPQR::GOALIE_CLOSE_DIVE_RECOVER_TIME; - else if( diveType == rDive || diveType == lDive ) + else if( diveType == DiveHandle::rDive || diveType == DiveHandle::lDive ) { // Long dive: the robot has to stand up and reposition tRecover = SPQR::GOALIE_DIVE_RECOVER_TIME; tReposition = SPQR::GOALIE_DIVE_REPOSITION_TIME; } - else if( diveType == stopBall ) + else if( diveType == DiveHandle::stopBall ) { // stop ball: the robot has to stand up and stop the ball tRecover = SPQR::GOALIE_STOP_BALL_RECOVER_TIME; @@ -591,7 +585,7 @@ void DiveHandler::estimateDiveTimes() } /* TOCOMMENT */ -inline double DiveHandler::computeDiveAndRecoverTime(double alpha1, double alpha2) +inline float DiveHandler::computeDiveAndRecoverTime(float alpha1, float alpha2) { return alpha2*( alpha1*tBall2Goal - tDive ); } @@ -611,21 +605,86 @@ inline double DiveHandler::computeDiveAndRecoverTime(double alpha1, double alpha */ void DiveHandler::update(DiveHandle& diveHandle) { -// theOpponentTeamInfo.score; // Check you're actually the goalie... if (theRobotInfo.number == 1) - { - // Compute the ball projection estimate + { + // Compute the ball projection estimate estimateBallProjection(); // Update the DiveHandle diveHandle.ballProjectionEstimate = ballProjectionIntercept; - // Check whether the ball is close enough - if( (distanceBall2Goal < SPQR::FIELD_DIMENSION_X) && (fabs(ballProjectionIntercept) < SPQR::FIELD_DIMENSION_Y) ) + if( ((int) (clock() - timer.fallen)/(CLOCKS_PER_SEC/1000)) > 10001 && + ((int) (clock() - timer.fallen)/(CLOCKS_PER_SEC/1000)) < 10050 && + (int) timer.fallen != 0) +// SPQR_SUCCESS("TooEarly time window START..."); + if( ((int) (clock() - timer.fallen)/(CLOCKS_PER_SEC/1000)) > 14971 && + ((int) (clock() - timer.fallen)/(CLOCKS_PER_SEC/1000)) < 14999 && + (int) timer.fallen != 0) +// SPQR_SUCCESS("TooEarly time window END."); + + if( ((int) (clock() - timer.fallen)/(CLOCKS_PER_SEC/1000)) > 10000 && + ((int) (clock() - timer.fallen)/(CLOCKS_PER_SEC/1000)) < 15000 && + (int) timer.fallen != 0) + { + if(opponentScore != (int)theOpponentTeamInfo.score) + tooEarly=true; + } + // Check whether the ball is close enough + if( (distanceBall2Goal < SPQR::FIELD_DIMENSION_X) && (fabs(ballProjectionIntercept) < SPQR::FIELD_DIMENSION_Y) ) { // Estimate all temporal parameters estimateDiveTimes(); + if(state != notLearning) + { + // if not in playing state + if(theGameInfo.state != STATE_PLAYING) + timer.reset(); + else + { + // if the ball is moving enough fast then set the timer + if( !timer.setTimer && (theBallModel.estimate.velocity.abs() > SPQR::MOVING_BALL_MIN_VELOCITY && + theFrameInfo.getTimeSince(theBallModel.timeWhenLastSeen) < 1000) ) + timer.set(clock()); + // else reset it... + if( timer.setTimer && (theBallModel.estimate.velocity.abs() < SPQR::MOVING_BALL_MIN_VELOCITY || + theFrameInfo.getTimeSince(theBallModel.timeWhenLastSeen) > 1000) ) + timer.reset(); + + // if the goalie dives + if( (int)theFallDownState.state == (int)FallDownState::fallen ) + { + timer.fallen=clock(); + estimatedInterval = (int) (clock() - timer.start)/(CLOCKS_PER_SEC/1000); + } + + if(opponentScore != (int)theOpponentTeamInfo.score && !estimatedTime) + { + if( tooEarly ) + { + SPQR_FAILURE("too FAST dude!"); + estimatedInterval += 2000; + tooEarly=false; + } + else + { + SPQR_FAILURE("too SLOW dude!"); + estimatedInterval += (int)(clock() - timer.fallen)/(CLOCKS_PER_SEC/1000) - 500; + } + estimatedTime=true; + + } + // if the goalie succeeded + else if(ownScore != (int)theOwnTeamInfo.score && !estimatedTime) + { + SPQR_SUCCESS("SUPER!"); + estimatedInterval -= 100; + estimatedTime=true; + } + + } + } + #ifdef DIVEHANDLER_DEBUG SPQR_INFO("Ball projection: " << ballProjectionIntercept); SPQR_INFO("PAPO time: " << tBall2Goal); @@ -635,7 +694,7 @@ void DiveHandler::update(DiveHandle& diveHandle) #endif // The module is in the learning state and a reward has been received - if( (state == learning) ) + if( state == learning ) { // Perform a single iteration of the learning algorithm if( learner->updateCoeffs() ) @@ -654,7 +713,7 @@ void DiveHandler::update(DiveHandle& diveHandle) else if( state == waitReward ) { // The opponent team scores: the goalie failed and gets a negative reward - if(opponentScore != (int)theOpponentTeamInfo.score) + if(opponentScore != (int)theOpponentTeamInfo.score && estimatedTime) { // The learner obtains a negative reward rewardHistory.push_front(NEGATIVE_REWARD); @@ -666,16 +725,19 @@ void DiveHandler::update(DiveHandle& diveHandle) opponentScore = (int)theOpponentTeamInfo.score; #ifdef DIVEHANDLER_TRAINING - SPQR_FAILURE("The opponent team scored! Negative reward for the learner. "); + SPQR_FAILURE("The opponent team scored! Negative reward for the learner."); #endif // A reward has been received: re-enable learning state = learning; - // Clear the pending reward + // Clear the pending rewardelse if(!diveHandle.rewardAck) diveHandle.rewardAck = true; + + estimatedTime=false; + stamp =true; } // The own team scores: user-guided move to provide the goalie a positive reward - else if(ownScore != (int)theOwnTeamInfo.score) + else if(ownScore != (int)theOwnTeamInfo.score && estimatedTime) { // The learner obtains a positive reward rewardHistory.push_front(POSITIVE_REWARD); @@ -694,6 +756,9 @@ void DiveHandler::update(DiveHandle& diveHandle) // Clear the pending reward if(!diveHandle.rewardAck) diveHandle.rewardAck = true; + + estimatedTime=false; + stamp=true; } } @@ -701,8 +766,19 @@ void DiveHandler::update(DiveHandle& diveHandle) if( state == learning ) learner->updateParams(rewardHistory); - // Compute the dive time using the current coefficients as T = alpha2 * (alpha1*T_PAPO - T_dive) - double diveTime = (learner->getCoeffs()).at(1) * ( (learner->getCoeffs()).at(0) * tBall2Goal - tDive ); + // Compute the dive time using the current coefficients as T = alpha2 * (alpha1*T_PAPO - T_dive) + float diveTime = ( (learner->getCoeffs()).at(0) * tBall2Goal ); + +#ifdef DIVEHANDLER_TRAINING + if(stamp) + { + SPQR_INFO("diveTime: " << diveTime ); + SPQR_INFO("estimated time interval: " << estimatedInterval ); + SPQR_ERR("TimeError: "<< (estimatedInterval - diveTime)*(estimatedInterval - diveTime)); + SPQR_INFO("/-----------------------------------------/\n"); + stamp = false; + } +#endif #ifdef DIVEHANDLER_DEBUG SPQR_INFO( "Estimated overall time to dive and recover position: " << @@ -712,16 +788,16 @@ void DiveHandler::update(DiveHandle& diveHandle) // Update the DiveHandle if (diveTime > 0.0) - diveHandle.diveTime = diveTime; + diveHandle.diveTime = diveTime -tDive; else diveHandle.diveTime = -1.0; #ifdef DIVEHANDLER_TRAINING - if (diveTime > 0.0) - { - if(diveHandle.diveTime < SPQR::GOALIE_DIVE_TIME_TOLERANCE) - SPQR_INFO("Dive now! "); - } +// if (diveTime > 0.0) +// { +// if(diveHandle.diveTime < SPQR::GOALIE_DIVE_TIME_TOLERANCE) +// SPQR_INFO("Dive now! "); +// } #endif } @@ -729,7 +805,8 @@ void DiveHandler::update(DiveHandle& diveHandle) else { diveHandle.diveTime = -1; - diveHandle.diveType = diveType; + diveHandle.diveType = diveType; + timer.reset(); } } } diff --git a/machineLearning/DiveHandler/DiveHandler.h b/machineLearning/DiveHandler/DiveHandler.h index fa104b7..d593b7a 100644 --- a/machineLearning/DiveHandler/DiveHandler.h +++ b/machineLearning/DiveHandler/DiveHandler.h @@ -22,27 +22,30 @@ #include #include #include +#include #include "Tools/Module/Module.h" #include "Representations/Modeling/BallModel.h" #include "Representations/Infrastructure/TeamInfo.h" #include "Representations/Infrastructure/FrameInfo.h" +#include "Representations/Infrastructure/GameInfo.h" #include "Representations/Infrastructure/RobotInfo.h" +#include "Representations/Sensing/FallDownState.h" #include "Representations/SPQR-Representations/ConfigurationParameters.h" #include "Representations/SPQR-Representations/RobotPoseSpqrFiltered.h" #include "Representations/SPQR-Representations/GlobalBallEstimation.h" #include "Representations/SPQR-Representations/DiveHandle.h" -#include "SPQR-Libraries/PTracking/src/Utils/AgentPacket.h" +#include "Utils/AgentPacket.h" // Module definition - - MODULE(DiveHandler) REQUIRES(OpponentTeamInfo) REQUIRES(OwnTeamInfo) REQUIRES(FrameInfo) - REQUIRES(RobotInfo) + REQUIRES(GameInfo) + REQUIRES(FallDownState) + REQUIRES(RobotInfo) REQUIRES(RobotPoseSpqrFiltered) REQUIRES(BallModel) REQUIRES(GlobalBallEstimation) @@ -51,60 +54,40 @@ END_MODULE // Termination conditions -#define MAX_ITER 300 +#define MAX_ITER 15 #define CONVERGENCE_THRESHOLD 0.01 // PG parameters #define GAMMA 0.5 #define BUFFER_DIM 10 -#define REWARDS_HISTORY_SIZE 15 -#define EPSILON 0.10 +#define REWARDS_HISTORY_SIZE 10 +#define EPSILON 0.05 #define T 15 // Evaluation weight -#define LAMBDA1 0.7 +#define LAMBDA1 0.9 //#define LAMBDA2 0.3 // Module class declaration - - class DiveHandler : public DiveHandlerBase { // Learning state - enum LearningState - { + ENUM( LearningState, // Learning disabled notLearning = 1, // Learning paused, expecting reward waitReward, // Learning active learning - }; - - // Dive type - enum Dive - { - // No dive at all - none = 1, - // Long dive on the left - lDive, - // Long dive on the right - rDive, - // Close dive on the left - lcloseDive, - // Close dive on the right - rcloseDive, - // Stop the ball without diving - stopBall - }; + ); // Inner base class modeling the learning agent class CoeffsLearner { protected: // Set of coefficients representing the learning objective - std::vector coeffs; + std::vector coeffs; // Set of fixed parameters defining the cost funcion - std::map params; + std::map params; // Iteration counter int iter_count; @@ -114,45 +97,43 @@ class DiveHandler : public DiveHandlerBase public: // Default constructor - CoeffsLearner(int _nCoeffs, double _initValue, DiveHandler* _dhPtr): + CoeffsLearner(int _nCoeffs, float _initValue, DiveHandler* _dhPtr): coeffs(_nCoeffs, _initValue), iter_count(0), diveHandler_ptr(_dhPtr) { } + virtual ~CoeffsLearner(){} + // Setter/getter for the coefficients - void setCoeffs(const std::vector& _coeffs); - inline std::vector getCoeffs(){ return coeffs; } + void setCoeffs(const std::vector& _coeffs); + inline std::vector getCoeffs(){ return coeffs; } // Setter/getter for the parameters - void setParam(const std::string& _key, double _value); - inline double getParam(std::string _key){ return params[_key]; } + void setParam(const std::string& _key, float _value); + inline float getParam(std::string _key){ return params[_key]; } // Update coefficients performing a step of the learning algorithm virtual bool updateCoeffs() = 0; // Use the obtained rewards to adjust the algorithm parameters - virtual void updateParams(const std::list& rewards) = 0; + virtual void updateParams(const std::list& rewards) = 0; }; // Inner class modeling a PolicyGradient-based learning agent class PGLearner : public CoeffsLearner { - typedef std::list< std::vector > PGbuffer; + typedef std::list< std::vector > PGbuffer; private: // Current estimate for the coefficients gradient - std::vector coeffsGradient; + std::vector coeffsGradient; // Best individual performance achieved so far - std::vector coeffsBest; + std::vector coeffsBest; // Current reward score - double reward_score; + float reward_score; // Current reward normalization factor - double reward_norm; - // Score of the current gradient estimate - double rewardGradient; - // Best gradient score so far - double rewardBest; + float reward_norm; // Memory buffer for the PG algorithm PGbuffer coeffsBuffer; @@ -163,22 +144,22 @@ class DiveHandler : public DiveHandlerBase bool converged(); // Recursive perturbation generator - void generatePerturbations(std::vector* partial_perturbation, unsigned int index); + void generatePerturbations(std::vector* partial_perturbation, unsigned int index); public: // Default constructor - PGLearner(DiveHandler* _dhPtr, int _nCoeffs, double _epsilon = EPSILON, - int _T = T, double _initValue = 1.0, bool randomize = false); + PGLearner(DiveHandler* _dhPtr, int _nCoeffs, float _epsilon = EPSILON, + int _T = T, float _initValue = 1.0, bool randomize = false); // Generate a set of perturbations for the current policy void generatePerturbations(); // Evaluate a single policy perturbation with the cost function - double evaluatePerturbation( std::vector R ); + float evaluatePerturbation( std::vector R ); // Update the PG parameters according to the obtained rewards - void updateParams(const std::list& rewards); + void updateParams(const std::list& rewards); // Update coefficients performing a step of the learning algorithm virtual bool updateCoeffs(); @@ -197,14 +178,14 @@ class DiveHandler : public DiveHandlerBase private: // Dive type currently selected - Dive diveType; + DiveHandle::Dive diveType; // Current learning state LearningState state; // Learning agent CoeffsLearner* learner; // Obtained rewards - std::list rewardHistory; + std::list rewardHistory; // Current scores int opponentScore; @@ -212,23 +193,54 @@ class DiveHandler : public DiveHandlerBase // Estimated time the ball needs to reach the goal // a.k.a. Tpapo (historical reasons) - double tBall2Goal; + float tBall2Goal; // Estimated time needed for the current dive action to be performed - double tDive; + float tDive; // Estimated time the goalie needs to back up to its original position - double tBackInPose; + float tBackInPose; + + // Timer + class Timer + { + public: + clock_t start; + clock_t fallen; + bool setTimer; + + Timer():start(0), fallen(0), setTimer(false){} + inline void set(clock_t startTime) + { + if(!setTimer) + { + start = startTime; + setTimer = true; +// std::cerr << "\033[33;1m" <<"[DiveHandler] " << "set Timer!" << "\033[0m" << std::endl; + } + } + inline void reset() + { + if(setTimer) + { + setTimer = false; +// std::cerr << "\033[33;1m" <<"[DiveHandler] " << "reset Timer!" << "\033[0m" << std::endl; + } + } + }; + + Timer timer; + unsigned int estimatedInterval; // Estimated intersection between the ball projection and the goal line - double ballProjectionIntercept; + float ballProjectionIntercept; // Estimated distance of the ball from the own goal - double distanceBall2Goal; + float distanceBall2Goal; // Computes parameters using the ball estimated position and velocity void estimateDiveTimes(); void estimateBallProjection(); // Compute the overall time the goalie needs to dive and then recover its position - inline double computeDiveAndRecoverTime(double alpha1, double alpha2); + inline float computeDiveAndRecoverTime(float alpha1, float alpha2); public: @@ -238,7 +250,7 @@ class DiveHandler : public DiveHandlerBase ~DiveHandler(); // Setter for the reward list - inline const std::list& getRewardList() const + inline const std::list& getRewardList() const { return rewardHistory; }