Skip to content

Commit

Permalink
TOFIX: Evaluation of hypotheses
Browse files Browse the repository at this point in the history
  • Loading branch information
claudio-db committed Jan 29, 2014
1 parent c01ff3c commit 5dac511
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 19 deletions.
49 changes: 32 additions & 17 deletions machineLearning/DiveHandler/DiveHandler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@
#define DIVEHANDLER_TRAINING
//#define RAND_PERMUTATIONS

#define NEGATIVE_REWARD -1.0
#define POSITIVE_REWARD 1.0
#define NEGATIVE_REWARD -0.5
#define POSITIVE_REWARD 1.5

// Debug messages template
#define SPQR_ERR(x) std::cerr << "\033[22;31;1m" <<"[DiveHandler] " << x << "\033[0m"<< std::endl;
Expand Down Expand Up @@ -68,9 +68,6 @@ void DiveHandler::CoeffsLearner::setCoeffs(const std::vector<float>& _coeffs)
void DiveHandler::CoeffsLearner::setParam(const std::string& _key, float _value)
{
params[_key] = _value;
// // "Smart" insertion procedure using iterators (C++ 11)
// std::map<std::string, float>::iterator iter = params.begin();
// params.insert( std::pair< std::string, float >(_key, _value) );
}


Expand All @@ -88,7 +85,9 @@ void DiveHandler::CoeffsLearner::setParam(const std::string& _key, float _value)
*/
DiveHandler::PGLearner::PGLearner( DiveHandler* _dhPtr, int _nCoeffs, float _epsilon, int _T, float _initValue, bool randomize ):
// Initialize the base class
CoeffsLearner(_nCoeffs, _initValue, _dhPtr)
CoeffsLearner(_nCoeffs, _initValue, _dhPtr),
// Initialize the gradient estimate
coeffsGradient(_nCoeffs, 0.0), positivesWeight(0.0)
{
// Initializing coefficients
if(randomize)
Expand Down Expand Up @@ -238,7 +237,8 @@ float DiveHandler::PGLearner::evaluatePerturbation( std::vector<float> R )
// Dimensions check
assert(R.size() == coeffs.size());
// Generate perturbated policy and call the DiveHandler object for evaluation
return diveHandler_ptr->computeDiveAndRecoverTime(coeffs.at(0) + R.at(0), coeffs.at(1) + R.at(1));
float tDiveAndRecover = diveHandler_ptr->computeDiveAndRecoverTime(coeffs.at(0) + R.at(0), coeffs.at(1) + R.at(1));
return LAMBDA*fabs(tDiveAndRecover) + (1-LAMBDA)*fabs(diveHandler_ptr->tBall2Goal - tDiveAndRecover);
}


Expand All @@ -247,24 +247,24 @@ void DiveHandler::PGLearner::updateParams(const std::list<float>& rewards)
{
float reward_score = 0.0;
int discount_exp = 0;
#ifdef DIVEHANDLER_TRAINING_DEBUG
int positives = 0, negatives = 0;
#endif
int positives = 0;

std::list<float>::const_iterator i = rewards.begin();
while (i != rewards.end())
{
#ifdef DIVEHANDLER_TRAINING_DEBUG
if (*i == POSITIVE_REWARD) ++positives;
else ++ negatives;
#endif
// Counting positives
if (*i == POSITIVE_REWARD)
++positives;

// Computing discounted rewards
reward_score += (*i) * pow(GAMMA, discount_exp);
++i; ++discount_exp;
}
positivesWeight = static_cast<float>(positives)/rewards.size();

#ifdef DIVEHANDLER_TRAINING_DEBUG
SPQR_INFO("Positive rewards: " << positives << " out of " << rewards.size());
SPQR_INFO("Negative rewards: " << negatives << " out of " << rewards.size());
SPQR_INFO("Negative rewards: " << (rewards.size() - positives) << " out of " << rewards.size());
SPQR_INFO("Reward total score: " << reward_score);
#endif

Expand Down Expand Up @@ -375,10 +375,22 @@ bool DiveHandler::PGLearner::updateCoeffs()
if (magnitude(coeffs_avgGradient) != 0)
normalization = magnitude(coeffs_avgGradient);


#ifdef DIVEHANDLER_TRAINING
SPQR_INFO("Computed policy gradient: [ " << coeffs_avgGradient.at(0)/normalization
<< ", " << coeffs_avgGradient.at(1)/normalization << " ]");
#endif
// Weight new gradient estimate and previous one according to the reward score
std::vector<float> newGradient (coeffsGradient.size());
for( unsigned int j=0; j<newGradient.size(); ++j )
newGradient.at(j) = (positivesWeight)*coeffsGradient.at(j) +
(1.0 - positivesWeight)*(coeffs_avgGradient.at(j)/normalization);

#ifdef DIVEHANDLER_TRAINING
SPQR_INFO("Weight of the current estimate: " << positivesWeight);
SPQR_INFO("New policy gradient: [ " << newGradient.at(0)
<< ", " << newGradient.at(1) << " ]");
#endif

// Update coefficients history
coeffsBuffer.push_front(coeffs);
Expand All @@ -389,7 +401,10 @@ bool DiveHandler::PGLearner::updateCoeffs()
// Update the coefficients following the gradient direction
for( unsigned int i=0; i<coeffs_avgGradient.size(); ++i )
{
coeffs.at(i) += - (coeffs_avgGradient.at(i)/normalization) * ETA;
// Coefficients
coeffs.at(i) += - newGradient.at(i) * getParam("epsilon");
// Gradient estimate
coeffsGradient.at(i) = newGradient.at(i);

// Crop negative coefficients
if (coeffs.at(i) < 0) coeffs.at(i) = 0.0;
Expand Down Expand Up @@ -541,7 +556,7 @@ void DiveHandler::estimateDiveTimes()
/* TOCOMMENT */
inline float DiveHandler::computeDiveAndRecoverTime(float alpha1, float alpha2)
{
return fabs(alpha2*( alpha1*tBall2Goal - tDive ) + tBackInPose);
return alpha2*( alpha1*tBall2Goal - tDive ) + tBackInPose;
}

/* TOTEST&COMMENT */
Expand Down
9 changes: 7 additions & 2 deletions machineLearning/DiveHandler/DiveHandler.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,14 @@ END_MODULE
// Termination conditions
#define MAX_ITER 300
#define CONVERGENCE_THRESHOLD 0.05

// PG parameters
#define GAMMA 0.5
#define BUFFER_DIM 10
#define REWARDS_HISTORY_SIZE 15
#define ETA 0.4
#define EPSILON 0.15
#define T 15
// Evaluation weight
#define LAMBDA 0.15


// Module class declaration
Expand Down Expand Up @@ -139,6 +139,11 @@ class DiveHandler : public DiveHandlerBase

private:

// Current estimate for the coefficients gradient
std::vector<float> coeffsGradient;
// Weight of the current gradient estimate
float positivesWeight;

// Memory buffer for the PG algorithm
PGbuffer coeffsBuffer;
// Set of perturbations to be performed
Expand Down

0 comments on commit 5dac511

Please sign in to comment.