Skip to content

Commit

Permalink
TOFIX: negative coeffs + rewards
Browse files Browse the repository at this point in the history
  • Loading branch information
claudio-db committed Jan 28, 2014
1 parent 9c8ae17 commit 36a3532
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 10 deletions.
23 changes: 14 additions & 9 deletions machineLearning/DiveHandler/DiveHandler.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/**
* @file DiveHandler.cpp
*
* This header file contains the implementation of a module working as a dive handler for the goalie.
* This source file contains the implementation of a module working as a dive handler for the goalie.
* Such handler is activated when the ball gets in the own field side, and it computes an estimate of its projection toward the goal
* with respect to the goalie reference frame. It also provides estimates for the amount of time needed to dive, save the ball and
* then get back to the goalie position. This measure is compared against the estimated time the ball needs to reach the goal.
Expand Down Expand Up @@ -230,7 +230,6 @@ void DiveHandler::PGLearner::generatePerturbations(std::vector<float>* partial_p
}
}


/* TOCOMMENT */
float DiveHandler::PGLearner::evaluatePerturbation( std::vector<float> R )
{
Expand Down Expand Up @@ -356,10 +355,14 @@ bool DiveHandler::PGLearner::updateCoeffs()
coeffs_avgGradient.at(coeffs.size() - (n +1)) = avg_plus - avg_minus;
}
#endif
// Avoid 'nan' when the gradient is zeroed
float normalization = 1.0;
if (magnitude(coeffs_avgGradient) != 0)
normalization = magnitude(coeffs_avgGradient);

#ifdef DIVEHANDLER_TRAINING
SPQR_INFO("Computed policy gradient: [ " << coeffs_avgGradient.at(0)/magnitude(coeffs_avgGradient)
<< ", " << coeffs_avgGradient.at(1)/magnitude(coeffs_avgGradient) << " ]");
SPQR_INFO("Computed policy gradient: [ " << coeffs_avgGradient.at(0)/normalization
<< ", " << coeffs_avgGradient.at(1)/normalization << " ]");
#endif

// Update coefficients history
Expand All @@ -370,7 +373,7 @@ bool DiveHandler::PGLearner::updateCoeffs()

// Update the coefficients following the gradient direction
for( unsigned int i=0; i<coeffs_avgGradient.size(); ++i )
coeffs.at(i) += - (coeffs_avgGradient.at(i)/magnitude(coeffs_avgGradient)) * ETA;
coeffs.at(i) += - (coeffs_avgGradient.at(i)/normalization) * ETA;

#ifdef DIVEHANDLER_TRAINING
SPQR_INFO("New coefficients: [ " << coeffs.at(0) << ", " << coeffs.at(1) << " ]");
Expand Down Expand Up @@ -518,7 +521,7 @@ void DiveHandler::estimateDiveTimes()
/* TOCOMMENT */
inline float DiveHandler::computeDiveAndRecoverTime(float alpha1, float alpha2)
{
return alpha2*( alpha1*tBall2Goal - tDive ) + tBackInPose;
return fabs(alpha2*( alpha1*tBall2Goal - tDive ) + tBackInPose);
}

/* TOTEST&COMMENT */
Expand Down Expand Up @@ -578,7 +581,7 @@ void DiveHandler::update(DiveHandle& diveHandle)
// The module is in the learning state, waiting for the next reward
else if( state == waitReward )
{
// First case: game controller active and the opponent team scores
// The opponent team scores: the goalie failed and gets a negative reward
if(opponentScore != (int)theOpponentTeamInfo.score)
{
// The learner obtains a negative reward
Expand All @@ -599,7 +602,7 @@ void DiveHandler::update(DiveHandle& diveHandle)
if(!diveHandle.rewardAck)
diveHandle.rewardAck = true;
}
// The goalie has performed a dive and yet the outcome is unknown
// The own team scores: user-guided move to provide the goalie a positive reward
else if(ownScore != (int)theOwnTeamInfo.score)
{
// The learner obtains a positive reward
Expand All @@ -608,6 +611,9 @@ void DiveHandler::update(DiveHandle& diveHandle)
// Crop the buffer
if (rewardHistory.size() > REWARDS_HISTORY_SIZE)
rewardHistory.resize(REWARDS_HISTORY_SIZE);
// Update own score
ownScore = (int)theOwnTeamInfo.score;

#ifdef DIVEHANDLER_TRAINING
SPQR_SUCCESS("The goalie has succeeded! Positive reward for the learner. ");
#endif
Expand All @@ -616,7 +622,6 @@ void DiveHandler::update(DiveHandle& diveHandle)
// Clear the pending reward
if(!diveHandle.rewardAck)
diveHandle.rewardAck = true;

}
}

Expand Down
2 changes: 1 addition & 1 deletion machineLearning/DiveHandler/DiveHandler.h
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ class DiveHandler : public DiveHandlerBase
// Obtained rewards
std::list<float> rewardHistory;

// Opponent team current score
// Current scores
int opponentScore;
int ownScore;

Expand Down

0 comments on commit 36a3532

Please sign in to comment.