Skip to content

Commit

Permalink
add timestamped debug logs for retries
Browse files Browse the repository at this point in the history
  • Loading branch information
cosmicexplorer committed Apr 11, 2019
1 parent be8d5e9 commit 200299d
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 13 deletions.
27 changes: 27 additions & 0 deletions src/rust/engine/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 8 additions & 9 deletions src/rust/engine/fs/src/store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1996,15 +1996,14 @@ mod remote {
status: grpcio::RpcStatusCode::NotFound,
..
}) => Ok(None),
// TODO: This may be a race condition that deserves respect, but right now it's
// not clear it's an error. See #6344 for a case where we override this behavior
// by forking gprcio.
grpcio::Error::RpcFinished(None)
if std::env::var("_ALLOW_RPCFINISHED").unwrap_or("NO".to_string())
== "YES".to_string() =>
{
Ok(None)
}
// NB: This can sometimes occur when pants sends many process execution requests
// which depend on the same large file at the same time. It appears that the
// check_alive() method in grpc-rs which returns RpcFinished(None) is used as a
// way to exit early out of methods which return a Result, if the future's
// contents are already available. This may be a race condition that deserves
// respect, but right now it's not clear it's an error. See #6344 for a case
// where we override this type of behavior by forking gprcio.
grpcio::Error::RpcFinished(None) => Ok(None),
_ => Err(format!(
"Error from server in response to CAS read request: {:?}",
e
Expand Down
1 change: 1 addition & 0 deletions src/rust/engine/serverset/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ publish = false

[dependencies]
boxfuture = { path = "../boxfuture" }
chrono = "0.4"
futures = "^0.1.16"
# TODO: Switch to a release once https://github.com/alexcrichton/futures-timer/pull/11 and https://github.com/alexcrichton/futures-timer/pull/12 merge
futures-timer = { git = "https://github.com/pantsbuild/futures-timer", rev = "0b747e565309a58537807ab43c674d8951f9e5a0" }
Expand Down
26 changes: 22 additions & 4 deletions src/rust/engine/serverset/src/retry.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,20 @@
use crate::{Health, Serverset};
use chrono::prelude::DateTime;
use chrono::Utc;
use futures::{self, Future, IntoFuture};
use log::debug;
use std::time::SystemTime;

pub struct Retry<T>(pub Serverset<T>);

fn now() -> String {
let sys_time = SystemTime::now();
let date_time = DateTime::<Utc>::from(sys_time);
date_time
.format("%Y-%m-%d %H:%M:%S.%f +0000 UTC")
.to_string()
}

pub struct RetryParameters {
times: usize,
description: String,
Expand Down Expand Up @@ -47,22 +58,29 @@ impl<T: Clone + Send + Sync + 'static> Retry<T> {
&Ok(_) => Health::Healthy,
Err(err) => {
debug!(
"Attempt {} for {} failed with error {:?}",
i, description, err
"({}) Attempt {} for {} failed with error {:?}",
now(),
i,
description,
err
);
Health::Unhealthy
}
};
serverset.report_health(token, health);
debug!("({}) Attempt {} for {} succeeded!", now(), i, description);
result
})
})
.map(futures::future::Loop::Break)
.or_else(move |err| {
if i >= times {
Err(format!(
"Failed after {} retries for {}; last failure: {}",
i, description2, err
"({}) Failed after {} retries for {}; last failure: {}",
now(),
i,
description2,
err
))
} else {
Ok(futures::future::Loop::Continue(i + 1))
Expand Down

0 comments on commit 200299d

Please sign in to comment.