Skip to content

Commit

Permalink
feat: add timeout to health check
Browse files Browse the repository at this point in the history
  • Loading branch information
TroyKomodo committed May 1, 2024
1 parent 07dc140 commit e734d8c
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 12 deletions.
6 changes: 5 additions & 1 deletion foundations/src/runtime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,11 @@ pub enum Runtime {
}

impl Runtime {
pub fn new_steal(thread_count: usize, name: &str) -> std::io::Result<Self> {
pub fn new_steal(mut thread_count: usize, name: &str) -> std::io::Result<Self> {
if thread_count == 0 {
thread_count = num_cpus::get();
}

Ok(Self::Steal(
tokio::runtime::Builder::new_multi_thread()
.worker_threads(thread_count)
Expand Down
31 changes: 26 additions & 5 deletions foundations/src/telementry/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::net::SocketAddr;

use anyhow::Context;

#[derive(Debug)]
#[derive(Debug, Clone)]
pub struct ServerSettings {
pub bind: SocketAddr,
#[cfg(feature = "pprof-cpu")]
Expand All @@ -13,6 +13,8 @@ pub struct ServerSettings {
pub metrics_path: Option<String>,
#[cfg(feature = "health-check")]
pub health_path: Option<String>,
#[cfg(feature = "health-check")]
pub health_timeout: Option<std::time::Duration>,
#[cfg(feature = "context")]
pub context: Option<crate::context::Context>,
}
Expand All @@ -29,6 +31,8 @@ impl Default for ServerSettings {
metrics_path: Some("/metrics".into()),
#[cfg(feature = "health-check")]
health_path: Some("/health".into()),
#[cfg(feature = "health-check")]
health_timeout: Some(std::time::Duration::from_secs(5)),
#[cfg(feature = "context")]
context: Some(crate::context::Context::global()),
}
Expand Down Expand Up @@ -160,7 +164,9 @@ async fn metrics(
}

#[cfg(feature = "health-check")]
pub use health_check::{register as register_health_check, unregister as unregister_health_check, HealthCheck, HealthCheckFn};
pub use health_check::{
register as register_health_check, unregister as unregister_health_check, HealthCheck, HealthCheckFn,
};

#[cfg(feature = "health-check")]
mod health_check {
Expand Down Expand Up @@ -237,8 +243,21 @@ mod health_check {
}

#[cfg(feature = "health-check")]
async fn health() -> axum::response::Response<axum::body::Body> {
if health_check::is_healthy().await {
async fn health(
axum::Extension(timeout): axum::Extension<Option<std::time::Duration>>,
) -> axum::response::Response<axum::body::Body> {
let healthy = if let Some(timeout) = timeout {
tokio::time::timeout(timeout, health_check::is_healthy())
.await
.map_err(|err| {
tracing::error!(%err, "failed to check health, timed out");
})
.unwrap_or(false)
} else {
health_check::is_healthy().await
};

if healthy {
axum::response::Response::builder()
.status(axum::http::StatusCode::OK)
.body("ok".into())
Expand Down Expand Up @@ -275,7 +294,9 @@ pub async fn init(settings: ServerSettings) -> anyhow::Result<()> {

#[cfg(feature = "health-check")]
if let Some(path) = &settings.health_path {
router = router.route(path, axum::routing::get(health));
router = router
.layer(axum::Extension(settings.health_timeout))
.route(path, axum::routing::get(health));
}

router = router.fallback(axum::routing::any(not_found));
Expand Down
16 changes: 10 additions & 6 deletions foundations/src/telementry/settings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,8 @@ pub struct TelementrySettings {
pub opentelemetry: OpentelemetrySettings,
/// Settings for logging.
pub logging: LoggingSettings,
#[cfg(all(
any(feature = "pprof-cpu", feature = "pprof-heap", feature = "metrics",),
feature = "telemetry-server"
))]
/// Settings for the http server.
#[cfg(feature = "telemetry-server")]
pub server: ServerSettings,
}

Expand All @@ -43,7 +40,7 @@ pub struct MetricsSettings {
#[serde(default)]
pub struct OpentelemetrySettings {
/// Whether to enable opentelemetry span exporting.
#[settings(default = true)]
#[settings(default = false)]
pub enabled: bool,
/// A map of additional labels to add to opentelemetry spans.
pub labels: HashMap<String, String>,
Expand Down Expand Up @@ -247,7 +244,7 @@ pub struct ServerSettings {
#[settings(default = true)]
pub enabled: bool,
/// The address to bind the server to.
#[settings(default = SocketAddr::from(([127, 0, 0, 1], 9090)))]
#[settings(default = SocketAddr::from(([127, 0, 0, 1], 9000)))]
pub bind: SocketAddr,
/// The path to the pprof heap endpoint. If `None`, the endpoint is
/// disabled.
Expand All @@ -267,6 +264,11 @@ pub struct ServerSettings {
#[cfg(feature = "health-check")]
#[settings(default = Some("/health".into()))]
pub health_path: Option<String>,
/// Health check timeout.
#[cfg(feature = "health-check")]
#[settings(default = Some(std::time::Duration::from_secs(5)))]
#[serde(with = "humantime_serde")]
pub health_timeout: Option<std::time::Duration>,
}

pub async fn init(info: crate::ServiceInfo, settings: TelementrySettings) {
Expand Down Expand Up @@ -463,6 +465,8 @@ pub async fn init(info: crate::ServiceInfo, settings: TelementrySettings) {
pprof_heap_path: settings.server.pprof_heap_path,
#[cfg(feature = "health-check")]
health_path: settings.server.health_path,
#[cfg(feature = "health-check")]
health_timeout: settings.server.health_timeout,
#[cfg(feature = "context")]
context: Some(crate::context::Context::global()),
})
Expand Down

0 comments on commit e734d8c

Please sign in to comment.