diff --git a/foundations/src/runtime.rs b/foundations/src/runtime.rs index 805fdbb2..5ac0300d 100644 --- a/foundations/src/runtime.rs +++ b/foundations/src/runtime.rs @@ -11,7 +11,11 @@ pub enum Runtime { } impl Runtime { - pub fn new_steal(thread_count: usize, name: &str) -> std::io::Result { + pub fn new_steal(mut thread_count: usize, name: &str) -> std::io::Result { + if thread_count == 0 { + thread_count = num_cpus::get(); + } + Ok(Self::Steal( tokio::runtime::Builder::new_multi_thread() .worker_threads(thread_count) diff --git a/foundations/src/telementry/server.rs b/foundations/src/telementry/server.rs index 77cc710f..d1d88b75 100644 --- a/foundations/src/telementry/server.rs +++ b/foundations/src/telementry/server.rs @@ -2,7 +2,7 @@ use std::net::SocketAddr; use anyhow::Context; -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct ServerSettings { pub bind: SocketAddr, #[cfg(feature = "pprof-cpu")] @@ -13,6 +13,8 @@ pub struct ServerSettings { pub metrics_path: Option, #[cfg(feature = "health-check")] pub health_path: Option, + #[cfg(feature = "health-check")] + pub health_timeout: Option, #[cfg(feature = "context")] pub context: Option, } @@ -29,6 +31,8 @@ impl Default for ServerSettings { metrics_path: Some("/metrics".into()), #[cfg(feature = "health-check")] health_path: Some("/health".into()), + #[cfg(feature = "health-check")] + health_timeout: Some(std::time::Duration::from_secs(5)), #[cfg(feature = "context")] context: Some(crate::context::Context::global()), } @@ -160,7 +164,9 @@ async fn metrics( } #[cfg(feature = "health-check")] -pub use health_check::{register as register_health_check, unregister as unregister_health_check, HealthCheck, HealthCheckFn}; +pub use health_check::{ + register as register_health_check, unregister as unregister_health_check, HealthCheck, HealthCheckFn, +}; #[cfg(feature = "health-check")] mod health_check { @@ -237,8 +243,21 @@ mod health_check { } #[cfg(feature = "health-check")] -async fn health() -> axum::response::Response { - if health_check::is_healthy().await { +async fn health( + axum::Extension(timeout): axum::Extension>, +) -> axum::response::Response { + let healthy = if let Some(timeout) = timeout { + tokio::time::timeout(timeout, health_check::is_healthy()) + .await + .map_err(|err| { + tracing::error!(%err, "failed to check health, timed out"); + }) + .unwrap_or(false) + } else { + health_check::is_healthy().await + }; + + if healthy { axum::response::Response::builder() .status(axum::http::StatusCode::OK) .body("ok".into()) @@ -275,7 +294,9 @@ pub async fn init(settings: ServerSettings) -> anyhow::Result<()> { #[cfg(feature = "health-check")] if let Some(path) = &settings.health_path { - router = router.route(path, axum::routing::get(health)); + router = router + .layer(axum::Extension(settings.health_timeout)) + .route(path, axum::routing::get(health)); } router = router.fallback(axum::routing::any(not_found)); diff --git a/foundations/src/telementry/settings.rs b/foundations/src/telementry/settings.rs index 644d2bf2..c2e61a9d 100644 --- a/foundations/src/telementry/settings.rs +++ b/foundations/src/telementry/settings.rs @@ -20,11 +20,8 @@ pub struct TelementrySettings { pub opentelemetry: OpentelemetrySettings, /// Settings for logging. pub logging: LoggingSettings, - #[cfg(all( - any(feature = "pprof-cpu", feature = "pprof-heap", feature = "metrics",), - feature = "telemetry-server" - ))] /// Settings for the http server. + #[cfg(feature = "telemetry-server")] pub server: ServerSettings, } @@ -43,7 +40,7 @@ pub struct MetricsSettings { #[serde(default)] pub struct OpentelemetrySettings { /// Whether to enable opentelemetry span exporting. - #[settings(default = true)] + #[settings(default = false)] pub enabled: bool, /// A map of additional labels to add to opentelemetry spans. pub labels: HashMap, @@ -247,7 +244,7 @@ pub struct ServerSettings { #[settings(default = true)] pub enabled: bool, /// The address to bind the server to. - #[settings(default = SocketAddr::from(([127, 0, 0, 1], 9090)))] + #[settings(default = SocketAddr::from(([127, 0, 0, 1], 9000)))] pub bind: SocketAddr, /// The path to the pprof heap endpoint. If `None`, the endpoint is /// disabled. @@ -267,6 +264,11 @@ pub struct ServerSettings { #[cfg(feature = "health-check")] #[settings(default = Some("/health".into()))] pub health_path: Option, + /// Health check timeout. + #[cfg(feature = "health-check")] + #[settings(default = Some(std::time::Duration::from_secs(5)))] + #[serde(with = "humantime_serde")] + pub health_timeout: Option, } pub async fn init(info: crate::ServiceInfo, settings: TelementrySettings) { @@ -463,6 +465,8 @@ pub async fn init(info: crate::ServiceInfo, settings: TelementrySettings) { pprof_heap_path: settings.server.pprof_heap_path, #[cfg(feature = "health-check")] health_path: settings.server.health_path, + #[cfg(feature = "health-check")] + health_timeout: settings.server.health_timeout, #[cfg(feature = "context")] context: Some(crate::context::Context::global()), })