-
Notifications
You must be signed in to change notification settings - Fork 38
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
chore(bors): merge pull request #904
904: Cherry pick health probes r=niladrih a=niladrih Co-authored-by: Niladri Halder <[email protected]>
- Loading branch information
Showing
11 changed files
with
296 additions
and
30 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
use crate::v0::core_grpc; | ||
use grpc::operations::node::traits::NodeOperations; | ||
use std::time::{Duration, Instant}; | ||
use tokio::sync::Mutex; | ||
|
||
/// This is a type to cache the liveness of the agent-core service. | ||
/// This is meant to be wrapped inside an Arc and used across threads. | ||
pub struct CachedCoreState { | ||
state: Mutex<ServerState>, | ||
cache_duration: Duration, | ||
} | ||
|
||
/// This type remembers a liveness state, and when this data was refreshed. | ||
struct ServerState { | ||
is_live: bool, | ||
last_updated: Instant, | ||
} | ||
|
||
impl ServerState { | ||
/// Update the state of the agent-core service, or assume it's unavailable if something | ||
/// went wrong. | ||
async fn update_or_assume_unavailable(&mut self) { | ||
let new_value = core_grpc().node().probe(None).await.unwrap_or(false); | ||
self.is_live = new_value; | ||
self.last_updated = Instant::now(); | ||
} | ||
} | ||
|
||
impl CachedCoreState { | ||
/// Create a new cache for serving readiness health checks based on agent-core health. | ||
pub async fn new(cache_duration: Duration) -> Self { | ||
let agent_core_is_live = core_grpc().node().probe(None).await.unwrap_or(false); | ||
|
||
CachedCoreState { | ||
state: Mutex::new(ServerState { | ||
is_live: agent_core_is_live, | ||
last_updated: Instant::now(), | ||
}), | ||
cache_duration, | ||
} | ||
} | ||
|
||
/// Get the cached state of the agent-core service, or assume it's unavailable if something | ||
/// went wrong. | ||
pub async fn get_or_assume_unavailable(&self) -> bool { | ||
let mut state = self.state.lock().await; | ||
|
||
if state.last_updated.elapsed() >= self.cache_duration { | ||
state.update_or_assume_unavailable().await; | ||
} | ||
|
||
state.is_live | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
use crate::CachedCoreState; | ||
use actix_web::{get, web::Data, HttpResponse, Responder}; | ||
|
||
/// Liveness probe check. Failure will result in Pod restart. 200 on success. | ||
#[get("/live")] | ||
async fn liveness(_cached_core_state: Data<CachedCoreState>) -> impl Responder { | ||
HttpResponse::Ok() | ||
.content_type("text/plain; charset=utf-8") | ||
.insert_header(("X-Content-Type-Options", "nosniff")) | ||
.body("live") | ||
} | ||
|
||
/// Readiness probe check. Failure will result in removal of Container from Kubernetes service | ||
/// target pool. 200 on success, 503 on failure. | ||
#[get("/ready")] | ||
async fn readiness(cached_core_state: Data<CachedCoreState>) -> HttpResponse { | ||
if cached_core_state.get_or_assume_unavailable().await { | ||
return HttpResponse::Ok() | ||
.content_type("text/plain; charset=utf-8") | ||
.insert_header(("X-Content-Type-Options", "nosniff")) | ||
.body("ready"); | ||
} | ||
|
||
HttpResponse::ServiceUnavailable() | ||
.content_type("text/plain; charset=utf-8") | ||
.insert_header(("X-Content-Type-Options", "nosniff")) | ||
.body("not ready") | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
/// Has tools to collect the liveness state of the agent-core service. | ||
pub mod core_state; | ||
/// Actix request handlers for health checks. | ||
pub mod handlers; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
Feature: Readiness Probe | ||
|
||
Background: | ||
Given a running agent-core service | ||
And a running REST service with the cache refresh period set to "800ms" | ||
|
||
Scenario: The REST API /ready service should not update its readiness status more than once in the cache refresh period | ||
Given agent-core service is available | ||
And the REST service returns a 200 status code for an HTTP GET request to the /ready endpoint | ||
When the agent-core service is brought down forcefully | ||
Then the REST service return changes from 200 to 503 within double of the cache refresh period | ||
And it keeps returning 503 at least for the cache refresh period |
Oops, something went wrong.