Skip to content

Commit

Permalink
Add graceful shutdown to all health check failures
Browse files Browse the repository at this point in the history
  • Loading branch information
danielle-tfh committed Jan 10, 2025
1 parent 7d6de6f commit 4ed28ca
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 1 deletion.
5 changes: 5 additions & 0 deletions iris-mpc-common/src/helpers/shutdown_handler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@ impl ShutdownHandler {
.fetch_sub(1, Ordering::SeqCst);
}

pub fn manually_trigger_graceful_shutdown(&self) {
self.shutdown_received.store(true, Ordering::Relaxed);
tracing::info!("Shutdown signal received.");
}

pub async fn wait_for_pending_batches_completion(&self) {
let check_interval = Duration::from_millis(100);
let start = Instant::now();
Expand Down
3 changes: 2 additions & 1 deletion iris-mpc/src/bin/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -887,7 +887,8 @@ async fn server_main(config: Config) -> eyre::Result<()> {
// If the UUID response is different, the node has restarted without us
// noticing. Our main NCCL connections cannot recover from
// this, so we panic.
panic!("Node {} seems to have restarted, killing server...", host);
tracing::error!("Node {} has restarted, starting graceful shutdown", host);
shutdown_handler.shutdown();
} else {
tracing::info!("Heartbeat: Node {} is healthy", host);
}
Expand Down

0 comments on commit 4ed28ca

Please sign in to comment.