diff --git a/iris-mpc-common/src/helpers/shutdown_handler.rs b/iris-mpc-common/src/helpers/shutdown_handler.rs index e596335d7..11d85c381 100644 --- a/iris-mpc-common/src/helpers/shutdown_handler.rs +++ b/iris-mpc-common/src/helpers/shutdown_handler.rs @@ -50,6 +50,11 @@ impl ShutdownHandler { .fetch_sub(1, Ordering::SeqCst); } + pub fn manually_trigger_graceful_shutdown(&self) { + self.shutdown_received.store(true, Ordering::Relaxed); + tracing::info!("Shutdown signal received."); + } + pub async fn wait_for_pending_batches_completion(&self) { let check_interval = Duration::from_millis(100); let start = Instant::now(); diff --git a/iris-mpc/src/bin/server.rs b/iris-mpc/src/bin/server.rs index 2b1ed4f54..9f14d09d4 100644 --- a/iris-mpc/src/bin/server.rs +++ b/iris-mpc/src/bin/server.rs @@ -887,7 +887,8 @@ async fn server_main(config: Config) -> eyre::Result<()> { // If the UUID response is different, the node has restarted without us // noticing. Our main NCCL connections cannot recover from // this, so we panic. - panic!("Node {} seems to have restarted, killing server...", host); + tracing::error!("Node {} has restarted, starting graceful shutdown", host); + shutdown_handler.shutdown(); } else { tracing::info!("Heartbeat: Node {} is healthy", host); }