From 789326b83d4e538fddc2e008ab314ddad866eb60 Mon Sep 17 00:00:00 2001 From: Roland Sherwin Date: Mon, 15 Jan 2024 12:10:02 +0530 Subject: [PATCH] fix: retry if build inventory list is empty --- src/ansible.rs | 93 ++++++++++++++++++++++++++++++------------------- src/deploy.rs | 27 +++++++++----- src/lib.rs | 24 +++++++++---- src/logs.rs | 14 +++++--- src/logstash.rs | 12 ++++--- 5 files changed, 111 insertions(+), 59 deletions(-) diff --git a/src/ansible.rs b/src/ansible.rs index 78c7aace..a64bc338 100644 --- a/src/ansible.rs +++ b/src/ansible.rs @@ -7,9 +7,9 @@ use crate::{ error::{Error, Result}, is_binary_on_path, run_external_command, CloudProvider, }; -use log::debug; +use log::{debug, warn}; use serde::Deserialize; -use std::{collections::HashMap, net::IpAddr, path::PathBuf}; +use std::{collections::HashMap, net::IpAddr, path::PathBuf, time::Duration}; /// Ansible has multiple 'binaries', e.g., `ansible-playbook`, `ansible-inventory` etc. that are /// wrappers around the main `ansible` program. It would be a bit cumbersome to create a different @@ -83,42 +83,65 @@ impl AnsibleRunner { // This function is used to list the inventory of the ansible runner. // It takes a PathBuf as an argument which represents the inventory path. // It returns a Result containing a vector of tuples. Each tuple contains a string representing the name and the ansible host. - pub fn inventory_list(&self, inventory_path: PathBuf) -> Result> { + // + // Set re_attempt to re-run the ansible runner if the inventory list is empty + pub async fn inventory_list( + &self, + inventory_path: PathBuf, + re_attempt: bool, + ) -> Result> { // Run the external command and store the output. - let output = run_external_command( - AnsibleBinary::AnsibleInventory.get_binary_path()?, - self.working_directory_path.clone(), - vec![ - "--inventory".to_string(), - inventory_path.to_string_lossy().to_string(), - "--list".to_string(), - ], - true, - false, - )?; + let retry_count = if re_attempt { 3 } else { 0 }; + let mut count = 0; + let mut inventory = Vec::new(); + + while count <= retry_count { + debug!("Running inventory list. retry attempts {count}/{retry_count}"); + let output = run_external_command( + AnsibleBinary::AnsibleInventory.get_binary_path()?, + self.working_directory_path.clone(), + vec![ + "--inventory".to_string(), + inventory_path.to_string_lossy().to_string(), + "--list".to_string(), + ], + true, + false, + )?; - // Debug the output of the inventory list. - debug!("Inventory list output:"); - debug!("{output:#?}"); - // Convert the output into a string and remove any lines that do not start with '{'. - let mut output_string = output - .into_iter() - .skip_while(|line| !line.starts_with('{')) - .collect::>() - .join("\n"); - // Truncate the string at the last '}' character. - if let Some(end_index) = output_string.rfind('}') { - output_string.truncate(end_index + 1); + // Debug the output of the inventory list. + debug!("Inventory list output:"); + debug!("{output:#?}"); + // Convert the output into a string and remove any lines that do not start with '{'. + let mut output_string = output + .into_iter() + .skip_while(|line| !line.starts_with('{')) + .collect::>() + .join("\n"); + // Truncate the string at the last '}' character. + if let Some(end_index) = output_string.rfind('}') { + output_string.truncate(end_index + 1); + } + // Parse the output string into the Output struct. + let parsed: Output = serde_json::from_str(&output_string)?; + // Convert the parsed output into a vector of tuples containing the name and ansible host. + inventory = parsed + ._meta + .hostvars + .into_iter() + .map(|(name, vars)| (name, vars.ansible_host)) + .collect(); + + count += 1; + if !inventory.is_empty() { + break; + } + debug!("Inventory list is empty, re-running after a few seconds."); + tokio::time::sleep(Duration::from_secs(3)).await; + } + if inventory.is_empty() { + warn!("Inventory list is empty after {retry_count} retries"); } - // Parse the output string into the Output struct. - let parsed: Output = serde_json::from_str(&output_string)?; - // Convert the parsed output into a vector of tuples containing the name and ansible host. - let inventory: Vec<(String, IpAddr)> = parsed - ._meta - .hostvars - .into_iter() - .map(|(name, vars)| (name, vars.ansible_host)) - .collect(); // Return the inventory. Ok(inventory) diff --git a/src/deploy.rs b/src/deploy.rs index b40a796d..b21e388b 100644 --- a/src/deploy.rs +++ b/src/deploy.rs @@ -129,10 +129,15 @@ impl DeployCmd { async fn build_safe_network_binaries(&self) -> Result<()> { let start = Instant::now(); println!("Obtaining IP address for build VM..."); - let build_inventory = self.testnet_deploy.ansible_runner.inventory_list( - PathBuf::from("inventory") - .join(format!(".{}_build_inventory_digital_ocean.yml", self.name)), - )?; + let build_inventory = self + .testnet_deploy + .ansible_runner + .inventory_list( + PathBuf::from("inventory") + .join(format!(".{}_build_inventory_digital_ocean.yml", self.name)), + true, + ) + .await?; let build_ip = build_inventory[0].1; self.testnet_deploy.ssh_client.wait_for_ssh_availability( &build_ip, @@ -154,13 +159,17 @@ impl DeployCmd { pub async fn provision_genesis_node(&self) -> Result<()> { let start = Instant::now(); - let genesis_inventory = - self.testnet_deploy - .ansible_runner - .inventory_list(PathBuf::from("inventory").join(format!( + let genesis_inventory = self + .testnet_deploy + .ansible_runner + .inventory_list( + PathBuf::from("inventory").join(format!( ".{}_genesis_inventory_digital_ocean.yml", self.name - )))?; + )), + false, + ) + .await?; let genesis_ip = genesis_inventory[0].1; self.testnet_deploy.ssh_client.wait_for_ssh_availability( &genesis_ip, diff --git a/src/lib.rs b/src/lib.rs index a3193bdc..ea499d42 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -418,9 +418,14 @@ impl TestnetDeploy { } pub async fn get_genesis_multiaddr(&self, name: &str) -> Result<(String, IpAddr)> { - let genesis_inventory = self.ansible_runner.inventory_list( - PathBuf::from("inventory").join(format!(".{name}_genesis_inventory_digital_ocean.yml")), - )?; + let genesis_inventory = self + .ansible_runner + .inventory_list( + PathBuf::from("inventory") + .join(format!(".{name}_genesis_inventory_digital_ocean.yml")), + false, + ) + .await?; let genesis_ip = genesis_inventory[0].1; let node_info = self .rpc_client @@ -479,11 +484,18 @@ impl TestnetDeploy { return Err(Error::EnvironmentDoesNotExist(name.to_string())); } - let genesis_inventory = self.ansible_runner.inventory_list(genesis_inventory_path)?; - let build_inventory = self.ansible_runner.inventory_list(build_inventory_path)?; + let genesis_inventory = self + .ansible_runner + .inventory_list(genesis_inventory_path, false) + .await?; + let build_inventory = self + .ansible_runner + .inventory_list(build_inventory_path, false) + .await?; let remaining_nodes_inventory = self .ansible_runner - .inventory_list(remaining_nodes_inventory_path)?; + .inventory_list(remaining_nodes_inventory_path, false) + .await?; // It also seems to be possible for a workspace and inventory files to still exist, but // there to be no inventory items returned. Perhaps someone deleted the VMs manually. We diff --git a/src/logs.rs b/src/logs.rs index 5b71c716..89b0d24d 100644 --- a/src/logs.rs +++ b/src/logs.rs @@ -24,7 +24,7 @@ impl TestnetDeploy { pub async fn rsync_logs(&self, name: &str, resources_only: bool) -> Result<()> { // take root_dir at the top as `get_all_node_inventory` changes the working dir. let root_dir = std::env::current_dir()?; - let all_node_inventory = self.get_all_node_inventory(name)?; + let all_node_inventory = self.get_all_node_inventory(name).await?; let log_abs_dest = create_initial_log_dir_setup(&root_dir, name, &all_node_inventory)?; // Rsync args @@ -133,7 +133,7 @@ impl TestnetDeploy { pub async fn ripgrep_logs(&self, name: &str, rg_args: &str) -> Result<()> { // take root_dir at the top as `get_all_node_inventory` changes the working dir. let root_dir = std::env::current_dir()?; - let all_node_inventory = self.get_all_node_inventory(name)?; + let all_node_inventory = self.get_all_node_inventory(name).await?; let log_abs_dest = create_initial_log_dir_setup(&root_dir, name, &all_node_inventory)?; let rg_cmd = format!("rg {rg_args} .local/share/safe/"); @@ -224,7 +224,7 @@ impl TestnetDeploy { } // Return the list of all the node machines. - fn get_all_node_inventory(&self, name: &str) -> Result> { + async fn get_all_node_inventory(&self, name: &str) -> Result> { let environments = self.terraform_runner.workspace_list()?; if !environments.contains(&name.to_string()) { return Err(Error::EnvironmentDoesNotExist(name.to_string())); @@ -246,10 +246,14 @@ impl TestnetDeploy { } // Get the inventory of all the nodes - let mut all_node_inventory = self.ansible_runner.inventory_list(genesis_inventory_path)?; + let mut all_node_inventory = self + .ansible_runner + .inventory_list(genesis_inventory_path, false) + .await?; all_node_inventory.extend( self.ansible_runner - .inventory_list(remaining_nodes_inventory_path)?, + .inventory_list(remaining_nodes_inventory_path, false) + .await?, ); Ok(all_node_inventory) } diff --git a/src/logstash.rs b/src/logstash.rs index 09f8dc1b..a834fdfc 100644 --- a/src/logstash.rs +++ b/src/logstash.rs @@ -221,10 +221,14 @@ impl LogstashDeploy { pub async fn provision(&self, name: &str) -> Result<()> { println!("Obtaining IP address for Logstash VM..."); - let logstash_inventory = self.ansible_runner.inventory_list( - PathBuf::from("inventory") - .join(format!(".{name}_logstash_inventory_digital_ocean.yml")), - )?; + let logstash_inventory = self + .ansible_runner + .inventory_list( + PathBuf::from("inventory") + .join(format!(".{name}_logstash_inventory_digital_ocean.yml")), + false, + ) + .await?; let logstash_ip = logstash_inventory[0].1; self.ssh_client .wait_for_ssh_availability(&logstash_ip, &self.cloud_provider.get_ssh_user())?;