Skip to content

Commit

Permalink
fix: retry if build inventory list is empty
Browse files Browse the repository at this point in the history
  • Loading branch information
RolandSherwin committed Jan 15, 2024
1 parent d4aa755 commit 789326b
Show file tree
Hide file tree
Showing 5 changed files with 111 additions and 59 deletions.
93 changes: 58 additions & 35 deletions src/ansible.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@ use crate::{
error::{Error, Result},
is_binary_on_path, run_external_command, CloudProvider,
};
use log::debug;
use log::{debug, warn};
use serde::Deserialize;
use std::{collections::HashMap, net::IpAddr, path::PathBuf};
use std::{collections::HashMap, net::IpAddr, path::PathBuf, time::Duration};

/// Ansible has multiple 'binaries', e.g., `ansible-playbook`, `ansible-inventory` etc. that are
/// wrappers around the main `ansible` program. It would be a bit cumbersome to create a different
Expand Down Expand Up @@ -83,42 +83,65 @@ impl AnsibleRunner {
// This function is used to list the inventory of the ansible runner.
// It takes a PathBuf as an argument which represents the inventory path.
// It returns a Result containing a vector of tuples. Each tuple contains a string representing the name and the ansible host.
pub fn inventory_list(&self, inventory_path: PathBuf) -> Result<Vec<(String, IpAddr)>> {
//
// Set re_attempt to re-run the ansible runner if the inventory list is empty
pub async fn inventory_list(
&self,
inventory_path: PathBuf,
re_attempt: bool,
) -> Result<Vec<(String, IpAddr)>> {
// Run the external command and store the output.
let output = run_external_command(
AnsibleBinary::AnsibleInventory.get_binary_path()?,
self.working_directory_path.clone(),
vec![
"--inventory".to_string(),
inventory_path.to_string_lossy().to_string(),
"--list".to_string(),
],
true,
false,
)?;
let retry_count = if re_attempt { 3 } else { 0 };
let mut count = 0;
let mut inventory = Vec::new();

while count <= retry_count {
debug!("Running inventory list. retry attempts {count}/{retry_count}");
let output = run_external_command(
AnsibleBinary::AnsibleInventory.get_binary_path()?,
self.working_directory_path.clone(),
vec![
"--inventory".to_string(),
inventory_path.to_string_lossy().to_string(),
"--list".to_string(),
],
true,
false,
)?;

// Debug the output of the inventory list.
debug!("Inventory list output:");
debug!("{output:#?}");
// Convert the output into a string and remove any lines that do not start with '{'.
let mut output_string = output
.into_iter()
.skip_while(|line| !line.starts_with('{'))
.collect::<Vec<String>>()
.join("\n");
// Truncate the string at the last '}' character.
if let Some(end_index) = output_string.rfind('}') {
output_string.truncate(end_index + 1);
// Debug the output of the inventory list.
debug!("Inventory list output:");
debug!("{output:#?}");
// Convert the output into a string and remove any lines that do not start with '{'.
let mut output_string = output
.into_iter()
.skip_while(|line| !line.starts_with('{'))
.collect::<Vec<String>>()
.join("\n");
// Truncate the string at the last '}' character.
if let Some(end_index) = output_string.rfind('}') {
output_string.truncate(end_index + 1);
}
// Parse the output string into the Output struct.
let parsed: Output = serde_json::from_str(&output_string)?;
// Convert the parsed output into a vector of tuples containing the name and ansible host.
inventory = parsed
._meta
.hostvars
.into_iter()
.map(|(name, vars)| (name, vars.ansible_host))
.collect();

count += 1;
if !inventory.is_empty() {
break;
}
debug!("Inventory list is empty, re-running after a few seconds.");
tokio::time::sleep(Duration::from_secs(3)).await;
}
if inventory.is_empty() {
warn!("Inventory list is empty after {retry_count} retries");
}
// Parse the output string into the Output struct.
let parsed: Output = serde_json::from_str(&output_string)?;
// Convert the parsed output into a vector of tuples containing the name and ansible host.
let inventory: Vec<(String, IpAddr)> = parsed
._meta
.hostvars
.into_iter()
.map(|(name, vars)| (name, vars.ansible_host))
.collect();

// Return the inventory.
Ok(inventory)
Expand Down
27 changes: 18 additions & 9 deletions src/deploy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,10 +129,15 @@ impl DeployCmd {
async fn build_safe_network_binaries(&self) -> Result<()> {
let start = Instant::now();
println!("Obtaining IP address for build VM...");
let build_inventory = self.testnet_deploy.ansible_runner.inventory_list(
PathBuf::from("inventory")
.join(format!(".{}_build_inventory_digital_ocean.yml", self.name)),
)?;
let build_inventory = self
.testnet_deploy
.ansible_runner
.inventory_list(
PathBuf::from("inventory")
.join(format!(".{}_build_inventory_digital_ocean.yml", self.name)),
true,
)
.await?;
let build_ip = build_inventory[0].1;
self.testnet_deploy.ssh_client.wait_for_ssh_availability(
&build_ip,
Expand All @@ -154,13 +159,17 @@ impl DeployCmd {

pub async fn provision_genesis_node(&self) -> Result<()> {
let start = Instant::now();
let genesis_inventory =
self.testnet_deploy
.ansible_runner
.inventory_list(PathBuf::from("inventory").join(format!(
let genesis_inventory = self
.testnet_deploy
.ansible_runner
.inventory_list(
PathBuf::from("inventory").join(format!(
".{}_genesis_inventory_digital_ocean.yml",
self.name
)))?;
)),
false,
)
.await?;
let genesis_ip = genesis_inventory[0].1;
self.testnet_deploy.ssh_client.wait_for_ssh_availability(
&genesis_ip,
Expand Down
24 changes: 18 additions & 6 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -418,9 +418,14 @@ impl TestnetDeploy {
}

pub async fn get_genesis_multiaddr(&self, name: &str) -> Result<(String, IpAddr)> {
let genesis_inventory = self.ansible_runner.inventory_list(
PathBuf::from("inventory").join(format!(".{name}_genesis_inventory_digital_ocean.yml")),
)?;
let genesis_inventory = self
.ansible_runner
.inventory_list(
PathBuf::from("inventory")
.join(format!(".{name}_genesis_inventory_digital_ocean.yml")),
false,
)
.await?;
let genesis_ip = genesis_inventory[0].1;
let node_info = self
.rpc_client
Expand Down Expand Up @@ -479,11 +484,18 @@ impl TestnetDeploy {
return Err(Error::EnvironmentDoesNotExist(name.to_string()));
}

let genesis_inventory = self.ansible_runner.inventory_list(genesis_inventory_path)?;
let build_inventory = self.ansible_runner.inventory_list(build_inventory_path)?;
let genesis_inventory = self
.ansible_runner
.inventory_list(genesis_inventory_path, false)
.await?;
let build_inventory = self
.ansible_runner
.inventory_list(build_inventory_path, false)
.await?;
let remaining_nodes_inventory = self
.ansible_runner
.inventory_list(remaining_nodes_inventory_path)?;
.inventory_list(remaining_nodes_inventory_path, false)
.await?;

// It also seems to be possible for a workspace and inventory files to still exist, but
// there to be no inventory items returned. Perhaps someone deleted the VMs manually. We
Expand Down
14 changes: 9 additions & 5 deletions src/logs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ impl TestnetDeploy {
pub async fn rsync_logs(&self, name: &str, resources_only: bool) -> Result<()> {
// take root_dir at the top as `get_all_node_inventory` changes the working dir.
let root_dir = std::env::current_dir()?;
let all_node_inventory = self.get_all_node_inventory(name)?;
let all_node_inventory = self.get_all_node_inventory(name).await?;
let log_abs_dest = create_initial_log_dir_setup(&root_dir, name, &all_node_inventory)?;

// Rsync args
Expand Down Expand Up @@ -133,7 +133,7 @@ impl TestnetDeploy {
pub async fn ripgrep_logs(&self, name: &str, rg_args: &str) -> Result<()> {
// take root_dir at the top as `get_all_node_inventory` changes the working dir.
let root_dir = std::env::current_dir()?;
let all_node_inventory = self.get_all_node_inventory(name)?;
let all_node_inventory = self.get_all_node_inventory(name).await?;
let log_abs_dest = create_initial_log_dir_setup(&root_dir, name, &all_node_inventory)?;

let rg_cmd = format!("rg {rg_args} .local/share/safe/");
Expand Down Expand Up @@ -224,7 +224,7 @@ impl TestnetDeploy {
}

// Return the list of all the node machines.
fn get_all_node_inventory(&self, name: &str) -> Result<Vec<(String, IpAddr)>> {
async fn get_all_node_inventory(&self, name: &str) -> Result<Vec<(String, IpAddr)>> {
let environments = self.terraform_runner.workspace_list()?;
if !environments.contains(&name.to_string()) {
return Err(Error::EnvironmentDoesNotExist(name.to_string()));
Expand All @@ -246,10 +246,14 @@ impl TestnetDeploy {
}

// Get the inventory of all the nodes
let mut all_node_inventory = self.ansible_runner.inventory_list(genesis_inventory_path)?;
let mut all_node_inventory = self
.ansible_runner
.inventory_list(genesis_inventory_path, false)
.await?;
all_node_inventory.extend(
self.ansible_runner
.inventory_list(remaining_nodes_inventory_path)?,
.inventory_list(remaining_nodes_inventory_path, false)
.await?,
);
Ok(all_node_inventory)
}
Expand Down
12 changes: 8 additions & 4 deletions src/logstash.rs
Original file line number Diff line number Diff line change
Expand Up @@ -221,10 +221,14 @@ impl LogstashDeploy {

pub async fn provision(&self, name: &str) -> Result<()> {
println!("Obtaining IP address for Logstash VM...");
let logstash_inventory = self.ansible_runner.inventory_list(
PathBuf::from("inventory")
.join(format!(".{name}_logstash_inventory_digital_ocean.yml")),
)?;
let logstash_inventory = self
.ansible_runner
.inventory_list(
PathBuf::from("inventory")
.join(format!(".{name}_logstash_inventory_digital_ocean.yml")),
false,
)
.await?;
let logstash_ip = logstash_inventory[0].1;
self.ssh_client
.wait_for_ssh_availability(&logstash_ip, &self.cloud_provider.get_ssh_user())?;
Expand Down

0 comments on commit 789326b

Please sign in to comment.