Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: provide reset-to-n-nodes command #270

Merged
merged 2 commits into from
Dec 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions resources/ansible/reset_to_n_nodes.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
- name: reset to n nodes
hosts: all
become: True
roles:
- reset-to-n-nodes
15 changes: 15 additions & 0 deletions resources/ansible/roles/reset-to-n-nodes/tasks/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# An assumption is being made that an environment for running nodes was already setup.
# Nodes will be stopped slowly, then everything will be cleared using the `reset` command.
---
- name: copy script
template:
src: reset_to_n_nodes.sh.j2
dest: /usr/local/bin/reset_to_n_nodes.sh
mode: '0755'
owner: root
group: root

- name: run script
ansible.builtin.shell: /usr/local/bin/reset_to_n_nodes.sh
args:
executable: /bin/bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# An assumption is being made that an environment for running nodes was already setup.
# Nodes will be stopped slowly, then everything will be cleared using the `reset` command.
# The node services will then be created again, using the settings from the previous node registry.
# After which, they will be started, using an interval between each.
# In the production environment, it's advisable for the interval to be quite large, e.g., 5 minutes.

#!/bin/bash

set -euo pipefail

readonly ANTCTL="/usr/local/bin/antctl"
readonly JQ="/usr/bin/jq"
readonly NODE_REGISTRY="/var/antctl/node_registry.json"

node_count={{ node_count }}

if [ "{{ evm_network_type }}" = "evm-custom" ]; then
rpc_url=$(cat ${NODE_REGISTRY} | ${JQ} -r '.nodes[0].evm_network.Custom.rpc_url_http')
payment_token_address=$(cat ${NODE_REGISTRY} | ${JQ} -r '.nodes[0].evm_network.Custom.payment_token_address')
data_payments_address=$(cat ${NODE_REGISTRY} | ${JQ} -r '.nodes[0].evm_network.Custom.data_payments_address')
fi

network_contacts_url=$(cat ${NODE_REGISTRY} | ${JQ} -r '.nodes[0].peers_args.network_contacts_url[0]')
peer_multiaddr=$(cat ${NODE_REGISTRY} | ${JQ} -r '.nodes[0].peers_args.addrs[0]')
rewards_address=$(cat ${NODE_REGISTRY} | ${JQ} -r '.nodes[0].rewards_address')
network_id=$(cat ${NODE_REGISTRY} | ${JQ} -r '.nodes[0].network_id')
max_archived_log_files=$(cat ${NODE_REGISTRY} | ${JQ} -r '.nodes[0].max_archived_log_files')
max_log_files=$(cat ${NODE_REGISTRY} | ${JQ} -r '.nodes[0].max_log_files')

# The delay is useful when there is only one node running.
{% if delay is defined %}
sleep {{ delay | default(0) }}
{% endif %}
antctl stop --interval {{ stop_interval }}

${ANTCTL} reset --force

base_rpc_port=13000
base_metrics_port=14000

for ((i=0; i<node_count; i++)); do
current_rpc_port=$((base_rpc_port + i))
current_metrics_port=$((base_metrics_port + i))

${ANTCTL} add \
--version {{ version }} \
--rpc-port ${current_rpc_port} \
--data-dir-path /mnt/antnode-storage/data \
--log-dir-path /mnt/antnode-storage/log \
--peer ${peer_multiaddr} \
--network-contacts-url ${network_contacts_url} \
--bootstrap-cache-dir /var/antctl/bootstrap_cache \
--network-id ${network_id} \
--log-format json \
--metrics-port ${current_metrics_port} \
--max-archived-log-files ${max_archived_log_files} \
--max-log-files ${max_log_files} \
--rewards-address ${rewards_address} \
{% if environment_name is defined and not environment_name.startswith('PROD') %}
--testnet \
{% endif %}
{% if evm_network_type == 'evm-custom' %}
{{ evm_network_type }} \
{% else %}
{{ evm_network_type }}
{% endif %}
{% if evm_network_type == 'evm-custom' %}
--rpc-url ${rpc_url} \
--payment-token-address ${payment_token_address} \
--data-payments-address ${data_payments_address}
{% endif %}
done

${ANTCTL} start --interval {{ start_interval }}
2 changes: 1 addition & 1 deletion resources/ansible/start_nodes.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@
interval: "{{ interval }}"
tasks:
- name: start
ansible.builtin.command: "safenode-manager start --interval {{ interval }}"
ansible.builtin.command: "antctl start --interval {{ interval }}"
2 changes: 1 addition & 1 deletion resources/ansible/stop_nodes.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@
{% if delay is defined %}
sleep {{ delay | default(0) }}
{% endif %}
safenode-manager stop --interval {{ interval }}
antctl stop --interval {{ interval }}
args:
executable: /bin/bash
5 changes: 5 additions & 0 deletions src/ansible/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,10 @@ pub enum AnsiblePlaybook {
///
/// Use in combination with `AnsibleInventoryType::PeerCache`.
PeerCacheNodes,
/// The reset to n nodes playbook will reset the nodes to the specified number of nodes.
///
/// See the `reset-to-n-nodes` role for more details.
ResetToNNodes,
/// The rpc client playbook will setup the `safenode_rpc_client` binary on the genesis node.
///
/// Use in combination with `AnsibleInventoryType::Genesis`.
Expand Down Expand Up @@ -197,6 +201,7 @@ impl AnsiblePlaybook {
AnsiblePlaybook::Nodes => "nodes.yml".to_string(),
AnsiblePlaybook::PeerCacheNodes => "peer_cache_node.yml".to_string(),
AnsiblePlaybook::RpcClient => "safenode_rpc_client.yml".to_string(),
AnsiblePlaybook::ResetToNNodes => "reset_to_n_nodes.yml".to_string(),
AnsiblePlaybook::StartFaucet => "start_faucet.yml".to_string(),
AnsiblePlaybook::StartNodes => "start_nodes.yml".to_string(),
AnsiblePlaybook::StartTelegraf => "start_telegraf.yml".to_string(),
Expand Down
128 changes: 127 additions & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1047,6 +1047,51 @@ enum Commands {
#[clap(long, value_parser = parse_provider, verbatim_doc_comment, default_value_t = CloudProvider::DigitalOcean)]
provider: CloudProvider,
},
/// Reset nodes to a specified count.
///
/// This will stop all nodes, clear their data, and start the specified number of nodes.
#[clap(name = "reset-to-n-nodes")]
ResetToNNodes {
/// Provide a list of VM names to use as a custom inventory.
///
/// This will reset nodes on a particular subset of VMs.
#[clap(name = "custom-inventory", long, use_value_delimiter = true)]
custom_inventory: Option<Vec<String>>,
/// The EVM network to use.
///
/// Valid values are "arbitrum-one", "arbitrum-sepolia", or "custom".
#[clap(long, value_parser = parse_evm_network)]
evm_network_type: EvmNetwork,
/// Maximum number of forks Ansible will use to execute tasks on target hosts.
#[clap(long, default_value_t = 50)]
forks: usize,
/// The name of the environment.
#[arg(short = 'n', long)]
name: String,
/// The number of nodes to run after reset.
#[arg(long)]
node_count: u16,
/// Specify the type of node VM to reset the nodes on. If not provided, the nodes on
/// all the node VMs will be reset. This is mutually exclusive with the '--custom-inventory' argument.
///
/// Valid values are "peer-cache", "genesis", "generic" and "private".
#[arg(long, conflicts_with = "custom-inventory")]
node_type: Option<NodeType>,
/// The cloud provider for the environment.
#[clap(long, value_parser = parse_provider, verbatim_doc_comment, default_value_t = CloudProvider::DigitalOcean)]
provider: CloudProvider,
/// The interval between starting each node in milliseconds.
#[clap(long, value_parser = |t: &str| -> Result<Duration> { Ok(t.parse().map(Duration::from_millis)?)}, default_value = "2000")]
start_interval: Duration,
/// The interval between stopping each node in milliseconds.
#[clap(long, value_parser = |t: &str| -> Result<Duration> { Ok(t.parse().map(Duration::from_millis)?)}, default_value = "2000")]
stop_interval: Duration,
/// Supply a version number for the antnode binary.
///
/// If not provided, the latest version will be used.
#[arg(long)]
version: Option<String>,
},
}

#[derive(Subcommand, Debug)]
Expand Down Expand Up @@ -1914,7 +1959,7 @@ async fn main() -> Result<()> {
eyre!("Genesis node not found. Most likely this is a bootstrap deployment."))?,
&inventory.genesis_multiaddr.clone().ok_or_else(||
eyre!("Genesis node not found. Most likely this is a bootstrap deployment."))?,
)?),
)?)
)?;
}

Expand Down Expand Up @@ -3020,6 +3065,87 @@ async fn main() -> Result<()> {

Ok(())
}
Commands::ResetToNNodes {
custom_inventory,
evm_network_type,
forks,
name,
node_count,
node_type,
provider,
start_interval,
stop_interval,
version,
} => {
// We will use 50 forks for the initial run to retrieve the inventory, then recreate the
// deployer using the custom fork value.
let testnet_deployer = TestnetDeployBuilder::default()
.ansible_forks(50)
.environment_name(&name)
.provider(provider)
.build()?;
let inventory_service = DeploymentInventoryService::from(&testnet_deployer);
let inventory = inventory_service
.generate_or_retrieve_inventory(&name, true, None)
.await?;
if inventory.is_empty() {
return Err(eyre!("The {name} environment does not exist"));
}

let testnet_deployer = TestnetDeployBuilder::default()
.ansible_forks(forks)
.environment_name(&name)
.provider(provider)
.build()?;
testnet_deployer.init().await?;

let antnode_version = get_version_from_option(version, &ReleaseType::AntNode).await?;
let mut extra_vars = ExtraVarsDocBuilder::default();
extra_vars.add_variable("environment_name", &name);
extra_vars.add_variable("evm_network_type", &evm_network_type.to_string());
extra_vars.add_variable("node_count", &node_count.to_string());
extra_vars.add_variable("start_interval", &start_interval.as_millis().to_string());
extra_vars.add_variable("stop_interval", &stop_interval.as_millis().to_string());
extra_vars.add_variable("version", &antnode_version.to_string());

let ansible_runner = &testnet_deployer.ansible_provisioner.ansible_runner;

if let Some(custom_inventory) = custom_inventory {
println!("Running the playbook with a custom inventory");
let custom_vms = get_custom_inventory(&inventory, &custom_inventory)?;
generate_custom_environment_inventory(
&custom_vms,
&name,
&ansible_runner.working_directory_path.join("inventory"),
)?;
ansible_runner.run_playbook(
AnsiblePlaybook::ResetToNNodes,
AnsibleInventoryType::Custom,
Some(extra_vars.build()),
)?;
return Ok(());
}

if let Some(node_type) = node_type {
println!("Running the playbook for {node_type:?} nodes");
ansible_runner.run_playbook(
AnsiblePlaybook::ResetToNNodes,
node_type.to_ansible_inventory_type(),
Some(extra_vars.build()),
)?;
return Ok(());
}

println!("Running the playbook for all node types");
for node_inv_type in AnsibleInventoryType::iter_node_type() {
ansible_runner.run_playbook(
AnsiblePlaybook::ResetToNNodes,
node_inv_type,
Some(extra_vars.build()),
)?;
}
Ok(())
}
}
}

Expand Down
Loading