From f6d8efe931175640f013639b83f7f543fd495db5 Mon Sep 17 00:00:00 2001
From: Chris O'Neil <chriso83@protonmail.com>
Date: Mon, 23 Dec 2024 16:25:48 +0000
Subject: [PATCH 1/2] chore: use `antctl` to start/stop nodes

Replaces the old references to `safenode-manager`.
---
 resources/ansible/start_nodes.yml | 2 +-
 resources/ansible/stop_nodes.yml  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/ansible/start_nodes.yml b/resources/ansible/start_nodes.yml
index c1f50783..ebdafe1a 100644
--- a/resources/ansible/start_nodes.yml
+++ b/resources/ansible/start_nodes.yml
@@ -6,4 +6,4 @@
     interval: "{{ interval }}"
   tasks:
     - name: start
-      ansible.builtin.command: "safenode-manager start --interval {{ interval }}"
+      ansible.builtin.command: "antctl start --interval {{ interval }}"
diff --git a/resources/ansible/stop_nodes.yml b/resources/ansible/stop_nodes.yml
index 2c60500d..8c3ec10e 100644
--- a/resources/ansible/stop_nodes.yml
+++ b/resources/ansible/stop_nodes.yml
@@ -10,6 +10,6 @@
         {% if delay is defined %}
         sleep {{ delay | default(0) }}
         {% endif %}
-        safenode-manager stop --interval {{ interval }}
+        antctl stop --interval {{ interval }}
       args:
         executable: /bin/bash
\ No newline at end of file

From 602019fafd8ea180918d2532cbbd17f743ad2f3a Mon Sep 17 00:00:00 2001
From: Chris O'Neil <chriso83@protonmail.com>
Date: Mon, 23 Dec 2024 23:33:31 +0000
Subject: [PATCH 2/2] feat: provide `reset-to-n-nodes` command

This command runs a playbook that uses a Bash script to clear out existing nodes then create the
specified number of new nodes.

It is intended to be used in the production environment to reset nodes for a particular environment,
to avoid having to bootstrap a new one.

In production, there will be intervals applied to the `stop` and `start` commands such that the
existing node services will be stopped gradually, and the new services will come online slowly,
probably with an interval of about 5 minutes between each.
---
 resources/ansible/reset_to_n_nodes.yml        |   6 +
 .../roles/reset-to-n-nodes/tasks/main.yml     |  15 ++
 .../templates/reset_to_n_nodes.sh.j2          |  74 ++++++++++
 src/ansible/mod.rs                            |   5 +
 src/main.rs                                   | 128 +++++++++++++++++-
 5 files changed, 227 insertions(+), 1 deletion(-)
 create mode 100644 resources/ansible/reset_to_n_nodes.yml
 create mode 100644 resources/ansible/roles/reset-to-n-nodes/tasks/main.yml
 create mode 100644 resources/ansible/roles/reset-to-n-nodes/templates/reset_to_n_nodes.sh.j2

diff --git a/resources/ansible/reset_to_n_nodes.yml b/resources/ansible/reset_to_n_nodes.yml
new file mode 100644
index 00000000..ca955967
--- /dev/null
+++ b/resources/ansible/reset_to_n_nodes.yml
@@ -0,0 +1,6 @@
+---
+- name: reset to n nodes
+  hosts: all
+  become: True
+  roles:
+    - reset-to-n-nodes
diff --git a/resources/ansible/roles/reset-to-n-nodes/tasks/main.yml b/resources/ansible/roles/reset-to-n-nodes/tasks/main.yml
new file mode 100644
index 00000000..66dcc504
--- /dev/null
+++ b/resources/ansible/roles/reset-to-n-nodes/tasks/main.yml
@@ -0,0 +1,15 @@
+# An assumption is being made that an environment for running nodes was already setup.
+# Nodes will be stopped slowly, then everything will be cleared using the `reset` command.
+---
+- name: copy script
+  template:
+    src: reset_to_n_nodes.sh.j2
+    dest: /usr/local/bin/reset_to_n_nodes.sh
+    mode: '0755'
+    owner: root
+    group: root
+
+- name: run script
+  ansible.builtin.shell: /usr/local/bin/reset_to_n_nodes.sh
+  args:
+    executable: /bin/bash
\ No newline at end of file
diff --git a/resources/ansible/roles/reset-to-n-nodes/templates/reset_to_n_nodes.sh.j2 b/resources/ansible/roles/reset-to-n-nodes/templates/reset_to_n_nodes.sh.j2
new file mode 100644
index 00000000..cfd66dbc
--- /dev/null
+++ b/resources/ansible/roles/reset-to-n-nodes/templates/reset_to_n_nodes.sh.j2
@@ -0,0 +1,74 @@
+# An assumption is being made that an environment for running nodes was already setup.
+# Nodes will be stopped slowly, then everything will be cleared using the `reset` command.
+# The node services will then be created again, using the settings from the previous node registry.
+# After which, they will be started, using an interval between each.
+# In the production environment, it's advisable for the interval to be quite large, e.g., 5 minutes.
+
+#!/bin/bash
+
+set -euo pipefail
+
+readonly ANTCTL="/usr/local/bin/antctl"
+readonly JQ="/usr/bin/jq"
+readonly NODE_REGISTRY="/var/antctl/node_registry.json"
+
+node_count={{ node_count }}
+
+if [ "{{ evm_network_type }}" = "evm-custom" ]; then
+  rpc_url=$(cat ${NODE_REGISTRY} | ${JQ} -r '.nodes[0].evm_network.Custom.rpc_url_http')
+  payment_token_address=$(cat ${NODE_REGISTRY} | ${JQ} -r '.nodes[0].evm_network.Custom.payment_token_address')
+  data_payments_address=$(cat ${NODE_REGISTRY} | ${JQ} -r '.nodes[0].evm_network.Custom.data_payments_address')
+fi
+
+network_contacts_url=$(cat ${NODE_REGISTRY} | ${JQ} -r '.nodes[0].peers_args.network_contacts_url[0]')
+peer_multiaddr=$(cat ${NODE_REGISTRY} | ${JQ} -r '.nodes[0].peers_args.addrs[0]')
+rewards_address=$(cat ${NODE_REGISTRY} | ${JQ} -r '.nodes[0].rewards_address')
+network_id=$(cat ${NODE_REGISTRY} | ${JQ} -r '.nodes[0].network_id')
+max_archived_log_files=$(cat ${NODE_REGISTRY} | ${JQ} -r '.nodes[0].max_archived_log_files')
+max_log_files=$(cat ${NODE_REGISTRY} | ${JQ} -r '.nodes[0].max_log_files')
+
+# The delay is useful when there is only one node running.
+{% if delay is defined %}
+sleep {{ delay | default(0) }}
+{% endif %}
+antctl stop --interval {{ stop_interval }}
+
+${ANTCTL} reset --force
+
+base_rpc_port=13000
+base_metrics_port=14000
+
+for ((i=0; i<node_count; i++)); do
+  current_rpc_port=$((base_rpc_port + i))
+  current_metrics_port=$((base_metrics_port + i))
+  
+  ${ANTCTL} add \
+    --version {{ version }} \
+    --rpc-port ${current_rpc_port} \
+    --data-dir-path /mnt/antnode-storage/data \
+    --log-dir-path /mnt/antnode-storage/log \
+    --peer ${peer_multiaddr} \
+    --network-contacts-url ${network_contacts_url} \
+    --bootstrap-cache-dir /var/antctl/bootstrap_cache \
+    --network-id ${network_id} \
+    --log-format json \
+    --metrics-port ${current_metrics_port} \
+    --max-archived-log-files ${max_archived_log_files} \
+    --max-log-files ${max_log_files} \
+    --rewards-address ${rewards_address} \
+{% if environment_name is defined and not environment_name.startswith('PROD') %}
+    --testnet \
+{% endif %}
+{% if evm_network_type == 'evm-custom' %}
+    {{ evm_network_type }} \
+{% else %}
+    {{ evm_network_type }}
+{% endif %}
+{% if evm_network_type == 'evm-custom' %}
+    --rpc-url ${rpc_url} \
+    --payment-token-address ${payment_token_address} \
+    --data-payments-address ${data_payments_address}
+{% endif %}
+done
+
+${ANTCTL} start --interval {{ start_interval }}
\ No newline at end of file
diff --git a/src/ansible/mod.rs b/src/ansible/mod.rs
index 34e1a9c7..1bd77aea 100644
--- a/src/ansible/mod.rs
+++ b/src/ansible/mod.rs
@@ -117,6 +117,10 @@ pub enum AnsiblePlaybook {
     ///
     /// Use in combination with `AnsibleInventoryType::PeerCache`.
     PeerCacheNodes,
+    /// The reset to n nodes playbook will reset the nodes to the specified number of nodes.
+    ///
+    /// See the `reset-to-n-nodes` role for more details.
+    ResetToNNodes,
     /// The rpc client playbook will setup the `safenode_rpc_client` binary on the genesis node.
     ///
     /// Use in combination with `AnsibleInventoryType::Genesis`.
@@ -197,6 +201,7 @@ impl AnsiblePlaybook {
             AnsiblePlaybook::Nodes => "nodes.yml".to_string(),
             AnsiblePlaybook::PeerCacheNodes => "peer_cache_node.yml".to_string(),
             AnsiblePlaybook::RpcClient => "safenode_rpc_client.yml".to_string(),
+            AnsiblePlaybook::ResetToNNodes => "reset_to_n_nodes.yml".to_string(),
             AnsiblePlaybook::StartFaucet => "start_faucet.yml".to_string(),
             AnsiblePlaybook::StartNodes => "start_nodes.yml".to_string(),
             AnsiblePlaybook::StartTelegraf => "start_telegraf.yml".to_string(),
diff --git a/src/main.rs b/src/main.rs
index 5ce77ba2..f2ceeabe 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1047,6 +1047,51 @@ enum Commands {
         #[clap(long, value_parser = parse_provider, verbatim_doc_comment, default_value_t = CloudProvider::DigitalOcean)]
         provider: CloudProvider,
     },
+    /// Reset nodes to a specified count.
+    ///
+    /// This will stop all nodes, clear their data, and start the specified number of nodes.
+    #[clap(name = "reset-to-n-nodes")]
+    ResetToNNodes {
+        /// Provide a list of VM names to use as a custom inventory.
+        ///
+        /// This will reset nodes on a particular subset of VMs.
+        #[clap(name = "custom-inventory", long, use_value_delimiter = true)]
+        custom_inventory: Option<Vec<String>>,
+        /// The EVM network to use.
+        ///
+        /// Valid values are "arbitrum-one", "arbitrum-sepolia", or "custom".
+        #[clap(long, value_parser = parse_evm_network)]
+        evm_network_type: EvmNetwork,
+        /// Maximum number of forks Ansible will use to execute tasks on target hosts.
+        #[clap(long, default_value_t = 50)]
+        forks: usize,
+        /// The name of the environment.
+        #[arg(short = 'n', long)]
+        name: String,
+        /// The number of nodes to run after reset.
+        #[arg(long)]
+        node_count: u16,
+        /// Specify the type of node VM to reset the nodes on. If not provided, the nodes on
+        /// all the node VMs will be reset. This is mutually exclusive with the '--custom-inventory' argument.
+        ///
+        /// Valid values are "peer-cache", "genesis", "generic" and "private".
+        #[arg(long, conflicts_with = "custom-inventory")]
+        node_type: Option<NodeType>,
+        /// The cloud provider for the environment.
+        #[clap(long, value_parser = parse_provider, verbatim_doc_comment, default_value_t = CloudProvider::DigitalOcean)]
+        provider: CloudProvider,
+        /// The interval between starting each node in milliseconds.
+        #[clap(long, value_parser = |t: &str| -> Result<Duration> { Ok(t.parse().map(Duration::from_millis)?)}, default_value = "2000")]
+        start_interval: Duration,
+        /// The interval between stopping each node in milliseconds.
+        #[clap(long, value_parser = |t: &str| -> Result<Duration> { Ok(t.parse().map(Duration::from_millis)?)}, default_value = "2000")]
+        stop_interval: Duration,
+        /// Supply a version number for the antnode binary.
+        ///
+        /// If not provided, the latest version will be used.
+        #[arg(long)]
+        version: Option<String>,
+    },
 }
 
 #[derive(Subcommand, Debug)]
@@ -1914,7 +1959,7 @@ async fn main() -> Result<()> {
                                 eyre!("Genesis node not found. Most likely this is a bootstrap deployment."))?,
                             &inventory.genesis_multiaddr.clone().ok_or_else(||
                                 eyre!("Genesis node not found. Most likely this is a bootstrap deployment."))?,
-                        )?),
+                        )?)
                     )?;
                 }
 
@@ -3020,6 +3065,87 @@ async fn main() -> Result<()> {
 
             Ok(())
         }
+        Commands::ResetToNNodes {
+            custom_inventory,
+            evm_network_type,
+            forks,
+            name,
+            node_count,
+            node_type,
+            provider,
+            start_interval,
+            stop_interval,
+            version,
+        } => {
+            // We will use 50 forks for the initial run to retrieve the inventory, then recreate the
+            // deployer using the custom fork value.
+            let testnet_deployer = TestnetDeployBuilder::default()
+                .ansible_forks(50)
+                .environment_name(&name)
+                .provider(provider)
+                .build()?;
+            let inventory_service = DeploymentInventoryService::from(&testnet_deployer);
+            let inventory = inventory_service
+                .generate_or_retrieve_inventory(&name, true, None)
+                .await?;
+            if inventory.is_empty() {
+                return Err(eyre!("The {name} environment does not exist"));
+            }
+
+            let testnet_deployer = TestnetDeployBuilder::default()
+                .ansible_forks(forks)
+                .environment_name(&name)
+                .provider(provider)
+                .build()?;
+            testnet_deployer.init().await?;
+
+            let antnode_version = get_version_from_option(version, &ReleaseType::AntNode).await?;
+            let mut extra_vars = ExtraVarsDocBuilder::default();
+            extra_vars.add_variable("environment_name", &name);
+            extra_vars.add_variable("evm_network_type", &evm_network_type.to_string());
+            extra_vars.add_variable("node_count", &node_count.to_string());
+            extra_vars.add_variable("start_interval", &start_interval.as_millis().to_string());
+            extra_vars.add_variable("stop_interval", &stop_interval.as_millis().to_string());
+            extra_vars.add_variable("version", &antnode_version.to_string());
+
+            let ansible_runner = &testnet_deployer.ansible_provisioner.ansible_runner;
+
+            if let Some(custom_inventory) = custom_inventory {
+                println!("Running the playbook with a custom inventory");
+                let custom_vms = get_custom_inventory(&inventory, &custom_inventory)?;
+                generate_custom_environment_inventory(
+                    &custom_vms,
+                    &name,
+                    &ansible_runner.working_directory_path.join("inventory"),
+                )?;
+                ansible_runner.run_playbook(
+                    AnsiblePlaybook::ResetToNNodes,
+                    AnsibleInventoryType::Custom,
+                    Some(extra_vars.build()),
+                )?;
+                return Ok(());
+            }
+
+            if let Some(node_type) = node_type {
+                println!("Running the playbook for {node_type:?} nodes");
+                ansible_runner.run_playbook(
+                    AnsiblePlaybook::ResetToNNodes,
+                    node_type.to_ansible_inventory_type(),
+                    Some(extra_vars.build()),
+                )?;
+                return Ok(());
+            }
+
+            println!("Running the playbook for all node types");
+            for node_inv_type in AnsibleInventoryType::iter_node_type() {
+                ansible_runner.run_playbook(
+                    AnsiblePlaybook::ResetToNNodes,
+                    node_inv_type,
+                    Some(extra_vars.build()),
+                )?;
+            }
+            Ok(())
+        }
     }
 }