Skip to content

Commit

Permalink
Merge branch 'main' into romac/host-integration
Browse files Browse the repository at this point in the history
  • Loading branch information
romac authored Jun 25, 2024
2 parents cac97f1 + 158ec63 commit caabaaa
Show file tree
Hide file tree
Showing 8 changed files with 24,543 additions and 62 deletions.
7 changes: 5 additions & 2 deletions qa/terraform/nodes.tf
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ variable "ssh_keys" {

variable "instance_tags" {
type = list(string)
default = ["malachite"]
default = ["Malachite"]
}

resource "digitalocean_droplet" "cc" {
Expand All @@ -17,7 +17,10 @@ resource "digitalocean_droplet" "cc" {
# Build takes about 2.5 minutes on an 8-core Digital Ocean server
#size = "s-8vcpu-16gb"
ssh_keys = var.ssh_keys
user_data = file("user-data/cc-data.txt")
user_data = templatefile("user-data/cc-data.txt", {
malachite_dashboard = filebase64("../viewer/config-grafana/provisioning/dashboards-data/main.json")
node_dashboard = filebase64("../viewer/config-grafana/provisioning/dashboards-data/node-exporter-full.json")
})
}

resource "digitalocean_droplet" "small" {
Expand Down
85 changes: 42 additions & 43 deletions qa/terraform/templates/commands.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
# D_REGION - the Digital Ocean region where the servers are deployed
# PSSH_H - space-separated list of all the node server IP addresses for pssh input
# PSSH_P - the number of parallel processes to run with pssh
# SSH_OPTS - options for ssh run locally (forward agent, disable known_hosts)
# MALACHITE_DIR - the path to the malachite repository directory
# MALACHITE_DIR - the path to the malachite repository directory
# IS_CC - 1 means we are on the CC server, 0 we are not. (Used to determine the docker -H parameter.)
# IS_CC - 1 means we are on the CC server, 0 we are not. (Used to determine the docker -H parameter when run locally.)
##
# Aliases for easy manual access to the servers (don't use these in scripts)
# ssh-cc - ssh into the cc server
Expand All @@ -17,16 +17,17 @@
# xssh - parallel ssh command to all servers. Change PSSH_H and PSSH_P for different behavior.
# get_ip - get the IP address of a node server for programmatic use (example: get_ip 0)
# ok_cc - check if the CC server is ready to be used and update its services (DNS hosts, commands.sh, etc)
# ok_all - check if all servers are ready to be used (scanning the SSH keys could take a while)
# ok_all - check if all servers are ready to be used
# deploy_cc - build the local source code into a docker image on the cc server and push it to the cc registry
# setup_config - create configuration on the cc server
# done-pull - pull the node image on all the node servers. Accepts list of IDs or "all". (example: dnode-pull 0 1 2)
# dnode-run - run the application on a node server. Accepts list of IDs or "all". (example: dnode-run 0 1 2)
# dnode-log - get the logs of the application from a node server (example: dnode-log 0 -f)
# dnode-stop - stop the application on a node server. Accepts list of IDs or "all". (example: dnode-stop 0 2)

# get_prometheus_data - create a compressed prometheus data file (and download it from the cc server)
# dnode-rm - remove node container from server. Accepts list of IDs or "all". (example: dnode-rm 0 1 2)
# cheat_sheet - get some help on the order of commands to run
# fetch_log - fetch the logs from all the node servers (example: fetch_log 0 1 2)
# get_prometheus_data - create a compressed prometheus data file (and download it from the cc server)
##

export CANDC="${cc.ip}"
Expand All @@ -37,20 +38,22 @@ export D_N="${length(small)+length(large)}"
export D_REGION="${region}"
export PSSH_H="${join(" ",ips)}"
export PSSH_P="30"
# Arrays require advanced shell, SSH_OPTS is not POSIX compatible
export SSH_OPTS=(-A -o LogLevel=ERROR -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o GlobalKnownHostsFile=/dev/null)
export MALACHITE_DIR="$(dirname $(dirname ${path}))"
export IS_CC=0
export _CC_DOCKER_SHIM="-H ssh://root@$CANDC"
if [ $IS_CC -eq 1 ]; then
export _CC_DOCKER_SHIM=""
fi

alias ssh-cc="ssh -A root@${cc.ip}"
alias ssh-cc="ssh $SSH_OPTS root@${cc.ip}"
%{~ for i,n in concat(small, large) }
alias ssh-node${i}="ssh -A root@${n.ip}"
alias ssh-node${i}="ssh $SSH_OPTS root@${n.ip}"
%{~ endfor }

xssh() {
pssh -l root -i -v -p $PSSH_P -H "$PSSH_H" "$@"
pssh -l root -i -v -O LogLevel=ERROR -O StrictHostKeyChecking=no -O UserKnownHostsFile=/dev/null -O GlobalKnownHostsFile=/dev/null -p $PSSH_P -H "$PSSH_H" "$@"
}

get_ip() {
Expand All @@ -62,21 +65,16 @@ get_ip() {
}

ok_cc() {
_keyscan_cc 2> /dev/null
PSSH_P=1 PSSH_H=$CANDC xssh "cat /etc/done" && \
echo "Updating cc server..." && \
scp -q "$${1:-$${MALACHITE_DIR}/qa/terraform/hosts}" root@$${CANDC}:/etc/hosts && \
ssh root@$${CANDC} "systemctl restart dnsmasq" && \
scp -q "$${1:-$${MALACHITE_DIR}/qa/terraform/commands.sh}" root@$${CANDC}:/etc/profile.d/commands.sh && \
ssh root@$${CANDC} \
"sed -i 's,^export MALACHITE_DIR=.*,export MALACHITE_DIR=/root/malachite,' /etc/profile.d/commands.sh && \
sed -i 's,^export IS_CC=.*,export IS_CC=1,' /etc/profile.d/commands.sh && \
source /etc/profile.d/commands.sh && \
_keyscan_all_servers 2> /dev/null"
_keyscan_cc 2> /dev/null # needed for deploy_cc
PSSH_P=1 PSSH_H=$CANDC xssh "cat /etc/done"
sftp -C -q root@$${CANDC} <<EOF
put $${1:-$${MALACHITE_DIR}/qa/terraform/hosts} /etc/hosts
put $${1:-$${MALACHITE_DIR}/qa/terraform/commands.sh} /etc/profile.d/commands.sh
EOF
ssh $SSH_OPTS root@$${CANDC} "sed -i -e 's,^export MALACHITE_DIR=.*,export MALACHITE_DIR=/root/malachite,' -e 's,^export IS_CC=.*,export IS_CC=1,' /etc/profile.d/commands.sh && systemctl reload dnsmasq"
}

ok_all() {
_keyscan_all_servers 2> /dev/null
xssh "cat /etc/done && mount /data" # Mount /data in case a QA node came online earlier than CC
}

Expand All @@ -90,7 +88,7 @@ setup_config() {
if _is_cc; then
_change_config all
else
ssh root@$CANDC "source /etc/profile.d/commands.sh && _change_config all"
ssh $SSH_OPTS root@$CANDC "source /etc/profile.d/commands.sh && _change_config all"
fi
}

Expand All @@ -113,7 +111,7 @@ dnode-log() {
F="-f"
fi
fi
docker -H ssh://root@$IP logs $F node
ssh $SSH_OPTS root@$IP docker logs $F node
}

dnode-stop() {
Expand All @@ -126,17 +124,18 @@ dnode-rm() {

cheat_sheet() {
cat <<EOF
Commands and their dependencies:
(terminal1) | (terminal2 run in parallel)
ok_cc
(ssh-cc)
deploy_cc | ok_all
setup_config
dnode-run all
(wait for data)
dnode-stop all
fetch_log | get_prometheus_data
dnode-rm all
ok_cc
deploy_cc
ssh-cc
ok_all
setup_config
(_chance_one_config_entry)
dnode-run all
(wait for data)
dnode-stop all
fetch_log
get_prometheus_data
dnode-rm all
EOF
}

Expand All @@ -146,22 +145,24 @@ fetch_log() {

get_prometheus_data() {
if _is_cc; then
systemctl stop prometheus && rm prometheus.tgz 2> /dev/null && tar -cvzf prometheus.tgz -C /var/lib/prometheus/metrics2 . ; systemctl start prometheus
rm -f prometheus.tgz
systemctl stop prometheus && tar -cvzf prometheus.tgz -C /var/lib/prometheus/metrics2 .
systemctl start prometheus
else
ssh-cc "systemctl stop prometheus && rm prometheus.tgz 2> /dev/null && tar -cvzf prometheus.tgz -C /var/lib/prometheus/metrics2 . ; systemctl start prometheus"
ssh-cc "rm -f prometheus.tgz; systemctl stop prometheus && tar -cvzf prometheus.tgz -C /var/lib/prometheus/metrics2 . ; systemctl start prometheus"
scp -r root@$CANDC:prometheus.tgz .
fi
}

mem_usage() {
_mem_usage() {
PSSH_H="$(_parse_multiple_hosts "$@")" xssh -o mem_usage_out -e mem_usage_err "ps -e -o pid,user,%mem,cmd --sort=-%mem | head -2 | tail -1"
}

cpu_usage() {
_cpu_usage() {
PSSH_H="$(_parse_multiple_hosts "$@")" xssh -o cpu_usage_out -e cpu_usage_err "ps -e -o pid,user,%cpu,cmd --sort=-%cpu | head -2 | tail -1"
}

reset_prometheus_db() {
_reset_prometheus_db() {
if _is_cc; then
systemctl stop prometheus
rm -rf /var/lib/prometheus/metrics2/*
Expand All @@ -172,15 +173,15 @@ reset_prometheus_db() {
}

_is_cc() {
return $IS_CC
return $((1 - IS_CC))
}

_keyscan_cc() {
ssh-keygen -R $CANDC > /dev/null
ssh-keyscan -t ed25519 $CANDC >> $HOME/.ssh/known_hosts
}

_keyscan_all_servers() {
_keyscan_servers() {
_keyscan_cc 2> /dev/null
%{~ for n in concat(small, large) }
ssh-keygen -R ${n.ip} > /dev/null
Expand Down Expand Up @@ -231,9 +232,7 @@ _change_config() {
"moniker=test-$i" \
"consensus.p2p.listen_addr=/ip4/0.0.0.0/udp/27000/quic-v1" \
"mempool.p2p.listen_addr=/ip4/0.0.0.0/udp/28000/quic-v1" \
"metrics.listen_addr=0.0.0.0:9000" \
"test.time_allowance_factor=0.5" \
"test.exec_time_per_tx=500us" && \
"metrics.listen_addr=0.0.0.0:9000" && \
sconfig "$file" -t stringSlice \
"consensus.p2p.persistent_peers=$(_compose_persistent_peers)" \
"mempool.p2p.persistent_peers=$(_compose_persistent_peers 28000)" &
Expand Down
1 change: 1 addition & 0 deletions qa/terraform/templates/hosts.tmpl
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
127.0.0.1 localhost
${cc.ip} g-${cc.name}
%{~ for n in small }
${n.ip} g-${n.name}
Expand Down
Loading

0 comments on commit caabaaa

Please sign in to comment.