From 04919be334cebbd464bf023f7076014dae19174a Mon Sep 17 00:00:00 2001 From: Riku Rouvila Date: Fri, 5 Jan 2024 14:13:53 +0200 Subject: [PATCH] separate backup downloading and restoring to two different scripts, use production server's encryptin key on the machine that restres the backup (staging) --- .github/workflows/provision.yml | 1 + infrastructure/{ => backups}/backup.sh | 6 +- infrastructure/backups/download.sh | 117 ++++++++++++++++++ .../{ => backups}/restore-snapshot.sh | 0 infrastructure/{ => backups}/restore.sh | 98 ++------------- infrastructure/server-setup/staging.yml | 1 + .../server-setup/tasks/backups/crontab.yml | 18 ++- package.json | 5 +- 8 files changed, 147 insertions(+), 99 deletions(-) rename infrastructure/{ => backups}/backup.sh (97%) create mode 100644 infrastructure/backups/download.sh rename infrastructure/{ => backups}/restore-snapshot.sh (100%) rename infrastructure/{ => backups}/restore.sh (79%) diff --git a/.github/workflows/provision.yml b/.github/workflows/provision.yml index 19f484ed2..70eb5daf6 100644 --- a/.github/workflows/provision.yml +++ b/.github/workflows/provision.yml @@ -70,6 +70,7 @@ jobs: mongodb_admin_username: ${{ secrets.MONGODB_ADMIN_USER }} mongodb_admin_password: ${{ secrets.MONGODB_ADMIN_PASSWORD }} backup_encryption_passphrase: ${{ secrets.BACKUP_ENCRYPTION_PASSPHRASE }} + restore_backup_encryption_passphrase: ${{ secrets.RESTORE_BACKUP_ENCRYPTION_PASSPHRASE }} elasticsearch_superuser_password: ${{ secrets.ELASTICSEARCH_SUPERUSER_PASSWORD }} external_backup_server_remote_directory: ${{ vars.BACKUP_DIRECTORY }} external_backup_server_user: ${{ secrets.BACKUP_SSH_USER }} diff --git a/infrastructure/backup.sh b/infrastructure/backups/backup.sh similarity index 97% rename from infrastructure/backup.sh rename to infrastructure/backups/backup.sh index 834f369b7..8941a09d4 100755 --- a/infrastructure/backup.sh +++ b/infrastructure/backups/backup.sh @@ -343,9 +343,11 @@ if [[ "$OWN_IP" = "$PRODUCTION_IP" || "$OWN_IP" = "$(dig $PRODUCTION_IP +short)" tar -czf /tmp/${LABEL:-$BACKUP_DATE}.tar.gz -C "$BACKUP_RAW_FILES_DIR" . - openssl enc -aes-256-cbc -salt -in /tmp/${LABEL:-$BACKUP_DATE}.tar.gz -out /tmp/${LABEL:-$BACKUP_DATE}.tar.gz.enc -pass pass:$PASSPHRASE + openssl enc -aes-256-cbc -salt -pbkdf2 -in /tmp/${LABEL:-$BACKUP_DATE}.tar.gz -out /tmp/${LABEL:-$BACKUP_DATE}.tar.gz.enc -pass pass:$PASSPHRASE - script -q -c "rsync -a -r --rsync-path='mkdir -p $REMOTE_DIR/ && rsync' --progress --rsh='ssh -o StrictHostKeyChecking=no -p$SSH_PORT' /tmp/${LABEL:-$BACKUP_DATE}.tar.gz.enc $SSH_USER@$SSH_HOST:$REMOTE_DIR/" && echo "Copied backup files to remote server." + rsync -a -r --rsync-path="mkdir -p $REMOTE_DIR/ && rsync" --progress --rsh="ssh -o StrictHostKeyChecking=no -p $SSH_PORT" /tmp/${LABEL:-$BACKUP_DATE}.tar.gz.enc $SSH_USER@$SSH_HOST:$REMOTE_DIR/ + + echo "Copied backup files to remote server." rm /tmp/${LABEL:-$BACKUP_DATE}.tar.gz.enc rm /tmp/${LABEL:-$BACKUP_DATE}.tar.gz diff --git a/infrastructure/backups/download.sh b/infrastructure/backups/download.sh new file mode 100644 index 000000000..84394a64d --- /dev/null +++ b/infrastructure/backups/download.sh @@ -0,0 +1,117 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# +# OpenCRVS is also distributed under the terms of the Civil Registration +# & Healthcare Disclaimer located at http://opencrvs.org/license. +# +# Copyright (C) The OpenCRVS Authors located at https://github.com/opencrvs/opencrvs-core/blob/master/AUTHORS. + +#------------------------------------------------------------------------------------------------------------------ +# By default OpenCRVS saves a backup of all data on a cron job every day in case of an emergency data loss incident +# This script clears all data and restores a specific day's data. It is irreversable, so use with caution. +#------------------------------------------------------------------------------------------------------------------ + +set -e + +for i in "$@"; do + case $i in + --ssh_user=*) + SSH_USER="${i#*=}" + shift + ;; + --ssh_host=*) + SSH_HOST="${i#*=}" + shift + ;; + --ssh_port=*) + SSH_PORT="${i#*=}" + shift + ;; + --replicas=*) + REPLICAS="${i#*=}" + shift + ;; + --label=*) + LABEL="${i#*=}" + shift + ;; + --passphrase=*) + PASSPHRASE="${i#*=}" + shift + ;; + --remote_dir=*) + REMOTE_DIR="${i#*=}" + shift + ;; + *) ;; + esac +done + +print_usage_and_exit() { + echo 'Usage: ./download.sh --passphrase=XXX --ssh_user=XXX --ssh_host=XXX --ssh_port=XXX --remote_dir=XXX' + exit 1 +} + +if [ -z "$LABEL" ]; then + LABEL=$(date +%Y-%m-%d) +fi + +if [ -z "$SSH_USER" ] ; then + echo 'Error: Missing environment variable SSH_USER.' + exit 1 +fi + +if [ -z "$SSH_HOST" ] ; then + echo 'Error: Missing environment variable SSH_HOST.' + exit 1 +fi + +if [ -z "$SSH_PORT" ] ; then + echo 'Error: Missing environment variable SSH_PORT.' + exit 1 +fi + +if [ -z "$REMOTE_DIR" ]; then + echo "Error: Argument for the --remote_dir is required." + print_usage_and_exit +fi + +# Copy & decrypt backup files +#------------------------------------------- + +# Create a temporary directory to store the backup files before decrypting +BACKUP_RAW_FILES_DIR=/tmp/backup-$LABEL +REMOTE_DIR_WITH_DATE="$REMOTE_DIR/${LABEL:-$BACKUP_DATE}" + +mkdir -p $BACKUP_RAW_FILES_DIR + +# Copy backup from backup server +rsync -a -r --delete --progress --rsh="ssh -o StrictHostKeyChecking=no -p $SSH_PORT" \ + $SSH_USER@$SSH_HOST:$REMOTE_DIR_WITH_DATE/${LABEL}.tar.gz.enc\ + $BACKUP_RAW_FILES_DIR/${LABEL}.tar.gz.enc + +echo "Copied backup files from server to $BACKUP_RAW_FILES_DIR/${LABEL}.tar.gz.enc." + +# Decrypt +openssl enc -d -aes-256-cbc -salt -pbkdf2 -in $BACKUP_RAW_FILES_DIR/${LABEL}.tar.gz.enc --out $BACKUP_RAW_FILES_DIR/${LABEL}.tar.gz -pass pass:$PASSPHRASE + +# Extract +mkdir -p $BACKUP_RAW_FILES_DIR/extract +tar -xvf $BACKUP_RAW_FILES_DIR/${LABEL}.tar.gz -C $BACKUP_RAW_FILES_DIR/extract + +# Move folders +rm -r /data/backups/elasticsearch +mv $BACKUP_RAW_FILES_DIR/extract/elasticsearch /data/backups/elasticsearch + +mv $BACKUP_RAW_FILES_DIR/extract/influxdb /data/backups/influxdb/${LABEL} +mv $BACKUP_RAW_FILES_DIR/extract/minio/ocrvs-${LABEL}.tar.gz /data/backups/minio/ +mv $BACKUP_RAW_FILES_DIR/extract/metabase/ocrvs-${LABEL}.tar.gz /data/backups/metabase/ +mv $BACKUP_RAW_FILES_DIR/extract/vsexport/ocrvs-${LABEL}.tar.gz /data/backups/vsexport/ +mv $BACKUP_RAW_FILES_DIR/extract/mongo/* /data/backups/mongo/ + +# Clean up +rm $BACKUP_RAW_FILES_DIR/${LABEL}.tar.gz.enc +rm $BACKUP_RAW_FILES_DIR/${LABEL}.tar.gz +rm -r $BACKUP_RAW_FILES_DIR +echo "Done" \ No newline at end of file diff --git a/infrastructure/restore-snapshot.sh b/infrastructure/backups/restore-snapshot.sh similarity index 100% rename from infrastructure/restore-snapshot.sh rename to infrastructure/backups/restore-snapshot.sh diff --git a/infrastructure/restore.sh b/infrastructure/backups/restore.sh similarity index 79% rename from infrastructure/restore.sh rename to infrastructure/backups/restore.sh index 6de0f3bfc..9897df934 100755 --- a/infrastructure/restore.sh +++ b/infrastructure/backups/restore.sh @@ -12,28 +12,17 @@ # This script clears all data and restores a specific day's data. It is irreversable, so use with caution. #------------------------------------------------------------------------------------------------------------------ +set -e + if docker service ls > /dev/null 2>&1; then IS_LOCAL=false else IS_LOCAL=true fi - # Reading Named parameters for i in "$@"; do case $i in - --ssh_user=*) - SSH_USER="${i#*=}" - shift - ;; - --ssh_host=*) - SSH_HOST="${i#*=}" - shift - ;; - --ssh_port=*) - SSH_PORT="${i#*=}" - shift - ;; --replicas=*) REPLICAS="${i#*=}" shift @@ -42,20 +31,12 @@ for i in "$@"; do LABEL="${i#*=}" shift ;; - --passphrase=*) - PASSPHRASE="${i#*=}" - shift - ;; - --remote_dir=*) - REMOTE_DIR="${i#*=}" - shift - ;; *) ;; esac done print_usage_and_exit() { - echo 'Usage: ./restore.sh --passphrase=XXX --ssh_user=XXX --ssh_host=XXX --ssh_port=XXX --replicas=XXX --remote_dir=XXX' + echo 'Usage: ./restore.sh --replicas=XXX' echo "This script CLEARS ALL DATA and RESTORES A SPECIFIC DAY'S or label's data. This process is irreversible, so USE WITH CAUTION." echo "Script must receive a label parameter to restore data from that specific day in format +%Y-%m-%d i.e. 2019-01-01 or that label" echo "The Hearth, OpenHIM User and Application-config db backup zips you would like to restore from: hearth-dev-{label}.gz, openhim-dev-{label}.gz, user-mgnt-{label}.gz and application-config-{label}.gz must exist in /data/backups/mongo/ folder" @@ -82,30 +63,7 @@ fi if [ "$IS_LOCAL" = false ]; then ROOT_PATH=${ROOT_PATH:-/data} - if [ -z "$SSH_USER" ] ; then - echo 'Error: Missing environment variable SSH_USER.' - exit 1 - fi - if [ -z "$SSH_HOST" ] ; then - echo 'Error: Missing environment variable SSH_HOST.' - exit 1 - fi - - if [ -z "$SSH_PORT" ] ; then - echo 'Error: Missing environment variable SSH_PORT.' - exit 1 - fi - - if [ -z "$REMOTE_DIR" ]; then - echo "Error: Argument for the --remote_dir is required." - print_usage_and_exit - fi - - if [ -z "$BACKUP_HOST" ] ; then - echo 'Error: Missing environment variable BACKUP_HOST.' - exit 1 - fi if [ -z "$REPLICAS" ]; then echo "Error: Argument for the --replicas is required." print_usage_and_exit @@ -146,6 +104,8 @@ else done fi + + mongo_credentials() { if [ ! -z ${MONGODB_ADMIN_USER+x} ] || [ ! -z ${MONGODB_ADMIN_PASSWORD+x} ]; then echo "--username $MONGODB_ADMIN_USER --password $MONGODB_ADMIN_PASSWORD --authenticationDatabase admin" @@ -197,43 +157,9 @@ rm -rf $ROOT_PATH/metabase/* rm -rf $ROOT_PATH/vsexport mkdir -p $ROOT_PATH/vsexport -# Copy & decrypt backup files -#------------------------------------------- - -# Create a temporary directory to store the backup files before decrypting -BACKUP_RAW_FILES_DIR=/tmp/backup-$LABEL/ -mkdir -p $BACKUP_RAW_FILES_DIR - -# Copy backup from backup server -rsync -a -r --delete --progress --rsh="ssh -o StrictHostKeyChecking=no -p $SSH_PORT" \ - $SSH_USER@$SSH_HOST:$REMOTE_DIR/${LABEL}.tar.gz.enc\ - $BACKUP_RAW_FILES_DIR/${LABEL}.tar.gz.enc - -echo "Copied backup files to server." - -# Decrypt -openssl enc -d -aes-256-cbc -salt -in $BACKUP_RAW_FILES_DIR/${LABEL}.tar.gz.enc --out $BACKUP_RAW_FILES_DIR/${LABEL}.tar.gz -pass pass:$PASSPHRASE - -# Extract -tar -xvf $BACKUP_RAW_FILES_DIR/${LABEL}.tar.gz - -# Move folders -cp -r $BACKUP_RAW_FILES_DIR/elasticsearch /data/backups/ +echo "Waiting for elasticsearch to restart so that the restore script can find the updated volume." docker service update --force --update-parallelism 1 --update-delay 30s opencrvs_elasticsearch -echo "Waiting 2 mins for elasticsearch to restart so that the restore script can find the updated volume." -echo -sleep 120 -cp -r $BACKUP_RAW_FILES_DIR/influxdb /data/backups/ -mv $BACKUP_RAW_FILES_DIR/minio/ocrvs-${LABEL}.tar.gz /data/backups/minio/ -mv $BACKUP_RAW_FILES_DIR/metabase/ocrvs-${LABEL}.tar.gz /data/backups/metabase/ -mv $BACKUP_RAW_FILES_DIR/vsexport/ocrvs-${LABEL}.tar.gz /data/backups/vsexport/ -mv $BACKUP_RAW_FILES_DIR/mongo/hearth-dev-${LABEL}.tar.gz /data/backups/mongo/ -mv $BACKUP_RAW_FILES_DIR/mongo/user-mgnt-${LABEL}.tar.gz /data/backups/mongo/ -mv $BACKUP_RAW_FILES_DIR/mongo/openhim-dev-${LABEL}.tar.gz /data/backups/mongo/ -mv $BACKUP_RAW_FILES_DIR/mongo/application-config-${LABEL}.tar.gz /data/backups/mongo/ -mv $BACKUP_RAW_FILES_DIR/mongo/metrics-${LABEL}.tar.gz /data/backups/mongo/ -mv $BACKUP_RAW_FILES_DIR/mongo/webhooks-${LABEL}.tar.gz /data/backups/mongo/ -mv $BACKUP_RAW_FILES_DIR/mongo/performance-${LABEL}.tar.gz /data/backups/mongo/ +docker run --rm --network=$NETWORK toschneck/wait-for-it -t 120 elasticsearch:9200 -- echo "Elasticsearch is up" # Restore all data from a backup into Hearth, OpenHIM, User, Application-config and any other service related Mongo databases #-------------------------------------------------------------------------------------------------- @@ -255,13 +181,10 @@ docker run --rm -v $ROOT_PATH/backups/mongo:/data/backups/mongo --network=$NETWO # Register backup folder as an Elasticsearch repository for restoring the search data #------------------------------------------------------------------------------------- docker run --rm --network=$NETWORK appropriate/curl curl -X PUT -H "Content-Type: application/json;charset=UTF-8" "http://$(elasticsearch_host)/_snapshot/ocrvs" -d '{ "type": "fs", "settings": { "location": "/data/backups/elasticsearch", "compress": true }}' - sleep 10 # Restore all data from a backup into search #------------------------------------------- -script -q -c "rsync -a -r --rsync-path='mkdir -p $REMOTE_DIR/ && rsync' --progress --rsh='ssh -o StrictHostKeyChecking=no -p$SSH_PORT' /tmp/${LABEL:-$BACKUP_DATE}.tar.gz.enc $SSH_USER@$SSH_HOST:$REMOTE_DIR/" && echo "Copied backup files to remote server." - docker run --rm --network=$NETWORK appropriate/curl curl -X POST -H "Content-Type: application/json;charset=UTF-8" "http://$(elasticsearch_host)/_snapshot/ocrvs/snapshot_$LABEL/_restore?pretty" -d '{ "indices": "ocrvs" }' sleep 10 echo "Waiting 1 minute to rotate elasticsearch passwords" @@ -313,10 +236,3 @@ tar -xzvf $ROOT_PATH/backups/vsexport/ocrvs-$LABEL.tar.gz -C $ROOT_PATH/vsexport if [ "$IS_LOCAL" = false ]; then docker service update --force --update-parallelism 1 --update-delay 30s opencrvs_migration fi - -# Clean up -#------------------------------------------- -rm $BACKUP_RAW_FILES_DIR/${LABEL}.tar.gz.enc -rm $BACKUP_RAW_FILES_DIR/${LABEL}.tar.gz -rm -r $BACKUP_RAW_FILES_DIR -echo "Done" \ No newline at end of file diff --git a/infrastructure/server-setup/staging.yml b/infrastructure/server-setup/staging.yml index 5f7890aba..5030c7456 100644 --- a/infrastructure/server-setup/staging.yml +++ b/infrastructure/server-setup/staging.yml @@ -15,6 +15,7 @@ all: - 165.22.110.53 enable_backups: false periodic_restore_from_backup: true + # restore_backup_encryption_passphrase: Defined in --extra-vars by the provisioning pipeline # external_backup_server_remote_directory: Defined in --extra-vars by the provisioning pipeline # external_backup_server_user: Defined in --extra-vars by the provisioning pipeline # external_backup_server_ssh_port: Defined in --extra-vars by the provisioning pipeline diff --git a/infrastructure/server-setup/tasks/backups/crontab.yml b/infrastructure/server-setup/tasks/backups/crontab.yml index 96b2b0f35..b793d1633 100644 --- a/infrastructure/server-setup/tasks/backups/crontab.yml +++ b/infrastructure/server-setup/tasks/backups/crontab.yml @@ -4,7 +4,7 @@ name: 'backup opencrvs' minute: '0' hour: '0' - job: 'cd / && bash /opt/opencrvs/infrastructure/backup.sh --passphrase={{ backup_encryption_passphrase }} --ssh_user={{ external_backup_server_user }} --ssh_host={{ external_backup_server_ip }} --ssh_port={{ external_backup_server_ssh_port }} --production_ip={{ manager_production_server_ip }} --remote_dir={{ external_backup_server_remote_directory }} --replicas=1 >> /var/log/opencrvs-backup.log 2>&1' + job: 'cd / && bash /opt/opencrvs/infrastructure/backups/backup.sh --passphrase={{ backup_encryption_passphrase }} --ssh_user={{ external_backup_server_user }} --ssh_host={{ external_backup_server_ip }} --ssh_port={{ external_backup_server_ssh_port }} --production_ip={{ manager_production_server_ip }} --remote_dir={{ external_backup_server_remote_directory }} --replicas=1 >> /var/log/opencrvs-backup.log 2>&1' state: "{{ 'present' if (external_backup_server_ip is defined and backup_encryption_passphrase and enable_backups) else 'absent' }}" - name: Set default value for periodic_restore_from_backup @@ -12,11 +12,23 @@ periodic_restore_from_backup: false when: periodic_restore_from_backup is not defined +## +# For machines that periodically restore from backup (staging) +## +- name: 'Setup crontab to download a backup periodically the opencrvs data' + cron: + user: '{{ crontab_user }}' + name: 'download opencrvs backup' + minute: '30' + hour: '0' + job: 'cd / && bash /opt/opencrvs/infrastructure/backups/download.sh --passphrase={{ restore_backup_encryption_passphrase }} --ssh_user={{ external_backup_server_user }} --ssh_host={{ external_backup_server_ip }} --ssh_port={{ external_backup_server_ssh_port }} --remote_dir={{ external_backup_server_remote_directory }} >> /var/log/opencrvs-restore.log 2>&1' + state: "{{ 'present' if (external_backup_server_ip is defined and restore_backup_encryption_passphrase and periodic_restore_from_backup) else 'absent' }}" + - name: 'Setup crontab to restore the opencrvs data' cron: user: '{{ crontab_user }}' name: 'restore opencrvs' minute: '0' hour: '1' - job: 'cd / && bash /opt/opencrvs/infrastructure/restore.sh --passphrase={{ backup_encryption_passphrase }} --ssh_user={{ external_backup_server_user }} --ssh_host={{ external_backup_server_ip }} --ssh_port={{ external_backup_server_ssh_port }} --remote_dir={{ external_backup_server_remote_directory }} --replicas=1 >> /var/log/opencrvs-restore.log 2>&1' - state: "{{ 'present' if (external_backup_server_ip is defined and backup_encryption_passphrase and periodic_restore_from_backup) else 'absent' }}" + job: 'cd / && bash /opt/opencrvs/infrastructure/backups/restore.sh --replicas=1 >> /var/log/opencrvs-restore.log 2>&1' + state: "{{ 'present' if (external_backup_server_ip is defined and restore_backup_encryption_passphrase and periodic_restore_from_backup) else 'absent' }}" diff --git a/package.json b/package.json index d6d4423e8..ad4f2e258 100644 --- a/package.json +++ b/package.json @@ -24,9 +24,8 @@ "data-generator": "ts-node -r tsconfig-paths/register -T src/data-generator/index.ts", "data-generator:generate-types": "graphql-codegen --config codegen.yml && yarn prettier --write src/data-generator/gateway.ts", "deploy": "bash infrastructure/deployment/deploy.sh", - "restore-snapshot": "bash infrastructure/restore-snapshot.sh", - "snapshot": "bash infrastructure/backup.sh", - "backup:check": "bash infrastructure/backup-check.sh", + "restore-snapshot": "bash infrastructure/backups/restore-snapshot.sh", + "snapshot": "bash infrastructure/backups/backup.sh", "port-forward": "bash infrastructure/port-forward.sh", "validate-translations": "ts-node src/validate-translations.ts" },