From f2dd75f5fd0f5526c333ca7d82ba8679c44c79b1 Mon Sep 17 00:00:00 2001 From: JS Fillman Date: Mon, 25 Mar 2024 20:56:01 -0700 Subject: [PATCH 01/30] Adding WIP --- app/zip2cloud | 38 ++++++++++++++++++--------- app/zip2cloud-test | 65 ++++++++++++++++++++++++++++++++++++++++++++++ app/zip2cloud.orig | 65 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 156 insertions(+), 12 deletions(-) create mode 100755 app/zip2cloud-test create mode 100755 app/zip2cloud.orig diff --git a/app/zip2cloud b/app/zip2cloud index 06df524..0e90760 100755 --- a/app/zip2cloud +++ b/app/zip2cloud @@ -9,17 +9,19 @@ # 5/21/2021 # Directory containing db dumps to be archived/compressed/copied -DUMP_BASE=/dump/ +#DUMP_BASE=/Users/jsfillman/Documents/repos/jsfillman-github/tmp-backup-test +DUMP_BASE=/dump/full_backup # Directory to put the zipped backups -ZIP_DIR=/zip/ +#ZIP_DIR=/Users/jsfillman/Documents/repos/jsfillman-github/tmp-backup-zip +ZIP_DIR=/zip NOW=$(/bin/date +"%Y%m%d%H%M") # Name of the zip'ed db backup. The .7z extension wil be added by the 7zip program -ZIP_BASE=backup_full_ -ZIP_NAME=${ZIP_BASE}${NOW} +ZIP_BASE=backup_full +#ZIP_NAME=${ZIP_BASE}${NOW} [ -r /run/secrets/encryption_key ] || { echo "Encryption key not readable in /run/secrets/encryption_key" ; exit 1; } [ -r /run/secrets/gcp_backup_creds ] || { echo "Google cloud service credentials not found in /run/secrets/gcp_back_creds" ; exit 1; } @@ -27,11 +29,12 @@ ZIP_NAME=${ZIP_BASE}${NOW} [ -z "${BUCKETPATH}" ] && { echo "Path within S3 bucket not set in BUCKETPATH environment variable" ; exit 1; } [ -z "${DELETE_DUMP}" ] || echo "DELETE_DUMP set, will delete files/directories under /dump/ when done compressing" -# This is the password used to generate the AES256 encryption key +## This is the password used to generate the AES256 encryption key +#SECRET=tempsecret SECRET=`cat /run/secrets/encryption_key` - -# This is the Google Cloud Storage path, note that it depends on rclone being preconfigured -# for "remote" using the runtime creds, check rclone config in /root/.config/rclone/rclone.conf +# +## This is the Google Cloud Storage path, note that it depends on rclone being preconfigured +## for "remote" using the runtime creds, check rclone config in /root/.config/rclone/rclone.conf REMOTE=remote:${BUCKET}/${BUCKETPATH} # Delete any files older than 30 days in the zip directory @@ -39,10 +42,21 @@ echo "Deleting database archives older than 30 days" /usr/bin/find ${ZIP_DIR} -mtime +30 -type f -name "${ZIP_BASE}*" -print -exec rm {} \; echo "Zipping ${DUMP_BASE}/${DUMP_DIR} to ${ZIP_DIR}/${ZIP_NAME}" -cd / -/usr/bin/7za a -p${SECRET} ${ZIP_DIR}/${ZIP_NAME} -mx=7 -mhe -t7z ${DUMP_BASE} || { echo "Could not zip ${DUMP_BASE} into ${ZIP_DIR}/${ZIP_NAME}" ; exit 1; } -[ -z "${DELETE_DUMP}" ] || { echo "Clearing contents of /dump/"; cd /dump/; rm -rf *; } + +# Get all directories in DUMP_BASE +for DUMP_DIR in $(ls -d ${DUMP_BASE}/*/); do + # Remove trailing slash and get the base name of the directory + DIR_NAME=$(basename ${DUMP_DIR%/}) + ZIP_NAME=${ZIP_DIR}/${ZIP_BASE}_${DIR_NAME} + + echo "Zipping ${DUMP_DIR} to ${ZIP_NAME}" + /usr/bin/7za a -p${SECRET} ${ZIP_NAME} -mx=0 -mhe -t7z ${DUMP_DIR} || { echo "Could not zip ${DUMP_DIR} into ${ZIP_NAME}" ; exit 1; } +done + echo "RClone-ing ${ZIP_DIR} to GCP ${GCP_DEST}" -/bin/rclone sync ${ZIP_DIR}/ ${REMOTE} +/bin/rclone -v sync ${ZIP_DIR}/ ${REMOTE} +## Create a block that, upon success of rclone above, delete _only_ files that were uploaded +## For each $FILE.7z in $ZIP_DIR, do a "rm -rf $DUMP_BASE/$FILE" to remove the original dump +#[ -z "${DELETE_DUMP}" ] || { echo "Clearing contents of /dump/"; cd /dump/; rm -rf *; } diff --git a/app/zip2cloud-test b/app/zip2cloud-test new file mode 100755 index 0000000..dc15ae1 --- /dev/null +++ b/app/zip2cloud-test @@ -0,0 +1,65 @@ +#!/bin/sh + +# Script to compress and encrypt mongodb backup directories and then sync them against a +# cloud S3 bucket +# +# Depends on 7zip and rclone +# +# sychan@lbl.gov +# 5/21/2021 + +# Directory containing db dumps to be archived/compressed/copied +#DUMP_BASE=/Users/jsfillman/Documents/repos/jsfillman-github/tmp-backup-test +DUMP_BASE=/dump/full_backup + +# Directory to put the zipped backups +#ZIP_DIR=/Users/jsfillman/Documents/repos/jsfillman-github/tmp-backup-zip +ZIP_DIR=/zip + +NOW=$(/bin/date +"%Y%m%d%H%M") + +# Name of the zip'ed db backup. The .7z extension wil be added by the 7zip program + +ZIP_BASE=backup_full +#ZIP_NAME=${ZIP_BASE}${NOW} + +[ -r /run/secrets/encryption_key ] || { echo "Encryption key not readable in /run/secrets/encryption_key" ; exit 1; } +[ -r /run/secrets/gcp_backup_creds ] || { echo "Google cloud service credentials not found in /run/secrets/gcp_back_creds" ; exit 1; } +[ -z "${BUCKET}" ] && { echo "S3 bucketname not set in BUCKET environment variable" ; exit 1; } +[ -z "${BUCKETPATH}" ] && { echo "Path within S3 bucket not set in BUCKETPATH environment variable" ; exit 1; } +[ -z "${DELETE_DUMP}" ] || echo "DELETE_DUMP set, will delete files/directories under /dump/ when done compressing" + +## This is the password used to generate the AES256 encryption key +#SECRET=tempsecret +SECRET=`cat /run/secrets/encryption_key` +# +## This is the Google Cloud Storage path, note that it depends on rclone being preconfigured +## for "remote" using the runtime creds, check rclone config in /root/.config/rclone/rclone.conf +REMOTE=remote:${BUCKET}/${BUCKETPATH} + +# Delete any files older than 30 days in the zip directory +echo "Deleting database archives older than 30 days" +/usr/bin/find ${ZIP_DIR} -mtime +30 -type f -name "${ZIP_BASE}*" -print -exec rm {} \; + +echo "Zipping ${DUMP_BASE}/${DUMP_DIR} to ${ZIP_DIR}/${ZIP_NAME}" + +# Get all directories in DUMP_BASE +for DUMP_DIR in $(ls -d ${DUMP_BASE}/*/); do + # Remove trailing slash and get the base name of the directory + DIR_NAME=$(basename ${DUMP_DIR%/}) + ZIP_NAME=${ZIP_DIR}/${ZIP_BASE}_${DIR_NAME} + + echo "Zipping ${DUMP_DIR} to ${ZIP_NAME}" + /usr/bin/7za a -p${SECRET} ${ZIP_NAME} -mx=7 -mhe -t7z ${DUMP_DIR} || { echo "Could not zip ${DUMP_DIR} into ${ZIP_NAME}" ; exit 1; } + [ -z "${DELETE_DUMP}" ] || { echo "Clearing contents of ${DUMP_DIR}"; rm -rf ${DUMP_DIR}*; } +done + +## Old 7zip Command +#cd / +##/usr/bin/7za +#/opt/homebrew/bin/7zz a -p${SECRET} ${ZIP_DIR}/${ZIP_NAME} -mx=7 -mhe -x!*.gz -t7z ${DUMP_BASE} || { echo "Could not zip ${DUMP_BASE} into ${ZIP_DIR}/${ZIP_NAME}" ; exit 1; } +#[ -z "${DELETE_DUMP}" ] || { echo "Clearing contents of /dump/"; cd /dump/; rm -rf *; } + +#echo "RClone-ing ${ZIP_DIR} to GCP ${GCP_DEST}" +#/bin/rclone sync ${ZIP_DIR}/ ${REMOTE} + diff --git a/app/zip2cloud.orig b/app/zip2cloud.orig new file mode 100755 index 0000000..dc15ae1 --- /dev/null +++ b/app/zip2cloud.orig @@ -0,0 +1,65 @@ +#!/bin/sh + +# Script to compress and encrypt mongodb backup directories and then sync them against a +# cloud S3 bucket +# +# Depends on 7zip and rclone +# +# sychan@lbl.gov +# 5/21/2021 + +# Directory containing db dumps to be archived/compressed/copied +#DUMP_BASE=/Users/jsfillman/Documents/repos/jsfillman-github/tmp-backup-test +DUMP_BASE=/dump/full_backup + +# Directory to put the zipped backups +#ZIP_DIR=/Users/jsfillman/Documents/repos/jsfillman-github/tmp-backup-zip +ZIP_DIR=/zip + +NOW=$(/bin/date +"%Y%m%d%H%M") + +# Name of the zip'ed db backup. The .7z extension wil be added by the 7zip program + +ZIP_BASE=backup_full +#ZIP_NAME=${ZIP_BASE}${NOW} + +[ -r /run/secrets/encryption_key ] || { echo "Encryption key not readable in /run/secrets/encryption_key" ; exit 1; } +[ -r /run/secrets/gcp_backup_creds ] || { echo "Google cloud service credentials not found in /run/secrets/gcp_back_creds" ; exit 1; } +[ -z "${BUCKET}" ] && { echo "S3 bucketname not set in BUCKET environment variable" ; exit 1; } +[ -z "${BUCKETPATH}" ] && { echo "Path within S3 bucket not set in BUCKETPATH environment variable" ; exit 1; } +[ -z "${DELETE_DUMP}" ] || echo "DELETE_DUMP set, will delete files/directories under /dump/ when done compressing" + +## This is the password used to generate the AES256 encryption key +#SECRET=tempsecret +SECRET=`cat /run/secrets/encryption_key` +# +## This is the Google Cloud Storage path, note that it depends on rclone being preconfigured +## for "remote" using the runtime creds, check rclone config in /root/.config/rclone/rclone.conf +REMOTE=remote:${BUCKET}/${BUCKETPATH} + +# Delete any files older than 30 days in the zip directory +echo "Deleting database archives older than 30 days" +/usr/bin/find ${ZIP_DIR} -mtime +30 -type f -name "${ZIP_BASE}*" -print -exec rm {} \; + +echo "Zipping ${DUMP_BASE}/${DUMP_DIR} to ${ZIP_DIR}/${ZIP_NAME}" + +# Get all directories in DUMP_BASE +for DUMP_DIR in $(ls -d ${DUMP_BASE}/*/); do + # Remove trailing slash and get the base name of the directory + DIR_NAME=$(basename ${DUMP_DIR%/}) + ZIP_NAME=${ZIP_DIR}/${ZIP_BASE}_${DIR_NAME} + + echo "Zipping ${DUMP_DIR} to ${ZIP_NAME}" + /usr/bin/7za a -p${SECRET} ${ZIP_NAME} -mx=7 -mhe -t7z ${DUMP_DIR} || { echo "Could not zip ${DUMP_DIR} into ${ZIP_NAME}" ; exit 1; } + [ -z "${DELETE_DUMP}" ] || { echo "Clearing contents of ${DUMP_DIR}"; rm -rf ${DUMP_DIR}*; } +done + +## Old 7zip Command +#cd / +##/usr/bin/7za +#/opt/homebrew/bin/7zz a -p${SECRET} ${ZIP_DIR}/${ZIP_NAME} -mx=7 -mhe -x!*.gz -t7z ${DUMP_BASE} || { echo "Could not zip ${DUMP_BASE} into ${ZIP_DIR}/${ZIP_NAME}" ; exit 1; } +#[ -z "${DELETE_DUMP}" ] || { echo "Clearing contents of /dump/"; cd /dump/; rm -rf *; } + +#echo "RClone-ing ${ZIP_DIR} to GCP ${GCP_DEST}" +#/bin/rclone sync ${ZIP_DIR}/ ${REMOTE} + From 3e3f88cd3dce17fb3bcbeb61ded3b598f95695a6 Mon Sep 17 00:00:00 2001 From: JS Fillman Date: Mon, 25 Mar 2024 21:10:29 -0700 Subject: [PATCH 02/30] Adding WIP --- app/zip2cloud | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/app/zip2cloud b/app/zip2cloud index 0e90760..d274555 100755 --- a/app/zip2cloud +++ b/app/zip2cloud @@ -55,7 +55,14 @@ done echo "RClone-ing ${ZIP_DIR} to GCP ${GCP_DEST}" -/bin/rclone -v sync ${ZIP_DIR}/ ${REMOTE} +/bin/rclone sync ${ZIP_DIR}/ ${REMOTE} + +## Sync All Resulting Files +cd ${ZIP_DIR} +for file in ${ZIP_DIR}/*; do + echo "RClone-ing ${file} to GCP ${GCP_DEST}" + /bin/rclone sync -v "$file" ${REMOTE} +done ## Create a block that, upon success of rclone above, delete _only_ files that were uploaded ## For each $FILE.7z in $ZIP_DIR, do a "rm -rf $DUMP_BASE/$FILE" to remove the original dump From 10113b915945c8ee487306504626a2e817001be1 Mon Sep 17 00:00:00 2001 From: JS Fillman Date: Mon, 25 Mar 2024 21:13:57 -0700 Subject: [PATCH 03/30] Adding WIP --- app/zip2cloud | 4 ---- 1 file changed, 4 deletions(-) diff --git a/app/zip2cloud b/app/zip2cloud index d274555..5e26ce1 100755 --- a/app/zip2cloud +++ b/app/zip2cloud @@ -53,10 +53,6 @@ for DUMP_DIR in $(ls -d ${DUMP_BASE}/*/); do /usr/bin/7za a -p${SECRET} ${ZIP_NAME} -mx=0 -mhe -t7z ${DUMP_DIR} || { echo "Could not zip ${DUMP_DIR} into ${ZIP_NAME}" ; exit 1; } done - -echo "RClone-ing ${ZIP_DIR} to GCP ${GCP_DEST}" -/bin/rclone sync ${ZIP_DIR}/ ${REMOTE} - ## Sync All Resulting Files cd ${ZIP_DIR} for file in ${ZIP_DIR}/*; do From 2aaa08c9d5a824cecae618e743c1c0617367817c Mon Sep 17 00:00:00 2001 From: Jason S Fillman <6155956+jsfillman@users.noreply.github.com> Date: Mon, 25 Mar 2024 21:32:49 -0700 Subject: [PATCH 04/30] Update zip2cloud script to match test fork --- app/zip2cloud | 43 ++++++++++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/app/zip2cloud b/app/zip2cloud index 06df524..5e26ce1 100755 --- a/app/zip2cloud +++ b/app/zip2cloud @@ -9,17 +9,19 @@ # 5/21/2021 # Directory containing db dumps to be archived/compressed/copied -DUMP_BASE=/dump/ +#DUMP_BASE=/Users/jsfillman/Documents/repos/jsfillman-github/tmp-backup-test +DUMP_BASE=/dump/full_backup # Directory to put the zipped backups -ZIP_DIR=/zip/ +#ZIP_DIR=/Users/jsfillman/Documents/repos/jsfillman-github/tmp-backup-zip +ZIP_DIR=/zip NOW=$(/bin/date +"%Y%m%d%H%M") # Name of the zip'ed db backup. The .7z extension wil be added by the 7zip program -ZIP_BASE=backup_full_ -ZIP_NAME=${ZIP_BASE}${NOW} +ZIP_BASE=backup_full +#ZIP_NAME=${ZIP_BASE}${NOW} [ -r /run/secrets/encryption_key ] || { echo "Encryption key not readable in /run/secrets/encryption_key" ; exit 1; } [ -r /run/secrets/gcp_backup_creds ] || { echo "Google cloud service credentials not found in /run/secrets/gcp_back_creds" ; exit 1; } @@ -27,11 +29,12 @@ ZIP_NAME=${ZIP_BASE}${NOW} [ -z "${BUCKETPATH}" ] && { echo "Path within S3 bucket not set in BUCKETPATH environment variable" ; exit 1; } [ -z "${DELETE_DUMP}" ] || echo "DELETE_DUMP set, will delete files/directories under /dump/ when done compressing" -# This is the password used to generate the AES256 encryption key +## This is the password used to generate the AES256 encryption key +#SECRET=tempsecret SECRET=`cat /run/secrets/encryption_key` - -# This is the Google Cloud Storage path, note that it depends on rclone being preconfigured -# for "remote" using the runtime creds, check rclone config in /root/.config/rclone/rclone.conf +# +## This is the Google Cloud Storage path, note that it depends on rclone being preconfigured +## for "remote" using the runtime creds, check rclone config in /root/.config/rclone/rclone.conf REMOTE=remote:${BUCKET}/${BUCKETPATH} # Delete any files older than 30 days in the zip directory @@ -39,10 +42,24 @@ echo "Deleting database archives older than 30 days" /usr/bin/find ${ZIP_DIR} -mtime +30 -type f -name "${ZIP_BASE}*" -print -exec rm {} \; echo "Zipping ${DUMP_BASE}/${DUMP_DIR} to ${ZIP_DIR}/${ZIP_NAME}" -cd / -/usr/bin/7za a -p${SECRET} ${ZIP_DIR}/${ZIP_NAME} -mx=7 -mhe -t7z ${DUMP_BASE} || { echo "Could not zip ${DUMP_BASE} into ${ZIP_DIR}/${ZIP_NAME}" ; exit 1; } -[ -z "${DELETE_DUMP}" ] || { echo "Clearing contents of /dump/"; cd /dump/; rm -rf *; } -echo "RClone-ing ${ZIP_DIR} to GCP ${GCP_DEST}" -/bin/rclone sync ${ZIP_DIR}/ ${REMOTE} +# Get all directories in DUMP_BASE +for DUMP_DIR in $(ls -d ${DUMP_BASE}/*/); do + # Remove trailing slash and get the base name of the directory + DIR_NAME=$(basename ${DUMP_DIR%/}) + ZIP_NAME=${ZIP_DIR}/${ZIP_BASE}_${DIR_NAME} + + echo "Zipping ${DUMP_DIR} to ${ZIP_NAME}" + /usr/bin/7za a -p${SECRET} ${ZIP_NAME} -mx=0 -mhe -t7z ${DUMP_DIR} || { echo "Could not zip ${DUMP_DIR} into ${ZIP_NAME}" ; exit 1; } +done + +## Sync All Resulting Files +cd ${ZIP_DIR} +for file in ${ZIP_DIR}/*; do + echo "RClone-ing ${file} to GCP ${GCP_DEST}" + /bin/rclone sync -v "$file" ${REMOTE} +done +## Create a block that, upon success of rclone above, delete _only_ files that were uploaded +## For each $FILE.7z in $ZIP_DIR, do a "rm -rf $DUMP_BASE/$FILE" to remove the original dump +#[ -z "${DELETE_DUMP}" ] || { echo "Clearing contents of /dump/"; cd /dump/; rm -rf *; } From bf106f936bb200fddb60114b6ffde2a5af868f5a Mon Sep 17 00:00:00 2001 From: JS Fillman Date: Fri, 29 Mar 2024 17:21:41 -0700 Subject: [PATCH 05/30] Adding `/` to rclone put path --- app/zip2cloud | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/zip2cloud b/app/zip2cloud index 5e26ce1..5ce5fc2 100755 --- a/app/zip2cloud +++ b/app/zip2cloud @@ -57,7 +57,7 @@ done cd ${ZIP_DIR} for file in ${ZIP_DIR}/*; do echo "RClone-ing ${file} to GCP ${GCP_DEST}" - /bin/rclone sync -v "$file" ${REMOTE} + /bin/rclone sync -v "$file" ${REMOTE}/ done ## Create a block that, upon success of rclone above, delete _only_ files that were uploaded From 59f6b9f9b2a9bb51d4a6829be5e7a774db646e24 Mon Sep 17 00:00:00 2001 From: JS Fillman Date: Fri, 29 Mar 2024 17:40:02 -0700 Subject: [PATCH 06/30] Testing minimal image --- Dockerfile.orig | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 Dockerfile.orig diff --git a/Dockerfile.orig b/Dockerfile.orig new file mode 100644 index 0000000..5ad3780 --- /dev/null +++ b/Dockerfile.orig @@ -0,0 +1,32 @@ +FROM arangodb:3.5.3 + +# Build arguments passed into the docker command for image metadata +ARG BUILD_DATE +ARG COMMIT +ARG BRANCH + +# RUN pip install requests docker python-json-logger structlog && \ +RUN apk update && \ + apk add p7zip && \ + cd /tmp && \ + wget https://downloads.rclone.org/rclone-current-linux-amd64.zip && \ + unzip rclone-current-linux-amd64.zip && \ + mv rclone-v*-linux-amd64/rclone /bin/rclone && \ + mkdir -p /root/.config/rclone/ + +COPY rclone.conf /root/.config/rclone/rclone.conf +COPY app/ /app/ + +LABEL org.label-schema.build-date=$BUILD_DATE \ + org.label-schema.vcs-url="https://github.com/kbase/db_zip2cloud.git" \ + org.label-schema.vcs-ref=$COMMIT \ + org.label-schema.schema-version="1.0.0-rc1" \ + us.kbase.vcs-branch=$BRANCH \ + maintainer="Steve Chan sychan@lbl.gov" \ + org.opencontainers.image.source="https://github.com/kbase/db_zip2cloud" + +WORKDIR /app + +ENTRYPOINT /app/zip2cloud + + From 2a76dfd7b8a81d65dd2c0480cbb7f90d6faf6443 Mon Sep 17 00:00:00 2001 From: JS Fillman Date: Fri, 29 Mar 2024 17:40:58 -0700 Subject: [PATCH 07/30] Testing minimal image --- Dockerfile | 30 ++++++------------------------ 1 file changed, 6 insertions(+), 24 deletions(-) diff --git a/Dockerfile b/Dockerfile index 5ad3780..241aeac 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,32 +1,14 @@ -FROM arangodb:3.5.3 +FROM alpine:latest -# Build arguments passed into the docker command for image metadata -ARG BUILD_DATE -ARG COMMIT -ARG BRANCH - -# RUN pip install requests docker python-json-logger structlog && \ +# Update and install necessary packages RUN apk update && \ - apk add p7zip && \ - cd /tmp && \ - wget https://downloads.rclone.org/rclone-current-linux-amd64.zip && \ - unzip rclone-current-linux-amd64.zip && \ - mv rclone-v*-linux-amd64/rclone /bin/rclone && \ - mkdir -p /root/.config/rclone/ + apk add p7zip rclone +# Copy necessary files +mkdir -p /root/.config/rclone/ COPY rclone.conf /root/.config/rclone/rclone.conf COPY app/ /app/ -LABEL org.label-schema.build-date=$BUILD_DATE \ - org.label-schema.vcs-url="https://github.com/kbase/db_zip2cloud.git" \ - org.label-schema.vcs-ref=$COMMIT \ - org.label-schema.schema-version="1.0.0-rc1" \ - us.kbase.vcs-branch=$BRANCH \ - maintainer="Steve Chan sychan@lbl.gov" \ - org.opencontainers.image.source="https://github.com/kbase/db_zip2cloud" - WORKDIR /app -ENTRYPOINT /app/zip2cloud - - +ENTRYPOINT /app/zip2cloud \ No newline at end of file From 6e071bab63470ecb701b5e5e5186641dc0e5f34b Mon Sep 17 00:00:00 2001 From: JS Fillman Date: Fri, 29 Mar 2024 17:45:42 -0700 Subject: [PATCH 08/30] Testing minimal image --- app/zip2cloud-test | 65 ---------------------------------------------- 1 file changed, 65 deletions(-) delete mode 100755 app/zip2cloud-test diff --git a/app/zip2cloud-test b/app/zip2cloud-test deleted file mode 100755 index dc15ae1..0000000 --- a/app/zip2cloud-test +++ /dev/null @@ -1,65 +0,0 @@ -#!/bin/sh - -# Script to compress and encrypt mongodb backup directories and then sync them against a -# cloud S3 bucket -# -# Depends on 7zip and rclone -# -# sychan@lbl.gov -# 5/21/2021 - -# Directory containing db dumps to be archived/compressed/copied -#DUMP_BASE=/Users/jsfillman/Documents/repos/jsfillman-github/tmp-backup-test -DUMP_BASE=/dump/full_backup - -# Directory to put the zipped backups -#ZIP_DIR=/Users/jsfillman/Documents/repos/jsfillman-github/tmp-backup-zip -ZIP_DIR=/zip - -NOW=$(/bin/date +"%Y%m%d%H%M") - -# Name of the zip'ed db backup. The .7z extension wil be added by the 7zip program - -ZIP_BASE=backup_full -#ZIP_NAME=${ZIP_BASE}${NOW} - -[ -r /run/secrets/encryption_key ] || { echo "Encryption key not readable in /run/secrets/encryption_key" ; exit 1; } -[ -r /run/secrets/gcp_backup_creds ] || { echo "Google cloud service credentials not found in /run/secrets/gcp_back_creds" ; exit 1; } -[ -z "${BUCKET}" ] && { echo "S3 bucketname not set in BUCKET environment variable" ; exit 1; } -[ -z "${BUCKETPATH}" ] && { echo "Path within S3 bucket not set in BUCKETPATH environment variable" ; exit 1; } -[ -z "${DELETE_DUMP}" ] || echo "DELETE_DUMP set, will delete files/directories under /dump/ when done compressing" - -## This is the password used to generate the AES256 encryption key -#SECRET=tempsecret -SECRET=`cat /run/secrets/encryption_key` -# -## This is the Google Cloud Storage path, note that it depends on rclone being preconfigured -## for "remote" using the runtime creds, check rclone config in /root/.config/rclone/rclone.conf -REMOTE=remote:${BUCKET}/${BUCKETPATH} - -# Delete any files older than 30 days in the zip directory -echo "Deleting database archives older than 30 days" -/usr/bin/find ${ZIP_DIR} -mtime +30 -type f -name "${ZIP_BASE}*" -print -exec rm {} \; - -echo "Zipping ${DUMP_BASE}/${DUMP_DIR} to ${ZIP_DIR}/${ZIP_NAME}" - -# Get all directories in DUMP_BASE -for DUMP_DIR in $(ls -d ${DUMP_BASE}/*/); do - # Remove trailing slash and get the base name of the directory - DIR_NAME=$(basename ${DUMP_DIR%/}) - ZIP_NAME=${ZIP_DIR}/${ZIP_BASE}_${DIR_NAME} - - echo "Zipping ${DUMP_DIR} to ${ZIP_NAME}" - /usr/bin/7za a -p${SECRET} ${ZIP_NAME} -mx=7 -mhe -t7z ${DUMP_DIR} || { echo "Could not zip ${DUMP_DIR} into ${ZIP_NAME}" ; exit 1; } - [ -z "${DELETE_DUMP}" ] || { echo "Clearing contents of ${DUMP_DIR}"; rm -rf ${DUMP_DIR}*; } -done - -## Old 7zip Command -#cd / -##/usr/bin/7za -#/opt/homebrew/bin/7zz a -p${SECRET} ${ZIP_DIR}/${ZIP_NAME} -mx=7 -mhe -x!*.gz -t7z ${DUMP_BASE} || { echo "Could not zip ${DUMP_BASE} into ${ZIP_DIR}/${ZIP_NAME}" ; exit 1; } -#[ -z "${DELETE_DUMP}" ] || { echo "Clearing contents of /dump/"; cd /dump/; rm -rf *; } - -#echo "RClone-ing ${ZIP_DIR} to GCP ${GCP_DEST}" -#/bin/rclone sync ${ZIP_DIR}/ ${REMOTE} - From 650fa23ef4f8b4ed390a4996e98d9199e55ccb26 Mon Sep 17 00:00:00 2001 From: JS Fillman Date: Fri, 29 Mar 2024 17:48:05 -0700 Subject: [PATCH 09/30] Testing minimal image --- Dockerfile | 4 ++- Dockerfile.orig | 32 ----------------------- app/zip2cloud.orig | 65 ---------------------------------------------- 3 files changed, 3 insertions(+), 98 deletions(-) delete mode 100644 Dockerfile.orig delete mode 100755 app/zip2cloud.orig diff --git a/Dockerfile b/Dockerfile index 241aeac..7ec1da0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,8 +4,10 @@ FROM alpine:latest RUN apk update && \ apk add p7zip rclone +# Create config directory +RUN mkdir -p /root/.config/rclone/ + # Copy necessary files -mkdir -p /root/.config/rclone/ COPY rclone.conf /root/.config/rclone/rclone.conf COPY app/ /app/ diff --git a/Dockerfile.orig b/Dockerfile.orig deleted file mode 100644 index 5ad3780..0000000 --- a/Dockerfile.orig +++ /dev/null @@ -1,32 +0,0 @@ -FROM arangodb:3.5.3 - -# Build arguments passed into the docker command for image metadata -ARG BUILD_DATE -ARG COMMIT -ARG BRANCH - -# RUN pip install requests docker python-json-logger structlog && \ -RUN apk update && \ - apk add p7zip && \ - cd /tmp && \ - wget https://downloads.rclone.org/rclone-current-linux-amd64.zip && \ - unzip rclone-current-linux-amd64.zip && \ - mv rclone-v*-linux-amd64/rclone /bin/rclone && \ - mkdir -p /root/.config/rclone/ - -COPY rclone.conf /root/.config/rclone/rclone.conf -COPY app/ /app/ - -LABEL org.label-schema.build-date=$BUILD_DATE \ - org.label-schema.vcs-url="https://github.com/kbase/db_zip2cloud.git" \ - org.label-schema.vcs-ref=$COMMIT \ - org.label-schema.schema-version="1.0.0-rc1" \ - us.kbase.vcs-branch=$BRANCH \ - maintainer="Steve Chan sychan@lbl.gov" \ - org.opencontainers.image.source="https://github.com/kbase/db_zip2cloud" - -WORKDIR /app - -ENTRYPOINT /app/zip2cloud - - diff --git a/app/zip2cloud.orig b/app/zip2cloud.orig deleted file mode 100755 index dc15ae1..0000000 --- a/app/zip2cloud.orig +++ /dev/null @@ -1,65 +0,0 @@ -#!/bin/sh - -# Script to compress and encrypt mongodb backup directories and then sync them against a -# cloud S3 bucket -# -# Depends on 7zip and rclone -# -# sychan@lbl.gov -# 5/21/2021 - -# Directory containing db dumps to be archived/compressed/copied -#DUMP_BASE=/Users/jsfillman/Documents/repos/jsfillman-github/tmp-backup-test -DUMP_BASE=/dump/full_backup - -# Directory to put the zipped backups -#ZIP_DIR=/Users/jsfillman/Documents/repos/jsfillman-github/tmp-backup-zip -ZIP_DIR=/zip - -NOW=$(/bin/date +"%Y%m%d%H%M") - -# Name of the zip'ed db backup. The .7z extension wil be added by the 7zip program - -ZIP_BASE=backup_full -#ZIP_NAME=${ZIP_BASE}${NOW} - -[ -r /run/secrets/encryption_key ] || { echo "Encryption key not readable in /run/secrets/encryption_key" ; exit 1; } -[ -r /run/secrets/gcp_backup_creds ] || { echo "Google cloud service credentials not found in /run/secrets/gcp_back_creds" ; exit 1; } -[ -z "${BUCKET}" ] && { echo "S3 bucketname not set in BUCKET environment variable" ; exit 1; } -[ -z "${BUCKETPATH}" ] && { echo "Path within S3 bucket not set in BUCKETPATH environment variable" ; exit 1; } -[ -z "${DELETE_DUMP}" ] || echo "DELETE_DUMP set, will delete files/directories under /dump/ when done compressing" - -## This is the password used to generate the AES256 encryption key -#SECRET=tempsecret -SECRET=`cat /run/secrets/encryption_key` -# -## This is the Google Cloud Storage path, note that it depends on rclone being preconfigured -## for "remote" using the runtime creds, check rclone config in /root/.config/rclone/rclone.conf -REMOTE=remote:${BUCKET}/${BUCKETPATH} - -# Delete any files older than 30 days in the zip directory -echo "Deleting database archives older than 30 days" -/usr/bin/find ${ZIP_DIR} -mtime +30 -type f -name "${ZIP_BASE}*" -print -exec rm {} \; - -echo "Zipping ${DUMP_BASE}/${DUMP_DIR} to ${ZIP_DIR}/${ZIP_NAME}" - -# Get all directories in DUMP_BASE -for DUMP_DIR in $(ls -d ${DUMP_BASE}/*/); do - # Remove trailing slash and get the base name of the directory - DIR_NAME=$(basename ${DUMP_DIR%/}) - ZIP_NAME=${ZIP_DIR}/${ZIP_BASE}_${DIR_NAME} - - echo "Zipping ${DUMP_DIR} to ${ZIP_NAME}" - /usr/bin/7za a -p${SECRET} ${ZIP_NAME} -mx=7 -mhe -t7z ${DUMP_DIR} || { echo "Could not zip ${DUMP_DIR} into ${ZIP_NAME}" ; exit 1; } - [ -z "${DELETE_DUMP}" ] || { echo "Clearing contents of ${DUMP_DIR}"; rm -rf ${DUMP_DIR}*; } -done - -## Old 7zip Command -#cd / -##/usr/bin/7za -#/opt/homebrew/bin/7zz a -p${SECRET} ${ZIP_DIR}/${ZIP_NAME} -mx=7 -mhe -x!*.gz -t7z ${DUMP_BASE} || { echo "Could not zip ${DUMP_BASE} into ${ZIP_DIR}/${ZIP_NAME}" ; exit 1; } -#[ -z "${DELETE_DUMP}" ] || { echo "Clearing contents of /dump/"; cd /dump/; rm -rf *; } - -#echo "RClone-ing ${ZIP_DIR} to GCP ${GCP_DEST}" -#/bin/rclone sync ${ZIP_DIR}/ ${REMOTE} - From b4d7aa4eb2b0f10bb0a4eda065aaa37baaf8eea3 Mon Sep 17 00:00:00 2001 From: JS Fillman Date: Mon, 1 Apr 2024 16:08:12 -0700 Subject: [PATCH 10/30] Testing minimal image --- Dockerfile | 17 +++++++++++++---- app/zip2cloud | 2 +- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/Dockerfile b/Dockerfile index 7ec1da0..6770b3f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,8 @@ -FROM alpine:latest +# Builder stage +FROM alpine:latest as builder -# Update and install necessary packages RUN apk update && \ - apk add p7zip rclone + apk add --no-cache p7zip rclone # Create config directory RUN mkdir -p /root/.config/rclone/ @@ -11,6 +11,15 @@ RUN mkdir -p /root/.config/rclone/ COPY rclone.conf /root/.config/rclone/rclone.conf COPY app/ /app/ +# Final stage +FROM alpine:latest + +# Copy necessary binaries and files from builder stage +COPY --from=builder /usr/bin/rclone /usr/bin/rclone +COPY --from=builder /usr/bin/7z /usr/bin/7z +COPY --from=builder /root/.config/rclone/rclone.conf /root/.config/rclone/rclone.conf +COPY --from=builder /app/ /app/ + WORKDIR /app -ENTRYPOINT /app/zip2cloud \ No newline at end of file +ENTRYPOINT ["/app/zip2cloud"] \ No newline at end of file diff --git a/app/zip2cloud b/app/zip2cloud index 5ce5fc2..e7fe87b 100755 --- a/app/zip2cloud +++ b/app/zip2cloud @@ -62,4 +62,4 @@ done ## Create a block that, upon success of rclone above, delete _only_ files that were uploaded ## For each $FILE.7z in $ZIP_DIR, do a "rm -rf $DUMP_BASE/$FILE" to remove the original dump -#[ -z "${DELETE_DUMP}" ] || { echo "Clearing contents of /dump/"; cd /dump/; rm -rf *; } +#[ -z "${DELETE_DUMP}" ] || { echo "Clearing contents of /dump/"; cd /dump/; rm -rf *; } \ No newline at end of file From 81959b75625fe1dacce5a0c0723a3556599daf3d Mon Sep 17 00:00:00 2001 From: JS Fillman Date: Tue, 2 Apr 2024 20:00:25 -0700 Subject: [PATCH 11/30] Adding curl and md5 loops --- Dockerfile | 5 +- app/README.md | 44 ++++++++++++++++++ app/zip2cloud | 123 ++++++++++++++++++++++++++++++++++++++++---------- 3 files changed, 145 insertions(+), 27 deletions(-) create mode 100644 app/README.md diff --git a/Dockerfile b/Dockerfile index 6770b3f..d548cd3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,7 @@ FROM alpine:latest as builder RUN apk update && \ - apk add --no-cache p7zip rclone + apk add --no-cache curl p7zip rclone # Create config directory RUN mkdir -p /root/.config/rclone/ @@ -15,8 +15,9 @@ COPY app/ /app/ FROM alpine:latest # Copy necessary binaries and files from builder stage -COPY --from=builder /usr/bin/rclone /usr/bin/rclone COPY --from=builder /usr/bin/7z /usr/bin/7z +COPY --from=builder /usr/bin/curl /usr/bin/curl +COPY --from=builder /usr/bin/rclone /usr/bin/rclone COPY --from=builder /root/.config/rclone/rclone.conf /root/.config/rclone/rclone.conf COPY --from=builder /app/ /app/ diff --git a/app/README.md b/app/README.md new file mode 100644 index 0000000..6d03cae --- /dev/null +++ b/app/README.md @@ -0,0 +1,44 @@ + +## Zip2Cloud + +A robust zip & upload utility for sending archives to a remote location. + +### Features + +- Intelligently compares local & remote files with md5 sums +- Only uploads _completed_ archives +- Only deletes local files once they have been successfully uploaded +- Allows keeping an arbitrary amount of zipped & unzipped backups locally for faster restore + - Script only zips & uploads files that are missing from the remote location +- Allows mixing backup files with other data + - Only zips folders under the `$DUMP_BASE` directory with a date-based name e.g. `2024-04-01` +- Notifies on completion or error via Slack + +### Operation of `zip2cloud` + +- Uses `rclone` to create a list of `.7z` & `.md5` files from the remote location defined with the `REMOTE` environment variable +- For each file in the list + +- Compares file names & md5 sums between local & remote locations prior to read/write operations + - Uploads any `.7z` files that are missing from the remote location + - Files with mismatched md5 sums are uploaded with alternate filenames + - Only deletes files locally once they have been successfully uploaded & md5 sums confirmed +- Allows multiple unzipped local backups to remain, without re-zipping & uploading + - This allows for faster restores, as we can avoid downloading the most recent archives +- + +1. Creates 7zip archives of any directories under the `$DUMP_BASE` with a date-based name + - For example, if `$DUMP_BASE` is `/dump/full_backup`, the directory `2024-04-01` will +2. Syncs the archives to a remote location using rclone + +### Variables + +- `DUMP_BASE` - The base directory for backup dumps (default `/dump`) +- `DUMP_RETENTION` - The number of days to keep uncompressed backups locally +- `REMOTE` - The remote location to sync backups to +- `SECRET` - The encryption key for 7zip +- `SLACK_CHANNEL` - The slack channel to send notifications to +- `SLACK_WEBHOOK` - The webhook URL for slack notifications +- `ZIP_BASE` - The base filename, minus date, for the compressed backups +- `ZIP_DIR` - The directory to store all compressed backups (default `/zip`) +- `ZIP_RETENTION` - The number of days to keep compressed backups locally diff --git a/app/zip2cloud b/app/zip2cloud index e7fe87b..a74419d 100755 --- a/app/zip2cloud +++ b/app/zip2cloud @@ -8,20 +8,17 @@ # sychan@lbl.gov # 5/21/2021 -# Directory containing db dumps to be archived/compressed/copied -#DUMP_BASE=/Users/jsfillman/Documents/repos/jsfillman-github/tmp-backup-test +## Variables +COMPRESSION_LEVEL=0 # Set to 0 if the db dumps are already compressed DUMP_BASE=/dump/full_backup - -# Directory to put the zipped backups -#ZIP_DIR=/Users/jsfillman/Documents/repos/jsfillman-github/tmp-backup-zip -ZIP_DIR=/zip - -NOW=$(/bin/date +"%Y%m%d%H%M") - -# Name of the zip'ed db backup. The .7z extension wil be added by the 7zip program - +DUMP_RETENTION=3 +REMOTE=remote:${BUCKET}/${BUCKETPATH} +SECRET=`cat /run/secrets/encryption_key` +SLACK_CHANNEL='' +SLACK_WEBHOOK='' ZIP_BASE=backup_full -#ZIP_NAME=${ZIP_BASE}${NOW} +ZIP_DIR=/zip +ZIP_RETENTION=4 [ -r /run/secrets/encryption_key ] || { echo "Encryption key not readable in /run/secrets/encryption_key" ; exit 1; } [ -r /run/secrets/gcp_backup_creds ] || { echo "Google cloud service credentials not found in /run/secrets/gcp_back_creds" ; exit 1; } @@ -29,17 +26,61 @@ ZIP_BASE=backup_full [ -z "${BUCKETPATH}" ] && { echo "Path within S3 bucket not set in BUCKETPATH environment variable" ; exit 1; } [ -z "${DELETE_DUMP}" ] || echo "DELETE_DUMP set, will delete files/directories under /dump/ when done compressing" -## This is the password used to generate the AES256 encryption key -#SECRET=tempsecret -SECRET=`cat /run/secrets/encryption_key` -# -## This is the Google Cloud Storage path, note that it depends on rclone being preconfigured -## for "remote" using the runtime creds, check rclone config in /root/.config/rclone/rclone.conf -REMOTE=remote:${BUCKET}/${BUCKETPATH} - # Delete any files older than 30 days in the zip directory -echo "Deleting database archives older than 30 days" -/usr/bin/find ${ZIP_DIR} -mtime +30 -type f -name "${ZIP_BASE}*" -print -exec rm {} \; +#echo "Deleting database archives older than 30 days" +#/usr/bin/find ${ZIP_DIR} -mtime +30 -type f -name "${ZIP_BASE}*" -print -exec rm {} \; + +# Delete all old backups, except the last #, as defined by $ZIP_RETENTION +ls -t ${ZIP_DIR}/${ZIP_BASE}*.{7z,md5} | tail -n +$((${ZIP_RETENTION} + 1)) | xargs rm -f + +# Get list of remote backups +remote_files=$(rclone ls remote:${BUCKET}/${BUCKETPATH} | grep 7z | awk '{print $2}' | rev | cut -d. -f2- | rev) +# Pull remote md5 sums for each remote backup into `tmp_md5` directory +mkdir -p ${ZIP_DIR}/${ZIP_BASE}/tmp_md5 && cd $_ +for file in $remote_files; do + rclone md5sum remote:${BUCKET}/${BUCKETPATH}/$file.7z | awk '{print $1}' > ${ZIP_DIR}/${ZIP_BASE}/tmp_md5/$file.md5 +done + +# Create empty list of files to upload +uploads="" + +# Create md5 sums for local backups, if they don't exist +cd ${ZIP_DIR}/${ZIP_BASE} +for file in ${ZIP_DIR}/${ZIP_BASE}/*.7z; do + # Get the base name of the file without extension + base_name=$(basename "$file" .7z) + # If a local .md5 file does not exist, create it + if [ ! -f "${ZIP_DIR}/${ZIP_BASE}/${base_name}.md5" ]; then + echo "Local md5 file does not exist for $file, generating, and adding $file to uploads list" + uploads="$uploads $file" + local_md5=$(md5sum "$file" | awk '{print $1}') + echo $local_md5 > "${ZIP_DIR}/${ZIP_BASE}/${base_name}.md5" + fi +done + + +# Verify & update list of files to upload +cd ${ZIP_DIR}/${ZIP_BASE} +for file in ${ZIP_DIR}/${ZIP_BASE}/*.7z; do + # Get the base name of the file without extension + base_name=$(basename "$file" .7z) + # Check if the remote md5 file exists + if [ ! -f "${ZIP_DIR}/${ZIP_BASE}/tmp_md5/${base_name}.md5" ]; then + # If the remote md5 file does not exist, add the file to the uploads list + echo "Remote does not exist for $file, adding $file to uploads list" + uploads="$uploads $file" + else + # Compare local and remote md5 + remote_md5=$(cat "${ZIP_DIR}/${ZIP_BASE}/tmp_md5/${base_name}.md5") + local_md5=$(cat "${ZIP_DIR}/${ZIP_BASE}/${base_name}.md5") + if [ "$local_md5" != "$remote_md5" ]; then + echo "MD5 mismatch for file $file, adding to uploads list" + uploads="$uploads $file" + fi + fi + echo "Uploads: $uploads" +done + echo "Zipping ${DUMP_BASE}/${DUMP_DIR} to ${ZIP_DIR}/${ZIP_NAME}" @@ -50,10 +91,11 @@ for DUMP_DIR in $(ls -d ${DUMP_BASE}/*/); do ZIP_NAME=${ZIP_DIR}/${ZIP_BASE}_${DIR_NAME} echo "Zipping ${DUMP_DIR} to ${ZIP_NAME}" - /usr/bin/7za a -p${SECRET} ${ZIP_NAME} -mx=0 -mhe -t7z ${DUMP_DIR} || { echo "Could not zip ${DUMP_DIR} into ${ZIP_NAME}" ; exit 1; } + /usr/bin/7za a -p${SECRET} ${ZIP_NAME} -mx=${COMPRESSION_LEVEL} -mhe -t7z ${DUMP_DIR} || { echo "Could not zip ${DUMP_DIR} into ${ZIP_NAME}" ; exit 1; } +# Add to list done -## Sync All Resulting Files +## Sync All Resulting Files (in list!) cd ${ZIP_DIR} for file in ${ZIP_DIR}/*; do echo "RClone-ing ${file} to GCP ${GCP_DEST}" @@ -62,4 +104,35 @@ done ## Create a block that, upon success of rclone above, delete _only_ files that were uploaded ## For each $FILE.7z in $ZIP_DIR, do a "rm -rf $DUMP_BASE/$FILE" to remove the original dump -#[ -z "${DELETE_DUMP}" ] || { echo "Clearing contents of /dump/"; cd /dump/; rm -rf *; } \ No newline at end of file +#[ -z "${DELETE_DUMP}" ] || { echo "Clearing contents of /dump/"; cd /dump/; rm -rf *; } + + +## -- Cruft -- +#cd ${ZIP_DIR}/${ZIP_BASE} +#uploads="" +#for file in ${ZIP_DIR}/${ZIP_BASE}/*.7z; do +# # Get the base name of the file without extension +# base_name=$(basename "$file" .7z) +# # Check if the remote md5 file exists +# if [ ! -f "${ZIP_DIR}/${ZIP_BASE}/tmp_md5/${base_name}.md5" ]; then +# # If the remote md5 file does not exist, add the file to the uploads list +# uploads="$uploads $file" +# else +# # Compare local and remote md5 +# remote_md5=$(cat "${ZIP_DIR}/${ZIP_BASE}/tmp_md5/${base_name}.md5") +# local_md5=$(cat "${ZIP_DIR}/${ZIP_BASE}/${base_name}.md5") +# +# if [ "$local_md5" != "$remote_md5" ]; then +# echo "MD5 mismatch for file $file" +# fi +#done + +# Loop over all .7z files in ZIP_DIR +#for file in ${ZIP_DIR}/${ZIP_BASE}/*.7z; do +# # Get the base name of the file without extension +# base_name=$(basename "$file" .7z) +# # If the corresponding .md5 file does not exist, create it +# if [ ! -f "${ZIP_DIR}/${ZIP_BASE}/${base_name}.md5" ]; then +# md5sum "$file" | awk '{print $1}' > "${ZIP_DIR}/${ZIP_BASE}/${base_name}.md5" +# fi +#done \ No newline at end of file From aa925d033f453e98f6e5eda85f81aed3749fe3f4 Mon Sep 17 00:00:00 2001 From: JS Fillman Date: Tue, 2 Apr 2024 20:16:57 -0700 Subject: [PATCH 12/30] Fixing curl install --- Dockerfile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index d548cd3..d2f54d2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,9 +14,11 @@ COPY app/ /app/ # Final stage FROM alpine:latest +RUN apk update && \ + apk add --no-cache curl + # Copy necessary binaries and files from builder stage COPY --from=builder /usr/bin/7z /usr/bin/7z -COPY --from=builder /usr/bin/curl /usr/bin/curl COPY --from=builder /usr/bin/rclone /usr/bin/rclone COPY --from=builder /root/.config/rclone/rclone.conf /root/.config/rclone/rclone.conf COPY --from=builder /app/ /app/ From 916b1d8b72cbcc3142d10df31e552cfabd19a3b0 Mon Sep 17 00:00:00 2001 From: JS Fillman Date: Tue, 2 Apr 2024 20:25:44 -0700 Subject: [PATCH 13/30] Testing with curl --- .github/workflows/manual-build.yml | 2 +- app/README.md | 22 +++++++++++++--------- app/zip2cloud | 7 ++++++- 3 files changed, 20 insertions(+), 11 deletions(-) diff --git a/.github/workflows/manual-build.yml b/.github/workflows/manual-build.yml index 944f903..0696a50 100644 --- a/.github/workflows/manual-build.yml +++ b/.github/workflows/manual-build.yml @@ -4,7 +4,7 @@ on: workflow_dispatch: jobs: build-push: - uses: kbase/.github/.github/workflows/reusable_build-push.yml@main + uses: kbase/.github/.github/workflows/reusable_build-push.yml@develop with: name: '${{ github.event.repository.name }}-develop' tags: br-${{ github.ref_name }} diff --git a/app/README.md b/app/README.md index 6d03cae..4002b0f 100644 --- a/app/README.md +++ b/app/README.md @@ -33,12 +33,16 @@ A robust zip & upload utility for sending archives to a remote location. ### Variables -- `DUMP_BASE` - The base directory for backup dumps (default `/dump`) -- `DUMP_RETENTION` - The number of days to keep uncompressed backups locally -- `REMOTE` - The remote location to sync backups to -- `SECRET` - The encryption key for 7zip -- `SLACK_CHANNEL` - The slack channel to send notifications to -- `SLACK_WEBHOOK` - The webhook URL for slack notifications -- `ZIP_BASE` - The base filename, minus date, for the compressed backups -- `ZIP_DIR` - The directory to store all compressed backups (default `/zip`) -- `ZIP_RETENTION` - The number of days to keep compressed backups locally +| Variable | Description | Default | +|-----------------|---------------------------------------------------|---------| +| `BUCKET` | The bucket to store the backups | | +| `BUCKET_PATH` | The path within the bucket to store the backups | | +| `DUMP_BASE` | The base directory for backup dumps | `/dump` | +| `DUMP_RETENTION`| The number of days to keep uncompressed backups locally | | +| `REMOTE` | The remote location to sync backups to | | +| `SECRET` | The encryption key for 7zip | | +| `SLACK_CHANNEL` | The slack channel to send notifications to | | +| `SLACK_WEBHOOK` | The webhook URL for slack notifications | | +| `ZIP_BASE` | The base filename, minus date, for the compressed backups | | +| `ZIP_DIR` | The directory to store all compressed backups | `/zip` | +| `ZIP_RETENTION` | The number of days to keep compressed backups locally | | \ No newline at end of file diff --git a/app/zip2cloud b/app/zip2cloud index a74419d..119f5b8 100755 --- a/app/zip2cloud +++ b/app/zip2cloud @@ -30,9 +30,14 @@ ZIP_RETENTION=4 #echo "Deleting database archives older than 30 days" #/usr/bin/find ${ZIP_DIR} -mtime +30 -type f -name "${ZIP_BASE}*" -print -exec rm {} \; -# Delete all old backups, except the last #, as defined by $ZIP_RETENTION +# Delete all old zip files, except the last N+1, as defined by $ZIP_RETENTION ls -t ${ZIP_DIR}/${ZIP_BASE}*.{7z,md5} | tail -n +$((${ZIP_RETENTION} + 1)) | xargs rm -f +# Delete all old backup dumps, except the last N+1, as defined by $DUMP_RETENTION +find ${DUMP_BASE} -type d -regextype posix-extended -regex ".*/[0-9]{4}-[0-9]{2}-[0-9]{2}$" -print0 | xargs -0 ls -td | tail -n +$((${DUMP_RETENTION} + 1)) | xargs -I {} rm -rf {} +# ls -t ${DUMP_BASE}/* | tail -n +$((${DUMP_RETENTION} + 1)) | xargs rm -f + + # Get list of remote backups remote_files=$(rclone ls remote:${BUCKET}/${BUCKETPATH} | grep 7z | awk '{print $2}' | rev | cut -d. -f2- | rev) # Pull remote md5 sums for each remote backup into `tmp_md5` directory From 00298b7f8eebd001fe63c6b8412eb7717b1e9a3b Mon Sep 17 00:00:00 2001 From: JS Fillman Date: Tue, 2 Apr 2024 20:53:15 -0700 Subject: [PATCH 14/30] Fixing 7zip install --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index d2f54d2..d81cfa9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,7 +15,7 @@ COPY app/ /app/ FROM alpine:latest RUN apk update && \ - apk add --no-cache curl + apk add --no-cache curl p7zip # Copy necessary binaries and files from builder stage COPY --from=builder /usr/bin/7z /usr/bin/7z From 6fbd7a26cce7b5254dbe82e2fce681e6327a1bb6 Mon Sep 17 00:00:00 2001 From: JS Fillman Date: Tue, 2 Apr 2024 23:32:41 -0700 Subject: [PATCH 15/30] Rough alpha flow --- Dockerfile | 1 - app/zip2cloud | 31 ++++++++++++++++--------------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/Dockerfile b/Dockerfile index d81cfa9..8a11b48 100644 --- a/Dockerfile +++ b/Dockerfile @@ -18,7 +18,6 @@ RUN apk update && \ apk add --no-cache curl p7zip # Copy necessary binaries and files from builder stage -COPY --from=builder /usr/bin/7z /usr/bin/7z COPY --from=builder /usr/bin/rclone /usr/bin/rclone COPY --from=builder /root/.config/rclone/rclone.conf /root/.config/rclone/rclone.conf COPY --from=builder /app/ /app/ diff --git a/app/zip2cloud b/app/zip2cloud index 119f5b8..57238bb 100755 --- a/app/zip2cloud +++ b/app/zip2cloud @@ -9,16 +9,17 @@ # 5/21/2021 ## Variables -COMPRESSION_LEVEL=0 # Set to 0 if the db dumps are already compressed -DUMP_BASE=/dump/full_backup -DUMP_RETENTION=3 -REMOTE=remote:${BUCKET}/${BUCKETPATH} -SECRET=`cat /run/secrets/encryption_key` -SLACK_CHANNEL='' -SLACK_WEBHOOK='' -ZIP_BASE=backup_full -ZIP_DIR=/zip -ZIP_RETENTION=4 +COMPRESSION_LEVEL=${COMPRESSION_LEVEL:-0} # Set to 0 if the db dumps are already compressed +DELETE_DUMP=${DELETE_DUMP:-0} +DUMP_BASE=${DUMP_BASE:-/dump/full_backup} +DUMP_RETENTION=${DUMP_RETENTION:-3} +REMOTE=${REMOTE:-remote:${BUCKET}/${BUCKETPATH}} +SECRET=${SECRET:-`cat /run/secrets/encryption_key`} +SLACK_CHANNEL=${SLACK_CHANNEL:-''} +SLACK_WEBHOOK=${SLACK_WEBHOOK:-''} +ZIP_BASE=${ZIP_BASE:-backup_full} +ZIP_DIR=${ZIP_DIR:-/zip} +ZIP_RETENTION=${ZIP_RETENTION:-4} [ -r /run/secrets/encryption_key ] || { echo "Encryption key not readable in /run/secrets/encryption_key" ; exit 1; } [ -r /run/secrets/gcp_backup_creds ] || { echo "Google cloud service credentials not found in /run/secrets/gcp_back_creds" ; exit 1; } @@ -31,11 +32,11 @@ ZIP_RETENTION=4 #/usr/bin/find ${ZIP_DIR} -mtime +30 -type f -name "${ZIP_BASE}*" -print -exec rm {} \; # Delete all old zip files, except the last N+1, as defined by $ZIP_RETENTION -ls -t ${ZIP_DIR}/${ZIP_BASE}*.{7z,md5} | tail -n +$((${ZIP_RETENTION} + 1)) | xargs rm -f +ls -t ${ZIP_DIR}/*.7z 2>/dev/null | tail -n +$((${ZIP_RETENTION} + 1)) | xargs -r rm -f +ls -t ${ZIP_DIR}/*.md5 2>/dev/null | tail -n +$((${ZIP_RETENTION} + 1)) | xargs -r rm -f # Delete all old backup dumps, except the last N+1, as defined by $DUMP_RETENTION -find ${DUMP_BASE} -type d -regextype posix-extended -regex ".*/[0-9]{4}-[0-9]{2}-[0-9]{2}$" -print0 | xargs -0 ls -td | tail -n +$((${DUMP_RETENTION} + 1)) | xargs -I {} rm -rf {} -# ls -t ${DUMP_BASE}/* | tail -n +$((${DUMP_RETENTION} + 1)) | xargs rm -f +find ${DUMP_BASE} -type d -name "[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]" -print0 | xargs -0 ls -td | head -n -$((${DUMP_RETENTION})) | xargs -I {} rm -rf {}# ls -t ${DUMP_BASE}/* | tail -n +$((${DUMP_RETENTION} + 1)) | xargs rm -f # Get list of remote backups @@ -97,12 +98,12 @@ for DUMP_DIR in $(ls -d ${DUMP_BASE}/*/); do echo "Zipping ${DUMP_DIR} to ${ZIP_NAME}" /usr/bin/7za a -p${SECRET} ${ZIP_NAME} -mx=${COMPRESSION_LEVEL} -mhe -t7z ${DUMP_DIR} || { echo "Could not zip ${DUMP_DIR} into ${ZIP_NAME}" ; exit 1; } -# Add to list + uploads="$uploads ${ZIP_NAME}.7z" done ## Sync All Resulting Files (in list!) cd ${ZIP_DIR} -for file in ${ZIP_DIR}/*; do +for file in ${uploads}; do echo "RClone-ing ${file} to GCP ${GCP_DEST}" /bin/rclone sync -v "$file" ${REMOTE}/ done From 92cee8bcd32d2144f3c0b1ac874b83d5e8f3878e Mon Sep 17 00:00:00 2001 From: JS Fillman Date: Tue, 2 Apr 2024 23:54:18 -0700 Subject: [PATCH 16/30] Rough alpha flow --- app/zip2cloud | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/app/zip2cloud b/app/zip2cloud index 57238bb..5eee4d9 100755 --- a/app/zip2cloud +++ b/app/zip2cloud @@ -36,8 +36,7 @@ ls -t ${ZIP_DIR}/*.7z 2>/dev/null | tail -n +$((${ZIP_RETENTION} + 1)) | xargs - ls -t ${ZIP_DIR}/*.md5 2>/dev/null | tail -n +$((${ZIP_RETENTION} + 1)) | xargs -r rm -f # Delete all old backup dumps, except the last N+1, as defined by $DUMP_RETENTION -find ${DUMP_BASE} -type d -name "[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]" -print0 | xargs -0 ls -td | head -n -$((${DUMP_RETENTION})) | xargs -I {} rm -rf {}# ls -t ${DUMP_BASE}/* | tail -n +$((${DUMP_RETENTION} + 1)) | xargs rm -f - +find ${DUMP_BASE} -type d -name "[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]" -print0 | xargs -0 ls -td | tail -n +$((${DUMP_RETENTION} + 1)) | xargs -I {} rm -rf {} # Get list of remote backups remote_files=$(rclone ls remote:${BUCKET}/${BUCKETPATH} | grep 7z | awk '{print $2}' | rev | cut -d. -f2- | rev) @@ -97,7 +96,7 @@ for DUMP_DIR in $(ls -d ${DUMP_BASE}/*/); do ZIP_NAME=${ZIP_DIR}/${ZIP_BASE}_${DIR_NAME} echo "Zipping ${DUMP_DIR} to ${ZIP_NAME}" - /usr/bin/7za a -p${SECRET} ${ZIP_NAME} -mx=${COMPRESSION_LEVEL} -mhe -t7z ${DUMP_DIR} || { echo "Could not zip ${DUMP_DIR} into ${ZIP_NAME}" ; exit 1; } + /usr/bin/7z a -p${SECRET} ${ZIP_NAME} -mx=${COMPRESSION_LEVEL} -mhe -t7z ${DUMP_DIR} || { echo "Could not zip ${DUMP_DIR} into ${ZIP_NAME}" ; exit 1; } uploads="$uploads ${ZIP_NAME}.7z" done From d863b864011e45d96098cdc8a014c63a55074a8d Mon Sep 17 00:00:00 2001 From: JS Fillman Date: Thu, 4 Apr 2024 19:25:35 -0700 Subject: [PATCH 17/30] Testing multi-target build --- .github/workflows/manual-build.yml | 8 +- app/zip2cloud | 152 ++++++++++++++++------------- 2 files changed, 92 insertions(+), 68 deletions(-) diff --git a/.github/workflows/manual-build.yml b/.github/workflows/manual-build.yml index 0696a50..22964a0 100644 --- a/.github/workflows/manual-build.yml +++ b/.github/workflows/manual-build.yml @@ -1,11 +1,17 @@ --- name: Manual Build & Push on: - workflow_dispatch: + workflow_dispatch: + inputs: + platforms: + description: 'The platforms for which the Docker image should be built. If not specified, defaults to linux/amd64.' + required: false + default: 'linux/amd64,linux/arm64,linux/riscv64,linux/ppc64le,linux/s390x' jobs: build-push: uses: kbase/.github/.github/workflows/reusable_build-push.yml@develop with: name: '${{ github.event.repository.name }}-develop' tags: br-${{ github.ref_name }} + platforms: ${{ github.event.inputs.platforms }} secrets: inherit diff --git a/app/zip2cloud b/app/zip2cloud index 5eee4d9..23c8aec 100755 --- a/app/zip2cloud +++ b/app/zip2cloud @@ -1,16 +1,8 @@ #!/bin/sh -# Script to compress and encrypt mongodb backup directories and then sync them against a -# cloud S3 bucket -# -# Depends on 7zip and rclone -# -# sychan@lbl.gov -# 5/21/2021 - ## Variables COMPRESSION_LEVEL=${COMPRESSION_LEVEL:-0} # Set to 0 if the db dumps are already compressed -DELETE_DUMP=${DELETE_DUMP:-0} +DELETE_DUMP=${DELETE_DUMP:-''} DUMP_BASE=${DUMP_BASE:-/dump/full_backup} DUMP_RETENTION=${DUMP_RETENTION:-3} REMOTE=${REMOTE:-remote:${BUCKET}/${BUCKETPATH}} @@ -32,112 +24,138 @@ ZIP_RETENTION=${ZIP_RETENTION:-4} #/usr/bin/find ${ZIP_DIR} -mtime +30 -type f -name "${ZIP_BASE}*" -print -exec rm {} \; # Delete all old zip files, except the last N+1, as defined by $ZIP_RETENTION -ls -t ${ZIP_DIR}/*.7z 2>/dev/null | tail -n +$((${ZIP_RETENTION} + 1)) | xargs -r rm -f -ls -t ${ZIP_DIR}/*.md5 2>/dev/null | tail -n +$((${ZIP_RETENTION} + 1)) | xargs -r rm -f +ls -t ${ZIP_DIR}/*.7z 2>/dev/null | tail -n +$((${ZIP_RETENTION} - 1)) | xargs -r rm -f +ls -t ${ZIP_DIR}/*.md5 2>/dev/null | tail -n +$((${ZIP_RETENTION} - 1)) | xargs -r rm -f # Delete all old backup dumps, except the last N+1, as defined by $DUMP_RETENTION -find ${DUMP_BASE} -type d -name "[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]" -print0 | xargs -0 ls -td | tail -n +$((${DUMP_RETENTION} + 1)) | xargs -I {} rm -rf {} +find ${DUMP_BASE} -type d -name "[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]" -print0 | xargs -0 ls -td | tail -n +$((${DUMP_RETENTION} - 1)) | xargs -I {} rm -rf {} # Get list of remote backups remote_files=$(rclone ls remote:${BUCKET}/${BUCKETPATH} | grep 7z | awk '{print $2}' | rev | cut -d. -f2- | rev) # Pull remote md5 sums for each remote backup into `tmp_md5` directory -mkdir -p ${ZIP_DIR}/${ZIP_BASE}/tmp_md5 && cd $_ +mkdir -p ${ZIP_DIR}/tmp_md5 && cd $_ for file in $remote_files; do - rclone md5sum remote:${BUCKET}/${BUCKETPATH}/$file.7z | awk '{print $1}' > ${ZIP_DIR}/${ZIP_BASE}/tmp_md5/$file.md5 + rclone md5sum remote:${BUCKET}/${BUCKETPATH}/$file.7z | awk '{print $1}' > ${ZIP_DIR}/tmp_md5/$file.md5 done # Create empty list of files to upload uploads="" +echo "Zipping ${DUMP_BASE}/${DUMP_DIR} to ${ZIP_DIR}/${ZIP_NAME}" + +# Get all directories in DUMP_BASE +for DUMP_DIR in $(ls -d ${DUMP_BASE}/*/); do + # Remove trailing slash and get the base name of the directory + DIR_NAME=$(basename ${DUMP_DIR%/}) + ZIP_NAME=${ZIP_DIR}/${ZIP_BASE}_${DIR_NAME} + echo $DIR_NAME + + # Check if the corresponding md5 file exists + if [ -f "${ZIP_DIR}/tmp_md5/${ZIP_BASE}_${DIR_NAME}.md5" ]; then + echo "MD5 file exists for ${DIR_NAME}, skipping" + continue + fi + + echo "Zipping ${DUMP_DIR} to ${ZIP_NAME}".7z + /usr/bin/7z a -p${SECRET} ${ZIP_NAME} -mx=${COMPRESSION_LEVEL} -mhe -t7z ${DUMP_DIR} || { echo "Could not zip ${DUMP_DIR} into ${ZIP_NAME}" ; exit 1; } +done + # Create md5 sums for local backups, if they don't exist -cd ${ZIP_DIR}/${ZIP_BASE} -for file in ${ZIP_DIR}/${ZIP_BASE}/*.7z; do +cd ${ZIP_DIR} +for file in ${ZIP_DIR}/*.7z; do # Get the base name of the file without extension base_name=$(basename "$file" .7z) + echo $base_name # If a local .md5 file does not exist, create it - if [ ! -f "${ZIP_DIR}/${ZIP_BASE}/${base_name}.md5" ]; then + if [ ! -f "${ZIP_DIR}/${base_name}.md5" ]; then echo "Local md5 file does not exist for $file, generating, and adding $file to uploads list" uploads="$uploads $file" local_md5=$(md5sum "$file" | awk '{print $1}') - echo $local_md5 > "${ZIP_DIR}/${ZIP_BASE}/${base_name}.md5" + echo $local_md5 > "${ZIP_DIR}/${base_name}.md5" fi done - # Verify & update list of files to upload cd ${ZIP_DIR}/${ZIP_BASE} -for file in ${ZIP_DIR}/${ZIP_BASE}/*.7z; do +for file in ${ZIP_DIR}/*.7z; do # Get the base name of the file without extension base_name=$(basename "$file" .7z) # Check if the remote md5 file exists - if [ ! -f "${ZIP_DIR}/${ZIP_BASE}/tmp_md5/${base_name}.md5" ]; then + if [ ! -f "${ZIP_DIR}/tmp_md5/${base_name}.md5" ]; then # If the remote md5 file does not exist, add the file to the uploads list echo "Remote does not exist for $file, adding $file to uploads list" uploads="$uploads $file" else # Compare local and remote md5 - remote_md5=$(cat "${ZIP_DIR}/${ZIP_BASE}/tmp_md5/${base_name}.md5") - local_md5=$(cat "${ZIP_DIR}/${ZIP_BASE}/${base_name}.md5") + remote_md5=$(cat "${ZIP_DIR}/tmp_md5/${base_name}.md5") + local_md5=$(cat "${ZIP_DIR}/${base_name}.md5") if [ "$local_md5" != "$remote_md5" ]; then - echo "MD5 mismatch for file $file, adding to uploads list" - uploads="$uploads $file" + echo "MD5 mismatch for file $file" + # Extract the last character of the base name + last_char=${base_name: -1} + # Check if the last character is a letter + if [[ $last_char =~ [a-z] ]]; then + # If it's a letter, increment it + next_char=$(echo "$last_char" | tr "a-y" "b-z") + new_base_name=${base_name%?}$next_char + else + # If it's not a letter, append 'a' + new_base_name=${base_name}a + fi + # Rename the file + mv "$file" "${ZIP_DIR}/${new_base_name}.7z" + # Add the renamed file to the uploads list + uploads="$uploads ${ZIP_DIR}/${new_base_name}.7z" fi fi echo "Uploads: $uploads" done - -echo "Zipping ${DUMP_BASE}/${DUMP_DIR} to ${ZIP_DIR}/${ZIP_NAME}" - -# Get all directories in DUMP_BASE -for DUMP_DIR in $(ls -d ${DUMP_BASE}/*/); do - # Remove trailing slash and get the base name of the directory - DIR_NAME=$(basename ${DUMP_DIR%/}) - ZIP_NAME=${ZIP_DIR}/${ZIP_BASE}_${DIR_NAME} - - echo "Zipping ${DUMP_DIR} to ${ZIP_NAME}" - /usr/bin/7z a -p${SECRET} ${ZIP_NAME} -mx=${COMPRESSION_LEVEL} -mhe -t7z ${DUMP_DIR} || { echo "Could not zip ${DUMP_DIR} into ${ZIP_NAME}" ; exit 1; } - uploads="$uploads ${ZIP_NAME}.7z" -done - -## Sync All Resulting Files (in list!) -cd ${ZIP_DIR} -for file in ${uploads}; do - echo "RClone-ing ${file} to GCP ${GCP_DEST}" - /bin/rclone sync -v "$file" ${REMOTE}/ -done - -## Create a block that, upon success of rclone above, delete _only_ files that were uploaded -## For each $FILE.7z in $ZIP_DIR, do a "rm -rf $DUMP_BASE/$FILE" to remove the original dump -#[ -z "${DELETE_DUMP}" ] || { echo "Clearing contents of /dump/"; cd /dump/; rm -rf *; } - - -## -- Cruft -- +## Verify & update list of files to upload #cd ${ZIP_DIR}/${ZIP_BASE} -#uploads="" #for file in ${ZIP_DIR}/${ZIP_BASE}/*.7z; do # # Get the base name of the file without extension # base_name=$(basename "$file" .7z) # # Check if the remote md5 file exists # if [ ! -f "${ZIP_DIR}/${ZIP_BASE}/tmp_md5/${base_name}.md5" ]; then # # If the remote md5 file does not exist, add the file to the uploads list +# echo "Remote does not exist for $file, adding $file to uploads list" # uploads="$uploads $file" # else -# # Compare local and remote md5 -# remote_md5=$(cat "${ZIP_DIR}/${ZIP_BASE}/tmp_md5/${base_name}.md5") -# local_md5=$(cat "${ZIP_DIR}/${ZIP_BASE}/${base_name}.md5") -# -# if [ "$local_md5" != "$remote_md5" ]; then -# echo "MD5 mismatch for file $file" +# # Compare local and remote md5 +# remote_md5=$(cat "${ZIP_DIR}/${ZIP_BASE}/tmp_md5/${base_name}.md5") +# local_md5=$(cat "${ZIP_DIR}/${ZIP_BASE}/${base_name}.md5") +# if [ "$local_md5" != "$remote_md5" ]; then +# echo "MD5 mismatch for file $file, adding to uploads list" +# uploads="$uploads $file" +# fi # fi +# echo "Uploads: $uploads" #done -# Loop over all .7z files in ZIP_DIR -#for file in ${ZIP_DIR}/${ZIP_BASE}/*.7z; do -# # Get the base name of the file without extension -# base_name=$(basename "$file" .7z) -# # If the corresponding .md5 file does not exist, create it -# if [ ! -f "${ZIP_DIR}/${ZIP_BASE}/${base_name}.md5" ]; then -# md5sum "$file" | awk '{print $1}' > "${ZIP_DIR}/${ZIP_BASE}/${base_name}.md5" + +# Before running rclone +for file in "${uploads[@]}"; do + ls $file + if [ ! -f "$file" ]; then + echo "File does not exist: $file" + fi +done + +## Before running rclone +#for file in ${uploads}; do +# if [ ! -f "$file" ]; then +# echo "File does not exist: $file" # fi -#done \ No newline at end of file +#done + +## Sync All Resulting Files (in list!) +#cd ${ZIP_DIR} +#for file in "${uploads[@]}"; do +# echo "RClone-ing ${file} to GCP ${REMOTE}" +# /usr/bin/rclone sync -v "$file" ${REMOTE}/ +#done +#for file in ${uploads}; do +# echo "RClone-ing ${file} to GCP ${GCP_DEST}" +# /usr/bin/rclone sync -v "$file" ${REMOTE}/ +#done From be40ac92ea4e04cecac9bc104068394add11c64e Mon Sep 17 00:00:00 2001 From: JS Fillman Date: Thu, 4 Apr 2024 19:28:12 -0700 Subject: [PATCH 18/30] Testing multi-target build --- .github/workflows/manual-build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/manual-build.yml b/.github/workflows/manual-build.yml index 22964a0..1de5ebb 100644 --- a/.github/workflows/manual-build.yml +++ b/.github/workflows/manual-build.yml @@ -6,7 +6,7 @@ on: platforms: description: 'The platforms for which the Docker image should be built. If not specified, defaults to linux/amd64.' required: false - default: 'linux/amd64,linux/arm64,linux/riscv64,linux/ppc64le,linux/s390x' + default: 'linux/amd64,linux/arm64,linux/riscv64' jobs: build-push: uses: kbase/.github/.github/workflows/reusable_build-push.yml@develop From f630bbe88b4a83659c9acfa353da0510a758630f Mon Sep 17 00:00:00 2001 From: JS Fillman Date: Thu, 4 Apr 2024 19:31:26 -0700 Subject: [PATCH 19/30] Testing multi-target build --- .github/workflows/manual-build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/manual-build.yml b/.github/workflows/manual-build.yml index 1de5ebb..d8e9abb 100644 --- a/.github/workflows/manual-build.yml +++ b/.github/workflows/manual-build.yml @@ -6,7 +6,7 @@ on: platforms: description: 'The platforms for which the Docker image should be built. If not specified, defaults to linux/amd64.' required: false - default: 'linux/amd64,linux/arm64,linux/riscv64' + default: 'linux/amd64,linux/arm64/v8,linux/riscv64' jobs: build-push: uses: kbase/.github/.github/workflows/reusable_build-push.yml@develop From dced65b467e31845441e7386c8b8b97fa58efe1a Mon Sep 17 00:00:00 2001 From: JS Fillman Date: Thu, 4 Apr 2024 19:48:15 -0700 Subject: [PATCH 20/30] Testing multi-target build --- .github/workflows/manual-build.yml | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/.github/workflows/manual-build.yml b/.github/workflows/manual-build.yml index d8e9abb..361a795 100644 --- a/.github/workflows/manual-build.yml +++ b/.github/workflows/manual-build.yml @@ -1,3 +1,20 @@ +#--- +#name: Manual Build & Push +#on: +# workflow_dispatch: +# inputs: +# platforms: +# description: 'The platforms for which the Docker image should be built. If not specified, defaults to linux/amd64.' +# required: false +# default: 'linux/amd64,linux/arm64/v8,linux/riscv64' +#jobs: +# build-push: +# uses: kbase/.github/.github/workflows/reusable_build-push.yml@develop +# with: +# name: '${{ github.event.repository.name }}-develop' +# tags: br-${{ github.ref_name }} +# platforms: ${{ github.event.inputs.platforms }} +# secrets: inherit --- name: Manual Build & Push on: @@ -14,4 +31,4 @@ jobs: name: '${{ github.event.repository.name }}-develop' tags: br-${{ github.ref_name }} platforms: ${{ github.event.inputs.platforms }} - secrets: inherit + secrets: inherit \ No newline at end of file From dc8f23a07570bb6a57767b36e7c40019c6e85e25 Mon Sep 17 00:00:00 2001 From: JS Fillman Date: Thu, 4 Apr 2024 19:50:27 -0700 Subject: [PATCH 21/30] Testing multi-target build --- .github/workflows/manual-build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/manual-build.yml b/.github/workflows/manual-build.yml index 361a795..24125d7 100644 --- a/.github/workflows/manual-build.yml +++ b/.github/workflows/manual-build.yml @@ -23,7 +23,7 @@ on: platforms: description: 'The platforms for which the Docker image should be built. If not specified, defaults to linux/amd64.' required: false - default: 'linux/amd64,linux/arm64/v8,linux/riscv64' + default: 'linux/amd64,linux/arm64/v8' jobs: build-push: uses: kbase/.github/.github/workflows/reusable_build-push.yml@develop From ad0aa9811f794e0550820a1c545539ee8f9890e2 Mon Sep 17 00:00:00 2001 From: JS Fillman Date: Mon, 8 Apr 2024 19:12:37 -0700 Subject: [PATCH 22/30] Cleanup & test. --- app/zip2cloud | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/app/zip2cloud b/app/zip2cloud index 23c8aec..79eb4a7 100755 --- a/app/zip2cloud +++ b/app/zip2cloud @@ -76,7 +76,7 @@ for file in ${ZIP_DIR}/*.7z; do done # Verify & update list of files to upload -cd ${ZIP_DIR}/${ZIP_BASE} +cd ${ZIP_DIR}/ for file in ${ZIP_DIR}/*.7z; do # Get the base name of the file without extension base_name=$(basename "$file" .7z) @@ -135,7 +135,8 @@ done # Before running rclone -for file in "${uploads[@]}"; do +#for file in "${uploads[@]}"; do +for file in '${uploads}'; do ls $file if [ ! -f "$file" ]; then echo "File does not exist: $file" From 274bcf8cca86c06b1a3d11f886c4708805d0a410 Mon Sep 17 00:00:00 2001 From: JS Fillman Date: Mon, 8 Apr 2024 19:34:59 -0700 Subject: [PATCH 23/30] Adding with working file add & rclone loops --- app/zip2cloud | 45 +++++++-------------------------------------- 1 file changed, 7 insertions(+), 38 deletions(-) diff --git a/app/zip2cloud b/app/zip2cloud index 79eb4a7..feff7fa 100755 --- a/app/zip2cloud +++ b/app/zip2cloud @@ -111,52 +111,21 @@ for file in ${ZIP_DIR}/*.7z; do echo "Uploads: $uploads" done -## Verify & update list of files to upload -#cd ${ZIP_DIR}/${ZIP_BASE} -#for file in ${ZIP_DIR}/${ZIP_BASE}/*.7z; do -# # Get the base name of the file without extension -# base_name=$(basename "$file" .7z) -# # Check if the remote md5 file exists -# if [ ! -f "${ZIP_DIR}/${ZIP_BASE}/tmp_md5/${base_name}.md5" ]; then -# # If the remote md5 file does not exist, add the file to the uploads list -# echo "Remote does not exist for $file, adding $file to uploads list" -# uploads="$uploads $file" -# else -# # Compare local and remote md5 -# remote_md5=$(cat "${ZIP_DIR}/${ZIP_BASE}/tmp_md5/${base_name}.md5") -# local_md5=$(cat "${ZIP_DIR}/${ZIP_BASE}/${base_name}.md5") -# if [ "$local_md5" != "$remote_md5" ]; then -# echo "MD5 mismatch for file $file, adding to uploads list" -# uploads="$uploads $file" -# fi -# fi -# echo "Uploads: $uploads" -#done - # Before running rclone #for file in "${uploads[@]}"; do -for file in '${uploads}'; do +for file in ${uploads}; do ls $file if [ ! -f "$file" ]; then echo "File does not exist: $file" fi done -## Before running rclone -#for file in ${uploads}; do -# if [ ! -f "$file" ]; then -# echo "File does not exist: $file" -# fi -#done + ## Sync All Resulting Files (in list!) -#cd ${ZIP_DIR} -#for file in "${uploads[@]}"; do -# echo "RClone-ing ${file} to GCP ${REMOTE}" -# /usr/bin/rclone sync -v "$file" ${REMOTE}/ -#done -#for file in ${uploads}; do -# echo "RClone-ing ${file} to GCP ${GCP_DEST}" -# /usr/bin/rclone sync -v "$file" ${REMOTE}/ -#done +cd ${ZIP_DIR} +for file in ${uploads}; do + echo "RClone-ing ${file} to GCP ${REMOTE}" + /usr/bin/rclone sync -v "$file" ${REMOTE}/ +done From 41fc18c37a3cb6695d3c74e1bb4052cb512880aa Mon Sep 17 00:00:00 2001 From: JS Fillman Date: Mon, 8 Apr 2024 21:49:22 -0700 Subject: [PATCH 24/30] Fixing rm cleanup vars --- app/zip2cloud | 73 ++++++++++++++++++++++++++++++++++----------------- 1 file changed, 49 insertions(+), 24 deletions(-) diff --git a/app/zip2cloud b/app/zip2cloud index feff7fa..0adfc8d 100755 --- a/app/zip2cloud +++ b/app/zip2cloud @@ -24,11 +24,12 @@ ZIP_RETENTION=${ZIP_RETENTION:-4} #/usr/bin/find ${ZIP_DIR} -mtime +30 -type f -name "${ZIP_BASE}*" -print -exec rm {} \; # Delete all old zip files, except the last N+1, as defined by $ZIP_RETENTION -ls -t ${ZIP_DIR}/*.7z 2>/dev/null | tail -n +$((${ZIP_RETENTION} - 1)) | xargs -r rm -f -ls -t ${ZIP_DIR}/*.md5 2>/dev/null | tail -n +$((${ZIP_RETENTION} - 1)) | xargs -r rm -f +rm -rf ${ZIP_DIR}/tmp_md5 +ls -t ${ZIP_DIR}/*.7z 2>/dev/null | tail -n +${ZIP_RETENTION} | xargs -r rm -f +ls -t ${ZIP_DIR}/*.md5 2>/dev/null | tail -n +${ZIP_RETENTION} | xargs -r rm -f # Delete all old backup dumps, except the last N+1, as defined by $DUMP_RETENTION -find ${DUMP_BASE} -type d -name "[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]" -print0 | xargs -0 ls -td | tail -n +$((${DUMP_RETENTION} - 1)) | xargs -I {} rm -rf {} +find ${DUMP_BASE} -type d -name "[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]" -print0 | xargs -0 ls -td | tail -n +${DUMP_RETENTION} | xargs -I {} rm -rf {} # Get list of remote backups remote_files=$(rclone ls remote:${BUCKET}/${BUCKETPATH} | grep 7z | awk '{print $2}' | rev | cut -d. -f2- | rev) @@ -86,27 +87,51 @@ for file in ${ZIP_DIR}/*.7z; do echo "Remote does not exist for $file, adding $file to uploads list" uploads="$uploads $file" else - # Compare local and remote md5 - remote_md5=$(cat "${ZIP_DIR}/tmp_md5/${base_name}.md5") - local_md5=$(cat "${ZIP_DIR}/${base_name}.md5") - if [ "$local_md5" != "$remote_md5" ]; then - echo "MD5 mismatch for file $file" - # Extract the last character of the base name - last_char=${base_name: -1} - # Check if the last character is a letter - if [[ $last_char =~ [a-z] ]]; then - # If it's a letter, increment it - next_char=$(echo "$last_char" | tr "a-y" "b-z") - new_base_name=${base_name%?}$next_char - else - # If it's not a letter, append 'a' - new_base_name=${base_name}a - fi - # Rename the file - mv "$file" "${ZIP_DIR}/${new_base_name}.7z" - # Add the renamed file to the uploads list - uploads="$uploads ${ZIP_DIR}/${new_base_name}.7z" - fi + # Compare local and remote md5 + remote_md5=$(cat "${ZIP_DIR}/tmp_md5/${base_name}.md5") + local_md5=$(cat "${ZIP_DIR}/${base_name}.md5") + if [ "$local_md5" != "$remote_md5" ]; then + echo "MD5 mismatch for file $file" + # Extract the last character of the base name + last_char=${base_name: -1} + # Check if the last character is a letter + if [[ $last_char =~ [a-y] ]]; then + # If it's a letter, increment it + next_char=$(echo "$last_char" | tr "a-y" "b-z") + new_base_name=${base_name%?}$next_char + elif [[ $last_char == 'z' ]]; then + # If it's 'z', replace it with 'a' and append 'a' + new_base_name=${base_name%?}aa + else + # If it's not a letter, append 'a' + new_base_name=${base_name}a + fi + # Rename the file + mv "$file" "${ZIP_DIR}/${new_base_name}.7z" + # Add the renamed file to the uploads list + uploads="$uploads ${ZIP_DIR}/${new_base_name}.7z" + fi +# # Compare local and remote md5 +# remote_md5=$(cat "${ZIP_DIR}/tmp_md5/${base_name}.md5") +# local_md5=$(cat "${ZIP_DIR}/${base_name}.md5") +# if [ "$local_md5" != "$remote_md5" ]; then +# echo "MD5 mismatch for file $file" +# # Extract the last character of the base name +# last_char=${base_name: -1} +# # Check if the last character is a letter +# if [[ $last_char =~ [a-z] ]]; then +# # If it's a letter, increment it +# next_char=$(echo "$last_char" | tr "a-y" "b-z") +# new_base_name=${base_name%?}$next_char +# else +# # If it's not a letter, append 'a' +# new_base_name=${base_name}a +# fi +# # Rename the file +# mv "$file" "${ZIP_DIR}/${new_base_name}.7z" +# # Add the renamed file to the uploads list +# uploads="$uploads ${ZIP_DIR}/${new_base_name}.7z" +# fi fi echo "Uploads: $uploads" done From ef6b6c4ccb19a09fdd2fe5800fdda6b081d06b87 Mon Sep 17 00:00:00 2001 From: JS Fillman Date: Mon, 8 Apr 2024 23:15:27 -0700 Subject: [PATCH 25/30] Adding working test candidate --- app/zip2cloud | 111 ++++++++++++++++++++++---------------------------- 1 file changed, 48 insertions(+), 63 deletions(-) diff --git a/app/zip2cloud b/app/zip2cloud index 0adfc8d..5a03425 100755 --- a/app/zip2cloud +++ b/app/zip2cloud @@ -13,24 +13,26 @@ ZIP_BASE=${ZIP_BASE:-backup_full} ZIP_DIR=${ZIP_DIR:-/zip} ZIP_RETENTION=${ZIP_RETENTION:-4} +### Cleanup + [ -r /run/secrets/encryption_key ] || { echo "Encryption key not readable in /run/secrets/encryption_key" ; exit 1; } [ -r /run/secrets/gcp_backup_creds ] || { echo "Google cloud service credentials not found in /run/secrets/gcp_back_creds" ; exit 1; } [ -z "${BUCKET}" ] && { echo "S3 bucketname not set in BUCKET environment variable" ; exit 1; } [ -z "${BUCKETPATH}" ] && { echo "Path within S3 bucket not set in BUCKETPATH environment variable" ; exit 1; } [ -z "${DELETE_DUMP}" ] || echo "DELETE_DUMP set, will delete files/directories under /dump/ when done compressing" -# Delete any files older than 30 days in the zip directory -#echo "Deleting database archives older than 30 days" -#/usr/bin/find ${ZIP_DIR} -mtime +30 -type f -name "${ZIP_BASE}*" -print -exec rm {} \; -# Delete all old zip files, except the last N+1, as defined by $ZIP_RETENTION +# Delete all old zip files, except the last N, as defined by $ZIP_RETENTION rm -rf ${ZIP_DIR}/tmp_md5 ls -t ${ZIP_DIR}/*.7z 2>/dev/null | tail -n +${ZIP_RETENTION} | xargs -r rm -f ls -t ${ZIP_DIR}/*.md5 2>/dev/null | tail -n +${ZIP_RETENTION} | xargs -r rm -f -# Delete all old backup dumps, except the last N+1, as defined by $DUMP_RETENTION +# Delete all old backup dumps, except the last N, as defined by $DUMP_RETENTION find ${DUMP_BASE} -type d -name "[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]" -print0 | xargs -0 ls -td | tail -n +${DUMP_RETENTION} | xargs -I {} rm -rf {} +### End Cleanup + + # Get list of remote backups remote_files=$(rclone ls remote:${BUCKET}/${BUCKETPATH} | grep 7z | awk '{print $2}' | rev | cut -d. -f2- | rev) # Pull remote md5 sums for each remote backup into `tmp_md5` directory @@ -39,59 +41,63 @@ for file in $remote_files; do rclone md5sum remote:${BUCKET}/${BUCKETPATH}/$file.7z | awk '{print $1}' > ${ZIP_DIR}/tmp_md5/$file.md5 done -# Create empty list of files to upload -uploads="" - -echo "Zipping ${DUMP_BASE}/${DUMP_DIR} to ${ZIP_DIR}/${ZIP_NAME}" -# Get all directories in DUMP_BASE +# Get all exports from DUMP_BASE for DUMP_DIR in $(ls -d ${DUMP_BASE}/*/); do # Remove trailing slash and get the base name of the directory DIR_NAME=$(basename ${DUMP_DIR%/}) ZIP_NAME=${ZIP_DIR}/${ZIP_BASE}_${DIR_NAME} echo $DIR_NAME - # Check if the corresponding md5 file exists - if [ -f "${ZIP_DIR}/tmp_md5/${ZIP_BASE}_${DIR_NAME}.md5" ]; then - echo "MD5 file exists for ${DIR_NAME}, skipping" - continue + # Check if the corresponding md5 file exists, if not, zip it + if [ ! -f "${ZIP_DIR}/tmp_md5/${ZIP_BASE}_${DIR_NAME}.md5" ]; then + echo "No remote exists for ${DIR_NAME}, zipping" + /usr/bin/7z a -p${SECRET} ${ZIP_NAME} -mx=${COMPRESSION_LEVEL} -mhe -t7z ${DUMP_DIR} || { echo "Could not zip ${DUMP_DIR} into ${ZIP_NAME}" ; exit 1; } fi - - echo "Zipping ${DUMP_DIR} to ${ZIP_NAME}".7z - /usr/bin/7z a -p${SECRET} ${ZIP_NAME} -mx=${COMPRESSION_LEVEL} -mhe -t7z ${DUMP_DIR} || { echo "Could not zip ${DUMP_DIR} into ${ZIP_NAME}" ; exit 1; } done -# Create md5 sums for local backups, if they don't exist -cd ${ZIP_DIR} +# Compare checksums of local 7z files against all remotes' md5's. Add to upload list if not found +uploads="" +cd ${ZIP_DIR} || exit for file in ${ZIP_DIR}/*.7z; do # Get the base name of the file without extension base_name=$(basename "$file" .7z) - echo $base_name - # If a local .md5 file does not exist, create it - if [ ! -f "${ZIP_DIR}/${base_name}.md5" ]; then - echo "Local md5 file does not exist for $file, generating, and adding $file to uploads list" + local_md5=$(md5sum "$file" | awk '{print $1}') + echo $local_md5 > "${ZIP_DIR}/${base_name}.md5" + # Now compare this file with the remote md5s + match_found=0 + for remote_md5_file in ${ZIP_DIR}/tmp_md5/*.md5; do + remote_md5=$(cat "$remote_md5_file") + if [ "$local_md5" = "$remote_md5" ]; then + match_found=1 + break + fi + done + if [ $match_found -eq 0 ]; then + echo "Adding $file to uploads list" uploads="$uploads $file" - local_md5=$(md5sum "$file" | awk '{print $1}') - echo $local_md5 > "${ZIP_DIR}/${base_name}.md5" fi done -# Verify & update list of files to upload -cd ${ZIP_DIR}/ -for file in ${ZIP_DIR}/*.7z; do +echo "Current uploads candidates are: $uploads" + +## Verify & update list of files to upload +final_uploads="" +cd ${ZIP_DIR} || exit +for file in ${uploads}; do # Get the base name of the file without extension base_name=$(basename "$file" .7z) - # Check if the remote md5 file exists - if [ ! -f "${ZIP_DIR}/tmp_md5/${base_name}.md5" ]; then - # If the remote md5 file does not exist, add the file to the uploads list - echo "Remote does not exist for $file, adding $file to uploads list" - uploads="$uploads $file" - else +# # Check if the remote md5 file exists +# if [ ! -f "${ZIP_DIR}/tmp_md5/${base_name}.md5" ]; then +# # If the remote md5 file does not exist, add the file to the uploads list +# echo "Remote does not exist for $file, adding $file to uploads list" +# final_uploads="$final_uploads $file" +# else # Compare local and remote md5 remote_md5=$(cat "${ZIP_DIR}/tmp_md5/${base_name}.md5") local_md5=$(cat "${ZIP_DIR}/${base_name}.md5") if [ "$local_md5" != "$remote_md5" ]; then - echo "MD5 mismatch for file $file" + echo "MD5 mismatch for file $file. Incrementing filename and adding to uploads list." # Extract the last character of the base name last_char=${base_name: -1} # Check if the last character is a letter @@ -109,37 +115,16 @@ for file in ${ZIP_DIR}/*.7z; do # Rename the file mv "$file" "${ZIP_DIR}/${new_base_name}.7z" # Add the renamed file to the uploads list - uploads="$uploads ${ZIP_DIR}/${new_base_name}.7z" - fi -# # Compare local and remote md5 -# remote_md5=$(cat "${ZIP_DIR}/tmp_md5/${base_name}.md5") -# local_md5=$(cat "${ZIP_DIR}/${base_name}.md5") -# if [ "$local_md5" != "$remote_md5" ]; then -# echo "MD5 mismatch for file $file" -# # Extract the last character of the base name -# last_char=${base_name: -1} -# # Check if the last character is a letter -# if [[ $last_char =~ [a-z] ]]; then -# # If it's a letter, increment it -# next_char=$(echo "$last_char" | tr "a-y" "b-z") -# new_base_name=${base_name%?}$next_char -# else -# # If it's not a letter, append 'a' -# new_base_name=${base_name}a -# fi -# # Rename the file -# mv "$file" "${ZIP_DIR}/${new_base_name}.7z" -# # Add the renamed file to the uploads list -# uploads="$uploads ${ZIP_DIR}/${new_base_name}.7z" -# fi - fi - echo "Uploads: $uploads" + final_uploads="$final_uploads ${ZIP_DIR}/${new_base_name}.7z" + fi done +echo "Final uploads: $final_uploads" + # Before running rclone #for file in "${uploads[@]}"; do -for file in ${uploads}; do +for file in ${final_uploads}; do ls $file if [ ! -f "$file" ]; then echo "File does not exist: $file" @@ -149,8 +134,8 @@ done ## Sync All Resulting Files (in list!) -cd ${ZIP_DIR} -for file in ${uploads}; do +cd ${ZIP_DIR} || exit +for file in ${final_uploads}; do echo "RClone-ing ${file} to GCP ${REMOTE}" /usr/bin/rclone sync -v "$file" ${REMOTE}/ done From 0d9933f95cfe04d7a2d4018b9383852c63760923 Mon Sep 17 00:00:00 2001 From: JS Fillman Date: Mon, 8 Apr 2024 23:37:57 -0700 Subject: [PATCH 26/30] Adding working test candidate --- app/zip2cloud | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/app/zip2cloud b/app/zip2cloud index 5a03425..591739f 100755 --- a/app/zip2cloud +++ b/app/zip2cloud @@ -44,15 +44,22 @@ done # Get all exports from DUMP_BASE for DUMP_DIR in $(ls -d ${DUMP_BASE}/*/); do + # Check if the dump is complete + echo "Checking export for ${DUMP_DIR}" + if [ ! -f "${DUMP_DIR}/dump_complete.txt" ]; then + echo "dump_complete.txt not found in ${DUMP_DIR}, skipping" + continue + fi # Remove trailing slash and get the base name of the directory DIR_NAME=$(basename ${DUMP_DIR%/}) ZIP_NAME=${ZIP_DIR}/${ZIP_BASE}_${DIR_NAME} echo $DIR_NAME - # Check if the corresponding md5 file exists, if not, zip it if [ ! -f "${ZIP_DIR}/tmp_md5/${ZIP_BASE}_${DIR_NAME}.md5" ]; then echo "No remote exists for ${DIR_NAME}, zipping" /usr/bin/7z a -p${SECRET} ${ZIP_NAME} -mx=${COMPRESSION_LEVEL} -mhe -t7z ${DUMP_DIR} || { echo "Could not zip ${DUMP_DIR} into ${ZIP_NAME}" ; exit 1; } + else + echo "Remote exists for ${DIR_NAME}, skipping" fi done From 2a2aef395ad7751f173f6bfc1d1b002093417ebc Mon Sep 17 00:00:00 2001 From: JS Fillman Date: Tue, 9 Apr 2024 00:04:16 -0700 Subject: [PATCH 27/30] Adding working test candidate --- app/README.md | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/app/README.md b/app/README.md index 4002b0f..6b2372a 100644 --- a/app/README.md +++ b/app/README.md @@ -10,26 +10,23 @@ A robust zip & upload utility for sending archives to a remote location. - Only deletes local files once they have been successfully uploaded - Allows keeping an arbitrary amount of zipped & unzipped backups locally for faster restore - Script only zips & uploads files that are missing from the remote location -- Allows mixing backup files with other data - - Only zips folders under the `$DUMP_BASE` directory with a date-based name e.g. `2024-04-01` -- Notifies on completion or error via Slack -### Operation of `zip2cloud` +[//]: # (- Allows mixing backup files with other data) + +[//]: # ( - Only zips folders under the `$DUMP_BASE` directory with a date-based name e.g. `2024-04-01`) -- Uses `rclone` to create a list of `.7z` & `.md5` files from the remote location defined with the `REMOTE` environment variable -- For each file in the list +[//]: # (- Notifies on completion or error via Slack) -- Compares file names & md5 sums between local & remote locations prior to read/write operations - - Uploads any `.7z` files that are missing from the remote location - - Files with mismatched md5 sums are uploaded with alternate filenames - - Only deletes files locally once they have been successfully uploaded & md5 sums confirmed -- Allows multiple unzipped local backups to remain, without re-zipping & uploading - - This allows for faster restores, as we can avoid downloading the most recent archives -- +### Operation of `zip2cloud` -1. Creates 7zip archives of any directories under the `$DUMP_BASE` with a date-based name - - For example, if `$DUMP_BASE` is `/dump/full_backup`, the directory `2024-04-01` will -2. Syncs the archives to a remote location using rclone +1. Cleans up old zip files and backup dumps based on the retention period set in the environment variables. +2. Retrieves the list of remote backups and their MD5 checksums from the remote S3 bucket. +3. Checks database dumps for completion by looking for a `dump_complete.txt` file in the dump's top-level directory. +4. Compresses new database dumps that do not have a corresponding MD5 file in the remote S3 bucket. +5. Compares the MD5 checksums of local and remote files. + 1. If a local file does not have a matching MD5 checksum in the remote S3 bucket, it is added to the upload list. +6. If there's an MD5 mismatch between a local and a remote file, the script increments the filename of the local file and adds it to the upload list. +7. Finally, it syncs all the files in the upload list to the remote S3 bucket using rclone. ### Variables From 9713a02a875fbbb2e5a8f9a14527fc76499791dc Mon Sep 17 00:00:00 2001 From: JS Fillman Date: Tue, 9 Apr 2024 00:06:00 -0700 Subject: [PATCH 28/30] Adding working test candidate --- app/zip2cloud | 6 ------ 1 file changed, 6 deletions(-) diff --git a/app/zip2cloud b/app/zip2cloud index 591739f..ad8c40c 100755 --- a/app/zip2cloud +++ b/app/zip2cloud @@ -94,12 +94,6 @@ cd ${ZIP_DIR} || exit for file in ${uploads}; do # Get the base name of the file without extension base_name=$(basename "$file" .7z) -# # Check if the remote md5 file exists -# if [ ! -f "${ZIP_DIR}/tmp_md5/${base_name}.md5" ]; then -# # If the remote md5 file does not exist, add the file to the uploads list -# echo "Remote does not exist for $file, adding $file to uploads list" -# final_uploads="$final_uploads $file" -# else # Compare local and remote md5 remote_md5=$(cat "${ZIP_DIR}/tmp_md5/${base_name}.md5") local_md5=$(cat "${ZIP_DIR}/${base_name}.md5") From 2ddf0e5f3497157f4709afd3bdc957b5e2d4e4e7 Mon Sep 17 00:00:00 2001 From: JS Fillman Date: Tue, 9 Apr 2024 14:26:05 -0700 Subject: [PATCH 29/30] Updating workflows --- .github/workflows/manual-build.yml | 23 +++-------------------- .github/workflows/pr_build.yml | 18 +++++++++++++----- .github/workflows/release-main.yml | 10 +++++++--- 3 files changed, 23 insertions(+), 28 deletions(-) diff --git a/.github/workflows/manual-build.yml b/.github/workflows/manual-build.yml index 24125d7..8997303 100644 --- a/.github/workflows/manual-build.yml +++ b/.github/workflows/manual-build.yml @@ -1,20 +1,3 @@ -#--- -#name: Manual Build & Push -#on: -# workflow_dispatch: -# inputs: -# platforms: -# description: 'The platforms for which the Docker image should be built. If not specified, defaults to linux/amd64.' -# required: false -# default: 'linux/amd64,linux/arm64/v8,linux/riscv64' -#jobs: -# build-push: -# uses: kbase/.github/.github/workflows/reusable_build-push.yml@develop -# with: -# name: '${{ github.event.repository.name }}-develop' -# tags: br-${{ github.ref_name }} -# platforms: ${{ github.event.inputs.platforms }} -# secrets: inherit --- name: Manual Build & Push on: @@ -23,12 +6,12 @@ on: platforms: description: 'The platforms for which the Docker image should be built. If not specified, defaults to linux/amd64.' required: false - default: 'linux/amd64,linux/arm64/v8' + default: 'linux/amd64' jobs: build-push: - uses: kbase/.github/.github/workflows/reusable_build-push.yml@develop + uses: kbase/.github/.github/workflows/reusable_build-push.yml@multi-target with: name: '${{ github.event.repository.name }}-develop' tags: br-${{ github.ref_name }} platforms: ${{ github.event.inputs.platforms }} - secrets: inherit \ No newline at end of file + secrets: inherit diff --git a/.github/workflows/pr_build.yml b/.github/workflows/pr_build.yml index 0fa1c46..d3ecfbd 100644 --- a/.github/workflows/pr_build.yml +++ b/.github/workflows/pr_build.yml @@ -11,33 +11,41 @@ on: - reopened - synchronize - closed +# Defaults to building for linux/amd64. Can modify be updating variable, e.g. PLATFORMS: 'linux/amd64,linux/arm64/v8' +env: + PLATFORMS: 'linux/amd64,linux/arm64/v8' jobs: build-develop-open: if: github.base_ref == 'develop' && github.event.pull_request.merged == false - uses: kbase/.github/.github/workflows/reusable_build.yml@main + uses: kbase/.github/.github/workflows/reusable_build.yml@multi-target + with: + platforms: ${{ env.PLATFORMS }} secrets: inherit build-develop-merge: if: github.base_ref == 'develop' && github.event.pull_request.merged == true - uses: kbase/.github/.github/workflows/reusable_build-push.yml@main + uses: kbase/.github/.github/workflows/reusable_build-push.yml@multi-target with: name: '${{ github.event.repository.name }}-develop' tags: pr-${{ github.event.number }},latest + platforms: ${{ env.PLATFORMS }} secrets: inherit build-main-open: if: (github.base_ref == 'main' || github.base_ref == 'master') && github.event.pull_request.merged == false - uses: kbase/.github/.github/workflows/reusable_build-push.yml@main + uses: kbase/.github/.github/workflows/reusable_build-push.yml@multi-target with: name: '${{ github.event.repository.name }}' tags: pr-${{ github.event.number }} + platforms: ${{ env.PLATFORMS }} secrets: inherit build-main-merge: if: (github.base_ref == 'main' || github.base_ref == 'master') && github.event.pull_request.merged == true - uses: kbase/.github/.github/workflows/reusable_build-push.yml@main + uses: kbase/.github/.github/workflows/reusable_build-push.yml@multi-target with: name: '${{ github.event.repository.name }}' tags: pr-${{ github.event.number }},latest-rc + platforms: ${{ env.PLATFORMS }} secrets: inherit trivy-scans: if: (github.base_ref == 'develop' || github.base_ref == 'main' || github.base_ref == 'master' ) && github.event.pull_request.merged == false - uses: kbase/.github/.github/workflows/reusable_trivy-scans.yml@main + uses: kbase/.github/.github/workflows/reusable_trivy-scans.yml@multi-target secrets: inherit diff --git a/.github/workflows/release-main.yml b/.github/workflows/release-main.yml index a254678..04a3701 100644 --- a/.github/workflows/release-main.yml +++ b/.github/workflows/release-main.yml @@ -6,20 +6,24 @@ on: - main - master types: [ published ] +# Defaults to building for linux/amd64. Can modify be updating variable, e.g. PLATFORMS: 'linux/amd64,linux/arm64/v8' +env: + PLATFORMS: 'linux/amd64,linux/arm64/v8' jobs: check-source-branch: - uses: kbase/.github/.github/workflows/reusable_validate-branch.yml@main + uses: kbase/.github/.github/workflows/reusable_validate-branch.yml@multi-target with: build_branch: '${{ github.event.release.target_commitish }}' validate-release-tag: needs: check-source-branch - uses: kbase/.github/.github/workflows/reusable_validate-release-tag.yml@main + uses: kbase/.github/.github/workflows/reusable_validate-release-tag.yml@multi-target with: release_tag: '${{ github.event.release.tag_name }}' build-push: needs: validate-release-tag - uses: kbase/.github/.github/workflows/reusable_build-push.yml@main + uses: kbase/.github/.github/workflows/reusable_build-push.yml@multi-target with: name: '${{ github.event.repository.name }}' tags: '${{ github.event.release.tag_name }},latest' + platforms: ${{ env.PLATFORMS }} secrets: inherit From 15b6138d345934577d640decec86a71bb44bb89e Mon Sep 17 00:00:00 2001 From: JS Fillman Date: Tue, 9 Apr 2024 14:40:28 -0700 Subject: [PATCH 30/30] Updating workflows --- .github/workflows/manual-build.yml | 2 +- .github/workflows/pr_build.yml | 16 ++++++++++++---- .github/workflows/release-main.yml | 13 +++++++++---- 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/.github/workflows/manual-build.yml b/.github/workflows/manual-build.yml index 8997303..5eb59c2 100644 --- a/.github/workflows/manual-build.yml +++ b/.github/workflows/manual-build.yml @@ -6,7 +6,7 @@ on: platforms: description: 'The platforms for which the Docker image should be built. If not specified, defaults to linux/amd64.' required: false - default: 'linux/amd64' + default: 'linux/amd64,linux/arm64/v8' jobs: build-push: uses: kbase/.github/.github/workflows/reusable_build-push.yml@multi-target diff --git a/.github/workflows/pr_build.yml b/.github/workflows/pr_build.yml index d3ecfbd..5d07745 100644 --- a/.github/workflows/pr_build.yml +++ b/.github/workflows/pr_build.yml @@ -15,11 +15,19 @@ on: env: PLATFORMS: 'linux/amd64,linux/arm64/v8' jobs: + set-platforms: + runs-on: ubuntu-latest + outputs: + platforms: ${{ steps.set-platforms.outputs.platforms }} + steps: + - name: Set platforms + id: set-platforms + run: echo "::set-output name=platforms::linux/amd64,linux/arm64/v8" build-develop-open: if: github.base_ref == 'develop' && github.event.pull_request.merged == false uses: kbase/.github/.github/workflows/reusable_build.yml@multi-target with: - platforms: ${{ env.PLATFORMS }} + platforms: ${{ needs.set-platforms.outputs.platforms }} secrets: inherit build-develop-merge: if: github.base_ref == 'develop' && github.event.pull_request.merged == true @@ -27,7 +35,7 @@ jobs: with: name: '${{ github.event.repository.name }}-develop' tags: pr-${{ github.event.number }},latest - platforms: ${{ env.PLATFORMS }} + platforms: ${{ needs.set-platforms.outputs.platforms }} secrets: inherit build-main-open: if: (github.base_ref == 'main' || github.base_ref == 'master') && github.event.pull_request.merged == false @@ -35,7 +43,7 @@ jobs: with: name: '${{ github.event.repository.name }}' tags: pr-${{ github.event.number }} - platforms: ${{ env.PLATFORMS }} + platforms: ${{ needs.set-platforms.outputs.platforms }} secrets: inherit build-main-merge: if: (github.base_ref == 'main' || github.base_ref == 'master') && github.event.pull_request.merged == true @@ -43,7 +51,7 @@ jobs: with: name: '${{ github.event.repository.name }}' tags: pr-${{ github.event.number }},latest-rc - platforms: ${{ env.PLATFORMS }} + platforms: ${{ needs.set-platforms.outputs.platforms }} secrets: inherit trivy-scans: if: (github.base_ref == 'develop' || github.base_ref == 'main' || github.base_ref == 'master' ) && github.event.pull_request.merged == false diff --git a/.github/workflows/release-main.yml b/.github/workflows/release-main.yml index 04a3701..52f4517 100644 --- a/.github/workflows/release-main.yml +++ b/.github/workflows/release-main.yml @@ -6,10 +6,15 @@ on: - main - master types: [ published ] -# Defaults to building for linux/amd64. Can modify be updating variable, e.g. PLATFORMS: 'linux/amd64,linux/arm64/v8' -env: - PLATFORMS: 'linux/amd64,linux/arm64/v8' jobs: + set-platforms: + runs-on: ubuntu-latest + outputs: + platforms: ${{ steps.set-platforms.outputs.platforms }} + steps: + - name: Set platforms + id: set-platforms + run: echo "::set-output name=platforms::linux/amd64,linux/arm64/v8" check-source-branch: uses: kbase/.github/.github/workflows/reusable_validate-branch.yml@multi-target with: @@ -25,5 +30,5 @@ jobs: with: name: '${{ github.event.repository.name }}' tags: '${{ github.event.release.tag_name }},latest' - platforms: ${{ env.PLATFORMS }} + platforms: ${{ needs.set-platforms.outputs.platforms }} secrets: inherit