From b4d7aa4eb2b0f10bb0a4eda065aaa37baaf8eea3 Mon Sep 17 00:00:00 2001 From: JS Fillman Date: Mon, 1 Apr 2024 16:08:12 -0700 Subject: [PATCH 01/17] Testing minimal image --- Dockerfile | 17 +++++++++++++---- app/zip2cloud | 2 +- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/Dockerfile b/Dockerfile index 7ec1da0..6770b3f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,8 @@ -FROM alpine:latest +# Builder stage +FROM alpine:latest as builder -# Update and install necessary packages RUN apk update && \ - apk add p7zip rclone + apk add --no-cache p7zip rclone # Create config directory RUN mkdir -p /root/.config/rclone/ @@ -11,6 +11,15 @@ RUN mkdir -p /root/.config/rclone/ COPY rclone.conf /root/.config/rclone/rclone.conf COPY app/ /app/ +# Final stage +FROM alpine:latest + +# Copy necessary binaries and files from builder stage +COPY --from=builder /usr/bin/rclone /usr/bin/rclone +COPY --from=builder /usr/bin/7z /usr/bin/7z +COPY --from=builder /root/.config/rclone/rclone.conf /root/.config/rclone/rclone.conf +COPY --from=builder /app/ /app/ + WORKDIR /app -ENTRYPOINT /app/zip2cloud \ No newline at end of file +ENTRYPOINT ["/app/zip2cloud"] \ No newline at end of file diff --git a/app/zip2cloud b/app/zip2cloud index 5ce5fc2..e7fe87b 100755 --- a/app/zip2cloud +++ b/app/zip2cloud @@ -62,4 +62,4 @@ done ## Create a block that, upon success of rclone above, delete _only_ files that were uploaded ## For each $FILE.7z in $ZIP_DIR, do a "rm -rf $DUMP_BASE/$FILE" to remove the original dump -#[ -z "${DELETE_DUMP}" ] || { echo "Clearing contents of /dump/"; cd /dump/; rm -rf *; } +#[ -z "${DELETE_DUMP}" ] || { echo "Clearing contents of /dump/"; cd /dump/; rm -rf *; } \ No newline at end of file From 81959b75625fe1dacce5a0c0723a3556599daf3d Mon Sep 17 00:00:00 2001 From: JS Fillman Date: Tue, 2 Apr 2024 20:00:25 -0700 Subject: [PATCH 02/17] Adding curl and md5 loops --- Dockerfile | 5 +- app/README.md | 44 ++++++++++++++++++ app/zip2cloud | 123 ++++++++++++++++++++++++++++++++++++++++---------- 3 files changed, 145 insertions(+), 27 deletions(-) create mode 100644 app/README.md diff --git a/Dockerfile b/Dockerfile index 6770b3f..d548cd3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,7 @@ FROM alpine:latest as builder RUN apk update && \ - apk add --no-cache p7zip rclone + apk add --no-cache curl p7zip rclone # Create config directory RUN mkdir -p /root/.config/rclone/ @@ -15,8 +15,9 @@ COPY app/ /app/ FROM alpine:latest # Copy necessary binaries and files from builder stage -COPY --from=builder /usr/bin/rclone /usr/bin/rclone COPY --from=builder /usr/bin/7z /usr/bin/7z +COPY --from=builder /usr/bin/curl /usr/bin/curl +COPY --from=builder /usr/bin/rclone /usr/bin/rclone COPY --from=builder /root/.config/rclone/rclone.conf /root/.config/rclone/rclone.conf COPY --from=builder /app/ /app/ diff --git a/app/README.md b/app/README.md new file mode 100644 index 0000000..6d03cae --- /dev/null +++ b/app/README.md @@ -0,0 +1,44 @@ + +## Zip2Cloud + +A robust zip & upload utility for sending archives to a remote location. + +### Features + +- Intelligently compares local & remote files with md5 sums +- Only uploads _completed_ archives +- Only deletes local files once they have been successfully uploaded +- Allows keeping an arbitrary amount of zipped & unzipped backups locally for faster restore + - Script only zips & uploads files that are missing from the remote location +- Allows mixing backup files with other data + - Only zips folders under the `$DUMP_BASE` directory with a date-based name e.g. `2024-04-01` +- Notifies on completion or error via Slack + +### Operation of `zip2cloud` + +- Uses `rclone` to create a list of `.7z` & `.md5` files from the remote location defined with the `REMOTE` environment variable +- For each file in the list + +- Compares file names & md5 sums between local & remote locations prior to read/write operations + - Uploads any `.7z` files that are missing from the remote location + - Files with mismatched md5 sums are uploaded with alternate filenames + - Only deletes files locally once they have been successfully uploaded & md5 sums confirmed +- Allows multiple unzipped local backups to remain, without re-zipping & uploading + - This allows for faster restores, as we can avoid downloading the most recent archives +- + +1. Creates 7zip archives of any directories under the `$DUMP_BASE` with a date-based name + - For example, if `$DUMP_BASE` is `/dump/full_backup`, the directory `2024-04-01` will +2. Syncs the archives to a remote location using rclone + +### Variables + +- `DUMP_BASE` - The base directory for backup dumps (default `/dump`) +- `DUMP_RETENTION` - The number of days to keep uncompressed backups locally +- `REMOTE` - The remote location to sync backups to +- `SECRET` - The encryption key for 7zip +- `SLACK_CHANNEL` - The slack channel to send notifications to +- `SLACK_WEBHOOK` - The webhook URL for slack notifications +- `ZIP_BASE` - The base filename, minus date, for the compressed backups +- `ZIP_DIR` - The directory to store all compressed backups (default `/zip`) +- `ZIP_RETENTION` - The number of days to keep compressed backups locally diff --git a/app/zip2cloud b/app/zip2cloud index e7fe87b..a74419d 100755 --- a/app/zip2cloud +++ b/app/zip2cloud @@ -8,20 +8,17 @@ # sychan@lbl.gov # 5/21/2021 -# Directory containing db dumps to be archived/compressed/copied -#DUMP_BASE=/Users/jsfillman/Documents/repos/jsfillman-github/tmp-backup-test +## Variables +COMPRESSION_LEVEL=0 # Set to 0 if the db dumps are already compressed DUMP_BASE=/dump/full_backup - -# Directory to put the zipped backups -#ZIP_DIR=/Users/jsfillman/Documents/repos/jsfillman-github/tmp-backup-zip -ZIP_DIR=/zip - -NOW=$(/bin/date +"%Y%m%d%H%M") - -# Name of the zip'ed db backup. The .7z extension wil be added by the 7zip program - +DUMP_RETENTION=3 +REMOTE=remote:${BUCKET}/${BUCKETPATH} +SECRET=`cat /run/secrets/encryption_key` +SLACK_CHANNEL='' +SLACK_WEBHOOK='' ZIP_BASE=backup_full -#ZIP_NAME=${ZIP_BASE}${NOW} +ZIP_DIR=/zip +ZIP_RETENTION=4 [ -r /run/secrets/encryption_key ] || { echo "Encryption key not readable in /run/secrets/encryption_key" ; exit 1; } [ -r /run/secrets/gcp_backup_creds ] || { echo "Google cloud service credentials not found in /run/secrets/gcp_back_creds" ; exit 1; } @@ -29,17 +26,61 @@ ZIP_BASE=backup_full [ -z "${BUCKETPATH}" ] && { echo "Path within S3 bucket not set in BUCKETPATH environment variable" ; exit 1; } [ -z "${DELETE_DUMP}" ] || echo "DELETE_DUMP set, will delete files/directories under /dump/ when done compressing" -## This is the password used to generate the AES256 encryption key -#SECRET=tempsecret -SECRET=`cat /run/secrets/encryption_key` -# -## This is the Google Cloud Storage path, note that it depends on rclone being preconfigured -## for "remote" using the runtime creds, check rclone config in /root/.config/rclone/rclone.conf -REMOTE=remote:${BUCKET}/${BUCKETPATH} - # Delete any files older than 30 days in the zip directory -echo "Deleting database archives older than 30 days" -/usr/bin/find ${ZIP_DIR} -mtime +30 -type f -name "${ZIP_BASE}*" -print -exec rm {} \; +#echo "Deleting database archives older than 30 days" +#/usr/bin/find ${ZIP_DIR} -mtime +30 -type f -name "${ZIP_BASE}*" -print -exec rm {} \; + +# Delete all old backups, except the last #, as defined by $ZIP_RETENTION +ls -t ${ZIP_DIR}/${ZIP_BASE}*.{7z,md5} | tail -n +$((${ZIP_RETENTION} + 1)) | xargs rm -f + +# Get list of remote backups +remote_files=$(rclone ls remote:${BUCKET}/${BUCKETPATH} | grep 7z | awk '{print $2}' | rev | cut -d. -f2- | rev) +# Pull remote md5 sums for each remote backup into `tmp_md5` directory +mkdir -p ${ZIP_DIR}/${ZIP_BASE}/tmp_md5 && cd $_ +for file in $remote_files; do + rclone md5sum remote:${BUCKET}/${BUCKETPATH}/$file.7z | awk '{print $1}' > ${ZIP_DIR}/${ZIP_BASE}/tmp_md5/$file.md5 +done + +# Create empty list of files to upload +uploads="" + +# Create md5 sums for local backups, if they don't exist +cd ${ZIP_DIR}/${ZIP_BASE} +for file in ${ZIP_DIR}/${ZIP_BASE}/*.7z; do + # Get the base name of the file without extension + base_name=$(basename "$file" .7z) + # If a local .md5 file does not exist, create it + if [ ! -f "${ZIP_DIR}/${ZIP_BASE}/${base_name}.md5" ]; then + echo "Local md5 file does not exist for $file, generating, and adding $file to uploads list" + uploads="$uploads $file" + local_md5=$(md5sum "$file" | awk '{print $1}') + echo $local_md5 > "${ZIP_DIR}/${ZIP_BASE}/${base_name}.md5" + fi +done + + +# Verify & update list of files to upload +cd ${ZIP_DIR}/${ZIP_BASE} +for file in ${ZIP_DIR}/${ZIP_BASE}/*.7z; do + # Get the base name of the file without extension + base_name=$(basename "$file" .7z) + # Check if the remote md5 file exists + if [ ! -f "${ZIP_DIR}/${ZIP_BASE}/tmp_md5/${base_name}.md5" ]; then + # If the remote md5 file does not exist, add the file to the uploads list + echo "Remote does not exist for $file, adding $file to uploads list" + uploads="$uploads $file" + else + # Compare local and remote md5 + remote_md5=$(cat "${ZIP_DIR}/${ZIP_BASE}/tmp_md5/${base_name}.md5") + local_md5=$(cat "${ZIP_DIR}/${ZIP_BASE}/${base_name}.md5") + if [ "$local_md5" != "$remote_md5" ]; then + echo "MD5 mismatch for file $file, adding to uploads list" + uploads="$uploads $file" + fi + fi + echo "Uploads: $uploads" +done + echo "Zipping ${DUMP_BASE}/${DUMP_DIR} to ${ZIP_DIR}/${ZIP_NAME}" @@ -50,10 +91,11 @@ for DUMP_DIR in $(ls -d ${DUMP_BASE}/*/); do ZIP_NAME=${ZIP_DIR}/${ZIP_BASE}_${DIR_NAME} echo "Zipping ${DUMP_DIR} to ${ZIP_NAME}" - /usr/bin/7za a -p${SECRET} ${ZIP_NAME} -mx=0 -mhe -t7z ${DUMP_DIR} || { echo "Could not zip ${DUMP_DIR} into ${ZIP_NAME}" ; exit 1; } + /usr/bin/7za a -p${SECRET} ${ZIP_NAME} -mx=${COMPRESSION_LEVEL} -mhe -t7z ${DUMP_DIR} || { echo "Could not zip ${DUMP_DIR} into ${ZIP_NAME}" ; exit 1; } +# Add to list done -## Sync All Resulting Files +## Sync All Resulting Files (in list!) cd ${ZIP_DIR} for file in ${ZIP_DIR}/*; do echo "RClone-ing ${file} to GCP ${GCP_DEST}" @@ -62,4 +104,35 @@ done ## Create a block that, upon success of rclone above, delete _only_ files that were uploaded ## For each $FILE.7z in $ZIP_DIR, do a "rm -rf $DUMP_BASE/$FILE" to remove the original dump -#[ -z "${DELETE_DUMP}" ] || { echo "Clearing contents of /dump/"; cd /dump/; rm -rf *; } \ No newline at end of file +#[ -z "${DELETE_DUMP}" ] || { echo "Clearing contents of /dump/"; cd /dump/; rm -rf *; } + + +## -- Cruft -- +#cd ${ZIP_DIR}/${ZIP_BASE} +#uploads="" +#for file in ${ZIP_DIR}/${ZIP_BASE}/*.7z; do +# # Get the base name of the file without extension +# base_name=$(basename "$file" .7z) +# # Check if the remote md5 file exists +# if [ ! -f "${ZIP_DIR}/${ZIP_BASE}/tmp_md5/${base_name}.md5" ]; then +# # If the remote md5 file does not exist, add the file to the uploads list +# uploads="$uploads $file" +# else +# # Compare local and remote md5 +# remote_md5=$(cat "${ZIP_DIR}/${ZIP_BASE}/tmp_md5/${base_name}.md5") +# local_md5=$(cat "${ZIP_DIR}/${ZIP_BASE}/${base_name}.md5") +# +# if [ "$local_md5" != "$remote_md5" ]; then +# echo "MD5 mismatch for file $file" +# fi +#done + +# Loop over all .7z files in ZIP_DIR +#for file in ${ZIP_DIR}/${ZIP_BASE}/*.7z; do +# # Get the base name of the file without extension +# base_name=$(basename "$file" .7z) +# # If the corresponding .md5 file does not exist, create it +# if [ ! -f "${ZIP_DIR}/${ZIP_BASE}/${base_name}.md5" ]; then +# md5sum "$file" | awk '{print $1}' > "${ZIP_DIR}/${ZIP_BASE}/${base_name}.md5" +# fi +#done \ No newline at end of file From aa925d033f453e98f6e5eda85f81aed3749fe3f4 Mon Sep 17 00:00:00 2001 From: JS Fillman Date: Tue, 2 Apr 2024 20:16:57 -0700 Subject: [PATCH 03/17] Fixing curl install --- Dockerfile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index d548cd3..d2f54d2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,9 +14,11 @@ COPY app/ /app/ # Final stage FROM alpine:latest +RUN apk update && \ + apk add --no-cache curl + # Copy necessary binaries and files from builder stage COPY --from=builder /usr/bin/7z /usr/bin/7z -COPY --from=builder /usr/bin/curl /usr/bin/curl COPY --from=builder /usr/bin/rclone /usr/bin/rclone COPY --from=builder /root/.config/rclone/rclone.conf /root/.config/rclone/rclone.conf COPY --from=builder /app/ /app/ From 916b1d8b72cbcc3142d10df31e552cfabd19a3b0 Mon Sep 17 00:00:00 2001 From: JS Fillman Date: Tue, 2 Apr 2024 20:25:44 -0700 Subject: [PATCH 04/17] Testing with curl --- .github/workflows/manual-build.yml | 2 +- app/README.md | 22 +++++++++++++--------- app/zip2cloud | 7 ++++++- 3 files changed, 20 insertions(+), 11 deletions(-) diff --git a/.github/workflows/manual-build.yml b/.github/workflows/manual-build.yml index 944f903..0696a50 100644 --- a/.github/workflows/manual-build.yml +++ b/.github/workflows/manual-build.yml @@ -4,7 +4,7 @@ on: workflow_dispatch: jobs: build-push: - uses: kbase/.github/.github/workflows/reusable_build-push.yml@main + uses: kbase/.github/.github/workflows/reusable_build-push.yml@develop with: name: '${{ github.event.repository.name }}-develop' tags: br-${{ github.ref_name }} diff --git a/app/README.md b/app/README.md index 6d03cae..4002b0f 100644 --- a/app/README.md +++ b/app/README.md @@ -33,12 +33,16 @@ A robust zip & upload utility for sending archives to a remote location. ### Variables -- `DUMP_BASE` - The base directory for backup dumps (default `/dump`) -- `DUMP_RETENTION` - The number of days to keep uncompressed backups locally -- `REMOTE` - The remote location to sync backups to -- `SECRET` - The encryption key for 7zip -- `SLACK_CHANNEL` - The slack channel to send notifications to -- `SLACK_WEBHOOK` - The webhook URL for slack notifications -- `ZIP_BASE` - The base filename, minus date, for the compressed backups -- `ZIP_DIR` - The directory to store all compressed backups (default `/zip`) -- `ZIP_RETENTION` - The number of days to keep compressed backups locally +| Variable | Description | Default | +|-----------------|---------------------------------------------------|---------| +| `BUCKET` | The bucket to store the backups | | +| `BUCKET_PATH` | The path within the bucket to store the backups | | +| `DUMP_BASE` | The base directory for backup dumps | `/dump` | +| `DUMP_RETENTION`| The number of days to keep uncompressed backups locally | | +| `REMOTE` | The remote location to sync backups to | | +| `SECRET` | The encryption key for 7zip | | +| `SLACK_CHANNEL` | The slack channel to send notifications to | | +| `SLACK_WEBHOOK` | The webhook URL for slack notifications | | +| `ZIP_BASE` | The base filename, minus date, for the compressed backups | | +| `ZIP_DIR` | The directory to store all compressed backups | `/zip` | +| `ZIP_RETENTION` | The number of days to keep compressed backups locally | | \ No newline at end of file diff --git a/app/zip2cloud b/app/zip2cloud index a74419d..119f5b8 100755 --- a/app/zip2cloud +++ b/app/zip2cloud @@ -30,9 +30,14 @@ ZIP_RETENTION=4 #echo "Deleting database archives older than 30 days" #/usr/bin/find ${ZIP_DIR} -mtime +30 -type f -name "${ZIP_BASE}*" -print -exec rm {} \; -# Delete all old backups, except the last #, as defined by $ZIP_RETENTION +# Delete all old zip files, except the last N+1, as defined by $ZIP_RETENTION ls -t ${ZIP_DIR}/${ZIP_BASE}*.{7z,md5} | tail -n +$((${ZIP_RETENTION} + 1)) | xargs rm -f +# Delete all old backup dumps, except the last N+1, as defined by $DUMP_RETENTION +find ${DUMP_BASE} -type d -regextype posix-extended -regex ".*/[0-9]{4}-[0-9]{2}-[0-9]{2}$" -print0 | xargs -0 ls -td | tail -n +$((${DUMP_RETENTION} + 1)) | xargs -I {} rm -rf {} +# ls -t ${DUMP_BASE}/* | tail -n +$((${DUMP_RETENTION} + 1)) | xargs rm -f + + # Get list of remote backups remote_files=$(rclone ls remote:${BUCKET}/${BUCKETPATH} | grep 7z | awk '{print $2}' | rev | cut -d. -f2- | rev) # Pull remote md5 sums for each remote backup into `tmp_md5` directory From 00298b7f8eebd001fe63c6b8412eb7717b1e9a3b Mon Sep 17 00:00:00 2001 From: JS Fillman Date: Tue, 2 Apr 2024 20:53:15 -0700 Subject: [PATCH 05/17] Fixing 7zip install --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index d2f54d2..d81cfa9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,7 +15,7 @@ COPY app/ /app/ FROM alpine:latest RUN apk update && \ - apk add --no-cache curl + apk add --no-cache curl p7zip # Copy necessary binaries and files from builder stage COPY --from=builder /usr/bin/7z /usr/bin/7z From 6fbd7a26cce7b5254dbe82e2fce681e6327a1bb6 Mon Sep 17 00:00:00 2001 From: JS Fillman Date: Tue, 2 Apr 2024 23:32:41 -0700 Subject: [PATCH 06/17] Rough alpha flow --- Dockerfile | 1 - app/zip2cloud | 31 ++++++++++++++++--------------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/Dockerfile b/Dockerfile index d81cfa9..8a11b48 100644 --- a/Dockerfile +++ b/Dockerfile @@ -18,7 +18,6 @@ RUN apk update && \ apk add --no-cache curl p7zip # Copy necessary binaries and files from builder stage -COPY --from=builder /usr/bin/7z /usr/bin/7z COPY --from=builder /usr/bin/rclone /usr/bin/rclone COPY --from=builder /root/.config/rclone/rclone.conf /root/.config/rclone/rclone.conf COPY --from=builder /app/ /app/ diff --git a/app/zip2cloud b/app/zip2cloud index 119f5b8..57238bb 100755 --- a/app/zip2cloud +++ b/app/zip2cloud @@ -9,16 +9,17 @@ # 5/21/2021 ## Variables -COMPRESSION_LEVEL=0 # Set to 0 if the db dumps are already compressed -DUMP_BASE=/dump/full_backup -DUMP_RETENTION=3 -REMOTE=remote:${BUCKET}/${BUCKETPATH} -SECRET=`cat /run/secrets/encryption_key` -SLACK_CHANNEL='' -SLACK_WEBHOOK='' -ZIP_BASE=backup_full -ZIP_DIR=/zip -ZIP_RETENTION=4 +COMPRESSION_LEVEL=${COMPRESSION_LEVEL:-0} # Set to 0 if the db dumps are already compressed +DELETE_DUMP=${DELETE_DUMP:-0} +DUMP_BASE=${DUMP_BASE:-/dump/full_backup} +DUMP_RETENTION=${DUMP_RETENTION:-3} +REMOTE=${REMOTE:-remote:${BUCKET}/${BUCKETPATH}} +SECRET=${SECRET:-`cat /run/secrets/encryption_key`} +SLACK_CHANNEL=${SLACK_CHANNEL:-''} +SLACK_WEBHOOK=${SLACK_WEBHOOK:-''} +ZIP_BASE=${ZIP_BASE:-backup_full} +ZIP_DIR=${ZIP_DIR:-/zip} +ZIP_RETENTION=${ZIP_RETENTION:-4} [ -r /run/secrets/encryption_key ] || { echo "Encryption key not readable in /run/secrets/encryption_key" ; exit 1; } [ -r /run/secrets/gcp_backup_creds ] || { echo "Google cloud service credentials not found in /run/secrets/gcp_back_creds" ; exit 1; } @@ -31,11 +32,11 @@ ZIP_RETENTION=4 #/usr/bin/find ${ZIP_DIR} -mtime +30 -type f -name "${ZIP_BASE}*" -print -exec rm {} \; # Delete all old zip files, except the last N+1, as defined by $ZIP_RETENTION -ls -t ${ZIP_DIR}/${ZIP_BASE}*.{7z,md5} | tail -n +$((${ZIP_RETENTION} + 1)) | xargs rm -f +ls -t ${ZIP_DIR}/*.7z 2>/dev/null | tail -n +$((${ZIP_RETENTION} + 1)) | xargs -r rm -f +ls -t ${ZIP_DIR}/*.md5 2>/dev/null | tail -n +$((${ZIP_RETENTION} + 1)) | xargs -r rm -f # Delete all old backup dumps, except the last N+1, as defined by $DUMP_RETENTION -find ${DUMP_BASE} -type d -regextype posix-extended -regex ".*/[0-9]{4}-[0-9]{2}-[0-9]{2}$" -print0 | xargs -0 ls -td | tail -n +$((${DUMP_RETENTION} + 1)) | xargs -I {} rm -rf {} -# ls -t ${DUMP_BASE}/* | tail -n +$((${DUMP_RETENTION} + 1)) | xargs rm -f +find ${DUMP_BASE} -type d -name "[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]" -print0 | xargs -0 ls -td | head -n -$((${DUMP_RETENTION})) | xargs -I {} rm -rf {}# ls -t ${DUMP_BASE}/* | tail -n +$((${DUMP_RETENTION} + 1)) | xargs rm -f # Get list of remote backups @@ -97,12 +98,12 @@ for DUMP_DIR in $(ls -d ${DUMP_BASE}/*/); do echo "Zipping ${DUMP_DIR} to ${ZIP_NAME}" /usr/bin/7za a -p${SECRET} ${ZIP_NAME} -mx=${COMPRESSION_LEVEL} -mhe -t7z ${DUMP_DIR} || { echo "Could not zip ${DUMP_DIR} into ${ZIP_NAME}" ; exit 1; } -# Add to list + uploads="$uploads ${ZIP_NAME}.7z" done ## Sync All Resulting Files (in list!) cd ${ZIP_DIR} -for file in ${ZIP_DIR}/*; do +for file in ${uploads}; do echo "RClone-ing ${file} to GCP ${GCP_DEST}" /bin/rclone sync -v "$file" ${REMOTE}/ done From 92cee8bcd32d2144f3c0b1ac874b83d5e8f3878e Mon Sep 17 00:00:00 2001 From: JS Fillman Date: Tue, 2 Apr 2024 23:54:18 -0700 Subject: [PATCH 07/17] Rough alpha flow --- app/zip2cloud | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/app/zip2cloud b/app/zip2cloud index 57238bb..5eee4d9 100755 --- a/app/zip2cloud +++ b/app/zip2cloud @@ -36,8 +36,7 @@ ls -t ${ZIP_DIR}/*.7z 2>/dev/null | tail -n +$((${ZIP_RETENTION} + 1)) | xargs - ls -t ${ZIP_DIR}/*.md5 2>/dev/null | tail -n +$((${ZIP_RETENTION} + 1)) | xargs -r rm -f # Delete all old backup dumps, except the last N+1, as defined by $DUMP_RETENTION -find ${DUMP_BASE} -type d -name "[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]" -print0 | xargs -0 ls -td | head -n -$((${DUMP_RETENTION})) | xargs -I {} rm -rf {}# ls -t ${DUMP_BASE}/* | tail -n +$((${DUMP_RETENTION} + 1)) | xargs rm -f - +find ${DUMP_BASE} -type d -name "[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]" -print0 | xargs -0 ls -td | tail -n +$((${DUMP_RETENTION} + 1)) | xargs -I {} rm -rf {} # Get list of remote backups remote_files=$(rclone ls remote:${BUCKET}/${BUCKETPATH} | grep 7z | awk '{print $2}' | rev | cut -d. -f2- | rev) @@ -97,7 +96,7 @@ for DUMP_DIR in $(ls -d ${DUMP_BASE}/*/); do ZIP_NAME=${ZIP_DIR}/${ZIP_BASE}_${DIR_NAME} echo "Zipping ${DUMP_DIR} to ${ZIP_NAME}" - /usr/bin/7za a -p${SECRET} ${ZIP_NAME} -mx=${COMPRESSION_LEVEL} -mhe -t7z ${DUMP_DIR} || { echo "Could not zip ${DUMP_DIR} into ${ZIP_NAME}" ; exit 1; } + /usr/bin/7z a -p${SECRET} ${ZIP_NAME} -mx=${COMPRESSION_LEVEL} -mhe -t7z ${DUMP_DIR} || { echo "Could not zip ${DUMP_DIR} into ${ZIP_NAME}" ; exit 1; } uploads="$uploads ${ZIP_NAME}.7z" done From d863b864011e45d96098cdc8a014c63a55074a8d Mon Sep 17 00:00:00 2001 From: JS Fillman Date: Thu, 4 Apr 2024 19:25:35 -0700 Subject: [PATCH 08/17] Testing multi-target build --- .github/workflows/manual-build.yml | 8 +- app/zip2cloud | 152 ++++++++++++++++------------- 2 files changed, 92 insertions(+), 68 deletions(-) diff --git a/.github/workflows/manual-build.yml b/.github/workflows/manual-build.yml index 0696a50..22964a0 100644 --- a/.github/workflows/manual-build.yml +++ b/.github/workflows/manual-build.yml @@ -1,11 +1,17 @@ --- name: Manual Build & Push on: - workflow_dispatch: + workflow_dispatch: + inputs: + platforms: + description: 'The platforms for which the Docker image should be built. If not specified, defaults to linux/amd64.' + required: false + default: 'linux/amd64,linux/arm64,linux/riscv64,linux/ppc64le,linux/s390x' jobs: build-push: uses: kbase/.github/.github/workflows/reusable_build-push.yml@develop with: name: '${{ github.event.repository.name }}-develop' tags: br-${{ github.ref_name }} + platforms: ${{ github.event.inputs.platforms }} secrets: inherit diff --git a/app/zip2cloud b/app/zip2cloud index 5eee4d9..23c8aec 100755 --- a/app/zip2cloud +++ b/app/zip2cloud @@ -1,16 +1,8 @@ #!/bin/sh -# Script to compress and encrypt mongodb backup directories and then sync them against a -# cloud S3 bucket -# -# Depends on 7zip and rclone -# -# sychan@lbl.gov -# 5/21/2021 - ## Variables COMPRESSION_LEVEL=${COMPRESSION_LEVEL:-0} # Set to 0 if the db dumps are already compressed -DELETE_DUMP=${DELETE_DUMP:-0} +DELETE_DUMP=${DELETE_DUMP:-''} DUMP_BASE=${DUMP_BASE:-/dump/full_backup} DUMP_RETENTION=${DUMP_RETENTION:-3} REMOTE=${REMOTE:-remote:${BUCKET}/${BUCKETPATH}} @@ -32,112 +24,138 @@ ZIP_RETENTION=${ZIP_RETENTION:-4} #/usr/bin/find ${ZIP_DIR} -mtime +30 -type f -name "${ZIP_BASE}*" -print -exec rm {} \; # Delete all old zip files, except the last N+1, as defined by $ZIP_RETENTION -ls -t ${ZIP_DIR}/*.7z 2>/dev/null | tail -n +$((${ZIP_RETENTION} + 1)) | xargs -r rm -f -ls -t ${ZIP_DIR}/*.md5 2>/dev/null | tail -n +$((${ZIP_RETENTION} + 1)) | xargs -r rm -f +ls -t ${ZIP_DIR}/*.7z 2>/dev/null | tail -n +$((${ZIP_RETENTION} - 1)) | xargs -r rm -f +ls -t ${ZIP_DIR}/*.md5 2>/dev/null | tail -n +$((${ZIP_RETENTION} - 1)) | xargs -r rm -f # Delete all old backup dumps, except the last N+1, as defined by $DUMP_RETENTION -find ${DUMP_BASE} -type d -name "[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]" -print0 | xargs -0 ls -td | tail -n +$((${DUMP_RETENTION} + 1)) | xargs -I {} rm -rf {} +find ${DUMP_BASE} -type d -name "[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]" -print0 | xargs -0 ls -td | tail -n +$((${DUMP_RETENTION} - 1)) | xargs -I {} rm -rf {} # Get list of remote backups remote_files=$(rclone ls remote:${BUCKET}/${BUCKETPATH} | grep 7z | awk '{print $2}' | rev | cut -d. -f2- | rev) # Pull remote md5 sums for each remote backup into `tmp_md5` directory -mkdir -p ${ZIP_DIR}/${ZIP_BASE}/tmp_md5 && cd $_ +mkdir -p ${ZIP_DIR}/tmp_md5 && cd $_ for file in $remote_files; do - rclone md5sum remote:${BUCKET}/${BUCKETPATH}/$file.7z | awk '{print $1}' > ${ZIP_DIR}/${ZIP_BASE}/tmp_md5/$file.md5 + rclone md5sum remote:${BUCKET}/${BUCKETPATH}/$file.7z | awk '{print $1}' > ${ZIP_DIR}/tmp_md5/$file.md5 done # Create empty list of files to upload uploads="" +echo "Zipping ${DUMP_BASE}/${DUMP_DIR} to ${ZIP_DIR}/${ZIP_NAME}" + +# Get all directories in DUMP_BASE +for DUMP_DIR in $(ls -d ${DUMP_BASE}/*/); do + # Remove trailing slash and get the base name of the directory + DIR_NAME=$(basename ${DUMP_DIR%/}) + ZIP_NAME=${ZIP_DIR}/${ZIP_BASE}_${DIR_NAME} + echo $DIR_NAME + + # Check if the corresponding md5 file exists + if [ -f "${ZIP_DIR}/tmp_md5/${ZIP_BASE}_${DIR_NAME}.md5" ]; then + echo "MD5 file exists for ${DIR_NAME}, skipping" + continue + fi + + echo "Zipping ${DUMP_DIR} to ${ZIP_NAME}".7z + /usr/bin/7z a -p${SECRET} ${ZIP_NAME} -mx=${COMPRESSION_LEVEL} -mhe -t7z ${DUMP_DIR} || { echo "Could not zip ${DUMP_DIR} into ${ZIP_NAME}" ; exit 1; } +done + # Create md5 sums for local backups, if they don't exist -cd ${ZIP_DIR}/${ZIP_BASE} -for file in ${ZIP_DIR}/${ZIP_BASE}/*.7z; do +cd ${ZIP_DIR} +for file in ${ZIP_DIR}/*.7z; do # Get the base name of the file without extension base_name=$(basename "$file" .7z) + echo $base_name # If a local .md5 file does not exist, create it - if [ ! -f "${ZIP_DIR}/${ZIP_BASE}/${base_name}.md5" ]; then + if [ ! -f "${ZIP_DIR}/${base_name}.md5" ]; then echo "Local md5 file does not exist for $file, generating, and adding $file to uploads list" uploads="$uploads $file" local_md5=$(md5sum "$file" | awk '{print $1}') - echo $local_md5 > "${ZIP_DIR}/${ZIP_BASE}/${base_name}.md5" + echo $local_md5 > "${ZIP_DIR}/${base_name}.md5" fi done - # Verify & update list of files to upload cd ${ZIP_DIR}/${ZIP_BASE} -for file in ${ZIP_DIR}/${ZIP_BASE}/*.7z; do +for file in ${ZIP_DIR}/*.7z; do # Get the base name of the file without extension base_name=$(basename "$file" .7z) # Check if the remote md5 file exists - if [ ! -f "${ZIP_DIR}/${ZIP_BASE}/tmp_md5/${base_name}.md5" ]; then + if [ ! -f "${ZIP_DIR}/tmp_md5/${base_name}.md5" ]; then # If the remote md5 file does not exist, add the file to the uploads list echo "Remote does not exist for $file, adding $file to uploads list" uploads="$uploads $file" else # Compare local and remote md5 - remote_md5=$(cat "${ZIP_DIR}/${ZIP_BASE}/tmp_md5/${base_name}.md5") - local_md5=$(cat "${ZIP_DIR}/${ZIP_BASE}/${base_name}.md5") + remote_md5=$(cat "${ZIP_DIR}/tmp_md5/${base_name}.md5") + local_md5=$(cat "${ZIP_DIR}/${base_name}.md5") if [ "$local_md5" != "$remote_md5" ]; then - echo "MD5 mismatch for file $file, adding to uploads list" - uploads="$uploads $file" + echo "MD5 mismatch for file $file" + # Extract the last character of the base name + last_char=${base_name: -1} + # Check if the last character is a letter + if [[ $last_char =~ [a-z] ]]; then + # If it's a letter, increment it + next_char=$(echo "$last_char" | tr "a-y" "b-z") + new_base_name=${base_name%?}$next_char + else + # If it's not a letter, append 'a' + new_base_name=${base_name}a + fi + # Rename the file + mv "$file" "${ZIP_DIR}/${new_base_name}.7z" + # Add the renamed file to the uploads list + uploads="$uploads ${ZIP_DIR}/${new_base_name}.7z" fi fi echo "Uploads: $uploads" done - -echo "Zipping ${DUMP_BASE}/${DUMP_DIR} to ${ZIP_DIR}/${ZIP_NAME}" - -# Get all directories in DUMP_BASE -for DUMP_DIR in $(ls -d ${DUMP_BASE}/*/); do - # Remove trailing slash and get the base name of the directory - DIR_NAME=$(basename ${DUMP_DIR%/}) - ZIP_NAME=${ZIP_DIR}/${ZIP_BASE}_${DIR_NAME} - - echo "Zipping ${DUMP_DIR} to ${ZIP_NAME}" - /usr/bin/7z a -p${SECRET} ${ZIP_NAME} -mx=${COMPRESSION_LEVEL} -mhe -t7z ${DUMP_DIR} || { echo "Could not zip ${DUMP_DIR} into ${ZIP_NAME}" ; exit 1; } - uploads="$uploads ${ZIP_NAME}.7z" -done - -## Sync All Resulting Files (in list!) -cd ${ZIP_DIR} -for file in ${uploads}; do - echo "RClone-ing ${file} to GCP ${GCP_DEST}" - /bin/rclone sync -v "$file" ${REMOTE}/ -done - -## Create a block that, upon success of rclone above, delete _only_ files that were uploaded -## For each $FILE.7z in $ZIP_DIR, do a "rm -rf $DUMP_BASE/$FILE" to remove the original dump -#[ -z "${DELETE_DUMP}" ] || { echo "Clearing contents of /dump/"; cd /dump/; rm -rf *; } - - -## -- Cruft -- +## Verify & update list of files to upload #cd ${ZIP_DIR}/${ZIP_BASE} -#uploads="" #for file in ${ZIP_DIR}/${ZIP_BASE}/*.7z; do # # Get the base name of the file without extension # base_name=$(basename "$file" .7z) # # Check if the remote md5 file exists # if [ ! -f "${ZIP_DIR}/${ZIP_BASE}/tmp_md5/${base_name}.md5" ]; then # # If the remote md5 file does not exist, add the file to the uploads list +# echo "Remote does not exist for $file, adding $file to uploads list" # uploads="$uploads $file" # else -# # Compare local and remote md5 -# remote_md5=$(cat "${ZIP_DIR}/${ZIP_BASE}/tmp_md5/${base_name}.md5") -# local_md5=$(cat "${ZIP_DIR}/${ZIP_BASE}/${base_name}.md5") -# -# if [ "$local_md5" != "$remote_md5" ]; then -# echo "MD5 mismatch for file $file" +# # Compare local and remote md5 +# remote_md5=$(cat "${ZIP_DIR}/${ZIP_BASE}/tmp_md5/${base_name}.md5") +# local_md5=$(cat "${ZIP_DIR}/${ZIP_BASE}/${base_name}.md5") +# if [ "$local_md5" != "$remote_md5" ]; then +# echo "MD5 mismatch for file $file, adding to uploads list" +# uploads="$uploads $file" +# fi # fi +# echo "Uploads: $uploads" #done -# Loop over all .7z files in ZIP_DIR -#for file in ${ZIP_DIR}/${ZIP_BASE}/*.7z; do -# # Get the base name of the file without extension -# base_name=$(basename "$file" .7z) -# # If the corresponding .md5 file does not exist, create it -# if [ ! -f "${ZIP_DIR}/${ZIP_BASE}/${base_name}.md5" ]; then -# md5sum "$file" | awk '{print $1}' > "${ZIP_DIR}/${ZIP_BASE}/${base_name}.md5" + +# Before running rclone +for file in "${uploads[@]}"; do + ls $file + if [ ! -f "$file" ]; then + echo "File does not exist: $file" + fi +done + +## Before running rclone +#for file in ${uploads}; do +# if [ ! -f "$file" ]; then +# echo "File does not exist: $file" # fi -#done \ No newline at end of file +#done + +## Sync All Resulting Files (in list!) +#cd ${ZIP_DIR} +#for file in "${uploads[@]}"; do +# echo "RClone-ing ${file} to GCP ${REMOTE}" +# /usr/bin/rclone sync -v "$file" ${REMOTE}/ +#done +#for file in ${uploads}; do +# echo "RClone-ing ${file} to GCP ${GCP_DEST}" +# /usr/bin/rclone sync -v "$file" ${REMOTE}/ +#done From be40ac92ea4e04cecac9bc104068394add11c64e Mon Sep 17 00:00:00 2001 From: JS Fillman Date: Thu, 4 Apr 2024 19:28:12 -0700 Subject: [PATCH 09/17] Testing multi-target build --- .github/workflows/manual-build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/manual-build.yml b/.github/workflows/manual-build.yml index 22964a0..1de5ebb 100644 --- a/.github/workflows/manual-build.yml +++ b/.github/workflows/manual-build.yml @@ -6,7 +6,7 @@ on: platforms: description: 'The platforms for which the Docker image should be built. If not specified, defaults to linux/amd64.' required: false - default: 'linux/amd64,linux/arm64,linux/riscv64,linux/ppc64le,linux/s390x' + default: 'linux/amd64,linux/arm64,linux/riscv64' jobs: build-push: uses: kbase/.github/.github/workflows/reusable_build-push.yml@develop From f630bbe88b4a83659c9acfa353da0510a758630f Mon Sep 17 00:00:00 2001 From: JS Fillman Date: Thu, 4 Apr 2024 19:31:26 -0700 Subject: [PATCH 10/17] Testing multi-target build --- .github/workflows/manual-build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/manual-build.yml b/.github/workflows/manual-build.yml index 1de5ebb..d8e9abb 100644 --- a/.github/workflows/manual-build.yml +++ b/.github/workflows/manual-build.yml @@ -6,7 +6,7 @@ on: platforms: description: 'The platforms for which the Docker image should be built. If not specified, defaults to linux/amd64.' required: false - default: 'linux/amd64,linux/arm64,linux/riscv64' + default: 'linux/amd64,linux/arm64/v8,linux/riscv64' jobs: build-push: uses: kbase/.github/.github/workflows/reusable_build-push.yml@develop From dced65b467e31845441e7386c8b8b97fa58efe1a Mon Sep 17 00:00:00 2001 From: JS Fillman Date: Thu, 4 Apr 2024 19:48:15 -0700 Subject: [PATCH 11/17] Testing multi-target build --- .github/workflows/manual-build.yml | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/.github/workflows/manual-build.yml b/.github/workflows/manual-build.yml index d8e9abb..361a795 100644 --- a/.github/workflows/manual-build.yml +++ b/.github/workflows/manual-build.yml @@ -1,3 +1,20 @@ +#--- +#name: Manual Build & Push +#on: +# workflow_dispatch: +# inputs: +# platforms: +# description: 'The platforms for which the Docker image should be built. If not specified, defaults to linux/amd64.' +# required: false +# default: 'linux/amd64,linux/arm64/v8,linux/riscv64' +#jobs: +# build-push: +# uses: kbase/.github/.github/workflows/reusable_build-push.yml@develop +# with: +# name: '${{ github.event.repository.name }}-develop' +# tags: br-${{ github.ref_name }} +# platforms: ${{ github.event.inputs.platforms }} +# secrets: inherit --- name: Manual Build & Push on: @@ -14,4 +31,4 @@ jobs: name: '${{ github.event.repository.name }}-develop' tags: br-${{ github.ref_name }} platforms: ${{ github.event.inputs.platforms }} - secrets: inherit + secrets: inherit \ No newline at end of file From dc8f23a07570bb6a57767b36e7c40019c6e85e25 Mon Sep 17 00:00:00 2001 From: JS Fillman Date: Thu, 4 Apr 2024 19:50:27 -0700 Subject: [PATCH 12/17] Testing multi-target build --- .github/workflows/manual-build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/manual-build.yml b/.github/workflows/manual-build.yml index 361a795..24125d7 100644 --- a/.github/workflows/manual-build.yml +++ b/.github/workflows/manual-build.yml @@ -23,7 +23,7 @@ on: platforms: description: 'The platforms for which the Docker image should be built. If not specified, defaults to linux/amd64.' required: false - default: 'linux/amd64,linux/arm64/v8,linux/riscv64' + default: 'linux/amd64,linux/arm64/v8' jobs: build-push: uses: kbase/.github/.github/workflows/reusable_build-push.yml@develop From ad0aa9811f794e0550820a1c545539ee8f9890e2 Mon Sep 17 00:00:00 2001 From: JS Fillman Date: Mon, 8 Apr 2024 19:12:37 -0700 Subject: [PATCH 13/17] Cleanup & test. --- app/zip2cloud | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/app/zip2cloud b/app/zip2cloud index 23c8aec..79eb4a7 100755 --- a/app/zip2cloud +++ b/app/zip2cloud @@ -76,7 +76,7 @@ for file in ${ZIP_DIR}/*.7z; do done # Verify & update list of files to upload -cd ${ZIP_DIR}/${ZIP_BASE} +cd ${ZIP_DIR}/ for file in ${ZIP_DIR}/*.7z; do # Get the base name of the file without extension base_name=$(basename "$file" .7z) @@ -135,7 +135,8 @@ done # Before running rclone -for file in "${uploads[@]}"; do +#for file in "${uploads[@]}"; do +for file in '${uploads}'; do ls $file if [ ! -f "$file" ]; then echo "File does not exist: $file" From 274bcf8cca86c06b1a3d11f886c4708805d0a410 Mon Sep 17 00:00:00 2001 From: JS Fillman Date: Mon, 8 Apr 2024 19:34:59 -0700 Subject: [PATCH 14/17] Adding with working file add & rclone loops --- app/zip2cloud | 45 +++++++-------------------------------------- 1 file changed, 7 insertions(+), 38 deletions(-) diff --git a/app/zip2cloud b/app/zip2cloud index 79eb4a7..feff7fa 100755 --- a/app/zip2cloud +++ b/app/zip2cloud @@ -111,52 +111,21 @@ for file in ${ZIP_DIR}/*.7z; do echo "Uploads: $uploads" done -## Verify & update list of files to upload -#cd ${ZIP_DIR}/${ZIP_BASE} -#for file in ${ZIP_DIR}/${ZIP_BASE}/*.7z; do -# # Get the base name of the file without extension -# base_name=$(basename "$file" .7z) -# # Check if the remote md5 file exists -# if [ ! -f "${ZIP_DIR}/${ZIP_BASE}/tmp_md5/${base_name}.md5" ]; then -# # If the remote md5 file does not exist, add the file to the uploads list -# echo "Remote does not exist for $file, adding $file to uploads list" -# uploads="$uploads $file" -# else -# # Compare local and remote md5 -# remote_md5=$(cat "${ZIP_DIR}/${ZIP_BASE}/tmp_md5/${base_name}.md5") -# local_md5=$(cat "${ZIP_DIR}/${ZIP_BASE}/${base_name}.md5") -# if [ "$local_md5" != "$remote_md5" ]; then -# echo "MD5 mismatch for file $file, adding to uploads list" -# uploads="$uploads $file" -# fi -# fi -# echo "Uploads: $uploads" -#done - # Before running rclone #for file in "${uploads[@]}"; do -for file in '${uploads}'; do +for file in ${uploads}; do ls $file if [ ! -f "$file" ]; then echo "File does not exist: $file" fi done -## Before running rclone -#for file in ${uploads}; do -# if [ ! -f "$file" ]; then -# echo "File does not exist: $file" -# fi -#done + ## Sync All Resulting Files (in list!) -#cd ${ZIP_DIR} -#for file in "${uploads[@]}"; do -# echo "RClone-ing ${file} to GCP ${REMOTE}" -# /usr/bin/rclone sync -v "$file" ${REMOTE}/ -#done -#for file in ${uploads}; do -# echo "RClone-ing ${file} to GCP ${GCP_DEST}" -# /usr/bin/rclone sync -v "$file" ${REMOTE}/ -#done +cd ${ZIP_DIR} +for file in ${uploads}; do + echo "RClone-ing ${file} to GCP ${REMOTE}" + /usr/bin/rclone sync -v "$file" ${REMOTE}/ +done From 41fc18c37a3cb6695d3c74e1bb4052cb512880aa Mon Sep 17 00:00:00 2001 From: JS Fillman Date: Mon, 8 Apr 2024 21:49:22 -0700 Subject: [PATCH 15/17] Fixing rm cleanup vars --- app/zip2cloud | 73 ++++++++++++++++++++++++++++++++++----------------- 1 file changed, 49 insertions(+), 24 deletions(-) diff --git a/app/zip2cloud b/app/zip2cloud index feff7fa..0adfc8d 100755 --- a/app/zip2cloud +++ b/app/zip2cloud @@ -24,11 +24,12 @@ ZIP_RETENTION=${ZIP_RETENTION:-4} #/usr/bin/find ${ZIP_DIR} -mtime +30 -type f -name "${ZIP_BASE}*" -print -exec rm {} \; # Delete all old zip files, except the last N+1, as defined by $ZIP_RETENTION -ls -t ${ZIP_DIR}/*.7z 2>/dev/null | tail -n +$((${ZIP_RETENTION} - 1)) | xargs -r rm -f -ls -t ${ZIP_DIR}/*.md5 2>/dev/null | tail -n +$((${ZIP_RETENTION} - 1)) | xargs -r rm -f +rm -rf ${ZIP_DIR}/tmp_md5 +ls -t ${ZIP_DIR}/*.7z 2>/dev/null | tail -n +${ZIP_RETENTION} | xargs -r rm -f +ls -t ${ZIP_DIR}/*.md5 2>/dev/null | tail -n +${ZIP_RETENTION} | xargs -r rm -f # Delete all old backup dumps, except the last N+1, as defined by $DUMP_RETENTION -find ${DUMP_BASE} -type d -name "[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]" -print0 | xargs -0 ls -td | tail -n +$((${DUMP_RETENTION} - 1)) | xargs -I {} rm -rf {} +find ${DUMP_BASE} -type d -name "[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]" -print0 | xargs -0 ls -td | tail -n +${DUMP_RETENTION} | xargs -I {} rm -rf {} # Get list of remote backups remote_files=$(rclone ls remote:${BUCKET}/${BUCKETPATH} | grep 7z | awk '{print $2}' | rev | cut -d. -f2- | rev) @@ -86,27 +87,51 @@ for file in ${ZIP_DIR}/*.7z; do echo "Remote does not exist for $file, adding $file to uploads list" uploads="$uploads $file" else - # Compare local and remote md5 - remote_md5=$(cat "${ZIP_DIR}/tmp_md5/${base_name}.md5") - local_md5=$(cat "${ZIP_DIR}/${base_name}.md5") - if [ "$local_md5" != "$remote_md5" ]; then - echo "MD5 mismatch for file $file" - # Extract the last character of the base name - last_char=${base_name: -1} - # Check if the last character is a letter - if [[ $last_char =~ [a-z] ]]; then - # If it's a letter, increment it - next_char=$(echo "$last_char" | tr "a-y" "b-z") - new_base_name=${base_name%?}$next_char - else - # If it's not a letter, append 'a' - new_base_name=${base_name}a - fi - # Rename the file - mv "$file" "${ZIP_DIR}/${new_base_name}.7z" - # Add the renamed file to the uploads list - uploads="$uploads ${ZIP_DIR}/${new_base_name}.7z" - fi + # Compare local and remote md5 + remote_md5=$(cat "${ZIP_DIR}/tmp_md5/${base_name}.md5") + local_md5=$(cat "${ZIP_DIR}/${base_name}.md5") + if [ "$local_md5" != "$remote_md5" ]; then + echo "MD5 mismatch for file $file" + # Extract the last character of the base name + last_char=${base_name: -1} + # Check if the last character is a letter + if [[ $last_char =~ [a-y] ]]; then + # If it's a letter, increment it + next_char=$(echo "$last_char" | tr "a-y" "b-z") + new_base_name=${base_name%?}$next_char + elif [[ $last_char == 'z' ]]; then + # If it's 'z', replace it with 'a' and append 'a' + new_base_name=${base_name%?}aa + else + # If it's not a letter, append 'a' + new_base_name=${base_name}a + fi + # Rename the file + mv "$file" "${ZIP_DIR}/${new_base_name}.7z" + # Add the renamed file to the uploads list + uploads="$uploads ${ZIP_DIR}/${new_base_name}.7z" + fi +# # Compare local and remote md5 +# remote_md5=$(cat "${ZIP_DIR}/tmp_md5/${base_name}.md5") +# local_md5=$(cat "${ZIP_DIR}/${base_name}.md5") +# if [ "$local_md5" != "$remote_md5" ]; then +# echo "MD5 mismatch for file $file" +# # Extract the last character of the base name +# last_char=${base_name: -1} +# # Check if the last character is a letter +# if [[ $last_char =~ [a-z] ]]; then +# # If it's a letter, increment it +# next_char=$(echo "$last_char" | tr "a-y" "b-z") +# new_base_name=${base_name%?}$next_char +# else +# # If it's not a letter, append 'a' +# new_base_name=${base_name}a +# fi +# # Rename the file +# mv "$file" "${ZIP_DIR}/${new_base_name}.7z" +# # Add the renamed file to the uploads list +# uploads="$uploads ${ZIP_DIR}/${new_base_name}.7z" +# fi fi echo "Uploads: $uploads" done From ef6b6c4ccb19a09fdd2fe5800fdda6b081d06b87 Mon Sep 17 00:00:00 2001 From: JS Fillman Date: Mon, 8 Apr 2024 23:15:27 -0700 Subject: [PATCH 16/17] Adding working test candidate --- app/zip2cloud | 111 ++++++++++++++++++++++---------------------------- 1 file changed, 48 insertions(+), 63 deletions(-) diff --git a/app/zip2cloud b/app/zip2cloud index 0adfc8d..5a03425 100755 --- a/app/zip2cloud +++ b/app/zip2cloud @@ -13,24 +13,26 @@ ZIP_BASE=${ZIP_BASE:-backup_full} ZIP_DIR=${ZIP_DIR:-/zip} ZIP_RETENTION=${ZIP_RETENTION:-4} +### Cleanup + [ -r /run/secrets/encryption_key ] || { echo "Encryption key not readable in /run/secrets/encryption_key" ; exit 1; } [ -r /run/secrets/gcp_backup_creds ] || { echo "Google cloud service credentials not found in /run/secrets/gcp_back_creds" ; exit 1; } [ -z "${BUCKET}" ] && { echo "S3 bucketname not set in BUCKET environment variable" ; exit 1; } [ -z "${BUCKETPATH}" ] && { echo "Path within S3 bucket not set in BUCKETPATH environment variable" ; exit 1; } [ -z "${DELETE_DUMP}" ] || echo "DELETE_DUMP set, will delete files/directories under /dump/ when done compressing" -# Delete any files older than 30 days in the zip directory -#echo "Deleting database archives older than 30 days" -#/usr/bin/find ${ZIP_DIR} -mtime +30 -type f -name "${ZIP_BASE}*" -print -exec rm {} \; -# Delete all old zip files, except the last N+1, as defined by $ZIP_RETENTION +# Delete all old zip files, except the last N, as defined by $ZIP_RETENTION rm -rf ${ZIP_DIR}/tmp_md5 ls -t ${ZIP_DIR}/*.7z 2>/dev/null | tail -n +${ZIP_RETENTION} | xargs -r rm -f ls -t ${ZIP_DIR}/*.md5 2>/dev/null | tail -n +${ZIP_RETENTION} | xargs -r rm -f -# Delete all old backup dumps, except the last N+1, as defined by $DUMP_RETENTION +# Delete all old backup dumps, except the last N, as defined by $DUMP_RETENTION find ${DUMP_BASE} -type d -name "[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]" -print0 | xargs -0 ls -td | tail -n +${DUMP_RETENTION} | xargs -I {} rm -rf {} +### End Cleanup + + # Get list of remote backups remote_files=$(rclone ls remote:${BUCKET}/${BUCKETPATH} | grep 7z | awk '{print $2}' | rev | cut -d. -f2- | rev) # Pull remote md5 sums for each remote backup into `tmp_md5` directory @@ -39,59 +41,63 @@ for file in $remote_files; do rclone md5sum remote:${BUCKET}/${BUCKETPATH}/$file.7z | awk '{print $1}' > ${ZIP_DIR}/tmp_md5/$file.md5 done -# Create empty list of files to upload -uploads="" - -echo "Zipping ${DUMP_BASE}/${DUMP_DIR} to ${ZIP_DIR}/${ZIP_NAME}" -# Get all directories in DUMP_BASE +# Get all exports from DUMP_BASE for DUMP_DIR in $(ls -d ${DUMP_BASE}/*/); do # Remove trailing slash and get the base name of the directory DIR_NAME=$(basename ${DUMP_DIR%/}) ZIP_NAME=${ZIP_DIR}/${ZIP_BASE}_${DIR_NAME} echo $DIR_NAME - # Check if the corresponding md5 file exists - if [ -f "${ZIP_DIR}/tmp_md5/${ZIP_BASE}_${DIR_NAME}.md5" ]; then - echo "MD5 file exists for ${DIR_NAME}, skipping" - continue + # Check if the corresponding md5 file exists, if not, zip it + if [ ! -f "${ZIP_DIR}/tmp_md5/${ZIP_BASE}_${DIR_NAME}.md5" ]; then + echo "No remote exists for ${DIR_NAME}, zipping" + /usr/bin/7z a -p${SECRET} ${ZIP_NAME} -mx=${COMPRESSION_LEVEL} -mhe -t7z ${DUMP_DIR} || { echo "Could not zip ${DUMP_DIR} into ${ZIP_NAME}" ; exit 1; } fi - - echo "Zipping ${DUMP_DIR} to ${ZIP_NAME}".7z - /usr/bin/7z a -p${SECRET} ${ZIP_NAME} -mx=${COMPRESSION_LEVEL} -mhe -t7z ${DUMP_DIR} || { echo "Could not zip ${DUMP_DIR} into ${ZIP_NAME}" ; exit 1; } done -# Create md5 sums for local backups, if they don't exist -cd ${ZIP_DIR} +# Compare checksums of local 7z files against all remotes' md5's. Add to upload list if not found +uploads="" +cd ${ZIP_DIR} || exit for file in ${ZIP_DIR}/*.7z; do # Get the base name of the file without extension base_name=$(basename "$file" .7z) - echo $base_name - # If a local .md5 file does not exist, create it - if [ ! -f "${ZIP_DIR}/${base_name}.md5" ]; then - echo "Local md5 file does not exist for $file, generating, and adding $file to uploads list" + local_md5=$(md5sum "$file" | awk '{print $1}') + echo $local_md5 > "${ZIP_DIR}/${base_name}.md5" + # Now compare this file with the remote md5s + match_found=0 + for remote_md5_file in ${ZIP_DIR}/tmp_md5/*.md5; do + remote_md5=$(cat "$remote_md5_file") + if [ "$local_md5" = "$remote_md5" ]; then + match_found=1 + break + fi + done + if [ $match_found -eq 0 ]; then + echo "Adding $file to uploads list" uploads="$uploads $file" - local_md5=$(md5sum "$file" | awk '{print $1}') - echo $local_md5 > "${ZIP_DIR}/${base_name}.md5" fi done -# Verify & update list of files to upload -cd ${ZIP_DIR}/ -for file in ${ZIP_DIR}/*.7z; do +echo "Current uploads candidates are: $uploads" + +## Verify & update list of files to upload +final_uploads="" +cd ${ZIP_DIR} || exit +for file in ${uploads}; do # Get the base name of the file without extension base_name=$(basename "$file" .7z) - # Check if the remote md5 file exists - if [ ! -f "${ZIP_DIR}/tmp_md5/${base_name}.md5" ]; then - # If the remote md5 file does not exist, add the file to the uploads list - echo "Remote does not exist for $file, adding $file to uploads list" - uploads="$uploads $file" - else +# # Check if the remote md5 file exists +# if [ ! -f "${ZIP_DIR}/tmp_md5/${base_name}.md5" ]; then +# # If the remote md5 file does not exist, add the file to the uploads list +# echo "Remote does not exist for $file, adding $file to uploads list" +# final_uploads="$final_uploads $file" +# else # Compare local and remote md5 remote_md5=$(cat "${ZIP_DIR}/tmp_md5/${base_name}.md5") local_md5=$(cat "${ZIP_DIR}/${base_name}.md5") if [ "$local_md5" != "$remote_md5" ]; then - echo "MD5 mismatch for file $file" + echo "MD5 mismatch for file $file. Incrementing filename and adding to uploads list." # Extract the last character of the base name last_char=${base_name: -1} # Check if the last character is a letter @@ -109,37 +115,16 @@ for file in ${ZIP_DIR}/*.7z; do # Rename the file mv "$file" "${ZIP_DIR}/${new_base_name}.7z" # Add the renamed file to the uploads list - uploads="$uploads ${ZIP_DIR}/${new_base_name}.7z" - fi -# # Compare local and remote md5 -# remote_md5=$(cat "${ZIP_DIR}/tmp_md5/${base_name}.md5") -# local_md5=$(cat "${ZIP_DIR}/${base_name}.md5") -# if [ "$local_md5" != "$remote_md5" ]; then -# echo "MD5 mismatch for file $file" -# # Extract the last character of the base name -# last_char=${base_name: -1} -# # Check if the last character is a letter -# if [[ $last_char =~ [a-z] ]]; then -# # If it's a letter, increment it -# next_char=$(echo "$last_char" | tr "a-y" "b-z") -# new_base_name=${base_name%?}$next_char -# else -# # If it's not a letter, append 'a' -# new_base_name=${base_name}a -# fi -# # Rename the file -# mv "$file" "${ZIP_DIR}/${new_base_name}.7z" -# # Add the renamed file to the uploads list -# uploads="$uploads ${ZIP_DIR}/${new_base_name}.7z" -# fi - fi - echo "Uploads: $uploads" + final_uploads="$final_uploads ${ZIP_DIR}/${new_base_name}.7z" + fi done +echo "Final uploads: $final_uploads" + # Before running rclone #for file in "${uploads[@]}"; do -for file in ${uploads}; do +for file in ${final_uploads}; do ls $file if [ ! -f "$file" ]; then echo "File does not exist: $file" @@ -149,8 +134,8 @@ done ## Sync All Resulting Files (in list!) -cd ${ZIP_DIR} -for file in ${uploads}; do +cd ${ZIP_DIR} || exit +for file in ${final_uploads}; do echo "RClone-ing ${file} to GCP ${REMOTE}" /usr/bin/rclone sync -v "$file" ${REMOTE}/ done From 0d9933f95cfe04d7a2d4018b9383852c63760923 Mon Sep 17 00:00:00 2001 From: JS Fillman Date: Mon, 8 Apr 2024 23:37:57 -0700 Subject: [PATCH 17/17] Adding working test candidate --- app/zip2cloud | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/app/zip2cloud b/app/zip2cloud index 5a03425..591739f 100755 --- a/app/zip2cloud +++ b/app/zip2cloud @@ -44,15 +44,22 @@ done # Get all exports from DUMP_BASE for DUMP_DIR in $(ls -d ${DUMP_BASE}/*/); do + # Check if the dump is complete + echo "Checking export for ${DUMP_DIR}" + if [ ! -f "${DUMP_DIR}/dump_complete.txt" ]; then + echo "dump_complete.txt not found in ${DUMP_DIR}, skipping" + continue + fi # Remove trailing slash and get the base name of the directory DIR_NAME=$(basename ${DUMP_DIR%/}) ZIP_NAME=${ZIP_DIR}/${ZIP_BASE}_${DIR_NAME} echo $DIR_NAME - # Check if the corresponding md5 file exists, if not, zip it if [ ! -f "${ZIP_DIR}/tmp_md5/${ZIP_BASE}_${DIR_NAME}.md5" ]; then echo "No remote exists for ${DIR_NAME}, zipping" /usr/bin/7z a -p${SECRET} ${ZIP_NAME} -mx=${COMPRESSION_LEVEL} -mhe -t7z ${DUMP_DIR} || { echo "Could not zip ${DUMP_DIR} into ${ZIP_NAME}" ; exit 1; } + else + echo "Remote exists for ${DIR_NAME}, skipping" fi done