Skip to content

Commit

Permalink
Merge pull request #27 from kbase/DEVOPS-1770-Tiny
Browse files Browse the repository at this point in the history
Merge working test candidate into ticket's PR branch.
  • Loading branch information
jsfillman authored Apr 9, 2024
2 parents 1373122 + 0d9933f commit d8e203a
Show file tree
Hide file tree
Showing 4 changed files with 215 additions and 50 deletions.
29 changes: 26 additions & 3 deletions .github/workflows/manual-build.yml
Original file line number Diff line number Diff line change
@@ -1,11 +1,34 @@
#---
#name: Manual Build & Push
#on:
# workflow_dispatch:
# inputs:
# platforms:
# description: 'The platforms for which the Docker image should be built. If not specified, defaults to linux/amd64.'
# required: false
# default: 'linux/amd64,linux/arm64/v8,linux/riscv64'
#jobs:
# build-push:
# uses: kbase/.github/.github/workflows/reusable_build-push.yml@develop
# with:
# name: '${{ github.event.repository.name }}-develop'
# tags: br-${{ github.ref_name }}
# platforms: ${{ github.event.inputs.platforms }}
# secrets: inherit
---
name: Manual Build & Push
on:
workflow_dispatch:
workflow_dispatch:
inputs:
platforms:
description: 'The platforms for which the Docker image should be built. If not specified, defaults to linux/amd64.'
required: false
default: 'linux/amd64,linux/arm64/v8'
jobs:
build-push:
uses: kbase/.github/.github/workflows/reusable_build-push.yml@main
uses: kbase/.github/.github/workflows/reusable_build-push.yml@develop
with:
name: '${{ github.event.repository.name }}-develop'
tags: br-${{ github.ref_name }}
secrets: inherit
platforms: ${{ github.event.inputs.platforms }}
secrets: inherit
19 changes: 15 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
FROM alpine:latest
# Builder stage
FROM alpine:latest as builder

# Update and install necessary packages
RUN apk update && \
apk add p7zip rclone
apk add --no-cache curl p7zip rclone

# Create config directory
RUN mkdir -p /root/.config/rclone/
Expand All @@ -11,6 +11,17 @@ RUN mkdir -p /root/.config/rclone/
COPY rclone.conf /root/.config/rclone/rclone.conf
COPY app/ /app/

# Final stage
FROM alpine:latest

RUN apk update && \
apk add --no-cache curl p7zip

# Copy necessary binaries and files from builder stage
COPY --from=builder /usr/bin/rclone /usr/bin/rclone
COPY --from=builder /root/.config/rclone/rclone.conf /root/.config/rclone/rclone.conf
COPY --from=builder /app/ /app/

WORKDIR /app

ENTRYPOINT /app/zip2cloud
ENTRYPOINT ["/app/zip2cloud"]
48 changes: 48 additions & 0 deletions app/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@

## Zip2Cloud

A robust zip & upload utility for sending archives to a remote location.

### Features

- Intelligently compares local & remote files with md5 sums
- Only uploads _completed_ archives
- Only deletes local files once they have been successfully uploaded
- Allows keeping an arbitrary amount of zipped & unzipped backups locally for faster restore
- Script only zips & uploads files that are missing from the remote location
- Allows mixing backup files with other data
- Only zips folders under the `$DUMP_BASE` directory with a date-based name e.g. `2024-04-01`
- Notifies on completion or error via Slack

### Operation of `zip2cloud`

- Uses `rclone` to create a list of `.7z` & `.md5` files from the remote location defined with the `REMOTE` environment variable
- For each file in the list

- Compares file names & md5 sums between local & remote locations prior to read/write operations
- Uploads any `.7z` files that are missing from the remote location
- Files with mismatched md5 sums are uploaded with alternate filenames
- Only deletes files locally once they have been successfully uploaded & md5 sums confirmed
- Allows multiple unzipped local backups to remain, without re-zipping & uploading
- This allows for faster restores, as we can avoid downloading the most recent archives
-

1. Creates 7zip archives of any directories under the `$DUMP_BASE` with a date-based name
- For example, if `$DUMP_BASE` is `/dump/full_backup`, the directory `2024-04-01` will
2. Syncs the archives to a remote location using rclone

### Variables

| Variable | Description | Default |
|-----------------|---------------------------------------------------|---------|
| `BUCKET` | The bucket to store the backups | |
| `BUCKET_PATH` | The path within the bucket to store the backups | |
| `DUMP_BASE` | The base directory for backup dumps | `/dump` |
| `DUMP_RETENTION`| The number of days to keep uncompressed backups locally | |
| `REMOTE` | The remote location to sync backups to | |
| `SECRET` | The encryption key for 7zip | |
| `SLACK_CHANNEL` | The slack channel to send notifications to | |
| `SLACK_WEBHOOK` | The webhook URL for slack notifications | |
| `ZIP_BASE` | The base filename, minus date, for the compressed backups | |
| `ZIP_DIR` | The directory to store all compressed backups | `/zip` |
| `ZIP_RETENTION` | The number of days to keep compressed backups locally | |
169 changes: 126 additions & 43 deletions app/zip2cloud
Original file line number Diff line number Diff line change
@@ -1,65 +1,148 @@
#!/bin/sh

# Script to compress and encrypt mongodb backup directories and then sync them against a
# cloud S3 bucket
#
# Depends on 7zip and rclone
#
# [email protected]
# 5/21/2021
## Variables
COMPRESSION_LEVEL=${COMPRESSION_LEVEL:-0} # Set to 0 if the db dumps are already compressed
DELETE_DUMP=${DELETE_DUMP:-''}
DUMP_BASE=${DUMP_BASE:-/dump/full_backup}
DUMP_RETENTION=${DUMP_RETENTION:-3}
REMOTE=${REMOTE:-remote:${BUCKET}/${BUCKETPATH}}
SECRET=${SECRET:-`cat /run/secrets/encryption_key`}
SLACK_CHANNEL=${SLACK_CHANNEL:-''}
SLACK_WEBHOOK=${SLACK_WEBHOOK:-''}
ZIP_BASE=${ZIP_BASE:-backup_full}
ZIP_DIR=${ZIP_DIR:-/zip}
ZIP_RETENTION=${ZIP_RETENTION:-4}

# Directory containing db dumps to be archived/compressed/copied
#DUMP_BASE=/Users/jsfillman/Documents/repos/jsfillman-github/tmp-backup-test
DUMP_BASE=/dump/full_backup

# Directory to put the zipped backups
#ZIP_DIR=/Users/jsfillman/Documents/repos/jsfillman-github/tmp-backup-zip
ZIP_DIR=/zip

NOW=$(/bin/date +"%Y%m%d%H%M")

# Name of the zip'ed db backup. The .7z extension wil be added by the 7zip program

ZIP_BASE=backup_full
#ZIP_NAME=${ZIP_BASE}${NOW}
### Cleanup

[ -r /run/secrets/encryption_key ] || { echo "Encryption key not readable in /run/secrets/encryption_key" ; exit 1; }
[ -r /run/secrets/gcp_backup_creds ] || { echo "Google cloud service credentials not found in /run/secrets/gcp_back_creds" ; exit 1; }
[ -z "${BUCKET}" ] && { echo "S3 bucketname not set in BUCKET environment variable" ; exit 1; }
[ -z "${BUCKETPATH}" ] && { echo "Path within S3 bucket not set in BUCKETPATH environment variable" ; exit 1; }
[ -z "${DELETE_DUMP}" ] || echo "DELETE_DUMP set, will delete files/directories under /dump/ when done compressing"

## This is the password used to generate the AES256 encryption key
#SECRET=tempsecret
SECRET=`cat /run/secrets/encryption_key`
#
## This is the Google Cloud Storage path, note that it depends on rclone being preconfigured
## for "remote" using the runtime creds, check rclone config in /root/.config/rclone/rclone.conf
REMOTE=remote:${BUCKET}/${BUCKETPATH}

# Delete any files older than 30 days in the zip directory
echo "Deleting database archives older than 30 days"
/usr/bin/find ${ZIP_DIR} -mtime +30 -type f -name "${ZIP_BASE}*" -print -exec rm {} \;
# Delete all old zip files, except the last N, as defined by $ZIP_RETENTION
rm -rf ${ZIP_DIR}/tmp_md5
ls -t ${ZIP_DIR}/*.7z 2>/dev/null | tail -n +${ZIP_RETENTION} | xargs -r rm -f
ls -t ${ZIP_DIR}/*.md5 2>/dev/null | tail -n +${ZIP_RETENTION} | xargs -r rm -f

# Delete all old backup dumps, except the last N, as defined by $DUMP_RETENTION
find ${DUMP_BASE} -type d -name "[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]" -print0 | xargs -0 ls -td | tail -n +${DUMP_RETENTION} | xargs -I {} rm -rf {}

### End Cleanup


# Get list of remote backups
remote_files=$(rclone ls remote:${BUCKET}/${BUCKETPATH} | grep 7z | awk '{print $2}' | rev | cut -d. -f2- | rev)
# Pull remote md5 sums for each remote backup into `tmp_md5` directory
mkdir -p ${ZIP_DIR}/tmp_md5 && cd $_
for file in $remote_files; do
rclone md5sum remote:${BUCKET}/${BUCKETPATH}/$file.7z | awk '{print $1}' > ${ZIP_DIR}/tmp_md5/$file.md5
done

echo "Zipping ${DUMP_BASE}/${DUMP_DIR} to ${ZIP_DIR}/${ZIP_NAME}"

# Get all directories in DUMP_BASE
# Get all exports from DUMP_BASE
for DUMP_DIR in $(ls -d ${DUMP_BASE}/*/); do
# Check if the dump is complete
echo "Checking export for ${DUMP_DIR}"
if [ ! -f "${DUMP_DIR}/dump_complete.txt" ]; then
echo "dump_complete.txt not found in ${DUMP_DIR}, skipping"
continue
fi
# Remove trailing slash and get the base name of the directory
DIR_NAME=$(basename ${DUMP_DIR%/})
ZIP_NAME=${ZIP_DIR}/${ZIP_BASE}_${DIR_NAME}
echo $DIR_NAME
# Check if the corresponding md5 file exists, if not, zip it
if [ ! -f "${ZIP_DIR}/tmp_md5/${ZIP_BASE}_${DIR_NAME}.md5" ]; then
echo "No remote exists for ${DIR_NAME}, zipping"
/usr/bin/7z a -p${SECRET} ${ZIP_NAME} -mx=${COMPRESSION_LEVEL} -mhe -t7z ${DUMP_DIR} || { echo "Could not zip ${DUMP_DIR} into ${ZIP_NAME}" ; exit 1; }
else
echo "Remote exists for ${DIR_NAME}, skipping"
fi
done

echo "Zipping ${DUMP_DIR} to ${ZIP_NAME}"
/usr/bin/7za a -p${SECRET} ${ZIP_NAME} -mx=0 -mhe -t7z ${DUMP_DIR} || { echo "Could not zip ${DUMP_DIR} into ${ZIP_NAME}" ; exit 1; }
# Compare checksums of local 7z files against all remotes' md5's. Add to upload list if not found
uploads=""
cd ${ZIP_DIR} || exit
for file in ${ZIP_DIR}/*.7z; do
# Get the base name of the file without extension
base_name=$(basename "$file" .7z)
local_md5=$(md5sum "$file" | awk '{print $1}')
echo $local_md5 > "${ZIP_DIR}/${base_name}.md5"
# Now compare this file with the remote md5s
match_found=0
for remote_md5_file in ${ZIP_DIR}/tmp_md5/*.md5; do
remote_md5=$(cat "$remote_md5_file")
if [ "$local_md5" = "$remote_md5" ]; then
match_found=1
break
fi
done
if [ $match_found -eq 0 ]; then
echo "Adding $file to uploads list"
uploads="$uploads $file"
fi
done

## Sync All Resulting Files
cd ${ZIP_DIR}
for file in ${ZIP_DIR}/*; do
echo "RClone-ing ${file} to GCP ${GCP_DEST}"
/bin/rclone sync -v "$file" ${REMOTE}/
echo "Current uploads candidates are: $uploads"

## Verify & update list of files to upload
final_uploads=""
cd ${ZIP_DIR} || exit
for file in ${uploads}; do
# Get the base name of the file without extension
base_name=$(basename "$file" .7z)
# # Check if the remote md5 file exists
# if [ ! -f "${ZIP_DIR}/tmp_md5/${base_name}.md5" ]; then
# # If the remote md5 file does not exist, add the file to the uploads list
# echo "Remote does not exist for $file, adding $file to uploads list"
# final_uploads="$final_uploads $file"
# else
# Compare local and remote md5
remote_md5=$(cat "${ZIP_DIR}/tmp_md5/${base_name}.md5")
local_md5=$(cat "${ZIP_DIR}/${base_name}.md5")
if [ "$local_md5" != "$remote_md5" ]; then
echo "MD5 mismatch for file $file. Incrementing filename and adding to uploads list."
# Extract the last character of the base name
last_char=${base_name: -1}
# Check if the last character is a letter
if [[ $last_char =~ [a-y] ]]; then
# If it's a letter, increment it
next_char=$(echo "$last_char" | tr "a-y" "b-z")
new_base_name=${base_name%?}$next_char
elif [[ $last_char == 'z' ]]; then
# If it's 'z', replace it with 'a' and append 'a'
new_base_name=${base_name%?}aa
else
# If it's not a letter, append 'a'
new_base_name=${base_name}a
fi
# Rename the file
mv "$file" "${ZIP_DIR}/${new_base_name}.7z"
# Add the renamed file to the uploads list
final_uploads="$final_uploads ${ZIP_DIR}/${new_base_name}.7z"
fi
done

## Create a block that, upon success of rclone above, delete _only_ files that were uploaded
## For each $FILE.7z in $ZIP_DIR, do a "rm -rf $DUMP_BASE/$FILE" to remove the original dump
#[ -z "${DELETE_DUMP}" ] || { echo "Clearing contents of /dump/"; cd /dump/; rm -rf *; }
echo "Final uploads: $final_uploads"


# Before running rclone
#for file in "${uploads[@]}"; do
for file in ${final_uploads}; do
ls $file
if [ ! -f "$file" ]; then
echo "File does not exist: $file"
fi
done



## Sync All Resulting Files (in list!)
cd ${ZIP_DIR} || exit
for file in ${final_uploads}; do
echo "RClone-ing ${file} to GCP ${REMOTE}"
/usr/bin/rclone sync -v "$file" ${REMOTE}/
done

0 comments on commit d8e203a

Please sign in to comment.