-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #25 from kbase/DEVOPS-1770-GCloudUpdates
Draft: DEVOPS-1770 - GCloud Updates
- Loading branch information
Showing
6 changed files
with
230 additions
and
65 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,11 +1,17 @@ | ||
--- | ||
name: Manual Build & Push | ||
on: | ||
workflow_dispatch: | ||
workflow_dispatch: | ||
inputs: | ||
platforms: | ||
description: 'The platforms for which the Docker image should be built. If not specified, defaults to linux/amd64.' | ||
required: false | ||
default: 'linux/amd64,linux/arm64/v8' | ||
jobs: | ||
build-push: | ||
uses: kbase/.github/.github/workflows/reusable_build-push.yml@main | ||
uses: kbase/.github/.github/workflows/reusable_build-push.yml@multi-target | ||
with: | ||
name: '${{ github.event.repository.name }}-develop' | ||
tags: br-${{ github.ref_name }} | ||
platforms: ${{ github.event.inputs.platforms }} | ||
secrets: inherit |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,32 +1,27 @@ | ||
FROM arangodb:3.5.3 | ||
# Builder stage | ||
FROM alpine:latest as builder | ||
|
||
# Build arguments passed into the docker command for image metadata | ||
ARG BUILD_DATE | ||
ARG COMMIT | ||
ARG BRANCH | ||
|
||
# RUN pip install requests docker python-json-logger structlog && \ | ||
RUN apk update && \ | ||
apk add p7zip && \ | ||
cd /tmp && \ | ||
wget https://downloads.rclone.org/rclone-current-linux-amd64.zip && \ | ||
unzip rclone-current-linux-amd64.zip && \ | ||
mv rclone-v*-linux-amd64/rclone /bin/rclone && \ | ||
mkdir -p /root/.config/rclone/ | ||
apk add --no-cache curl p7zip rclone | ||
|
||
# Create config directory | ||
RUN mkdir -p /root/.config/rclone/ | ||
|
||
# Copy necessary files | ||
COPY rclone.conf /root/.config/rclone/rclone.conf | ||
COPY app/ /app/ | ||
|
||
LABEL org.label-schema.build-date=$BUILD_DATE \ | ||
org.label-schema.vcs-url="https://github.com/kbase/db_zip2cloud.git" \ | ||
org.label-schema.vcs-ref=$COMMIT \ | ||
org.label-schema.schema-version="1.0.0-rc1" \ | ||
us.kbase.vcs-branch=$BRANCH \ | ||
maintainer="Steve Chan [email protected]" \ | ||
org.opencontainers.image.source="https://github.com/kbase/db_zip2cloud" | ||
# Final stage | ||
FROM alpine:latest | ||
|
||
WORKDIR /app | ||
RUN apk update && \ | ||
apk add --no-cache curl p7zip | ||
|
||
ENTRYPOINT /app/zip2cloud | ||
# Copy necessary binaries and files from builder stage | ||
COPY --from=builder /usr/bin/rclone /usr/bin/rclone | ||
COPY --from=builder /root/.config/rclone/rclone.conf /root/.config/rclone/rclone.conf | ||
COPY --from=builder /app/ /app/ | ||
|
||
WORKDIR /app | ||
|
||
ENTRYPOINT ["/app/zip2cloud"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
|
||
## Zip2Cloud | ||
|
||
A robust zip & upload utility for sending archives to a remote location. | ||
|
||
### Features | ||
|
||
- Intelligently compares local & remote files with md5 sums | ||
- Only uploads _completed_ archives | ||
- Only deletes local files once they have been successfully uploaded | ||
- Allows keeping an arbitrary amount of zipped & unzipped backups locally for faster restore | ||
- Script only zips & uploads files that are missing from the remote location | ||
|
||
[//]: # (- Allows mixing backup files with other data) | ||
|
||
[//]: # ( - Only zips folders under the `$DUMP_BASE` directory with a date-based name e.g. `2024-04-01`) | ||
|
||
[//]: # (- Notifies on completion or error via Slack) | ||
|
||
### Operation of `zip2cloud` | ||
|
||
1. Cleans up old zip files and backup dumps based on the retention period set in the environment variables. | ||
2. Retrieves the list of remote backups and their MD5 checksums from the remote S3 bucket. | ||
3. Checks database dumps for completion by looking for a `dump_complete.txt` file in the dump's top-level directory. | ||
4. Compresses new database dumps that do not have a corresponding MD5 file in the remote S3 bucket. | ||
5. Compares the MD5 checksums of local and remote files. | ||
1. If a local file does not have a matching MD5 checksum in the remote S3 bucket, it is added to the upload list. | ||
6. If there's an MD5 mismatch between a local and a remote file, the script increments the filename of the local file and adds it to the upload list. | ||
7. Finally, it syncs all the files in the upload list to the remote S3 bucket using rclone. | ||
|
||
### Variables | ||
|
||
| Variable | Description | Default | | ||
|-----------------|---------------------------------------------------|---------| | ||
| `BUCKET` | The bucket to store the backups | | | ||
| `BUCKET_PATH` | The path within the bucket to store the backups | | | ||
| `DUMP_BASE` | The base directory for backup dumps | `/dump` | | ||
| `DUMP_RETENTION`| The number of days to keep uncompressed backups locally | | | ||
| `REMOTE` | The remote location to sync backups to | | | ||
| `SECRET` | The encryption key for 7zip | | | ||
| `SLACK_CHANNEL` | The slack channel to send notifications to | | | ||
| `SLACK_WEBHOOK` | The webhook URL for slack notifications | | | ||
| `ZIP_BASE` | The base filename, minus date, for the compressed backups | | | ||
| `ZIP_DIR` | The directory to store all compressed backups | `/zip` | | ||
| `ZIP_RETENTION` | The number of days to keep compressed backups locally | | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,48 +1,142 @@ | ||
#!/bin/sh | ||
|
||
# Script to compress and encrypt mongodb backup directories and then sync them against a | ||
# cloud S3 bucket | ||
# | ||
# Depends on 7zip and rclone | ||
# | ||
# [email protected] | ||
# 5/21/2021 | ||
## Variables | ||
COMPRESSION_LEVEL=${COMPRESSION_LEVEL:-0} # Set to 0 if the db dumps are already compressed | ||
DELETE_DUMP=${DELETE_DUMP:-''} | ||
DUMP_BASE=${DUMP_BASE:-/dump/full_backup} | ||
DUMP_RETENTION=${DUMP_RETENTION:-3} | ||
REMOTE=${REMOTE:-remote:${BUCKET}/${BUCKETPATH}} | ||
SECRET=${SECRET:-`cat /run/secrets/encryption_key`} | ||
SLACK_CHANNEL=${SLACK_CHANNEL:-''} | ||
SLACK_WEBHOOK=${SLACK_WEBHOOK:-''} | ||
ZIP_BASE=${ZIP_BASE:-backup_full} | ||
ZIP_DIR=${ZIP_DIR:-/zip} | ||
ZIP_RETENTION=${ZIP_RETENTION:-4} | ||
|
||
# Directory containing db dumps to be archived/compressed/copied | ||
DUMP_BASE=/dump/ | ||
|
||
# Directory to put the zipped backups | ||
ZIP_DIR=/zip/ | ||
|
||
NOW=$(/bin/date +"%Y%m%d%H%M") | ||
|
||
# Name of the zip'ed db backup. The .7z extension wil be added by the 7zip program | ||
|
||
ZIP_BASE=backup_full_ | ||
ZIP_NAME=${ZIP_BASE}${NOW} | ||
### Cleanup | ||
|
||
[ -r /run/secrets/encryption_key ] || { echo "Encryption key not readable in /run/secrets/encryption_key" ; exit 1; } | ||
[ -r /run/secrets/gcp_backup_creds ] || { echo "Google cloud service credentials not found in /run/secrets/gcp_back_creds" ; exit 1; } | ||
[ -z "${BUCKET}" ] && { echo "S3 bucketname not set in BUCKET environment variable" ; exit 1; } | ||
[ -z "${BUCKETPATH}" ] && { echo "Path within S3 bucket not set in BUCKETPATH environment variable" ; exit 1; } | ||
[ -z "${DELETE_DUMP}" ] || echo "DELETE_DUMP set, will delete files/directories under /dump/ when done compressing" | ||
|
||
# This is the password used to generate the AES256 encryption key | ||
SECRET=`cat /run/secrets/encryption_key` | ||
|
||
# This is the Google Cloud Storage path, note that it depends on rclone being preconfigured | ||
# for "remote" using the runtime creds, check rclone config in /root/.config/rclone/rclone.conf | ||
REMOTE=remote:${BUCKET}/${BUCKETPATH} | ||
# Delete all old zip files, except the last N, as defined by $ZIP_RETENTION | ||
rm -rf ${ZIP_DIR}/tmp_md5 | ||
ls -t ${ZIP_DIR}/*.7z 2>/dev/null | tail -n +${ZIP_RETENTION} | xargs -r rm -f | ||
ls -t ${ZIP_DIR}/*.md5 2>/dev/null | tail -n +${ZIP_RETENTION} | xargs -r rm -f | ||
|
||
# Delete all old backup dumps, except the last N, as defined by $DUMP_RETENTION | ||
find ${DUMP_BASE} -type d -name "[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]" -print0 | xargs -0 ls -td | tail -n +${DUMP_RETENTION} | xargs -I {} rm -rf {} | ||
|
||
### End Cleanup | ||
|
||
|
||
# Get list of remote backups | ||
remote_files=$(rclone ls remote:${BUCKET}/${BUCKETPATH} | grep 7z | awk '{print $2}' | rev | cut -d. -f2- | rev) | ||
# Pull remote md5 sums for each remote backup into `tmp_md5` directory | ||
mkdir -p ${ZIP_DIR}/tmp_md5 && cd $_ | ||
for file in $remote_files; do | ||
rclone md5sum remote:${BUCKET}/${BUCKETPATH}/$file.7z | awk '{print $1}' > ${ZIP_DIR}/tmp_md5/$file.md5 | ||
done | ||
|
||
|
||
# Get all exports from DUMP_BASE | ||
for DUMP_DIR in $(ls -d ${DUMP_BASE}/*/); do | ||
# Check if the dump is complete | ||
echo "Checking export for ${DUMP_DIR}" | ||
if [ ! -f "${DUMP_DIR}/dump_complete.txt" ]; then | ||
echo "dump_complete.txt not found in ${DUMP_DIR}, skipping" | ||
continue | ||
fi | ||
# Remove trailing slash and get the base name of the directory | ||
DIR_NAME=$(basename ${DUMP_DIR%/}) | ||
ZIP_NAME=${ZIP_DIR}/${ZIP_BASE}_${DIR_NAME} | ||
echo $DIR_NAME | ||
# Check if the corresponding md5 file exists, if not, zip it | ||
if [ ! -f "${ZIP_DIR}/tmp_md5/${ZIP_BASE}_${DIR_NAME}.md5" ]; then | ||
echo "No remote exists for ${DIR_NAME}, zipping" | ||
/usr/bin/7z a -p${SECRET} ${ZIP_NAME} -mx=${COMPRESSION_LEVEL} -mhe -t7z ${DUMP_DIR} || { echo "Could not zip ${DUMP_DIR} into ${ZIP_NAME}" ; exit 1; } | ||
else | ||
echo "Remote exists for ${DIR_NAME}, skipping" | ||
fi | ||
done | ||
|
||
# Compare checksums of local 7z files against all remotes' md5's. Add to upload list if not found | ||
uploads="" | ||
cd ${ZIP_DIR} || exit | ||
for file in ${ZIP_DIR}/*.7z; do | ||
# Get the base name of the file without extension | ||
base_name=$(basename "$file" .7z) | ||
local_md5=$(md5sum "$file" | awk '{print $1}') | ||
echo $local_md5 > "${ZIP_DIR}/${base_name}.md5" | ||
# Now compare this file with the remote md5s | ||
match_found=0 | ||
for remote_md5_file in ${ZIP_DIR}/tmp_md5/*.md5; do | ||
remote_md5=$(cat "$remote_md5_file") | ||
if [ "$local_md5" = "$remote_md5" ]; then | ||
match_found=1 | ||
break | ||
fi | ||
done | ||
if [ $match_found -eq 0 ]; then | ||
echo "Adding $file to uploads list" | ||
uploads="$uploads $file" | ||
fi | ||
done | ||
|
||
echo "Current uploads candidates are: $uploads" | ||
|
||
## Verify & update list of files to upload | ||
final_uploads="" | ||
cd ${ZIP_DIR} || exit | ||
for file in ${uploads}; do | ||
# Get the base name of the file without extension | ||
base_name=$(basename "$file" .7z) | ||
# Compare local and remote md5 | ||
remote_md5=$(cat "${ZIP_DIR}/tmp_md5/${base_name}.md5") | ||
local_md5=$(cat "${ZIP_DIR}/${base_name}.md5") | ||
if [ "$local_md5" != "$remote_md5" ]; then | ||
echo "MD5 mismatch for file $file. Incrementing filename and adding to uploads list." | ||
# Extract the last character of the base name | ||
last_char=${base_name: -1} | ||
# Check if the last character is a letter | ||
if [[ $last_char =~ [a-y] ]]; then | ||
# If it's a letter, increment it | ||
next_char=$(echo "$last_char" | tr "a-y" "b-z") | ||
new_base_name=${base_name%?}$next_char | ||
elif [[ $last_char == 'z' ]]; then | ||
# If it's 'z', replace it with 'a' and append 'a' | ||
new_base_name=${base_name%?}aa | ||
else | ||
# If it's not a letter, append 'a' | ||
new_base_name=${base_name}a | ||
fi | ||
# Rename the file | ||
mv "$file" "${ZIP_DIR}/${new_base_name}.7z" | ||
# Add the renamed file to the uploads list | ||
final_uploads="$final_uploads ${ZIP_DIR}/${new_base_name}.7z" | ||
fi | ||
done | ||
|
||
echo "Final uploads: $final_uploads" | ||
|
||
|
||
# Delete any files older than 30 days in the zip directory | ||
echo "Deleting database archives older than 30 days" | ||
/usr/bin/find ${ZIP_DIR} -mtime +30 -type f -name "${ZIP_BASE}*" -print -exec rm {} \; | ||
# Before running rclone | ||
#for file in "${uploads[@]}"; do | ||
for file in ${final_uploads}; do | ||
ls $file | ||
if [ ! -f "$file" ]; then | ||
echo "File does not exist: $file" | ||
fi | ||
done | ||
|
||
echo "Zipping ${DUMP_BASE}/${DUMP_DIR} to ${ZIP_DIR}/${ZIP_NAME}" | ||
cd / | ||
/usr/bin/7za a -p${SECRET} ${ZIP_DIR}/${ZIP_NAME} -mx=7 -mhe -t7z ${DUMP_BASE} || { echo "Could not zip ${DUMP_BASE} into ${ZIP_DIR}/${ZIP_NAME}" ; exit 1; } | ||
[ -z "${DELETE_DUMP}" ] || { echo "Clearing contents of /dump/"; cd /dump/; rm -rf *; } | ||
|
||
echo "RClone-ing ${ZIP_DIR} to GCP ${GCP_DEST}" | ||
/bin/rclone sync ${ZIP_DIR}/ ${REMOTE} | ||
|
||
## Sync All Resulting Files (in list!) | ||
cd ${ZIP_DIR} || exit | ||
for file in ${final_uploads}; do | ||
echo "RClone-ing ${file} to GCP ${REMOTE}" | ||
/usr/bin/rclone sync -v "$file" ${REMOTE}/ | ||
done |