Skip to content

Commit a743f6c

Browse files
committed
ci: Refactor sync-test-datasets workflow to use s5cmd for S3 sync
- Replace AWS CLI with s5cmd for improved performance and efficiency in syncing branches to S3. - Add caching for s5cmd installation to speed up workflow execution. - Update branch checkout process to use actions/checkout for better handling of specific branches.
1 parent 2537813 commit a743f6c

File tree

1 file changed

+38
-14
lines changed

1 file changed

+38
-14
lines changed

.github/workflows/sync-test-datasets.yml

Lines changed: 38 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -55,29 +55,53 @@ jobs:
5555
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
5656
aws-region: eu-west-1
5757

58-
- name: Sync branch ${{ matrix.branch }} to S3
58+
- name: Checkout specific branch
59+
uses: actions/checkout@v4
60+
with:
61+
repository: nf-core/test-datasets
62+
ref: ${{ matrix.branch }}
63+
path: test-datasets-branch
64+
65+
- name: Cache s5cmd
66+
id: cache-s5cmd
67+
uses: actions/cache@v4
68+
with:
69+
path: ~/.local/bin/s5cmd
70+
key: s5cmd-${{ runner.os }}-latest
71+
72+
- name: Install s5cmd
73+
if: steps.cache-s5cmd.outputs.cache-hit != 'true'
5974
run: |
60-
echo "Syncing branch: ${{ matrix.branch }}"
75+
mkdir -p ~/.local/bin
76+
curl -L https://github.com/peak/s5cmd/releases/latest/download/s5cmd_$(uname)_amd64.tar.gz | tar -xz
77+
mv s5cmd ~/.local/bin/
78+
chmod +x ~/.local/bin/s5cmd
6179
62-
# Clone only the specific branch
63-
git clone --single-branch --branch "${{ matrix.branch }}" \
64-
https://github.com/nf-core/test-datasets.git test-datasets-branch
80+
- name: Add s5cmd to PATH
81+
run: echo "$HOME/.local/bin" >> $GITHUB_PATH
6582

83+
- name: Sync branch ${{ matrix.branch }} to S3
84+
run: |
85+
echo "Syncing branch: ${{ matrix.branch }} using s5cmd"
6686
cd test-datasets-branch
67-
68-
# Sync to S3 with branch prefix
69-
aws s3 sync ./ "s3://nf-core-test-datasets/${{ matrix.branch }}/" \
87+
88+
# Use s5cmd for faster, more efficient uploads
89+
# --delete flag removes files in S3 that don't exist locally
90+
# --exclude patterns exclude git files
91+
# --storage-class sets the S3 storage class
92+
# --numworkers increases parallelism for faster uploads
93+
s5cmd \
94+
--numworkers 10 \
95+
--retry-count 3 \
96+
sync \
7097
--delete \
7198
--exclude ".git/*" \
7299
--exclude ".github/*" \
73-
--storage-class STANDARD_IA
74-
100+
--storage-class STANDARD_IA \
101+
./ "s3://nf-core-test-datasets/${{ matrix.branch }}/"
102+
75103
echo "Completed sync for branch: ${{ matrix.branch }}"
76104
77-
# Clean up
78-
cd ..
79-
rm -rf test-datasets-branch
80-
81105
update-metadata:
82106
needs: [discover-branches, sync-branches]
83107
runs-on: ubuntu-latest

0 commit comments

Comments
 (0)