Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Accelerate linux build workflow #28

Merged
merged 6 commits into from
Feb 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions .github/workflows/cuda/Linux.sh
Original file line number Diff line number Diff line change
@@ -1,10 +1,17 @@
#!/bin/bash

# CUDA major and minor version
CUDA_VER_FULL=${1}
CUDA_VER_ARR=($(echo ${CUDA_VER_FULL} | tr "." " "))
CUDA_VER="${CUDA_VER_ARR[0]}.${CUDA_VER_ARR[1]}"
CUDA_VER_ID="${CUDA_VER_ARR[0]}_${CUDA_VER_ARR[1]}"
CUDA_VER_SHORT="cu${CUDA_VER_ARR[0]}${CUDA_VER_ARR[1]}"

# Took from https://github.com/pyg-team/pyg-lib/

OS=ubuntu2004

case ${1} in
case ${CUDA_VER_SHORT} in
cu121)
CUDA=12.1
APT_KEY=${OS}-${CUDA/./-}-local
Expand Down Expand Up @@ -48,7 +55,7 @@ case ${1} in
URL=https://developer.download.nvidia.com/compute/cuda/${CUDA}/Prod/local_installers
;;
*)
echo "Unrecognized CUDA_VERSION=${1}"
echo "Unrecognized CUDA_VERSION=${CUDA_VER_SHORT}"
exit 1
;;
esac
Expand All @@ -58,7 +65,7 @@ sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600
wget -nv ${URL}/${FILENAME}
sudo dpkg -i ${FILENAME}

if [ "${1}" = "cu117" ] || [ "${1}" = "cu118" ] || [ "${1}" = "cu121" ]; then
if [ "${CUDA_VER_SHORT}" = "cu117" ] || [ "${CUDA_VER_SHORT}" = "cu118" ] || [ "${CUDA_VER_SHORT}" = "cu121" ]; then
sudo cp /var/cuda-repo-${APT_KEY}/cuda-*-keyring.gpg /usr/share/keyrings/
else
sudo apt-key add /var/cuda-repo-${APT_KEY}/7fa2af80.pub
Expand Down
68 changes: 51 additions & 17 deletions .github/workflows/ubuntu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,23 @@ on:

jobs:
build:
name: ${{ matrix.os }}-${{ matrix.cuda-version }}-torch-${{ matrix.torch-version }}-${{ matrix.cmake-build-type }}
name: ${{ matrix.os }}-cuda-${{ matrix.cuda-version }}-torch-${{ matrix.torch-version }}-${{ matrix.cmake-build-type }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-22.04, ubuntu-20.04] # [ubuntu-22.04, ubuntu-20.04, ubuntu-18.04]
arch: [x64] # [x64, x86]
torch-version: [2.1.2, 2.2.1] # [1.12.0, 1.13.0, 2.0.0, 2.1.0, 2.1.1, 2.1.2, 2.2.0, 2.2.1]
cuda-version: ['cu118', 'cu121'] # ['cpu', 'cu113', 'cu116', 'cu117']
cuda-version: [11.8.0, 12.1.1] # [12.3.1, 12.1.1, 11.8.0, 11.7.1, 11.6.2, 11.5.2,11.4.4, 11.3.1, 11.2.2, 11.1.1, 11.0.3, cpu]
cmake-build-type: [Release] # [Debug, ClangTidy]

env:
CCACHE_DIR: ${{ github.workspace }}/ccache
CCACHE_BASEDIR: ${{ github.workspace }}

CL: /MP
CMAKE_GENERATOR: Ninja
CMAKE_GENERATOR_PLATFORM: ${{matrix.arch}}
TORCH_CUDA_ARCH_LIST: '7.0;7.5'
steps:
- name: Checkout Repository
uses: actions/checkout@v4
Expand All @@ -40,49 +43,80 @@ jobs:
ninja-build \
libopencv-dev \
wget

- name: Install ccache
run: |
wget -nv https://github.com/ccache/ccache/releases/download/v4.9.1/ccache-4.9.1-linux-x86_64.tar.xz
sudo tar xf ccache-4.9.1-linux-x86_64.tar.xz -C /usr/bin --strip-components=1 --no-same-owner ccache-4.9.1-linux-x86_64/ccache
rm -f ccache-*-linux-x86_64.tar.xz
ccache --version

- name: Install CUDA ${{ matrix.cuda-version }}
if: ${{ matrix.cuda-version != 'cpu' }}
- name: Sudo Tar Wrapper
run: |
# Workaround: https://github.com/containers/podman/discussions/17868
sudo mv -fv /usr/bin/tar /usr/bin/tar.orig
echo -e '#!/bin/sh\n\nsudo /usr/bin/tar.orig "$@"' | sudo tee -a /usr/bin/tar
sudo chmod +x /usr/bin/tar

- name: Restore CUDA Cache
uses: actions/cache@v4
id: cuda-cache
with:
key: cuda-${{matrix.CUDA-VERSION}}-ubuntu
path: |
/usr/local/cuda*

- name: Install CUDA
if: ${{ steps.cuda-cache.outputs.cache-hit != 'true' }}
run: |
bash .github/workflows/cuda/${{ runner.os }}.sh ${{ matrix.cuda-version }}

- name: Setup CUDA
shell: bash
run: |
CUDA_VER_FULL=${{ matrix.cuda-version }}
CUDA_VER_ARR=($(echo ${CUDA_VER_FULL} | tr "." " "))
CUDA_VER="${CUDA_VER_ARR[0]}.${CUDA_VER_ARR[1]}"
CUDA_VER_ID="${CUDA_VER_ARR[0]}_${CUDA_VER_ARR[1]}"
CUDA_VER_SHORT="cu${CUDA_VER_ARR[0]}${CUDA_VER_ARR[1]}"
echo "CUDA_VER_SHORT=${CUDA_VER_SHORT}" >> ${GITHUB_ENV}

- name: Free disk space
if: ${{ matrix.os == 'ubuntu-20.04' }}
run: |
df -h
sudo rm -rf /usr/share/dotnet /usr/local/lib/android /usr/lib/php* /opt/ghc || true
df -h

- name: Install libtorch ${{ matrix.torch-version }}+${{ matrix.cuda-version }}
- name: Restore LibTorch Cache
uses: actions/cache@v4
id: libtorch-cache
with:
key: libtorch-${{ matrix.torch-version }}-${{env.CUDA_VER_SHORT}}-ubuntu
path: |
${{ github.workspace }}/libtorch

- name: Install LibTorch
if: ${{ steps.libtorch-cache.outputs.cache-hit != 'true' }}
run: |
wget --no-check-certificate -nv https://download.pytorch.org/libtorch/${{ matrix.cuda-version }}/libtorch-cxx11-abi-shared-with-deps-${{ matrix.torch-version }}%2B${{ matrix.cuda-version }}.zip -O libtorch.zip
wget --no-check-certificate -nv https://download.pytorch.org/libtorch/${CUDA_VER_SHORT}/libtorch-cxx11-abi-shared-with-deps-${{ matrix.torch-version }}%2B${CUDA_VER_SHORT}.zip -O libtorch.zip
unzip -q ${{ github.workspace }}/libtorch.zip -d ${{ github.workspace }}/
rm ${{ github.workspace }}/libtorch.zip

- name: Cache Build
uses: actions/cache@v4
id: cache-builds
with:
key: ${{ matrix.os }}-${{ matrix.cuda-version }}-torch-${{ matrix.torch-version }}-${{ matrix.cmake-build-type }}-ccache-${{ github.run_id }}
restore-keys: ${{ matrix.os }}-${{ matrix.cuda-version }}-torch-${{ matrix.torch-version }}-${{ matrix.cmake-build-type }}-ccache-
key: ${{ matrix.os }}-cuda-${{ matrix.cuda-version }}-torch-${{ matrix.torch-version }}-${{ matrix.cmake-build-type }}-ccache-${{ github.run_id }}
restore-keys: ${{ matrix.os }}-cuda-${{ matrix.cuda-version }}-torch-${{ matrix.torch-version }}-${{ matrix.cmake-build-type }}-ccache-
path: ${{ env.CCACHE_DIR }}

- name: Configure and build
- name: Configure And Build
run: |
set -x
source .github/workflows/cuda/${{ runner.os }}-env.sh ${{ matrix.cuda-version }}
source .github/workflows/cuda/${{ runner.os }}-env.sh ${CUDA_VER_SHORT}
cmake --version
mkdir build
cd build
cmake .. \
-GNinja \
-G${CMAKE_GENERATOR} \
-DCMAKE_BUILD_TYPE=${{ matrix.cmake-build-type }} \
-DCMAKE_C_COMPILER_LAUNCHER=$(which ccache) \
-DCMAKE_CXX_COMPILER_LAUNCHER=$(which ccache) \
Expand All @@ -91,7 +125,7 @@ jobs:
-DCUDA_TOOLKIT_ROOT_DIR=$CUDA_HOME
ninja

- name: Clean compiler cache
- name: Clean Compiler Cache
run: |
set -x
ccache --show-stats
Expand Down