From d2d197a9c81ccbb4e4e2af76929c8d9c4c7fbe5f Mon Sep 17 00:00:00 2001 From: Pengfei Xuan Date: Thu, 29 Feb 2024 12:22:59 -0500 Subject: [PATCH 1/6] Refactor cuda-version (ubuntu workflow) --- .github/workflows/cuda/Linux.sh | 13 ++++++++++--- .github/workflows/ubuntu.yml | 33 +++++++++++++++++++++------------ 2 files changed, 31 insertions(+), 15 deletions(-) diff --git a/.github/workflows/cuda/Linux.sh b/.github/workflows/cuda/Linux.sh index 8777f41..798b469 100644 --- a/.github/workflows/cuda/Linux.sh +++ b/.github/workflows/cuda/Linux.sh @@ -1,10 +1,17 @@ #!/bin/bash +# CUDA major and minor version +CUDA_VER_FULL=${1} +CUDA_VER_ARR=($(echo ${CUDA_VER_FULL} | tr "." " ")) +CUDA_VER="${CUDA_VER_ARR[0]}.${CUDA_VER_ARR[1]}" +CUDA_VER_ID="${CUDA_VER_ARR[0]}_${CUDA_VER_ARR[1]}" +CUDA_VER_SHORT="cu${CUDA_VER_ARR[0]}${CUDA_VER_ARR[1]}" + # Took from https://github.com/pyg-team/pyg-lib/ OS=ubuntu2004 -case ${1} in +case ${CUDA_VER_SHORT} in cu121) CUDA=12.1 APT_KEY=${OS}-${CUDA/./-}-local @@ -48,7 +55,7 @@ case ${1} in URL=https://developer.download.nvidia.com/compute/cuda/${CUDA}/Prod/local_installers ;; *) - echo "Unrecognized CUDA_VERSION=${1}" + echo "Unrecognized CUDA_VERSION=${CUDA_VER_SHORT}" exit 1 ;; esac @@ -58,7 +65,7 @@ sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600 wget -nv ${URL}/${FILENAME} sudo dpkg -i ${FILENAME} -if [ "${1}" = "cu117" ] || [ "${1}" = "cu118" ] || [ "${1}" = "cu121" ]; then +if [ "${CUDA_VER_SHORT}" = "cu117" ] || [ "${CUDA_VER_SHORT}" = "cu118" ] || [ "${CUDA_VER_SHORT}" = "cu121" ]; then sudo cp /var/cuda-repo-${APT_KEY}/cuda-*-keyring.gpg /usr/share/keyrings/ else sudo apt-key add /var/cuda-repo-${APT_KEY}/7fa2af80.pub diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index 5668b5a..0b573e6 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -3,7 +3,7 @@ name: OpenSplat (Ubuntu) on: push: branches: - - main + - ci-linux-cache pull_request: types: [ assigned, opened, synchronize, reopened ] release: @@ -11,16 +11,15 @@ on: jobs: build: - name: ${{ matrix.os }}-${{ matrix.cuda-version }}-torch-${{ matrix.torch-version }}-${{ matrix.cmake-build-type }} + name: ${{ matrix.os }}-cuda-${{ matrix.cuda-version }}-torch-${{ matrix.torch-version }}-${{ matrix.cmake-build-type }} runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: - os: [ubuntu-22.04, ubuntu-20.04] # [ubuntu-22.04, ubuntu-20.04, ubuntu-18.04] - torch-version: [2.1.2, 2.2.1] # [1.12.0, 1.13.0, 2.0.0, 2.1.0, 2.1.1, 2.1.2, 2.2.0, 2.2.1] - cuda-version: ['cu118', 'cu121'] # ['cpu', 'cu113', 'cu116', 'cu117'] + os: [ubuntu-22.04] # [ubuntu-22.04, ubuntu-20.04, ubuntu-18.04] + torch-version: [2.2.1] # [1.12.0, 1.13.0, 2.0.0, 2.1.0, 2.1.1, 2.1.2, 2.2.0, 2.2.1] + cuda-version: [12.1.1] # [12.3.1, 12.1.1, 11.8.0, 11.7.1, 11.6.2, 11.5.2,11.4.4, 11.3.1, 11.2.2, 11.1.1, 11.0.3, cpu] cmake-build-type: [Release] # [Debug, ClangTidy] - env: CCACHE_DIR: ${{ github.workspace }}/ccache CCACHE_BASEDIR: ${{ github.workspace }} @@ -48,11 +47,21 @@ jobs: rm -f ccache-*-linux-x86_64.tar.xz ccache --version - - name: Install CUDA ${{ matrix.cuda-version }} + - name: Install CUDA if: ${{ matrix.cuda-version != 'cpu' }} run: | bash .github/workflows/cuda/${{ runner.os }}.sh ${{ matrix.cuda-version }} + - name: Setup CUDA + shell: bash + run: | + CUDA_VER_FULL=${{ matrix.cuda-version }} + CUDA_VER_ARR=($(echo ${CUDA_VER_FULL} | tr "." " ")) + CUDA_VER="${CUDA_VER_ARR[0]}.${CUDA_VER_ARR[1]}" + CUDA_VER_ID="${CUDA_VER_ARR[0]}_${CUDA_VER_ARR[1]}" + CUDA_VER_SHORT="cu${CUDA_VER_ARR[0]}${CUDA_VER_ARR[1]}" + echo "CUDA_VER_SHORT=${CUDA_VER_SHORT}" >> ${GITHUB_ENV} + - name: Free disk space if: ${{ matrix.os == 'ubuntu-20.04' }} run: | @@ -60,9 +69,9 @@ jobs: sudo rm -rf /usr/share/dotnet /usr/local/lib/android /usr/lib/php* /opt/ghc || true df -h - - name: Install libtorch ${{ matrix.torch-version }}+${{ matrix.cuda-version }} + - name: Install LibTorch run: | - wget --no-check-certificate -nv https://download.pytorch.org/libtorch/${{ matrix.cuda-version }}/libtorch-cxx11-abi-shared-with-deps-${{ matrix.torch-version }}%2B${{ matrix.cuda-version }}.zip -O libtorch.zip + wget --no-check-certificate -nv https://download.pytorch.org/libtorch/${CUDA_VER_SHORT}/libtorch-cxx11-abi-shared-with-deps-${{ matrix.torch-version }}%2B${CUDA_VER_SHORT}.zip -O libtorch.zip unzip -q ${{ github.workspace }}/libtorch.zip -d ${{ github.workspace }}/ rm ${{ github.workspace }}/libtorch.zip @@ -70,14 +79,14 @@ jobs: uses: actions/cache@v4 id: cache-builds with: - key: ${{ matrix.os }}-${{ matrix.cuda-version }}-torch-${{ matrix.torch-version }}-${{ matrix.cmake-build-type }}-ccache-${{ github.run_id }} - restore-keys: ${{ matrix.os }}-${{ matrix.cuda-version }}-torch-${{ matrix.torch-version }}-${{ matrix.cmake-build-type }}-ccache- + key: ${{ matrix.os }}-cuda-${{ matrix.cuda-version }}-torch-${{ matrix.torch-version }}-${{ matrix.cmake-build-type }}-ccache-${{ github.run_id }} + restore-keys: ${{ matrix.os }}-cuda-${{ matrix.cuda-version }}-torch-${{ matrix.torch-version }}-${{ matrix.cmake-build-type }}-ccache- path: ${{ env.CCACHE_DIR }} - name: Configure and build run: | set -x - source .github/workflows/cuda/${{ runner.os }}-env.sh ${{ matrix.cuda-version }} + source .github/workflows/cuda/${{ runner.os }}-env.sh ${CUDA_VER_SHORT} cmake --version mkdir build cd build From 6bab04eeff38027eeca8682d6784f2f3161203cc Mon Sep 17 00:00:00 2001 From: Pengfei Xuan Date: Thu, 29 Feb 2024 13:00:06 -0500 Subject: [PATCH 2/6] Add cuda and libtorch cache to speedup ubuntu setup task --- .github/workflows/ubuntu.yml | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index 0b573e6..ac27c85 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -17,13 +17,17 @@ jobs: fail-fast: false matrix: os: [ubuntu-22.04] # [ubuntu-22.04, ubuntu-20.04, ubuntu-18.04] + arch: [x64] # [x64, x86] torch-version: [2.2.1] # [1.12.0, 1.13.0, 2.0.0, 2.1.0, 2.1.1, 2.1.2, 2.2.0, 2.2.1] cuda-version: [12.1.1] # [12.3.1, 12.1.1, 11.8.0, 11.7.1, 11.6.2, 11.5.2,11.4.4, 11.3.1, 11.2.2, 11.1.1, 11.0.3, cpu] cmake-build-type: [Release] # [Debug, ClangTidy] env: CCACHE_DIR: ${{ github.workspace }}/ccache CCACHE_BASEDIR: ${{ github.workspace }} - + CL: /MP + CMAKE_GENERATOR: Ninja + CMAKE_GENERATOR_PLATFORM: ${{matrix.arch}} + TORCH_CUDA_ARCH_LIST: '7.0;7.5' steps: - name: Checkout Repository uses: actions/checkout@v4 @@ -39,16 +43,21 @@ jobs: ninja-build \ libopencv-dev \ wget - - - name: Install ccache - run: | wget -nv https://github.com/ccache/ccache/releases/download/v4.9.1/ccache-4.9.1-linux-x86_64.tar.xz sudo tar xf ccache-4.9.1-linux-x86_64.tar.xz -C /usr/bin --strip-components=1 --no-same-owner ccache-4.9.1-linux-x86_64/ccache rm -f ccache-*-linux-x86_64.tar.xz ccache --version + - name: Restore CUDA Cache + uses: actions/cache@v4 + id: cuda-cache + with: + key: cuda-${{matrix.CUDA-VERSION}}-ubuntu + path: | + /usr/local/cuda* + - name: Install CUDA - if: ${{ matrix.cuda-version != 'cpu' }} + if: ${{ steps.cuda-cache.outputs.cache-hit != 'true' }} run: | bash .github/workflows/cuda/${{ runner.os }}.sh ${{ matrix.cuda-version }} @@ -69,7 +78,16 @@ jobs: sudo rm -rf /usr/share/dotnet /usr/local/lib/android /usr/lib/php* /opt/ghc || true df -h + - name: Restore LibTorch Cache + uses: actions/cache@v4 + id: libtorch-cache + with: + key: libtorch-${{ matrix.torch-version }}-${{env.CUDA_VER_SHORT}}-ubuntu + path: | + ${{ github.workspace }}/libtorch + - name: Install LibTorch + if: ${{ steps.libtorch-cache.outputs.cache-hit != 'true' }} run: | wget --no-check-certificate -nv https://download.pytorch.org/libtorch/${CUDA_VER_SHORT}/libtorch-cxx11-abi-shared-with-deps-${{ matrix.torch-version }}%2B${CUDA_VER_SHORT}.zip -O libtorch.zip unzip -q ${{ github.workspace }}/libtorch.zip -d ${{ github.workspace }}/ @@ -83,7 +101,7 @@ jobs: restore-keys: ${{ matrix.os }}-cuda-${{ matrix.cuda-version }}-torch-${{ matrix.torch-version }}-${{ matrix.cmake-build-type }}-ccache- path: ${{ env.CCACHE_DIR }} - - name: Configure and build + - name: Configure And Build run: | set -x source .github/workflows/cuda/${{ runner.os }}-env.sh ${CUDA_VER_SHORT} @@ -91,7 +109,7 @@ jobs: mkdir build cd build cmake .. \ - -GNinja \ + -G${CMAKE_GENERATOR} \ -DCMAKE_BUILD_TYPE=${{ matrix.cmake-build-type }} \ -DCMAKE_C_COMPILER_LAUNCHER=$(which ccache) \ -DCMAKE_CXX_COMPILER_LAUNCHER=$(which ccache) \ @@ -100,7 +118,7 @@ jobs: -DCUDA_TOOLKIT_ROOT_DIR=$CUDA_HOME ninja - - name: Clean compiler cache + - name: Clean Compiler Cache run: | set -x ccache --show-stats From 6e161760fd5fc20e7ddf97fca4c1f710f2b1c1d7 Mon Sep 17 00:00:00 2001 From: Pengfei Xuan Date: Thu, 29 Feb 2024 13:11:43 -0500 Subject: [PATCH 3/6] Test build speed --- .github/workflows/ubuntu.yml | 2 +- Dockerfile | 27 +++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 Dockerfile diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index ac27c85..0b01988 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -16,7 +16,7 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-22.04] # [ubuntu-22.04, ubuntu-20.04, ubuntu-18.04] + os: [ubuntu-22.04, ubuntu-20.04] # [ubuntu-22.04, ubuntu-20.04, ubuntu-18.04] arch: [x64] # [x64, x86] torch-version: [2.2.1] # [1.12.0, 1.13.0, 2.0.0, 2.1.0, 2.1.1, 2.1.2, 2.2.0, 2.2.1] cuda-version: [12.1.1] # [12.3.1, 12.1.1, 11.8.0, 11.7.1, 11.6.2, 11.5.2,11.4.4, 11.3.1, 11.2.2, 11.1.1, 11.0.3, cpu] diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..a486ff0 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,27 @@ +ARG UBUNTU_VERSION=22.04 +ARG TORCH-VERSION=2.2.1 +ARG CUDA_VERSION=12.1 +ARG CMAKE-BUILD-TYPE=Release + +FROM ubuntu:${UBUNTU_VERSION} + +# Env variables +ENV DEBIAN_FRONTEND noninteractive + +# Prepare directories +WORKDIR /code + +# Copy everything +COPY . ./ + +# Install build dependencies +RUN apt-get update && \ + apt-get install -y \ + build-essential \ + cmake \ + ninja-build \ + libopencv-dev \ + wget + +# Install CUDA +RUN bash .github/workflows/cuda/Linux.sh ${{ matrix.cuda-version }} From dc434ecf81b9fc9c1c4de21c747413a725ff200a Mon Sep 17 00:00:00 2001 From: Pengfei Xuan Date: Thu, 29 Feb 2024 13:53:03 -0500 Subject: [PATCH 4/6] Enable sudo tar from cache restore --- .github/workflows/ubuntu.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index 0b01988..b99088f 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -48,6 +48,12 @@ jobs: rm -f ccache-*-linux-x86_64.tar.xz ccache --version + - name: Sudo Tar Wrapper + run: | + sudo mv -fv /usr/bin/tar /usr/bin/tar.orig + echo -e '#!/bin/sh\n\nsudo /usr/bin/tar.orig "$@"' | sudo tee -a /usr/bin/tar + sudo chmod +x /usr/bin/tar + - name: Restore CUDA Cache uses: actions/cache@v4 id: cuda-cache From 5e40bcba507fd0ee5c3da24a5f5b322b6724855f Mon Sep 17 00:00:00 2001 From: Pengfei Xuan Date: Thu, 29 Feb 2024 14:00:55 -0500 Subject: [PATCH 5/6] Perform full test --- .github/workflows/ubuntu.yml | 5 +++-- Dockerfile | 27 --------------------------- 2 files changed, 3 insertions(+), 29 deletions(-) delete mode 100644 Dockerfile diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index b99088f..b4e0030 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -18,8 +18,8 @@ jobs: matrix: os: [ubuntu-22.04, ubuntu-20.04] # [ubuntu-22.04, ubuntu-20.04, ubuntu-18.04] arch: [x64] # [x64, x86] - torch-version: [2.2.1] # [1.12.0, 1.13.0, 2.0.0, 2.1.0, 2.1.1, 2.1.2, 2.2.0, 2.2.1] - cuda-version: [12.1.1] # [12.3.1, 12.1.1, 11.8.0, 11.7.1, 11.6.2, 11.5.2,11.4.4, 11.3.1, 11.2.2, 11.1.1, 11.0.3, cpu] + torch-version: [2.1.2, 2.2.1] # [1.12.0, 1.13.0, 2.0.0, 2.1.0, 2.1.1, 2.1.2, 2.2.0, 2.2.1] + cuda-version: [11.8.0, 12.1.1] # [12.3.1, 12.1.1, 11.8.0, 11.7.1, 11.6.2, 11.5.2,11.4.4, 11.3.1, 11.2.2, 11.1.1, 11.0.3, cpu] cmake-build-type: [Release] # [Debug, ClangTidy] env: CCACHE_DIR: ${{ github.workspace }}/ccache @@ -50,6 +50,7 @@ jobs: - name: Sudo Tar Wrapper run: | + # Workaround: https://github.com/containers/podman/discussions/17868 sudo mv -fv /usr/bin/tar /usr/bin/tar.orig echo -e '#!/bin/sh\n\nsudo /usr/bin/tar.orig "$@"' | sudo tee -a /usr/bin/tar sudo chmod +x /usr/bin/tar diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index a486ff0..0000000 --- a/Dockerfile +++ /dev/null @@ -1,27 +0,0 @@ -ARG UBUNTU_VERSION=22.04 -ARG TORCH-VERSION=2.2.1 -ARG CUDA_VERSION=12.1 -ARG CMAKE-BUILD-TYPE=Release - -FROM ubuntu:${UBUNTU_VERSION} - -# Env variables -ENV DEBIAN_FRONTEND noninteractive - -# Prepare directories -WORKDIR /code - -# Copy everything -COPY . ./ - -# Install build dependencies -RUN apt-get update && \ - apt-get install -y \ - build-essential \ - cmake \ - ninja-build \ - libopencv-dev \ - wget - -# Install CUDA -RUN bash .github/workflows/cuda/Linux.sh ${{ matrix.cuda-version }} From 7656b505ef5c983b27ae66b3be458173ac3e5f7b Mon Sep 17 00:00:00 2001 From: Pengfei Xuan Date: Thu, 29 Feb 2024 14:18:09 -0500 Subject: [PATCH 6/6] Switch ci tigger back to the main branch --- .github/workflows/ubuntu.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index b4e0030..23d02f3 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -3,7 +3,7 @@ name: OpenSplat (Ubuntu) on: push: branches: - - ci-linux-cache + - main pull_request: types: [ assigned, opened, synchronize, reopened ] release: