From bd8e9891f2447c5f57fc2708150b260dfc590ae7 Mon Sep 17 00:00:00 2001 From: Changming Sun Date: Tue, 22 Oct 2024 12:47:29 -0700 Subject: [PATCH] Move ORT Training pipeline to github actions --- .github/codeql/codeql-config.yml | 7 ++ .github/workflows/linux_training.yml | 55 +++++++++++ .../orttraining-linux-ci-pipeline.yml | 95 ------------------- .../orttraining-linux-gpu-ci-pipeline.yml | 55 ----------- .../github/linux/build_training_ci.sh | 4 - .../docker/Dockerfile.ubuntu_gpu_training | 60 ------------ 6 files changed, 62 insertions(+), 214 deletions(-) create mode 100644 .github/codeql/codeql-config.yml create mode 100644 .github/workflows/linux_training.yml delete mode 100644 tools/ci_build/github/azure-pipelines/orttraining-linux-ci-pipeline.yml delete mode 100644 tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-ci-pipeline.yml delete mode 100755 tools/ci_build/github/linux/build_training_ci.sh delete mode 100644 tools/ci_build/github/linux/docker/Dockerfile.ubuntu_gpu_training diff --git a/.github/codeql/codeql-config.yml b/.github/codeql/codeql-config.yml new file mode 100644 index 0000000000000..6a76f7bcdbcb0 --- /dev/null +++ b/.github/codeql/codeql-config.yml @@ -0,0 +1,7 @@ +name: "CodeQL config" +queries: + - uses: security-extended + - uses: security-and-quality +paths-ignore: + - tests + - build \ No newline at end of file diff --git a/.github/workflows/linux_training.yml b/.github/workflows/linux_training.yml new file mode 100644 index 0000000000000..51af6cd20de7d --- /dev/null +++ b/.github/workflows/linux_training.yml @@ -0,0 +1,55 @@ +name: orttraining-linux-ci-pipeline +on: + push: + branches: + - main + - rel-* + pull_request: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + orttraining-linux-ci-pipeline: + runs-on: ubuntu-24.04 + permissions: + actions: read + contents: read + security-events: write + steps: + - uses: actions/checkout@v4 + - run: | + python3 -m pip install -r tools/ci_build/github/linux/python/requirements.txt + - name: Initialize CodeQL + uses: github/codeql-action/init@v3 + with: + config-file: ./.github/codeql/codeql-config.yml + languages: 'cpp' + - run: | + set -e -x + rm -rf build + python3 tools/ci_build/build.py --build_dir build --config Release --enable_training --skip_submodule_sync --parallel --update --build + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v3 + with: + category: "/language:cpp" + output: sarif-results + upload: failure-only + + - name: filter-sarif + uses: advanced-security/filter-sarif@v1 + with: + patterns: | + +**/*.cc + +**/*.h + -tests/**/*.* + -build/**/*.* + input: sarif-results/cpp.sarif + output: sarif-results/cpp.sarif + + - name: Upload SARIF + uses: github/codeql-action/upload-sarif@v3 + with: + sarif_file: sarif-results/cpp.sarif \ No newline at end of file diff --git a/tools/ci_build/github/azure-pipelines/orttraining-linux-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/orttraining-linux-ci-pipeline.yml deleted file mode 100644 index 5c3273f79bd30..0000000000000 --- a/tools/ci_build/github/azure-pipelines/orttraining-linux-ci-pipeline.yml +++ /dev/null @@ -1,95 +0,0 @@ -##### start trigger Don't edit it manually, Please do edit set-trigger-rules.py #### -### please do rerun set-trigger-rules.py ### -trigger: - branches: - include: - - main - - rel-* - paths: - exclude: - - docs/** - - README.md - - CONTRIBUTING.md - - BUILD.md - - 'js/web' - - 'onnxruntime/core/providers/js' -pr: - branches: - include: - - main - - rel-* - paths: - exclude: - - docs/** - - README.md - - CONTRIBUTING.md - - BUILD.md - - 'js/web' - - 'onnxruntime/core/providers/js' -#### end trigger #### - -jobs: -- job: Linux_Build - timeoutInMinutes: 180 - workspace: - clean: all - variables: - skipComponentGovernanceDetection: true - CCACHE_DIR: $(Pipeline.Workspace)/ccache - TODAY: $[format('{0:dd}{0:MM}{0:yyyy}', pipeline.startTime)] - pool: onnxruntime-Ubuntu-2204-Training-CPU - steps: - - task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3 - displayName: 'Clean Agent Directories' - condition: always() - - - checkout: self - clean: true - submodules: none - - - template: templates/get-docker-image-steps.yml - parameters: - Dockerfile: tools/ci_build/github/linux/docker/inference/x86_64/default/cpu/Dockerfile - Context: tools/ci_build/github/linux/docker/inference/x86_64/default/cpu - DockerBuildArgs: "--build-arg BUILD_UID=$( id -u ) --build-arg BASEIMAGE=registry.access.redhat.com/ubi8/ubi" - Repository: onnxruntimecpubuildcentos8x64_packaging - - - task: Cache@2 - inputs: - key: '"$(TODAY)" | "$(Build.SourceBranch)" | "$(Build.SourceVersion)"' - path: $(CCACHE_DIR) - cacheHitVar: CACHE_RESTORED - restoreKeys: | - "$(TODAY)" | "$(Build.SourceBranch)" - "$(TODAY)" | - displayName: Cach Task - - - task: CmdLine@2 - displayName: 'build' - inputs: - script: | - set -e -x - mkdir -p $HOME/.onnx - mkdir -p $(Pipeline.Workspace)/ccache - docker run --rm \ - --volume /data/onnx:/data/onnx:ro \ - --volume /data/models:/build/models:ro \ - --volume $(Build.SourcesDirectory):/onnxruntime_src \ - --volume $(Build.BinariesDirectory):/build \ - --volume $HOME/.onnx:/home/onnxruntimedev/.onnx \ - --volume $(Pipeline.Workspace)/ccache:/cache \ - -e ALLOW_RELEASED_ONNX_OPSET_ONLY=0 \ - -e NIGHTLY_BUILD \ - -e BUILD_BUILDNUMBER \ - -e CCACHE_DIR=/cache \ - onnxruntimecpubuildcentos8x64_packaging \ - /onnxruntime_src/tools/ci_build/github/linux/build_training_ci.sh - workingDirectory: $(Build.SourcesDirectory) - - - task: PublishTestResults@2 - displayName: 'Publish unit test results' - inputs: - testResultsFiles: '**/*.results.xml' - searchFolder: '$(Build.BinariesDirectory)' - testRunTitle: 'Unit Test Run' - condition: succeededOrFailed() diff --git a/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-ci-pipeline.yml deleted file mode 100644 index 494035637a79d..0000000000000 --- a/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-ci-pipeline.yml +++ /dev/null @@ -1,55 +0,0 @@ -##### start trigger Don't edit it manually, Please do edit set-trigger-rules.py #### -### please do rerun set-trigger-rules.py ### -trigger: - branches: - include: - - main - - rel-* - paths: - exclude: - - docs/** - - README.md - - CONTRIBUTING.md - - BUILD.md - - 'js/web' - - 'onnxruntime/core/providers/js' -pr: - branches: - include: - - main - - rel-* - paths: - exclude: - - docs/** - - README.md - - CONTRIBUTING.md - - BUILD.md - - 'js/web' - - 'onnxruntime/core/providers/js' -#### end trigger #### - -jobs: -- template: templates/linux-ci.yml - parameters: - AgentPool : 'Onnxruntime-Linux-GPU-NC6sv3' - JobName: 'Onnxruntime_Linux_GPU_Training' - RunDockerBuildArgs: > - -o ubuntu20.04 -d gpu - -t onnxruntime_orttraining_ortmodule_tests_image - -u - -e - -x " - --enable_training - --config Release - --use_cuda --cuda_version=11.8 --cuda_home=/usr/local/cuda-11.8 --cudnn_home=/usr/local/cuda-11.8 - --build_wheel - --enable_nvtx_profile - --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=70 - " - RunInjectedPipeline: 'true' - InjectedPipeline: 'orttraining-linux-gpu-test-ci-pipeline.yml' - DockerImageTag: 'onnxruntime_orttraining_ortmodule_tests_image' - TimeoutInMinutes: 190 - # Enable unreleased onnx opsets in CI builds - # This facilitates testing the implementation for the new opsets - AllowReleasedOpsetOnly: '0' diff --git a/tools/ci_build/github/linux/build_training_ci.sh b/tools/ci_build/github/linux/build_training_ci.sh deleted file mode 100755 index 82f75a5cbbc50..0000000000000 --- a/tools/ci_build/github/linux/build_training_ci.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e -x -python3.12 -m pip install -r /onnxruntime_src/tools/ci_build/github/linux/python/requirements.txt -python3.12 /onnxruntime_src/tools/ci_build/build.py --build_dir /build --config Release --enable_training --skip_submodule_sync --parallel diff --git a/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_gpu_training b/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_gpu_training deleted file mode 100644 index 4d11cbbde3354..0000000000000 --- a/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_gpu_training +++ /dev/null @@ -1,60 +0,0 @@ -ARG BASEIMAGE=nvcr.io/nvidia/cuda:11.8.0-cudnn8-devel-ubuntu18.04 - -FROM $BASEIMAGE - -ARG PYTHON_VERSION=3.9 -ARG INSTALL_DEPS_EXTRA_ARGS -ARG USE_CONDA=false - -ADD scripts /tmp/scripts -RUN /tmp/scripts/install_ubuntu.sh -p $PYTHON_VERSION && \ - /tmp/scripts/install_os_deps.sh -d gpu $INSTALL_DEPS_EXTRA_ARGS - -# If USE_CONDA is false, use root to install python dependencies. -RUN if [ "$USE_CONDA" = false ] ; \ - then /tmp/scripts/install_python_deps.sh -p $PYTHON_VERSION -d gpu $INSTALL_DEPS_EXTRA_ARGS ; \ - fi - -WORKDIR /root - -# Allow configure to pick up GDK and CuDNN where it expects it. -# (Note: $CUDNN_VERSION is defined by NVidia's base image) -RUN _CUDNN_VERSION=$(echo $CUDNN_VERSION | cut -d. -f1-2) && \ - mkdir -p /usr/local/cudnn-$_CUDNN_VERSION/cuda/include && \ - ln -s /usr/include/cudnn.h /usr/local/cudnn-$_CUDNN_VERSION/cuda/include/cudnn.h && \ - mkdir -p /usr/local/cudnn-$_CUDNN_VERSION/cuda/lib64 && \ - ln -s /etc/alternatives/libcudnn_so /usr/local/cudnn-$_CUDNN_VERSION/cuda/lib64/libcudnn.so && \ - ln -s /usr/local/cudnn{-$_CUDNN_VERSION,} - -ENV LD_LIBRARY_PATH /usr/local/openblas/lib:$LD_LIBRARY_PATH - -ARG BUILD_USER=onnxruntimedev -ARG BUILD_UID=1000 -RUN adduser --gecos 'onnxruntime Build User' --disabled-password $BUILD_USER --uid $BUILD_UID -WORKDIR /home/$BUILD_USER -USER $BUILD_USER - -ARG MINICONDA_PREFIX=/home/$BUILD_USER/miniconda3 -RUN if [ "$USE_CONDA" = true ] ; \ - then MINICONDA=miniconda.sh && \ - wget --no-verbose https://repo.anaconda.com/miniconda/Miniconda3-py37_4.9.2-Linux-x86_64.sh -O $MINICONDA && \ - chmod a+x $MINICONDA && \ - ./$MINICONDA -b -p $MINICONDA_PREFIX && \ - rm ./$MINICONDA && \ - $MINICONDA_PREFIX/bin/conda clean --yes --all && \ - $MINICONDA_PREFIX/bin/conda install -y python=$PYTHON_VERSION ; \ - fi - -ENV PATH /home/$BUILD_USER/miniconda3/bin:$PATH - -# If USE_CONDA is true, use onnxruntimedev user to install python dependencies -RUN if [ "$USE_CONDA" = true ] ; \ - then /tmp/scripts/install_python_deps.sh -p $PYTHON_VERSION -d gpu $INSTALL_DEPS_EXTRA_ARGS -c ; \ - fi - -WORKDIR /root -USER root -RUN rm -rf /tmp/scripts - -WORKDIR /home/$BUILD_USER -USER $BUILD_USER