Merge branch 'dev_int8_conv' of https://github.com/Oneflow-Inc/oneflow …

…into dev_int8_conv
Oneflow-Inc · Sep 3, 2023 · f31f2b1 · f31f2b1
2 parents 9031d5c + 418598e
commit f31f2b1
Show file tree

Hide file tree

Showing 9 changed files with 54 additions and 13 deletions.
diff --git a/.github/workflows/canary.yml b/.github/workflows/canary.yml
@@ -72,8 +72,8 @@ jobs:
           clean-ccache: true
           compute-platform: ${{ env.COMPUTE_PLATFORM }}
           python-versions: |
-            3.7
             3.8
+            3.10
       - name: Upload wheelhouse
         uses: ./.github/actions/upload_oss
         with:

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -48,7 +48,6 @@ jobs:
           entries: |
             cu118
             cu117
-            cu116
             cpu
       - name: Get current date
         id: date
@@ -80,7 +79,32 @@ jobs:
       - uses: actions/checkout@v2
       - uses: Oneflow-Inc/get-oneflow@add-nightly-date
         name: Build ${{ matrix.entry }}
-        if: ${{ matrix.entry !='cpu' }}
+        if: ${{ matrix.entry =='cu118' }}
+        with:
+          cmake-init-cache: ${{ env.ONEFLOW_SRC }}/cmake/caches/ci/release/${{ matrix.entry }}.cmake
+          build-script: ${{ env.ONEFLOW_SRC }}/ci/manylinux/build-gcc9.sh
+          oneflow-src: ${{ env.ONEFLOW_SRC }}
+          oneflow-build-env: manylinux
+          wheelhouse-dir: ${{ env.WHEELHOUSE_DIR }}
+          clear-wheelhouse-dir: true
+          self-hosted: true
+          compute-platform: ${{ matrix.entry }}
+          manylinux-cache-dir: ${{ env.MANYLINUX_CACHE_DIR }}
+          docker-run-use-system-http-proxy: false
+          docker-run-use-lld: false
+          retry-failed-build: true
+          clean-ccache: true
+          nightly: ${{ github.event_name == 'schedule' || github.ref == 'refs/heads/release/add_nightly_date_index'}}
+          nightly-date: ${{ needs.generate-build-matrix.outputs.formatted_date }}
+          use-nvidia-wheels: ${{ matrix.entry !='cu112' }}
+          python-versions: |
+            3.7
+            3.8
+            3.9
+            3.10
+      - uses: Oneflow-Inc/get-oneflow@add-nightly-date
+        name: Build ${{ matrix.entry }}
+        if: ${{ matrix.entry !='cpu' && matrix.entry !='cu118' }}
         with:
           cmake-init-cache: ${{ env.ONEFLOW_SRC }}/cmake/caches/ci/release/cuda.cmake
           build-script: ${{ env.ONEFLOW_SRC }}/ci/manylinux/build-gcc9.sh

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -934,7 +934,7 @@ jobs:
               issue_number: context.issue.number,
               owner: context.repo.owner,
               repo: context.repo.repo,
-              body: "View latest API docs preview at: https://staging.oneflow.info/${{ env.DOCS_PATH }}/"
+              body: "View latest API docs preview at: https://oneflow-staging.oss-cn-beijing.aliyuncs.com/${{ env.DOCS_PATH }}/"
             })
       - name: Doctest
         timeout-minutes: 45

diff --git a/README.md b/README.md
@@ -1,6 +1,7 @@
 # OneFlow
 
 OneFlow is a deep learning framework designed to be **user-friendly, scalable and efficient**. With OneFlow, it is easy to:
+
 - program a model with [**PyTorch-like API**](https://oneflow.readthedocs.io/en/master/)
 - scale a model to n-dimensional-parallel execution with the [**Global Tensor**](https://docs.oneflow.org/en/master/cookies/global_tensor.html)
 - accelerate/deploy a model with the [**Graph Compiler**](https://oneflow.readthedocs.io/en/master/graph.html).
@@ -60,7 +61,7 @@ OneFlow is a deep learning framework designed to be **user-friendly, scalable an
 - To install nightly release of OneFlow with CUDA support:
 
   ```bash
-  python3 -m pip install --pre oneflow -f https://staging.oneflow.info/branch/master/cu117
+  python3 -m pip install --pre oneflow -f https://oneflow-staging.oss-cn-beijing.aliyuncs.com/branch/master/cu117
   ```
 
 - To install other available builds for different variants:
@@ -71,13 +72,12 @@ OneFlow is a deep learning framework designed to be **user-friendly, scalable an
     ```
   - Nightly
     ```
-    python3 -m pip install --pre oneflow -f https://staging.oneflow.info/branch/master/[PLATFORM]
+    python3 -m pip install --pre oneflow -f https://oneflow-staging.oss-cn-beijing.aliyuncs.com/branch/master/[PLATFORM]
     ```
   - All available `[PLATFORM]`:
     | Platform |CUDA Driver Version| Supported GPUs |
     |---|---|---|
     | cu117 | >= 450.80.02 | GTX 10xx, RTX 20xx, A100, RTX 30xx |
-    | cu102 | >= 440.33 | GTX 10xx, RTX 20xx |
     | cpu | N/A | N/A |
 
 - If you are in China, you could run this to have pip download packages from domestic mirror of pypi:

diff --git a/cmake/caches/ci/release/cu118.cmake b/cmake/caches/ci/release/cu118.cmake
@@ -0,0 +1,17 @@
+set(BUILD_CUDA YES CACHE BOOL "")
+set(BUILD_GIT_VERSION YES CACHE BOOL "")
+set(BUILD_TESTING OFF CACHE BOOL "")
+set(BUILD_RDMA YES CACHE BOOL "")
+set(TREAT_WARNINGS_AS_ERRORS YES CACHE BOOL "")
+set(THIRD_PARTY_MIRROR aliyun CACHE STRING "")
+set(PIP_INDEX_MIRROR "https://pypi.tuna.tsinghua.edu.cn/simple" CACHE STRING "")
+set(CMAKE_BUILD_TYPE Release CACHE STRING "")
+set(CMAKE_GENERATOR Ninja CACHE STRING "")
+set(CMAKE_CUDA_ARCHITECTURES "70-real;80-real;86-real;89-real;90-real" CACHE STRING "")
+set(CUDNN_STATIC OFF CACHE BOOL "")
+set(WITH_MLIR ON CACHE BOOL "")
+set(BUILD_CPP_API OFF CACHE BOOL "")
+set(CUDA_NVCC_THREADS_NUMBER 2 CACHE STRING "")
+set(CMAKE_C_COMPILER_LAUNCHER ccache CACHE STRING "")
+set(CMAKE_CXX_COMPILER_LAUNCHER ccache CACHE STRING "")
+set(CMAKE_CUDA_COMPILER_LAUNCHER ccache CACHE STRING "")
diff --git a/cmake/third_party/cutlass-extension.cmake b/cmake/third_party/cutlass-extension.cmake
@@ -15,7 +15,8 @@ if(WITH_CUTLASS_EXTENSION)
   set(CUTLASS_EXTENSION_INCLUDE_DIR ${CUTLASS_EXTENSION_INSTALL_DIR}/include CACHE PATH "" FORCE)
   set(CUTLASS_EXTENSION_LIBRARY_DIR ${CUTLASS_EXTENSION_INSTALL_DIR}/lib CACHE PATH "" FORCE)
   set(CUTLASS_EXTENSION_LIBRARIES ${CUTLASS_EXTENSION_LIBRARY_DIR}/libcutlass_extension.so)
-  set(CUTLASS_EXTENSION_SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/cutlass-extension/src/cutlass-extension/)
+  set(CUTLASS_EXTENSION_SOURCE_DIR
+      ${CMAKE_CURRENT_BINARY_DIR}/cutlass-extension/src/cutlass-extension/)
   set(CUTLASS_SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/cutlass/src/cutlass)
 
   foreach(arch ${CUDA_REAL_ARCHS_LIST})
@@ -53,8 +54,7 @@ if(WITH_CUTLASS_EXTENSION)
         -DCUTLASS_LIBRARY_DEBUG_POSTFIX:STRING=
         -DCUTLASS_NVCC_EMBED_PTX:BOOL=OFF
         -DCUTLASS_DIR:STRING=${CUTLASS_SOURCE_DIR}
-      DEPENDS cutlass
-    )
+      DEPENDS cutlass)
 
   endif(THIRD_PARTY)
 endif(WITH_CUTLASS_EXTENSION)
diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -5,5 +5,5 @@ furo==2021.4.11b34
 sphinx-copybutton==0.5.0
 # above are dev dependencies
 --pre
---find-links https://staging.oneflow.info/branch/master/cpu
+--find-links https://oneflow-staging.oss-cn-beijing.aliyuncs.com/branch/master/cpu
 oneflow
diff --git a/oneflow/core/functional/impl/nn_functor.cpp b/oneflow/core/functional/impl/nn_functor.cpp
@@ -5328,7 +5328,8 @@ class GroupedMatmulFunctor {
   Maybe<TensorTuple> operator()(const TensorTuple& xs, const TensorTuple& weights) const {
     const int64_t input_size = xs.size();
     const int64_t weight_size = weights.size();
-    CHECK_LT_OR_RETURN(input_size, kMaxInputCount);
+    CHECK_LT_OR_RETURN(input_size, kMaxInputCount)
+        << Error::RuntimeError() << "input_size size should not be greater than 128";
     CHECK_GE_OR_RETURN(input_size, 1)
         << Error::RuntimeError() << "The number of xs should be greater equal than 1.";
     CHECK_EQ_OR_RETURN(weight_size, input_size)

diff --git a/oneflow/ir/lib/OneFlow/Passes.cpp b/oneflow/ir/lib/OneFlow/Passes.cpp
@@ -944,7 +944,6 @@ struct KernelLaunchPattern : public mlir::OpRewritePattern<oneflow::Job> {
 
     int name_index = 0;
     std::vector<Operation*> current_wrap_ops;
-    op->dump();
     for (auto op_it = ops.begin(); op_it != ops.end(); ++op_it) {
       auto current_op = &(*op_it);
       if (!IsPackagable(current_op)) {