diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 89156f8..099b592 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -50,8 +50,40 @@ jobs: matrix: os: ['ubuntu-20.04'] python-version: ['3.8', '3.9', '3.10', '3.11'] - pytorch-version: ['2.3.0'] # Must be the most recent version that meets requirements-cuda.txt. - cuda-version: ['11.8', '12.1'] + torch-version: ['2.2.2', '2.3.0'] + cuda-version: ['12.2.2'] + exclude: + # see https://github.com/pytorch/pytorch/blob/main/RELEASE.md#release-compatibility-matrix + # Pytorch < 2.2 does not support Python 3.12 + - pytorch-version: '1.12.1' + python-version: '3.12' + - pytorch-version: '1.13.1' + python-version: '3.12' + - pytorch-version: '2.0.1' + python-version: '3.12' + - pytorch-version: '2.1.2' + python-version: '3.12' + # Pytorch <= 1.12 does not support Python 3.11 + - pytorch-version: '1.12.1' + python-version: '3.11' + # Pytorch >= 2.0 only supports Python >= 3.8 + - pytorch-version: '2.0.1' + python-version: '3.7' + - pytorch-version: '2.1.2' + python-version: '3.7' + - pytorch-version: '2.2.2' + python-version: '3.7' + - pytorch-version: '2.3.0' + python-version: '3.7' + - pytorch-version: '2.4.0.dev20240407' + python-version: '3.7' + # Pytorch <= 2.0 only supports CUDA <= 11.8 + - pytorch-version: '1.12.1' + cuda-version: '12.2.2' + - pytorch-version: '1.13.1' + cuda-version: '12.2.2' + - pytorch-version: '2.0.1' + cuda-version: '12.2.2' steps: - name: Checkout @@ -80,8 +112,18 @@ jobs: echo "MATRIX_PYTHON_VERSION=$(echo ${{ matrix.python-version }} | awk -F \. {'print $1 $2'})" >> $GITHUB_ENV - name: Install CUDA ${{ matrix.cuda-version }} - run: | - bash -x .github/workflows/scripts/cuda-install.sh ${{ matrix.cuda-version }} ${{ matrix.os }} + if: ${{ matrix.cuda-version != 'cpu' }} + uses: Jimver/cuda-toolkit@v0.2.14 + id: cuda-toolkit + with: + cuda: ${{ matrix.cuda-version }} + linux-local-args: '["--toolkit"]' + # default method is "local", and we're hitting some error with caching for CUDA 11.8 and 12.1 + # method: ${{ (matrix.cuda-version == '11.8.0' || matrix.cuda-version == '12.1.0') && 'network' || 'local' }} + method: 'network' + # We need the cuda libraries (e.g. cuSparse, cuSolver) for compiling PyTorch extensions, + # not just nvcc + # sub-packages: '["nvcc"]' - name: Install PyTorch ${{ matrix.pytorch-version }} with CUDA ${{ matrix.cuda-version }} run: | @@ -94,7 +136,7 @@ jobs: run: | bash -x .github/workflows/scripts/build.sh ${{ matrix.python-version }} ${{ matrix.cuda-version }} bdist_wheel wheel_name=$(ls dist/*whl | xargs -n 1 basename) - asset_name=${wheel_name//"linux"/"manylinux1"} + asset_name=${wheel_name} echo "wheel_name=${wheel_name}" >> $GITHUB_ENV echo "asset_name=${asset_name}" >> $GITHUB_ENV @@ -114,7 +156,7 @@ jobs: path: ./dist/${{ env.wheel_name }} publish_package: name: Publish Python 🐍 distribution 📦 to PyPI - needs: [release] + needs: [release, wheel] runs-on: ${{ matrix.os }} environment: name: pypi @@ -128,7 +170,7 @@ jobs: os: ['ubuntu-20.04'] python-version: ['3.10'] pytorch-version: ['2.3.0'] # Must be the most recent version that meets requirements-cuda.txt. - cuda-version: ['12.1'] + cuda-version: ['12.2.2'] steps: - name: Checkout @@ -150,9 +192,25 @@ jobs: with: python-version: ${{ matrix.python-version }} - - name: Install CUDA ${{ matrix.cuda-version }} + - name: Set CUDA and PyTorch versions run: | - bash -x .github/workflows/scripts/cuda-install.sh ${{ matrix.cuda-version }} ${{ matrix.os }} + echo "MATRIX_CUDA_VERSION=$(echo ${{ matrix.cuda-version }} | awk -F \. {'print $1 $2'})" >> $GITHUB_ENV + echo "MATRIX_TORCH_VERSION=$(echo ${{ matrix.pytorch-version }} | awk -F \. {'print $1 "." $2'})" >> $GITHUB_ENV + echo "MATRIX_PYTHON_VERSION=$(echo ${{ matrix.python-version }} | awk -F \. {'print $1 $2'})" >> $GITHUB_ENV + + - name: Install CUDA ${{ matrix.cuda-version }} + if: ${{ matrix.cuda-version != 'cpu' }} + uses: Jimver/cuda-toolkit@v0.2.14 + id: cuda-toolkit + with: + cuda: ${{ matrix.cuda-version }} + linux-local-args: '["--toolkit"]' + # default method is "local", and we're hitting some error with caching for CUDA 11.8 and 12.1 + # method: ${{ (matrix.cuda-version == '11.8.0' || matrix.cuda-version == '12.1.0') && 'network' || 'local' }} + method: 'network' + # We need the cuda libraries (e.g. cuSparse, cuSolver) for compiling PyTorch extensions, + # not just nvcc + # sub-packages: '["nvcc"]' - name: Install PyTorch ${{ matrix.pytorch-version }} with CUDA ${{ matrix.cuda-version }} run: | diff --git a/.github/workflows/scripts/build.sh b/.github/workflows/scripts/build.sh index c491b92..f830091 100644 --- a/.github/workflows/scripts/build.sh +++ b/.github/workflows/scripts/build.sh @@ -20,5 +20,8 @@ if [ "$3" = sdist ]; then MINFERENCE_SKIP_CUDA_BUILD="TRUE" $python_executable setup.py $3 --dist-dir=dist else -MINFERENCE_LOCAL_VERSION=cu${MATRIX_CUDA_VERSION}torch${MATRIX_TORCH_VERSION} MINFERENCE_FORCE_BUILD="TRUE" $python_executable setup.py $3 --dist-dir=dist +MINFERENCE_FORCE_BUILD="TRUE" $python_executable setup.py $3 --dist-dir=dist +tmpname=cu${MATRIX_CUDA_VERSION}torch${MATRIX_TORCH_VERSION} +wheel_name=$(ls dist/*whl | xargs -n 1 basename | sed "s/-/+$tmpname-/2") +ls dist/*whl |xargs -I {} mv {} dist/${wheel_name} fi diff --git a/.github/workflows/scripts/pytorch-install.sh b/.github/workflows/scripts/pytorch-install.sh index dfc1851..6e61ba8 100644 --- a/.github/workflows/scripts/pytorch-install.sh +++ b/.github/workflows/scripts/pytorch-install.sh @@ -4,11 +4,29 @@ python_executable=python$1 pytorch_version=$2 cuda_version=$3 -# Install torch -$python_executable -m pip install numpy pyyaml scipy ipython mkl mkl-include ninja cython typing pandas typing-extensions dataclasses setuptools && conda clean -ya -$python_executable -m pip install torch==${pytorch_version}+cu${cuda_version//./} --extra-index-url https://download.pytorch.org/whl/cu${cuda_version//./} - -# Print version information +pip install --upgrade pip +# If we don't install before installing Pytorch, we get error for torch 2.0.1 +# ERROR: Could not find a version that satisfies the requirement setuptools>=40.8.0 (from versions: none) +pip install lit +# For some reason torch 2.2.0 on python 3.12 errors saying no setuptools +pip install setuptools +# We want to figure out the CUDA version to download pytorch +# e.g. we can have system CUDA version being 11.7 but if torch==1.12 then we need to download the wheel from cu116 +# see https://github.com/pytorch/pytorch/blob/main/RELEASE.md#release-compatibility-matrix +# This code is ugly, maybe there's a better way to do this. +echo $MATRIX_CUDA_VERSION +echo $MATRIX_TORCH_VERSION +export TORCH_CUDA_VERSION=$(python -c "from os import environ as env; \ +minv = {'1.12': 113, '1.13': 116, '2.0': 117, '2.1': 118, '2.2': 118, '2.3': 118, '2.4': 118}[env['MATRIX_TORCH_VERSION']]; \ +maxv = {'1.12': 116, '1.13': 117, '2.0': 118, '2.1': 121, '2.2': 121, '2.3': 121, '2.4': 121}[env['MATRIX_TORCH_VERSION']]; \ +print(max(min(int(env['MATRIX_CUDA_VERSION']), maxv), minv))" \ +) +if [[ ${pytorch_version} == *"dev"* ]]; then +pip install --no-cache-dir --pre torch==${pytorch_version} --index-url https://download.pytorch.org/whl/nightly/cu${TORCH_CUDA_VERSION} +else +pip install --no-cache-dir torch==${pytorch_version} --index-url https://download.pytorch.org/whl/cu${TORCH_CUDA_VERSION} +fi +nvcc --version $python_executable --version $python_executable -c "import torch; print('PyTorch:', torch.__version__)" $python_executable -c "import torch; print('CUDA:', torch.version.cuda)" diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..eb5ecfc --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,2 @@ +recursive-include csrc *.cu +recursive-include csrc *.cpp diff --git a/README.md b/README.md index ed3233b..bf6c99d 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@
| Project Page | - Paper | + Paper | HF Demo |
@@ -103,7 +103,7 @@ attn_output = block_sparse_attention(q, k, v, topk) attn_output = streaming_forward(q, k, v, init_num, local_window_num) ``` -For more details, please refer to our [Examples](https://github.com/microsoft/MInference/tree/main/examples) and [Experiments](https://github.com/microsoft/MInference/tree/main/experiments). +For more details, please refer to our [Examples](https://github.com/microsoft/MInference/tree/main/examples) and [Experiments](https://github.com/microsoft/MInference/tree/main/experiments). You can find more information about the dynamic compiler PIT in this [paper](https://dl.acm.org/doi/10.1145/3600006.3613139) and on [GitHub](https://github.com/microsoft/SparTA/tree/pit_artifact). ## FAQ diff --git a/minference/version.py b/minference/version.py index d8a5922..d7adf2a 100644 --- a/minference/version.py +++ b/minference/version.py @@ -5,7 +5,7 @@ _MINOR = "1" # On master and in a nightly release the patch should be one ahead of the last # released build. -_PATCH = "1" +_PATCH = "2" # This is mainly for nightly builds which have the suffix ".dev$DATE". See # https://semver.org/#is-v123-a-semantic-version for the semantics. _SUFFIX = "" diff --git a/setup.py b/setup.py index a312a21..2ee4f73 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,10 @@ # Licensed under The MIT License [see LICENSE for details] import os +import platform import subprocess +import sys +import urllib import torch from packaging.version import Version, parse @@ -61,8 +64,6 @@ # SKIP_CUDA_BUILD: Intended to allow CI to use a simple `python setup.py sdist` run to copy over raw files, without any cuda compilation FORCE_BUILD = os.getenv("MINFERENCE_FORCE_BUILD", "FALSE") == "TRUE" SKIP_CUDA_BUILD = os.getenv("MINFERENCE_SKIP_CUDA_BUILD", "FALSE") == "TRUE" -# For CI, we want the option to build with C++11 ABI since the nvcr images use C++11 ABI -FORCE_CXX11_ABI = os.getenv("MINFERENCE_FORCE_CXX11_ABI", "FALSE") == "TRUE" def check_if_cuda_home_none(global_option: str) -> None: @@ -96,11 +97,6 @@ def check_if_cuda_home_none(global_option: str) -> None: check_if_cuda_home_none("minference") - # HACK: The compiler flag -D_GLIBCXX_USE_CXX11_ABI is set to be the same as - # torch._C._GLIBCXX_USE_CXX11_ABI - # https://github.com/pytorch/pytorch/blob/8472c24e3b5b60150096486616d98b7bea01500b/torch/utils/cpp_extension.py#L920 - if FORCE_CXX11_ABI: - torch._C._GLIBCXX_USE_CXX11_ABI = True ext_modules.append( CUDAExtension( name="minference.cuda", @@ -123,6 +119,47 @@ def get_minference_version() -> str: return str(version) +def get_platform(): + """ + Returns the platform name as used in wheel filenames. + """ + if sys.platform.startswith("linux"): + return f"linux_{platform.uname().machine}" + elif sys.platform == "darwin": + mac_version = ".".join(platform.mac_ver()[0].split(".")[:2]) + return f"macosx_{mac_version}_x86_64" + elif sys.platform == "win32": + return "win_amd64" + else: + raise ValueError("Unsupported platform: {}".format(sys.platform)) + + +def get_wheel_url(): + # Determine the version numbers that will be used to determine the correct wheel + # We're using the CUDA version used to build torch, not the one currently installed + # _, cuda_version_raw = get_cuda_bare_metal_version(CUDA_HOME) + torch_cuda_version = parse(torch.version.cuda) + torch_version_raw = parse(torch.__version__) + # For CUDA 11, we only compile for CUDA 11.8, and for CUDA 12 we only compile for CUDA 12.2 + # to save CI time. Minor versions should be compatible. + torch_cuda_version = ( + parse("11.8") if torch_cuda_version.major == 11 else parse("12.2") + ) + python_version = f"cp{sys.version_info.major}{sys.version_info.minor}" + platform_name = get_platform() + minference_version = get_minference_version() + # cuda_version = f"{cuda_version_raw.major}{cuda_version_raw.minor}" + cuda_version = f"{torch_cuda_version.major}{torch_cuda_version.minor}" + torch_version = f"{torch_version_raw.major}.{torch_version_raw.minor}" + + # Determine wheel URL based on CUDA version, torch version, python version and OS + wheel_filename = f"{PACKAGE_NAME}-{minference_version}+cu{cuda_version}torch{torch_version}-{python_version}-{python_version}-{platform_name}.whl" + wheel_url = BASE_WHEEL_URL.format( + tag_name=f"v{minference_version}", wheel_name=wheel_filename + ) + return wheel_url, wheel_filename + + class CachedWheelsCommand(_bdist_wheel): """ The CachedWheelsCommand plugs into the default bdist wheel, which is ran by pip when it cannot @@ -132,7 +169,29 @@ class CachedWheelsCommand(_bdist_wheel): """ def run(self): - return super().run() + if True: + return super().run() + wheel_url, wheel_filename = get_wheel_url() + print("Guessing wheel URL: ", wheel_url) + try: + urllib.request.urlretrieve(wheel_url, wheel_filename) + + # Make the archive + # Lifted from the root wheel processing command + # https://github.com/pypa/wheel/blob/cf71108ff9f6ffc36978069acb28824b44ae028e/src/wheel/bdist_wheel.py#LL381C9-L381C85 + if not os.path.exists(self.dist_dir): + os.makedirs(self.dist_dir) + + impl_tag, abi_tag, plat_tag = self.get_tag() + archive_basename = f"{self.wheel_dist_name}-{impl_tag}-{abi_tag}-{plat_tag}" + + wheel_path = os.path.join(self.dist_dir, archive_basename + ".whl") + print("Raw wheel path", wheel_path) + os.rename(wheel_filename, wheel_path) + except (urllib.error.HTTPError, urllib.error.URLError): + print("Precompiled wheel not found. Building from source...") + # If the wheel could not be downloaded, build from source + super().run() class NinjaBuildExtension(BuildExtension):