-
Notifications
You must be signed in to change notification settings - Fork 38
/
setup.py
76 lines (64 loc) · 2.04 KB
/
setup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import os
from pathlib import Path
from setuptools import find_packages, setup
try:
import torch
from torch.utils.cpp_extension import BuildExtension, CUDAExtension
except ModuleNotFoundError as e:
raise ModuleNotFoundError("No module named 'torch'. `torch` is required to install `grouped_gemm`.",) from e
if os.environ.get("TORCH_CUDA_ARCH_LIST"):
# Let PyTorch builder to choose device to target for.
device_capability = ""
else:
device_capability = torch.cuda.get_device_capability()
device_capability = f"{device_capability[0]}{device_capability[1]}"
cwd = Path(os.path.dirname(os.path.abspath(__file__)))
nvcc_flags = [
"-std=c++17", # NOTE: CUTLASS requires c++17
]
if device_capability:
nvcc_flags.extend([
f"--generate-code=arch=compute_{device_capability},code=sm_{device_capability}",
])
if os.environ.get("GROUPED_GEMM_CUTLASS", "0") == "1":
nvcc_flags.extend(["-DGROUPED_GEMM_CUTLASS"])
ext_modules = [
CUDAExtension(
"grouped_gemm_backend",
["csrc/ops.cu", "csrc/grouped_gemm.cu"],
include_dirs = [
f"{cwd}/third_party/cutlass/include/",
f"{cwd}/csrc"
],
extra_compile_args={
"cxx": [
"-fopenmp", "-fPIC", "-Wno-strict-aliasing"
],
"nvcc": nvcc_flags,
}
)
]
extra_deps = {}
extra_deps['dev'] = [
'absl-py',
]
extra_deps['all'] = set(dep for deps in extra_deps.values() for dep in deps)
setup(
name="grouped_gemm",
version="0.1.6",
author="Trevor Gale",
author_email="[email protected]",
description="Grouped GEMM",
long_description=open('README.md').read(),
long_description_content_type='text/markdown',
url="https://github.com/tgale06/grouped_gemm",
classifiers=[
"Programming Language :: Python :: 3",
"License :: OSI Approved :: BSD License",
"Operating System :: Unix",
],
packages=find_packages(),
ext_modules=ext_modules,
cmdclass={"build_ext": BuildExtension},
extras_require=extra_deps,
)