-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Dev] Add nccl and nccl-test build example
- Loading branch information
Showing
13 changed files
with
380 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
package(default_visibility = ["//visibility:public"]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
## bazel-skylib | ||
See: https://github.com/bazelbuild/bazel-skylib |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
"""Loads the bazel_skylib library""" | ||
|
||
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") | ||
|
||
def repo(): | ||
http_archive( | ||
name = "bazel_skylib", | ||
sha256 = "2e6fa9a61db799266072df115a719a14a9af0e8a630b1f770ef0bd757e68cd71", | ||
strip_prefix = "bazel-skylib-de3035d605b4c89a62d6da060188e4ab0c5034b9", | ||
urls = ["https://github.com/bazelbuild/bazel-skylib/archive/de3035d605b4c89a62d6da060188e4ab0c5034b9.tar.gz"], | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
package(default_visibility = ["//visibility:public"]) | ||
|
||
filegroup( | ||
name = "nccl_shared", | ||
srcs = [ | ||
"@nccl//:nccl_shared", | ||
], | ||
) | ||
|
||
filegroup( | ||
name = "perf_binaries", | ||
srcs = [ | ||
"@nccl-tests//:all_gather_perf", | ||
"@nccl-tests//:all_reduce_perf", | ||
"@nccl-tests//:alltoall_perf", | ||
"@nccl-tests//:broadcast_perf", | ||
"@nccl-tests//:gather_perf", | ||
"@nccl-tests//:hypercube_perf", | ||
"@nccl-tests//:reduce_perf", | ||
"@nccl-tests//:reduce_scatter_perf", | ||
"@nccl-tests//:scatter_perf", | ||
"@nccl-tests//:sendrecv_perf", | ||
], | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
## NCCL | ||
See: https://github.com/bazel-contrib/rules_cuda/blob/main/examples/WORKSPACE.bazel |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
load("@rules_cuda//cuda:defs.bzl", "cuda_library") | ||
load("@ai_playground//third_party/nccl:nccl-tests.bzl", "nccl_tests_binary") | ||
|
||
# NOTE: all paths in this file relative to @nccl-tests repo root. | ||
|
||
cc_library( | ||
name = "nccl_tests_include", | ||
hdrs = glob(["src/*.h"]), | ||
includes = ["src"], | ||
) | ||
|
||
cuda_library( | ||
name = "common_cuda", | ||
srcs = [ | ||
"src/common.cu", | ||
"verifiable/verifiable.cu", | ||
] + glob([ | ||
"**/*.h", | ||
]), | ||
deps = [ | ||
":nccl_tests_include", | ||
"@nccl", | ||
], | ||
) | ||
|
||
cc_library( | ||
name = "common_cc", | ||
srcs = ["src/timer.cc"], | ||
hdrs = ["src/timer.h"], | ||
alwayslink = 1, | ||
) | ||
|
||
# :common_cuda, :common_cc and @nccl//:nccl_shared are implicitly hardcoded in `nccl_tests_binary` | ||
nccl_tests_binary(name = "all_reduce") | ||
|
||
nccl_tests_binary(name = "all_gather") | ||
|
||
nccl_tests_binary(name = "broadcast") | ||
|
||
nccl_tests_binary(name = "reduce_scatter") | ||
|
||
nccl_tests_binary(name = "reduce") | ||
|
||
nccl_tests_binary(name = "alltoall") | ||
|
||
nccl_tests_binary(name = "scatter") | ||
|
||
nccl_tests_binary(name = "gather") | ||
|
||
nccl_tests_binary(name = "sendrecv") | ||
|
||
nccl_tests_binary(name = "hypercube") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
load("@rules_cuda//cuda:defs.bzl", "cuda_library") | ||
|
||
# NOTE: all paths in this file relative to @nccl-tests repo root. | ||
|
||
def nccl_tests_binary(name, cc_deps = [], cuda_deps = []): | ||
cuda_library( | ||
name = name, | ||
srcs = ["src/{}.cu".format(name)], | ||
deps = [ | ||
"@nccl//:nccl_shared", | ||
":common_cuda", | ||
], | ||
alwayslink = 1, | ||
) | ||
|
||
bin_name = name + "_perf" | ||
native.cc_binary( | ||
name = bin_name, | ||
deps = [":common_cc", ":" + name], | ||
visibility = ["//visibility:public"], | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,165 @@ | ||
load("@bazel_skylib//rules:expand_template.bzl", "expand_template") | ||
load("@rules_cuda//cuda:defs.bzl", "cuda_library", "cuda_objects") | ||
load("@ai_playground//third_party/nccl:nccl.bzl", "if_cuda_clang", "if_cuda_nvcc", "nccl_primitive") | ||
|
||
# NOTE: all paths in this file relative to @nccl repo root. | ||
|
||
expand_template( | ||
name = "nccl_h", | ||
out = "src/include/nccl.h", | ||
substitutions = { | ||
"${nccl:Major}": "2", | ||
"${nccl:Minor}": "18", | ||
"${nccl:Patch}": "3", | ||
"${nccl:Suffix}": "", | ||
# NCCL_VERSION(X,Y,Z) ((X) * 10000 + (Y) * 100 + (Z)) | ||
"${nccl:Version}": "21803", | ||
}, | ||
template = "src/nccl.h.in", | ||
) | ||
|
||
cc_library( | ||
name = "nccl_include", | ||
hdrs = [ | ||
":nccl_h", | ||
] + glob([ | ||
"src/include/**/*.h", | ||
"src/include/**/*.hpp", | ||
]), | ||
includes = [ | ||
# this will add both nccl/src/include in repo and | ||
# bazel-out/<compilation_mode>/bin/nccl/src/include to include paths | ||
# so the previous expand_template generate nccl.h to the very path! | ||
"src/include", | ||
], | ||
) | ||
|
||
cuda_objects( | ||
name = "nccl_device_common", | ||
srcs = [ | ||
"src/collectives/device/functions.cu", | ||
"src/collectives/device/onerank_reduce.cu", | ||
] + glob([ | ||
"src/collectives/device/**/*.h", | ||
]), | ||
copts = if_cuda_nvcc(["--extended-lambda"]), | ||
ptxasopts = ["-maxrregcount=96"], | ||
deps = [":nccl_include"], | ||
) | ||
|
||
# must be manually disabled if cuda version is lower than 11. | ||
USE_BF16 = True | ||
|
||
filegroup( | ||
name = "collective_dev_hdrs", | ||
srcs = [ | ||
"src/collectives/device/all_gather.h", | ||
"src/collectives/device/all_reduce.h", | ||
"src/collectives/device/broadcast.h", | ||
"src/collectives/device/common.h", | ||
"src/collectives/device/common_kernel.h", | ||
"src/collectives/device/gen_rules.sh", | ||
"src/collectives/device/op128.h", | ||
"src/collectives/device/primitives.h", | ||
"src/collectives/device/prims_ll.h", | ||
"src/collectives/device/prims_ll128.h", | ||
"src/collectives/device/prims_simple.h", | ||
"src/collectives/device/reduce.h", | ||
"src/collectives/device/reduce_kernel.h", | ||
"src/collectives/device/reduce_scatter.h", | ||
"src/collectives/device/sendrecv.h", | ||
], | ||
) | ||
|
||
# cuda_objects for each type of primitive | ||
nccl_primitive( | ||
name = "all_gather", | ||
hdrs = ["collective_dev_hdrs"], | ||
use_bf16 = USE_BF16, | ||
deps = [":nccl_device_common"], | ||
) | ||
|
||
nccl_primitive( | ||
name = "all_reduce", | ||
hdrs = ["collective_dev_hdrs"], | ||
use_bf16 = USE_BF16, | ||
deps = [":nccl_device_common"], | ||
) | ||
|
||
nccl_primitive( | ||
name = "broadcast", | ||
hdrs = ["collective_dev_hdrs"], | ||
use_bf16 = USE_BF16, | ||
deps = [":nccl_device_common"], | ||
) | ||
|
||
nccl_primitive( | ||
name = "reduce", | ||
hdrs = ["collective_dev_hdrs"], | ||
use_bf16 = USE_BF16, | ||
deps = [":nccl_device_common"], | ||
) | ||
|
||
nccl_primitive( | ||
name = "reduce_scatter", | ||
hdrs = ["collective_dev_hdrs"], | ||
use_bf16 = USE_BF16, | ||
deps = [":nccl_device_common"], | ||
) | ||
|
||
nccl_primitive( | ||
name = "sendrecv", | ||
hdrs = ["collective_dev_hdrs"], | ||
use_bf16 = USE_BF16, | ||
deps = [":nccl_device_common"], | ||
) | ||
|
||
# device link | ||
cuda_library( | ||
name = "collectives", | ||
rdc = 1, | ||
deps = [ | ||
":all_gather", | ||
":all_reduce", | ||
":broadcast", | ||
":reduce", | ||
":reduce_scatter", | ||
":sendrecv", | ||
], | ||
alwayslink = 1, | ||
) | ||
|
||
cc_binary( | ||
name = "nccl", | ||
srcs = glob( | ||
[ | ||
"src/*.cc", | ||
"src/collectives/*.cc", | ||
"src/graph/*.cc", | ||
"src/graph/*.h", | ||
"src/misc/*.cc", | ||
"src/transport/*.cc", | ||
], | ||
exclude = [ | ||
# https://github.com/NVIDIA/nccl/issues/658 | ||
"src/enhcompat.cc", | ||
], | ||
), | ||
copts = if_cuda_clang(["-xcu"]), | ||
linkshared = 1, | ||
linkstatic = 1, | ||
visibility = ["//visibility:public"], | ||
deps = [ | ||
":collectives", | ||
":nccl_include", | ||
"@rules_cuda//cuda:runtime", | ||
], | ||
) | ||
|
||
# To allow downstream targets to link with the nccl shared library, we need to `cc_import` it again. | ||
# See https://groups.google.com/g/bazel-discuss/c/RtbidPdVFyU/m/TsUDOVHIAwAJ | ||
cc_import( | ||
name = "nccl_shared", | ||
shared_library = ":nccl", | ||
visibility = ["//visibility:public"], | ||
) |
Oops, something went wrong.