Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

build: update build.py to pass vllm versions as input parameter and convert version map to dictionary #7500

Open
wants to merge 43 commits into
base: main
Choose a base branch
from
Open
Changes from 42 commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
a1d5ce5
Update build.py
nvda-mesharma Jul 23, 2024
0b9698f
Update build.py
nvda-mesharma Jul 24, 2024
93d54ff
Update build.py
nvda-mesharma Jul 24, 2024
dac4dbd
Update build.py
nvda-mesharma Jul 24, 2024
89289b8
Update build.py
nvda-mesharma Jul 24, 2024
b2f9179
Update TRITON_VERSION
nvda-mesharma Jul 24, 2024
57cd37d
Update build.py
nvda-mesharma Jul 24, 2024
3ce9080
Update build.py
nvda-mesharma Jul 24, 2024
f74b8cb
Update build.py
nvda-mesharma Jul 24, 2024
3750c05
Update build.py
nvda-mesharma Jul 24, 2024
575b1fc
Update build.py
nvda-mesharma Jul 24, 2024
8bd4d7d
Update build.py
nvda-mesharma Jul 24, 2024
e70b17c
Update build.py
nvda-mesharma Jul 24, 2024
e8d5943
Update build.py
nvda-mesharma Jul 24, 2024
1f80660
Update build.py
nvda-mesharma Jul 24, 2024
afa9ad1
Update build.py
nvda-mesharma Jul 24, 2024
4258ae8
Update build.py
nvda-mesharma Jul 24, 2024
4df6b47
Update build.py
nvda-mesharma Jul 24, 2024
1efd283
Update build.py
nvda-mesharma Jul 24, 2024
c9b7f11
Update build.py
nvda-mesharma Jul 24, 2024
9f201f0
Update build.py
nvda-mesharma Jul 24, 2024
07d1f93
Update build.py
nvda-mesharma Jul 24, 2024
eb40600
Update build.py
nvda-mesharma Jul 24, 2024
df14142
Update build.py
nvda-mesharma Jul 24, 2024
8c349ec
Update build.py
nvda-mesharma Jul 24, 2024
d179819
Update build.py
nvda-mesharma Jul 24, 2024
fc008ce
Update build.py
nvda-mesharma Jul 24, 2024
1408822
Update build.py
nvda-mesharma Jul 24, 2024
4ec7274
Update build.py
nvda-mesharma Jul 24, 2024
c5b0e3c
Update build.py
nvda-mesharma Jul 25, 2024
7c958ce
Update build.py
nvda-mesharma Jul 25, 2024
37c9bef
Update build.py
nvda-mesharma Jul 25, 2024
d5219ae
Update TRITON_VERSION
nvda-mesharma Aug 5, 2024
545c2c2
Update build.py
nvda-mesharma Aug 5, 2024
cb9ec87
Update build.py
nvda-mesharma Aug 5, 2024
95e9f16
Merge branch 'main' into mesharma-ci
nvda-mesharma Aug 5, 2024
0070efd
Update build.py
nvda-mesharma Aug 5, 2024
6d531e0
Update build.py
nvda-mesharma Aug 5, 2024
771d115
Update build.py
nvda-mesharma Aug 6, 2024
850f2cf
Update build.py
nvda-mesharma Aug 6, 2024
7e56ec6
Merge branch 'main' into mesharma-ci
nvda-mesharma Sep 19, 2024
ce4de4f
Update build.py
nvda-mesharma Sep 19, 2024
15c8c29
Merge branch 'main' into mesharma-ci
nvda-mesharma Sep 20, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 75 additions & 46 deletions build.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
# information.
#
# The TRITON_VERSION file indicates the Triton version and
# TRITON_VERSION_MAP is used to determine the corresponding container
# DEFAULT_TRITON_VERSION_MAP is used to determine the corresponding container
# version and upstream container version (upstream containers are
# dependencies required by Triton). These versions may be overridden.

Expand All @@ -69,16 +69,16 @@
# different versions are used then one backend or the other will
# incorrectly load the other version of the openvino libraries.
#
TRITON_VERSION_MAP = {
"2.50.0dev": (
"24.09dev", # triton container
"24.08", # upstream container
"1.18.1", # ORT
"2024.0.0", # ORT OpenVINO
"2024.0.0", # Standalone OpenVINO
"3.2.6", # DCGM version
"0.5.3.post1", # vLLM version
)

DEFAULT_TRITON_VERSION_MAP = {
mc-nv marked this conversation as resolved.
Show resolved Hide resolved
"release_version": "2.50.0dev",
"triton_container_version": "24.09dev",
"upstream_container_version": "24.08",
"ort_version": "1.18.1",
"ort_openvino_version": "2024.0.0",
"standalone_openvino_version": "2024.0.0",
"dcgm_version": "3.2.6",
"vllm_version": "0.5.3.post",
nvda-mesharma marked this conversation as resolved.
Show resolved Hide resolved
}

CORE_BACKENDS = ["ensemble"]
Expand Down Expand Up @@ -141,13 +141,11 @@ def target_machine():

def container_versions(version, container_version, upstream_container_version):
if container_version is None:
if version not in TRITON_VERSION_MAP:
fail("container version not known for {}".format(version))
container_version = TRITON_VERSION_MAP[version][0]
container_version = DEFAULT_TRITON_VERSION_MAP["triton_container_version"]
if upstream_container_version is None:
if version not in TRITON_VERSION_MAP:
fail("upstream container version not known for {}".format(version))
upstream_container_version = TRITON_VERSION_MAP[version][1]
upstream_container_version = DEFAULT_TRITON_VERSION_MAP[
"upstream_container_version"
]
return container_version, upstream_container_version


Expand Down Expand Up @@ -664,7 +662,7 @@ def onnxruntime_cmake_args(images, library_paths):
None,
os.getenv("TRITON_BUILD_ONNXRUNTIME_VERSION")
if os.getenv("TRITON_BUILD_ONNXRUNTIME_VERSION")
else TRITON_VERSION_MAP[FLAGS.version][2],
else DEFAULT_TRITON_VERSION_MAP["ort_version"],
)
]

Expand Down Expand Up @@ -697,15 +695,15 @@ def onnxruntime_cmake_args(images, library_paths):
"onnxruntime",
"TRITON_BUILD_CONTAINER_VERSION",
None,
TRITON_VERSION_MAP[FLAGS.version][1],
DEFAULT_TRITON_VERSION_MAP["triton_container_version"],
)
)

# TODO: TPRD-333 OpenVino extension is not currently supported by our manylinux build
if (
(target_machine() != "aarch64")
and (target_platform() != "rhel")
and (TRITON_VERSION_MAP[FLAGS.version][3] is not None)
and (DEFAULT_TRITON_VERSION_MAP["ort_openvino_version"] is not None)
):
cargs.append(
cmake_backend_enable(
Expand All @@ -717,7 +715,7 @@ def onnxruntime_cmake_args(images, library_paths):
"onnxruntime",
"TRITON_BUILD_ONNXRUNTIME_OPENVINO_VERSION",
None,
TRITON_VERSION_MAP[FLAGS.version][3],
DEFAULT_TRITON_VERSION_MAP["ort_openvino_version"],
)
)

Expand All @@ -740,7 +738,7 @@ def openvino_cmake_args():
"openvino",
"TRITON_BUILD_OPENVINO_VERSION",
None,
TRITON_VERSION_MAP[FLAGS.version][4],
DEFAULT_TRITON_VERSION_MAP["standalone_openvino_version"],
)
]
if target_platform() == "windows":
Expand All @@ -763,7 +761,7 @@ def openvino_cmake_args():
"openvino",
"TRITON_BUILD_CONTAINER_VERSION",
None,
TRITON_VERSION_MAP[FLAGS.version][1],
DEFAULT_TRITON_VERSION_MAP["upstream_container_version"],
)
)
return cargs
Expand Down Expand Up @@ -818,7 +816,7 @@ def fil_cmake_args(images):
"fil",
"TRITON_BUILD_CONTAINER_VERSION",
None,
TRITON_VERSION_MAP[FLAGS.version][1],
DEFAULT_TRITON_VERSION_MAP["upstream_container_version"],
)
)

Expand Down Expand Up @@ -1218,7 +1216,6 @@ def create_dockerfile_linux(
LABEL com.amazonaws.sagemaker.capabilities.multi-models=true
COPY --chown=1000:1000 docker/sagemaker/serve /usr/bin/.
"""

# This is required since libcublasLt.so is not present during the build
# stage of the PyTorch backend
if not FLAGS.enable_gpu and ("pytorch" in backends):
Expand Down Expand Up @@ -1419,7 +1416,7 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
# vLLM needed for vLLM backend
RUN pip3 install vllm=={}
""".format(
TRITON_VERSION_MAP[FLAGS.version][6]
FLAGS.vllm_version
)

if "dali" in backends:
Expand Down Expand Up @@ -1451,7 +1448,6 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
""".format(
argmap["NVIDIA_BUILD_ID"], argmap["NVIDIA_BUILD_ID"], argmap["NVIDIA_BUILD_REF"]
)

return df


Expand Down Expand Up @@ -1583,9 +1579,7 @@ def create_build_dockerfiles(
"TRITON_VERSION": FLAGS.version,
"TRITON_CONTAINER_VERSION": FLAGS.container_version,
"BASE_IMAGE": base_image,
"DCGM_VERSION": ""
if FLAGS.version is None or FLAGS.version not in TRITON_VERSION_MAP
else TRITON_VERSION_MAP[FLAGS.version][5],
"DCGM_VERSION": DEFAULT_TRITON_VERSION_MAP["dcgm_version"],
}

# For CPU-only image we need to copy some cuda libraries and dependencies
Expand Down Expand Up @@ -2400,12 +2394,6 @@ def enable_all():
required=False,
help="The Triton container version to build. If not specified the container version will be chosen automatically based on --version value.",
)
parser.add_argument(
"--upstream-container-version",
type=str,
required=False,
help="The upstream container version to use for the build. If not specified the upstream container version will be chosen automatically based on --version value.",
)
parser.add_argument(
"--container-prebuild-command",
type=str,
Expand Down Expand Up @@ -2553,6 +2541,54 @@ def enable_all():
required=False,
help="Override specified backend CMake argument in the build as <backend>:<name>=<value>. The argument is passed to CMake as -D<name>=<value>. This flag only impacts CMake arguments that are used by build.py. To unconditionally add a CMake argument to the backend build use --extra-backend-cmake-arg.",
)
parser.add_argument(
nvda-mesharma marked this conversation as resolved.
Show resolved Hide resolved
"--release-version",
required=False,
default=DEFAULT_TRITON_VERSION_MAP["release_version"],
help="Provide any release version.",
)
parser.add_argument(
"--triton-container-version",
required=False,
default=DEFAULT_TRITON_VERSION_MAP["triton_container_version"],
help="Provide any released version of project.",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the help message make sense, I don't think we release any dev versions

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you suggest a better help message

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this parameter indicates current development version of the container, not necessary a released version. Based on this, in my vision, the parameter should be something like triton-development-version, and help message would be Provide the current development version of the container. I'm not sure how relevant it would be to provide the past development versions and how much confusion this would introduce to users. Do we really need to expose this parameter? @nv-kmcgill53 and @mc-nv , what do you think?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any kind of logic could be added as an expansion for build.py.
My center of thoughts isn't changed, build.py requires simplification, any expansion need to be reasonable.

I agree that more moving parts we are adding the more complexity it will bring over time.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

afaik, this parameter can be used for dev and prod versions of triton container. A better help text for it would be "Provide any development or released version of project"

)
parser.add_argument(
"--upstream-container-version",
required=False,
default=DEFAULT_TRITON_VERSION_MAP["upstream_container_version"],
help="Provide any upstream container version of project.",
)
parser.add_argument(
"--ort-version",
required=False,
default=DEFAULT_TRITON_VERSION_MAP["ort_version"],
help="Provide any ORT version of project.",
)
parser.add_argument(
"--ort-openvino-version",
required=False,
default=DEFAULT_TRITON_VERSION_MAP["ort_openvino_version"],
help="Provide any openvino version of project.",
)
parser.add_argument(
"--standalone-openvino-version",
required=False,
default=DEFAULT_TRITON_VERSION_MAP["standalone_openvino_version"],
help="Provide any standalone openvino version of project.",
)
parser.add_argument(
"--dcgm-version",
required=False,
default=DEFAULT_TRITON_VERSION_MAP["dcgm_version"],
help="Provide any DCGM version of project.",
)
parser.add_argument(
"--vllm-version",
required=False,
default=DEFAULT_TRITON_VERSION_MAP["vllm_version"],
help="Provide any released version of vllm project.",
)

FLAGS = parser.parse_args()

Expand Down Expand Up @@ -2610,8 +2646,7 @@ def enable_all():
# Determine the versions. Start with Triton version, if --version
# is not explicitly specified read from TRITON_VERSION file.
if FLAGS.version is None:
with open(os.path.join(THIS_SCRIPT_DIR, "TRITON_VERSION"), "r") as vfile:
FLAGS.version = vfile.readline().strip()
FLAGS.version = DEFAULT_TRITON_VERSION_MAP["release_version"]

if FLAGS.build_parallel is None:
FLAGS.build_parallel = multiprocessing.cpu_count() * 2
Expand All @@ -2629,15 +2664,9 @@ def enable_all():
# explicitly. For release branches we use the release branch as
# the default, otherwise we use 'main'.
default_repo_tag = "main"
cver = FLAGS.container_version
cver = FLAGS.upstream_container_version
if cver is None:
if FLAGS.version not in TRITON_VERSION_MAP:
fail(
"unable to determine default repo-tag, container version not known for {}".format(
FLAGS.version
)
)
cver = TRITON_VERSION_MAP[FLAGS.version][0]
cver = DEFAULT_TRITON_VERSION_MAP["triton_container_version"]
if not cver.endswith("dev"):
default_repo_tag = "r" + cver
log("default repo-tag: {}".format(default_repo_tag))
Expand Down
Loading