triton-inference-server · nvda-mesharma · Jul 23, 2024 · Jul 24, 2024 · Jul 24, 2024 · Jul 24, 2024
diff --git a/build.py b/build.py
@@ -49,7 +49,7 @@
 # information.
 #
 # The TRITON_VERSION file indicates the Triton version and
-# TRITON_VERSION_MAP is used to determine the corresponding container
+# DEFAULT_TRITON_VERSION_MAP is used to determine the corresponding container
 # version and upstream container version (upstream containers are
 # dependencies required by Triton). These versions may be overridden.
 
@@ -69,16 +69,16 @@
 # different versions are used then one backend or the other will
 # incorrectly load the other version of the openvino libraries.
 #
-TRITON_VERSION_MAP = {
-    "2.50.0dev": (
-        "24.09dev",  # triton container
-        "24.08",  # upstream container
-        "1.18.1",  # ORT
-        "2024.0.0",  # ORT OpenVINO
-        "2024.0.0",  # Standalone OpenVINO
-        "3.2.6",  # DCGM version
-        "0.5.3.post1",  # vLLM version
-    )
+
+DEFAULT_TRITON_VERSION_MAP = {
+    "release_version": "2.50.0dev",
+    "triton_container_version": "24.09dev",
+    "upstream_container_version": "24.08",
+    "ort_version": "1.18.1",
+    "ort_openvino_version": "2024.0.0",
+    "standalone_openvino_version": "2024.0.0",
+    "dcgm_version": "3.2.6",
+    "vllm_version": "0.5.3.post",
 }
 
 CORE_BACKENDS = ["ensemble"]
@@ -141,13 +141,11 @@ def target_machine():
 
 def container_versions(version, container_version, upstream_container_version):
     if container_version is None:
-        if version not in TRITON_VERSION_MAP:
-            fail("container version not known for {}".format(version))
-        container_version = TRITON_VERSION_MAP[version][0]
+        container_version = DEFAULT_TRITON_VERSION_MAP["triton_container_version"]
     if upstream_container_version is None:
-        if version not in TRITON_VERSION_MAP:
-            fail("upstream container version not known for {}".format(version))
-        upstream_container_version = TRITON_VERSION_MAP[version][1]
+        upstream_container_version = DEFAULT_TRITON_VERSION_MAP[
+            "upstream_container_version"
+        ]
     return container_version, upstream_container_version
 
 
@@ -664,7 +662,7 @@ def onnxruntime_cmake_args(images, library_paths):
             None,
             os.getenv("TRITON_BUILD_ONNXRUNTIME_VERSION")
             if os.getenv("TRITON_BUILD_ONNXRUNTIME_VERSION")
-            else TRITON_VERSION_MAP[FLAGS.version][2],
+            else DEFAULT_TRITON_VERSION_MAP["ort_version"],
         )
     ]
 
@@ -697,15 +695,15 @@ def onnxruntime_cmake_args(images, library_paths):
                     "onnxruntime",
                     "TRITON_BUILD_CONTAINER_VERSION",
                     None,
-                    TRITON_VERSION_MAP[FLAGS.version][1],
+                    DEFAULT_TRITON_VERSION_MAP["triton_container_version"],
                 )
             )
 
         # TODO: TPRD-333 OpenVino extension is not currently supported by our manylinux build
         if (
             (target_machine() != "aarch64")
             and (target_platform() != "rhel")
-            and (TRITON_VERSION_MAP[FLAGS.version][3] is not None)
+            and (DEFAULT_TRITON_VERSION_MAP["ort_openvino_version"] is not None)
         ):
             cargs.append(
                 cmake_backend_enable(
@@ -717,7 +715,7 @@ def onnxruntime_cmake_args(images, library_paths):
                     "onnxruntime",
                     "TRITON_BUILD_ONNXRUNTIME_OPENVINO_VERSION",
                     None,
-                    TRITON_VERSION_MAP[FLAGS.version][3],
+                    DEFAULT_TRITON_VERSION_MAP["ort_openvino_version"],
                 )
             )
 
@@ -740,7 +738,7 @@ def openvino_cmake_args():
             "openvino",
             "TRITON_BUILD_OPENVINO_VERSION",
             None,
-            TRITON_VERSION_MAP[FLAGS.version][4],
+            DEFAULT_TRITON_VERSION_MAP["standalone_openvino_version"],
         )
     ]
     if target_platform() == "windows":
@@ -763,7 +761,7 @@ def openvino_cmake_args():
                     "openvino",
                     "TRITON_BUILD_CONTAINER_VERSION",
                     None,
-                    TRITON_VERSION_MAP[FLAGS.version][1],
+                    DEFAULT_TRITON_VERSION_MAP["upstream_container_version"],
                 )
             )
     return cargs
@@ -818,7 +816,7 @@ def fil_cmake_args(images):
                 "fil",
                 "TRITON_BUILD_CONTAINER_VERSION",
                 None,
-                TRITON_VERSION_MAP[FLAGS.version][1],
+                DEFAULT_TRITON_VERSION_MAP["upstream_container_version"],
             )
         )
 
@@ -1218,7 +1216,6 @@ def create_dockerfile_linux(
 LABEL com.amazonaws.sagemaker.capabilities.multi-models=true
 COPY --chown=1000:1000 docker/sagemaker/serve /usr/bin/.
 """
-
     # This is required since libcublasLt.so is not present during the build
     # stage of the PyTorch backend
     if not FLAGS.enable_gpu and ("pytorch" in backends):
@@ -1419,7 +1416,7 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
 # vLLM needed for vLLM backend
 RUN pip3 install vllm=={}
 """.format(
-            TRITON_VERSION_MAP[FLAGS.version][6]
+            FLAGS.vllm_version
         )
 
     if "dali" in backends:
@@ -1451,7 +1448,6 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
 """.format(
         argmap["NVIDIA_BUILD_ID"], argmap["NVIDIA_BUILD_ID"], argmap["NVIDIA_BUILD_REF"]
     )
-
     return df
 
 
@@ -1583,9 +1579,7 @@ def create_build_dockerfiles(
         "TRITON_VERSION": FLAGS.version,
         "TRITON_CONTAINER_VERSION": FLAGS.container_version,
         "BASE_IMAGE": base_image,
-        "DCGM_VERSION": ""
-        if FLAGS.version is None or FLAGS.version not in TRITON_VERSION_MAP
-        else TRITON_VERSION_MAP[FLAGS.version][5],
+        "DCGM_VERSION": DEFAULT_TRITON_VERSION_MAP["dcgm_version"],
     }
 
     # For CPU-only image we need to copy some cuda libraries and dependencies
@@ -2400,12 +2394,6 @@ def enable_all():
         required=False,
         help="The Triton container version to build. If not specified the container version will be chosen automatically based on --version value.",
     )
-    parser.add_argument(
-        "--upstream-container-version",
-        type=str,
-        required=False,
-        help="The upstream container version to use for the build. If not specified the upstream container version will be chosen automatically based on --version value.",
-    )
     parser.add_argument(
         "--container-prebuild-command",
         type=str,
@@ -2553,6 +2541,54 @@ def enable_all():
         required=False,
         help="Override specified backend CMake argument in the build as <backend>:<name>=<value>. The argument is passed to CMake as -D<name>=<value>. This flag only impacts CMake arguments that are used by build.py. To unconditionally add a CMake argument to the backend build use --extra-backend-cmake-arg.",
     )
+    parser.add_argument(
+        "--release-version",
+        required=False,
+        default=DEFAULT_TRITON_VERSION_MAP["release_version"],
+        help="Provide any release version.",
+    )
+    parser.add_argument(
+        "--triton-container-version",
+        required=False,
+        default=DEFAULT_TRITON_VERSION_MAP["triton_container_version"],
+        help="Provide any released version of project.",
+    )
+    parser.add_argument(
+        "--upstream-container-version",
+        required=False,
+        default=DEFAULT_TRITON_VERSION_MAP["upstream_container_version"],
+        help="Provide any upstream container version of project.",
+    )
+    parser.add_argument(
+        "--ort-version",
+        required=False,
+        default=DEFAULT_TRITON_VERSION_MAP["ort_version"],
+        help="Provide any ORT version of project.",
+    )
+    parser.add_argument(
+        "--ort-openvino-version",
+        required=False,
+        default=DEFAULT_TRITON_VERSION_MAP["ort_openvino_version"],
+        help="Provide any openvino version of project.",
+    )
+    parser.add_argument(
+        "--standalone-openvino-version",
+        required=False,
+        default=DEFAULT_TRITON_VERSION_MAP["standalone_openvino_version"],
+        help="Provide any standalone openvino version of project.",
+    )
+    parser.add_argument(
+        "--dcgm-version",
+        required=False,
+        default=DEFAULT_TRITON_VERSION_MAP["dcgm_version"],
+        help="Provide any DCGM version of project.",
+    )
+    parser.add_argument(
+        "--vllm-version",
+        required=False,
+        default=DEFAULT_TRITON_VERSION_MAP["vllm_version"],
+        help="Provide any released version of vllm project.",
+    )
 
     FLAGS = parser.parse_args()
 
@@ -2610,8 +2646,7 @@ def enable_all():
     # Determine the versions. Start with Triton version, if --version
     # is not explicitly specified read from TRITON_VERSION file.
     if FLAGS.version is None:
-        with open(os.path.join(THIS_SCRIPT_DIR, "TRITON_VERSION"), "r") as vfile:
-            FLAGS.version = vfile.readline().strip()
+        FLAGS.version = DEFAULT_TRITON_VERSION_MAP["release_version"]
 
     if FLAGS.build_parallel is None:
         FLAGS.build_parallel = multiprocessing.cpu_count() * 2
@@ -2629,15 +2664,9 @@ def enable_all():
     # explicitly. For release branches we use the release branch as
     # the default, otherwise we use 'main'.
     default_repo_tag = "main"
-    cver = FLAGS.container_version
+    cver = FLAGS.upstream_container_version
     if cver is None:
-        if FLAGS.version not in TRITON_VERSION_MAP:
-            fail(
-                "unable to determine default repo-tag, container version not known for {}".format(
-                    FLAGS.version
-                )
-            )
-        cver = TRITON_VERSION_MAP[FLAGS.version][0]
+        cver = DEFAULT_TRITON_VERSION_MAP["triton_container_version"]
     if not cver.endswith("dev"):
         default_repo_tag = "r" + cver
     log("default repo-tag: {}".format(default_repo_tag))