From fd60efa7320428594a1769c262420843c34482dc Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Wed, 18 Dec 2024 21:54:36 -0800 Subject: [PATCH] aotriton max process count fix patch aotriton to check MAX_JOBS environment variable in aotriton v2src/CMakeLists.txt and use that for limiting the amount of python processes allowed to build and compress hsaco files. This fixes the out of memory problem on cases where computer has lot of CPUs compared to amount of memory. Note that this fix only works when using Ninja. (cmake's limitation for add_custom_jobs command) MAX_JOBS environment variable and force to use ninja for building aotriton are set in binfo file. fixes: https://github.com/lamikr/rocm_sdk_builder/issues/178 Signed-off-by: Mika Laitio --- binfo/core/038_aotriton.binfo | 21 +++++ .../0001-pass-extra-build-options.patch | 6 +- ...add-gpus-with-gfx-name-to-build-list.patch | 6 +- ...-which-allocates-twice-the-number-of.patch | 6 +- ...printout-aotriton-tuning-db-gpu-info.patch | 6 +- ...-add-gfx906-908-40-41-and-gfx1150-51.patch | 6 +- ...6-separate-each-gpu-files-to-own-dir.patch | 6 +- .../0007-add-preconfig_rocm.sh-script.patch | 6 +- ...rocess-compile-count-for-hsaco-files.patch | 88 +++++++++++++++++++ 9 files changed, 130 insertions(+), 21 deletions(-) create mode 100644 patches/rocm-6.1.2/aotriton/0008-max-python-process-compile-count-for-hsaco-files.patch diff --git a/binfo/core/038_aotriton.binfo b/binfo/core/038_aotriton.binfo index 8d9ab28..fa8f6b7 100755 --- a/binfo/core/038_aotriton.binfo +++ b/binfo/core/038_aotriton.binfo @@ -47,9 +47,19 @@ BINFO_APP_PRE_CONFIG_CMD_ARRAY=( "./preconfig_rocm.sh" ) +# aotriton has been patched to check MAX_JOBS environment variable +# in aotriton v2src/CMakeLists.txt and use that for limiting the +# amount of python processes allowed to build and compress hsaco files. +# This fixes the out of memory problem on cases where computer has lot of +# CPUs compared to amount of memory. +# Note that this fix only works when using Ninja. +# (cmake's limitation for add_custom_jobs command) +export MAX_JOBS=${BUILD_CPU_COUNT_DEFAULT} + BINFO_APP_CMAKE_CFG="-DCMAKE_INSTALL_PREFIX=${INSTALL_DIR_PREFIX_SDK_ROOT}" BINFO_APP_CMAKE_CFG="${BINFO_APP_CMAKE_CFG} -DCMAKE_PREFIX_PATH=${INSTALL_DIR_PREFIX_SDK_ROOT}/lib64/cmake;${INSTALL_DIR_PREFIX_SDK_ROOT}/lib/cmake" BINFO_APP_CMAKE_CFG="${BINFO_APP_CMAKE_CFG} ${CFG_TEMP1}" +BINFO_APP_CMAKE_CFG="${BINFO_APP_CMAKE_CFG} -GNinja" BINFO_APP_CMAKE_CFG="${BINFO_APP_CMAKE_CFG} -DCMAKE_C_COMPILER=${SDK_C_COMPILER_HIPCC}" BINFO_APP_CMAKE_CFG="${BINFO_APP_CMAKE_CFG} -DCMAKE_CXX_COMPILER=${SDK_CXX_COMPILER_HIPCC}" BINFO_APP_CMAKE_CFG="${BINFO_APP_CMAKE_CFG} -DROCM_PATH=${INSTALL_DIR_PREFIX_SDK_ROOT}" @@ -65,3 +75,14 @@ BINFO_APP_CMAKE_CFG="${BINFO_APP_CMAKE_CFG} -DAMDGCN_ENABLE_DUMP=1" # separate build needed to do a backend mode as if this is enabled, other part of build is skipped??? #BINFO_APP_CMAKE_CFG="${BINFO_APP_CMAKE_CFG} -DHIP_BACKEND_MODE=1" BINFO_APP_CMAKE_CFG="${BINFO_APP_CMAKE_CFG} ${BINFO_APP_SRC_DIR}" + +BINFO_APP_BUILD_CMD_ARRAY=( + "cd ${BINFO_APP_BUILD_DIR}" + "ninja" +) + +BINFO_APP_INSTALL_CMD_ARRAY=( + "cd ${BINFO_APP_BUILD_DIR}" + "ninja install" +) + diff --git a/patches/rocm-6.1.2/aotriton/0001-pass-extra-build-options.patch b/patches/rocm-6.1.2/aotriton/0001-pass-extra-build-options.patch index 8bd4330..41bf30a 100644 --- a/patches/rocm-6.1.2/aotriton/0001-pass-extra-build-options.patch +++ b/patches/rocm-6.1.2/aotriton/0001-pass-extra-build-options.patch @@ -1,7 +1,7 @@ -From d9d6a93e0c5f131130bd8cae6bfd455b331bdf09 Mon Sep 17 00:00:00 2001 +From 0d30e41b904e1027e559ebd54640467bd0226163 Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Mon, 29 Jul 2024 00:01:32 -0700 -Subject: [PATCH 1/7] pass extra build options +Subject: [PATCH 1/8] pass extra build options Signed-off-by: Mika Laitio --- @@ -227,5 +227,5 @@ index ca7a4b5..5c7d2a2 100644 WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_PARENT_DIR}" ) -- -2.41.1 +2.43.0 diff --git a/patches/rocm-6.1.2/aotriton/0002-add-gpus-with-gfx-name-to-build-list.patch b/patches/rocm-6.1.2/aotriton/0002-add-gpus-with-gfx-name-to-build-list.patch index a910e0d..5e7a093 100644 --- a/patches/rocm-6.1.2/aotriton/0002-add-gpus-with-gfx-name-to-build-list.patch +++ b/patches/rocm-6.1.2/aotriton/0002-add-gpus-with-gfx-name-to-build-list.patch @@ -1,7 +1,7 @@ -From 3199757072a6291dd3276e315084a6fc32f07529 Mon Sep 17 00:00:00 2001 +From 13cb55b7856490058906ea08610edad6e4e8bbda Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Mon, 29 Jul 2024 12:48:47 -0700 -Subject: [PATCH 2/7] add gpus with gfx-name to build list +Subject: [PATCH 2/8] add gpus with gfx-name to build list add all rocm sdk gpu's to build list and use the gfx* name for them instead @@ -155,5 +155,5 @@ index 2c47e1d..2a6128b 100644 GpuArch -- -2.41.1 +2.43.0 diff --git a/patches/rocm-6.1.2/aotriton/0003-changed-the-line-which-allocates-twice-the-number-of.patch b/patches/rocm-6.1.2/aotriton/0003-changed-the-line-which-allocates-twice-the-number-of.patch index fbef5fb..9efed26 100644 --- a/patches/rocm-6.1.2/aotriton/0003-changed-the-line-which-allocates-twice-the-number-of.patch +++ b/patches/rocm-6.1.2/aotriton/0003-changed-the-line-which-allocates-twice-the-number-of.patch @@ -1,7 +1,7 @@ -From d01cd7fdad1e40b47505ba683d0703336e6be985 Mon Sep 17 00:00:00 2001 +From 02b1281ee898665efe5d713d8550016e5bcd7488 Mon Sep 17 00:00:00 2001 From: mritunjaymusale Date: Sun, 2 Jun 2024 18:00:59 +0530 -Subject: [PATCH 3/7] changed the line which allocates twice the number of max +Subject: [PATCH 3/8] changed the line which allocates twice the number of max cpu threads to triton build Signed-off-by: mritunjaymusale @@ -23,5 +23,5 @@ index 390ee8b..4030cad 100644 if check_env_flag("TRITON_BUILD_WITH_CLANG_LLD"): -- -2.41.1 +2.43.0 diff --git a/patches/rocm-6.1.2/aotriton/0004-printout-aotriton-tuning-db-gpu-info.patch b/patches/rocm-6.1.2/aotriton/0004-printout-aotriton-tuning-db-gpu-info.patch index d48c97d..a5059a6 100644 --- a/patches/rocm-6.1.2/aotriton/0004-printout-aotriton-tuning-db-gpu-info.patch +++ b/patches/rocm-6.1.2/aotriton/0004-printout-aotriton-tuning-db-gpu-info.patch @@ -1,7 +1,7 @@ -From 020aceb11f576ffb00d1131320acbc163f9519b1 Mon Sep 17 00:00:00 2001 +From 3bbccabde1409bbd2334d08d80e4fcdb56e942fc Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Tue, 6 Aug 2024 17:15:39 -0700 -Subject: [PATCH 4/7] printout aotriton tuning db gpu info +Subject: [PATCH 4/8] printout aotriton tuning db gpu info printout information if tuning data was available in database for kernel when build for certain gpu @@ -34,5 +34,5 @@ index 14ef241..fd1dc59 100644 @property -- -2.41.1 +2.43.0 diff --git a/patches/rocm-6.1.2/aotriton/0005-add-gfx906-908-40-41-and-gfx1150-51.patch b/patches/rocm-6.1.2/aotriton/0005-add-gfx906-908-40-41-and-gfx1150-51.patch index 6bace27..0215cf0 100644 --- a/patches/rocm-6.1.2/aotriton/0005-add-gfx906-908-40-41-and-gfx1150-51.patch +++ b/patches/rocm-6.1.2/aotriton/0005-add-gfx906-908-40-41-and-gfx1150-51.patch @@ -1,7 +1,7 @@ -From cd140c9489c60d3e1aba275c363e33f14aa5e30d Mon Sep 17 00:00:00 2001 +From 74b1c111ca07cbc02791ff63b6cf4ef20db9b52e Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Tue, 22 Oct 2024 21:55:34 -0700 -Subject: [PATCH 5/7] add gfx906/908/40/41 and gfx1150/51 +Subject: [PATCH 5/8] add gfx906/908/40/41 and gfx1150/51 Signed-off-by: Mika Laitio --- @@ -156,5 +156,5 @@ index 2a6128b..e54614a 100644 GpuArch -- -2.41.1 +2.43.0 diff --git a/patches/rocm-6.1.2/aotriton/0006-separate-each-gpu-files-to-own-dir.patch b/patches/rocm-6.1.2/aotriton/0006-separate-each-gpu-files-to-own-dir.patch index 36caec3..a3fe060 100644 --- a/patches/rocm-6.1.2/aotriton/0006-separate-each-gpu-files-to-own-dir.patch +++ b/patches/rocm-6.1.2/aotriton/0006-separate-each-gpu-files-to-own-dir.patch @@ -1,7 +1,7 @@ -From a68395374e6c9d1b127e6cbaaa6321fee971a594 Mon Sep 17 00:00:00 2001 +From f30bb39e4a3ffe1aae842eaf35118b21e8ef05fe Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Tue, 5 Nov 2024 00:13:12 -0800 -Subject: [PATCH 6/7] separate each gpu files to own dir +Subject: [PATCH 6/8] separate each gpu files to own dir - should help to avoid errors on situations where there are too many files in single @@ -114,5 +114,5 @@ index ec1e39f..a085ea4 100644 #include #include -- -2.41.1 +2.43.0 diff --git a/patches/rocm-6.1.2/aotriton/0007-add-preconfig_rocm.sh-script.patch b/patches/rocm-6.1.2/aotriton/0007-add-preconfig_rocm.sh-script.patch index 4cddc9e..2a909db 100644 --- a/patches/rocm-6.1.2/aotriton/0007-add-preconfig_rocm.sh-script.patch +++ b/patches/rocm-6.1.2/aotriton/0007-add-preconfig_rocm.sh-script.patch @@ -1,7 +1,7 @@ -From d95a6c6f73c06cb5e9abbe9240f43b8e564bfc31 Mon Sep 17 00:00:00 2001 +From e544d6b10c482745934203cfc085474cb1847ea1 Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Thu, 12 Dec 2024 16:14:24 -0800 -Subject: [PATCH 7/7] add preconfig_rocm.sh script +Subject: [PATCH 7/8] add preconfig_rocm.sh script Signed-off-by: Mika Laitio --- @@ -29,5 +29,5 @@ index 0000000..aa630ef + fi +fi -- -2.41.1 +2.43.0 diff --git a/patches/rocm-6.1.2/aotriton/0008-max-python-process-compile-count-for-hsaco-files.patch b/patches/rocm-6.1.2/aotriton/0008-max-python-process-compile-count-for-hsaco-files.patch new file mode 100644 index 0000000..6f56a83 --- /dev/null +++ b/patches/rocm-6.1.2/aotriton/0008-max-python-process-compile-count-for-hsaco-files.patch @@ -0,0 +1,88 @@ +From 375ce42eb83856885f933bb5214be85efd0a2a88 Mon Sep 17 00:00:00 2001 +From: Mika Laitio +Date: Wed, 18 Dec 2024 21:38:44 -0800 +Subject: [PATCH 8/8] max python process compile count for hsaco files + +use MAX_JOBS environment variable to +limit the amount python processes to +build and compress hsaco files. + +Note that this will require that aotriton uses +ninja as a builder because cmakes add_custom_command +supports only Ninja for setting the process count. + +This solves out of memory build problem in cases where +computer has low amount of memory compared to amount +of CPUs available. + +Fixes: https://github.com/lamikr/rocm_sdk_builder/issues/178 + +Signed-off-by: Mika Laitio +--- + v2src/CMakeLists.txt | 23 ++++++++++++++--------- + 1 file changed, 14 insertions(+), 9 deletions(-) + +diff --git a/v2src/CMakeLists.txt b/v2src/CMakeLists.txt +index 5c7d2a2..9509a27 100644 +--- a/v2src/CMakeLists.txt ++++ b/v2src/CMakeLists.txt +@@ -26,6 +26,17 @@ message("AOTRITON_COMPILER ${AOTRITON_COMPILER}") + # ) + # add_dependencies(aotriton_v2_gen_compile aotriton_venv_triton) + ++if(DEFINED ENV{MAX_JOBS}) ++ set(MAX_JOBS "$ENV{MAX_JOBS}") ++else() ++ cmake_host_system_information(RESULT MAX_JOBS QUERY NUMBER_OF_PHYSICAL_CORES) ++ if(MAX_JOBS LESS 2) # In case of failures. ++ set(MAX_JOBS 2) ++ endif() ++endif() ++ ++set_property(GLOBAL PROPERTY JOB_POOLS MAX_JOB_CNT__HSACO=${MAX_JOBS}) ++ + if(AOTRITON_BUILD_FOR_TUNING) + set(GENERATE_OPTION "--build_for_tuning") + else(AOTRITON_BUILD_FOR_TUNING) +@@ -36,6 +47,7 @@ execute_process( + COMMAND_ECHO STDOUT + WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_PARENT_DIR}" + ) ++ + message("Bare.compile: ${AOTRITON_V2_BUILD_DIR}/Bare.compile") + file(STRINGS "${AOTRITON_V2_BUILD_DIR}/Bare.compile" HSACO_RULES) + set(ALL_HSACOS "") +@@ -65,6 +77,7 @@ foreach(RULE IN LISTS HSACO_RULES) + "--timeout" "${AOTRITON_GPU_BUILD_TIMEOUT}" + COMMAND ${ZSTD_EXEC} "-q" "-f" ${HSACO} + DEPENDS aotriton_venv_triton ++ JOB_POOL MAX_JOB_CNT__HSACO + ) + list(APPEND ALL_HSACOS "${HSACO}.zst") + else(AOTRITON_COMPRESS_KERNEL) +@@ -82,21 +95,13 @@ foreach(RULE IN LISTS HSACO_RULES) + "--signature" "${SIG}" + "--timeout" "${AOTRITON_GPU_BUILD_TIMEOUT}" + DEPENDS aotriton_venv_triton ++ JOB_POOL MAX_JOB_CNT__HSACO + ) + list(APPEND ALL_HSACOS "${HSACO}") + endif(AOTRITON_COMPRESS_KERNEL) + # message("HSACO ${HSACO}") + endforeach(RULE) + +-if(DEFINED ENV{MAX_JOBS}) +- set(MAX_JOBS "$ENV{MAX_JOBS}") +-else() +- cmake_host_system_information(RESULT MAX_JOBS QUERY NUMBER_OF_PHYSICAL_CORES) +- if(MAX_JOBS LESS 2) # In case of failures. +- set(MAX_JOBS 2) +- endif() +-endif() +- + add_custom_target(aotriton_v2_compile ALL DEPENDS ${ALL_HSACOS}) + + # add_custom_target(aotriton_v2_compile +-- +2.43.0 +