Skip to content

Commit

Permalink
aotriton max process count fix
Browse files Browse the repository at this point in the history
patch aotriton to check MAX_JOBS environment variable
in aotriton v2src/CMakeLists.txt and use that for limiting the
amount of python processes allowed to build and compress hsaco files.

This fixes the out of memory problem on cases where computer has lot of
CPUs compared to amount of memory.
Note that this fix only works when using Ninja.
(cmake's limitation for add_custom_jobs command)

MAX_JOBS environment variable and force to use ninja for building
aotriton are set in binfo file.

fixes: #178

Signed-off-by: Mika Laitio <[email protected]>
  • Loading branch information
lamikr committed Dec 19, 2024
1 parent dad1c15 commit 3585ad7
Show file tree
Hide file tree
Showing 9 changed files with 124 additions and 21 deletions.
15 changes: 15 additions & 0 deletions binfo/core/038_aotriton.binfo
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,19 @@ BINFO_APP_PRE_CONFIG_CMD_ARRAY=(
"./preconfig_rocm.sh"
)

# aotriton has been patched to check MAX_JOBS environment variable
# in aotriton v2src/CMakeLists.txt and use that for limiting the
# amount of python processes allowed to build and compress hsaco files.
# This fixes the out of memory problem on cases where computer has lot of
# CPUs compared to amount of memory.
# Note that this fix only works when using Ninja.
# (cmake's limitation for add_custom_jobs command)
export MAX_JOBS=${BUILD_CPU_COUNT_DEFAULT}

BINFO_APP_CMAKE_CFG="-DCMAKE_INSTALL_PREFIX=${INSTALL_DIR_PREFIX_SDK_ROOT}"
BINFO_APP_CMAKE_CFG="${BINFO_APP_CMAKE_CFG} -DCMAKE_PREFIX_PATH=${INSTALL_DIR_PREFIX_SDK_ROOT}/lib64/cmake;${INSTALL_DIR_PREFIX_SDK_ROOT}/lib/cmake"
BINFO_APP_CMAKE_CFG="${BINFO_APP_CMAKE_CFG} ${CFG_TEMP1}"
BINFO_APP_CMAKE_CFG="${BINFO_APP_CMAKE_CFG} -GNinja"
BINFO_APP_CMAKE_CFG="${BINFO_APP_CMAKE_CFG} -DCMAKE_C_COMPILER=${SDK_C_COMPILER_HIPCC}"
BINFO_APP_CMAKE_CFG="${BINFO_APP_CMAKE_CFG} -DCMAKE_CXX_COMPILER=${SDK_CXX_COMPILER_HIPCC}"
BINFO_APP_CMAKE_CFG="${BINFO_APP_CMAKE_CFG} -DROCM_PATH=${INSTALL_DIR_PREFIX_SDK_ROOT}"
Expand All @@ -65,3 +75,8 @@ BINFO_APP_CMAKE_CFG="${BINFO_APP_CMAKE_CFG} -DAMDGCN_ENABLE_DUMP=1"
# separate build needed to do a backend mode as if this is enabled, other part of build is skipped???
#BINFO_APP_CMAKE_CFG="${BINFO_APP_CMAKE_CFG} -DHIP_BACKEND_MODE=1"
BINFO_APP_CMAKE_CFG="${BINFO_APP_CMAKE_CFG} ${BINFO_APP_SRC_DIR}"

BINFO_APP_BUILD_CMD_ARRAY=(
"cd ${BINFO_APP_BUILD_DIR}"
"ninja"
)
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
From d9d6a93e0c5f131130bd8cae6bfd455b331bdf09 Mon Sep 17 00:00:00 2001
From 0d30e41b904e1027e559ebd54640467bd0226163 Mon Sep 17 00:00:00 2001
From: Mika Laitio <[email protected]>
Date: Mon, 29 Jul 2024 00:01:32 -0700
Subject: [PATCH 1/7] pass extra build options
Subject: [PATCH 1/8] pass extra build options

Signed-off-by: Mika Laitio <[email protected]>
---
Expand Down Expand Up @@ -227,5 +227,5 @@ index ca7a4b5..5c7d2a2 100644
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_PARENT_DIR}"
)
--
2.41.1
2.43.0

Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
From 3199757072a6291dd3276e315084a6fc32f07529 Mon Sep 17 00:00:00 2001
From 13cb55b7856490058906ea08610edad6e4e8bbda Mon Sep 17 00:00:00 2001
From: Mika Laitio <[email protected]>
Date: Mon, 29 Jul 2024 12:48:47 -0700
Subject: [PATCH 2/7] add gpus with gfx-name to build list
Subject: [PATCH 2/8] add gpus with gfx-name to build list

add all rocm sdk gpu's to build list
and use the gfx* name for them instead
Expand Down Expand Up @@ -155,5 +155,5 @@ index 2c47e1d..2a6128b 100644

GpuArch
--
2.41.1
2.43.0

Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
From d01cd7fdad1e40b47505ba683d0703336e6be985 Mon Sep 17 00:00:00 2001
From 02b1281ee898665efe5d713d8550016e5bcd7488 Mon Sep 17 00:00:00 2001
From: mritunjaymusale <[email protected]>
Date: Sun, 2 Jun 2024 18:00:59 +0530
Subject: [PATCH 3/7] changed the line which allocates twice the number of max
Subject: [PATCH 3/8] changed the line which allocates twice the number of max
cpu threads to triton build

Signed-off-by: mritunjaymusale <[email protected]>
Expand All @@ -23,5 +23,5 @@ index 390ee8b..4030cad 100644

if check_env_flag("TRITON_BUILD_WITH_CLANG_LLD"):
--
2.41.1
2.43.0

Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
From 020aceb11f576ffb00d1131320acbc163f9519b1 Mon Sep 17 00:00:00 2001
From 3bbccabde1409bbd2334d08d80e4fcdb56e942fc Mon Sep 17 00:00:00 2001
From: Mika Laitio <[email protected]>
Date: Tue, 6 Aug 2024 17:15:39 -0700
Subject: [PATCH 4/7] printout aotriton tuning db gpu info
Subject: [PATCH 4/8] printout aotriton tuning db gpu info

printout information if tuning data was available
in database for kernel when build for certain gpu
Expand Down Expand Up @@ -34,5 +34,5 @@ index 14ef241..fd1dc59 100644

@property
--
2.41.1
2.43.0

Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
From cd140c9489c60d3e1aba275c363e33f14aa5e30d Mon Sep 17 00:00:00 2001
From 74b1c111ca07cbc02791ff63b6cf4ef20db9b52e Mon Sep 17 00:00:00 2001
From: Mika Laitio <[email protected]>
Date: Tue, 22 Oct 2024 21:55:34 -0700
Subject: [PATCH 5/7] add gfx906/908/40/41 and gfx1150/51
Subject: [PATCH 5/8] add gfx906/908/40/41 and gfx1150/51

Signed-off-by: Mika Laitio <[email protected]>
---
Expand Down Expand Up @@ -156,5 +156,5 @@ index 2a6128b..e54614a 100644

GpuArch
--
2.41.1
2.43.0

Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
From a68395374e6c9d1b127e6cbaaa6321fee971a594 Mon Sep 17 00:00:00 2001
From f30bb39e4a3ffe1aae842eaf35118b21e8ef05fe Mon Sep 17 00:00:00 2001
From: Mika Laitio <[email protected]>
Date: Tue, 5 Nov 2024 00:13:12 -0800
Subject: [PATCH 6/7] separate each gpu files to own dir
Subject: [PATCH 6/8] separate each gpu files to own dir

- should help to avoid errors on situations
where there are too many files in single
Expand Down Expand Up @@ -114,5 +114,5 @@ index ec1e39f..a085ea4 100644
#include <aotriton/cpp_tune.h>
#include <incbin.h>
--
2.41.1
2.43.0

Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
From d95a6c6f73c06cb5e9abbe9240f43b8e564bfc31 Mon Sep 17 00:00:00 2001
From e544d6b10c482745934203cfc085474cb1847ea1 Mon Sep 17 00:00:00 2001
From: Mika Laitio <[email protected]>
Date: Thu, 12 Dec 2024 16:14:24 -0800
Subject: [PATCH 7/7] add preconfig_rocm.sh script
Subject: [PATCH 7/8] add preconfig_rocm.sh script

Signed-off-by: Mika Laitio <[email protected]>
---
Expand Down Expand Up @@ -29,5 +29,5 @@ index 0000000..aa630ef
+ fi
+fi
--
2.41.1
2.43.0

Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
From 375ce42eb83856885f933bb5214be85efd0a2a88 Mon Sep 17 00:00:00 2001
From: Mika Laitio <[email protected]>
Date: Wed, 18 Dec 2024 21:38:44 -0800
Subject: [PATCH 8/8] max python process compile count for hsaco files

use MAX_JOBS environment variable to
limit the amount python processes to
build and compress hsaco files.

Note that this will require that aotriton uses
ninja as a builder because cmakes add_custom_command
supports only Ninja for setting the process count.

This solves out of memory build problem in cases where
computer has low amount of memory compared to amount
of CPUs available.

Fixes: https://github.com/lamikr/rocm_sdk_builder/issues/178

Signed-off-by: Mika Laitio <[email protected]>
---
v2src/CMakeLists.txt | 23 ++++++++++++++---------
1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/v2src/CMakeLists.txt b/v2src/CMakeLists.txt
index 5c7d2a2..9509a27 100644
--- a/v2src/CMakeLists.txt
+++ b/v2src/CMakeLists.txt
@@ -26,6 +26,17 @@ message("AOTRITON_COMPILER ${AOTRITON_COMPILER}")
# )
# add_dependencies(aotriton_v2_gen_compile aotriton_venv_triton)

+if(DEFINED ENV{MAX_JOBS})
+ set(MAX_JOBS "$ENV{MAX_JOBS}")
+else()
+ cmake_host_system_information(RESULT MAX_JOBS QUERY NUMBER_OF_PHYSICAL_CORES)
+ if(MAX_JOBS LESS 2) # In case of failures.
+ set(MAX_JOBS 2)
+ endif()
+endif()
+
+set_property(GLOBAL PROPERTY JOB_POOLS MAX_JOB_CNT__HSACO=${MAX_JOBS})
+
if(AOTRITON_BUILD_FOR_TUNING)
set(GENERATE_OPTION "--build_for_tuning")
else(AOTRITON_BUILD_FOR_TUNING)
@@ -36,6 +47,7 @@ execute_process(
COMMAND_ECHO STDOUT
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_PARENT_DIR}"
)
+
message("Bare.compile: ${AOTRITON_V2_BUILD_DIR}/Bare.compile")
file(STRINGS "${AOTRITON_V2_BUILD_DIR}/Bare.compile" HSACO_RULES)
set(ALL_HSACOS "")
@@ -65,6 +77,7 @@ foreach(RULE IN LISTS HSACO_RULES)
"--timeout" "${AOTRITON_GPU_BUILD_TIMEOUT}"
COMMAND ${ZSTD_EXEC} "-q" "-f" ${HSACO}
DEPENDS aotriton_venv_triton
+ JOB_POOL MAX_JOB_CNT__HSACO
)
list(APPEND ALL_HSACOS "${HSACO}.zst")
else(AOTRITON_COMPRESS_KERNEL)
@@ -82,21 +95,13 @@ foreach(RULE IN LISTS HSACO_RULES)
"--signature" "${SIG}"
"--timeout" "${AOTRITON_GPU_BUILD_TIMEOUT}"
DEPENDS aotriton_venv_triton
+ JOB_POOL MAX_JOB_CNT__HSACO
)
list(APPEND ALL_HSACOS "${HSACO}")
endif(AOTRITON_COMPRESS_KERNEL)
# message("HSACO ${HSACO}")
endforeach(RULE)

-if(DEFINED ENV{MAX_JOBS})
- set(MAX_JOBS "$ENV{MAX_JOBS}")
-else()
- cmake_host_system_information(RESULT MAX_JOBS QUERY NUMBER_OF_PHYSICAL_CORES)
- if(MAX_JOBS LESS 2) # In case of failures.
- set(MAX_JOBS 2)
- endif()
-endif()
-
add_custom_target(aotriton_v2_compile ALL DEPENDS ${ALL_HSACOS})

# add_custom_target(aotriton_v2_compile
--
2.43.0

0 comments on commit 3585ad7

Please sign in to comment.