-
Notifications
You must be signed in to change notification settings - Fork 16
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
patch aotriton to check MAX_JOBS environment variable in aotriton v2src/CMakeLists.txt and use that for limiting the amount of python processes allowed to build and compress hsaco files. This fixes the out of memory problem on cases where computer has lot of CPUs compared to amount of memory. Note that this fix only works when using Ninja. (cmake's limitation for add_custom_jobs command) MAX_JOBS environment variable and force to use ninja for building aotriton are set in binfo file. fixes: #178 Signed-off-by: Mika Laitio <[email protected]>
- Loading branch information
Showing
9 changed files
with
124 additions
and
21 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
From d9d6a93e0c5f131130bd8cae6bfd455b331bdf09 Mon Sep 17 00:00:00 2001 | ||
From 0d30e41b904e1027e559ebd54640467bd0226163 Mon Sep 17 00:00:00 2001 | ||
From: Mika Laitio <[email protected]> | ||
Date: Mon, 29 Jul 2024 00:01:32 -0700 | ||
Subject: [PATCH 1/7] pass extra build options | ||
Subject: [PATCH 1/8] pass extra build options | ||
|
||
Signed-off-by: Mika Laitio <[email protected]> | ||
--- | ||
|
@@ -227,5 +227,5 @@ index ca7a4b5..5c7d2a2 100644 | |
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_PARENT_DIR}" | ||
) | ||
-- | ||
2.41.1 | ||
2.43.0 | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
From 3199757072a6291dd3276e315084a6fc32f07529 Mon Sep 17 00:00:00 2001 | ||
From 13cb55b7856490058906ea08610edad6e4e8bbda Mon Sep 17 00:00:00 2001 | ||
From: Mika Laitio <[email protected]> | ||
Date: Mon, 29 Jul 2024 12:48:47 -0700 | ||
Subject: [PATCH 2/7] add gpus with gfx-name to build list | ||
Subject: [PATCH 2/8] add gpus with gfx-name to build list | ||
|
||
add all rocm sdk gpu's to build list | ||
and use the gfx* name for them instead | ||
|
@@ -155,5 +155,5 @@ index 2c47e1d..2a6128b 100644 | |
|
||
GpuArch | ||
-- | ||
2.41.1 | ||
2.43.0 | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
From d01cd7fdad1e40b47505ba683d0703336e6be985 Mon Sep 17 00:00:00 2001 | ||
From 02b1281ee898665efe5d713d8550016e5bcd7488 Mon Sep 17 00:00:00 2001 | ||
From: mritunjaymusale <[email protected]> | ||
Date: Sun, 2 Jun 2024 18:00:59 +0530 | ||
Subject: [PATCH 3/7] changed the line which allocates twice the number of max | ||
Subject: [PATCH 3/8] changed the line which allocates twice the number of max | ||
cpu threads to triton build | ||
|
||
Signed-off-by: mritunjaymusale <[email protected]> | ||
|
@@ -23,5 +23,5 @@ index 390ee8b..4030cad 100644 | |
|
||
if check_env_flag("TRITON_BUILD_WITH_CLANG_LLD"): | ||
-- | ||
2.41.1 | ||
2.43.0 | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
From 020aceb11f576ffb00d1131320acbc163f9519b1 Mon Sep 17 00:00:00 2001 | ||
From 3bbccabde1409bbd2334d08d80e4fcdb56e942fc Mon Sep 17 00:00:00 2001 | ||
From: Mika Laitio <[email protected]> | ||
Date: Tue, 6 Aug 2024 17:15:39 -0700 | ||
Subject: [PATCH 4/7] printout aotriton tuning db gpu info | ||
Subject: [PATCH 4/8] printout aotriton tuning db gpu info | ||
|
||
printout information if tuning data was available | ||
in database for kernel when build for certain gpu | ||
|
@@ -34,5 +34,5 @@ index 14ef241..fd1dc59 100644 | |
|
||
@property | ||
-- | ||
2.41.1 | ||
2.43.0 | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
From cd140c9489c60d3e1aba275c363e33f14aa5e30d Mon Sep 17 00:00:00 2001 | ||
From 74b1c111ca07cbc02791ff63b6cf4ef20db9b52e Mon Sep 17 00:00:00 2001 | ||
From: Mika Laitio <[email protected]> | ||
Date: Tue, 22 Oct 2024 21:55:34 -0700 | ||
Subject: [PATCH 5/7] add gfx906/908/40/41 and gfx1150/51 | ||
Subject: [PATCH 5/8] add gfx906/908/40/41 and gfx1150/51 | ||
|
||
Signed-off-by: Mika Laitio <[email protected]> | ||
--- | ||
|
@@ -156,5 +156,5 @@ index 2a6128b..e54614a 100644 | |
|
||
GpuArch | ||
-- | ||
2.41.1 | ||
2.43.0 | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
From a68395374e6c9d1b127e6cbaaa6321fee971a594 Mon Sep 17 00:00:00 2001 | ||
From f30bb39e4a3ffe1aae842eaf35118b21e8ef05fe Mon Sep 17 00:00:00 2001 | ||
From: Mika Laitio <[email protected]> | ||
Date: Tue, 5 Nov 2024 00:13:12 -0800 | ||
Subject: [PATCH 6/7] separate each gpu files to own dir | ||
Subject: [PATCH 6/8] separate each gpu files to own dir | ||
|
||
- should help to avoid errors on situations | ||
where there are too many files in single | ||
|
@@ -114,5 +114,5 @@ index ec1e39f..a085ea4 100644 | |
#include <aotriton/cpp_tune.h> | ||
#include <incbin.h> | ||
-- | ||
2.41.1 | ||
2.43.0 | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
From d95a6c6f73c06cb5e9abbe9240f43b8e564bfc31 Mon Sep 17 00:00:00 2001 | ||
From e544d6b10c482745934203cfc085474cb1847ea1 Mon Sep 17 00:00:00 2001 | ||
From: Mika Laitio <[email protected]> | ||
Date: Thu, 12 Dec 2024 16:14:24 -0800 | ||
Subject: [PATCH 7/7] add preconfig_rocm.sh script | ||
Subject: [PATCH 7/8] add preconfig_rocm.sh script | ||
|
||
Signed-off-by: Mika Laitio <[email protected]> | ||
--- | ||
|
@@ -29,5 +29,5 @@ index 0000000..aa630ef | |
+ fi | ||
+fi | ||
-- | ||
2.41.1 | ||
2.43.0 | ||
|
88 changes: 88 additions & 0 deletions
88
patches/rocm-6.1.2/aotriton/0008-max-python-process-compile-count-for-hsaco-files.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
From 375ce42eb83856885f933bb5214be85efd0a2a88 Mon Sep 17 00:00:00 2001 | ||
From: Mika Laitio <[email protected]> | ||
Date: Wed, 18 Dec 2024 21:38:44 -0800 | ||
Subject: [PATCH 8/8] max python process compile count for hsaco files | ||
|
||
use MAX_JOBS environment variable to | ||
limit the amount python processes to | ||
build and compress hsaco files. | ||
|
||
Note that this will require that aotriton uses | ||
ninja as a builder because cmakes add_custom_command | ||
supports only Ninja for setting the process count. | ||
|
||
This solves out of memory build problem in cases where | ||
computer has low amount of memory compared to amount | ||
of CPUs available. | ||
|
||
Fixes: https://github.com/lamikr/rocm_sdk_builder/issues/178 | ||
|
||
Signed-off-by: Mika Laitio <[email protected]> | ||
--- | ||
v2src/CMakeLists.txt | 23 ++++++++++++++--------- | ||
1 file changed, 14 insertions(+), 9 deletions(-) | ||
|
||
diff --git a/v2src/CMakeLists.txt b/v2src/CMakeLists.txt | ||
index 5c7d2a2..9509a27 100644 | ||
--- a/v2src/CMakeLists.txt | ||
+++ b/v2src/CMakeLists.txt | ||
@@ -26,6 +26,17 @@ message("AOTRITON_COMPILER ${AOTRITON_COMPILER}") | ||
# ) | ||
# add_dependencies(aotriton_v2_gen_compile aotriton_venv_triton) | ||
|
||
+if(DEFINED ENV{MAX_JOBS}) | ||
+ set(MAX_JOBS "$ENV{MAX_JOBS}") | ||
+else() | ||
+ cmake_host_system_information(RESULT MAX_JOBS QUERY NUMBER_OF_PHYSICAL_CORES) | ||
+ if(MAX_JOBS LESS 2) # In case of failures. | ||
+ set(MAX_JOBS 2) | ||
+ endif() | ||
+endif() | ||
+ | ||
+set_property(GLOBAL PROPERTY JOB_POOLS MAX_JOB_CNT__HSACO=${MAX_JOBS}) | ||
+ | ||
if(AOTRITON_BUILD_FOR_TUNING) | ||
set(GENERATE_OPTION "--build_for_tuning") | ||
else(AOTRITON_BUILD_FOR_TUNING) | ||
@@ -36,6 +47,7 @@ execute_process( | ||
COMMAND_ECHO STDOUT | ||
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_PARENT_DIR}" | ||
) | ||
+ | ||
message("Bare.compile: ${AOTRITON_V2_BUILD_DIR}/Bare.compile") | ||
file(STRINGS "${AOTRITON_V2_BUILD_DIR}/Bare.compile" HSACO_RULES) | ||
set(ALL_HSACOS "") | ||
@@ -65,6 +77,7 @@ foreach(RULE IN LISTS HSACO_RULES) | ||
"--timeout" "${AOTRITON_GPU_BUILD_TIMEOUT}" | ||
COMMAND ${ZSTD_EXEC} "-q" "-f" ${HSACO} | ||
DEPENDS aotriton_venv_triton | ||
+ JOB_POOL MAX_JOB_CNT__HSACO | ||
) | ||
list(APPEND ALL_HSACOS "${HSACO}.zst") | ||
else(AOTRITON_COMPRESS_KERNEL) | ||
@@ -82,21 +95,13 @@ foreach(RULE IN LISTS HSACO_RULES) | ||
"--signature" "${SIG}" | ||
"--timeout" "${AOTRITON_GPU_BUILD_TIMEOUT}" | ||
DEPENDS aotriton_venv_triton | ||
+ JOB_POOL MAX_JOB_CNT__HSACO | ||
) | ||
list(APPEND ALL_HSACOS "${HSACO}") | ||
endif(AOTRITON_COMPRESS_KERNEL) | ||
# message("HSACO ${HSACO}") | ||
endforeach(RULE) | ||
|
||
-if(DEFINED ENV{MAX_JOBS}) | ||
- set(MAX_JOBS "$ENV{MAX_JOBS}") | ||
-else() | ||
- cmake_host_system_information(RESULT MAX_JOBS QUERY NUMBER_OF_PHYSICAL_CORES) | ||
- if(MAX_JOBS LESS 2) # In case of failures. | ||
- set(MAX_JOBS 2) | ||
- endif() | ||
-endif() | ||
- | ||
add_custom_target(aotriton_v2_compile ALL DEPENDS ${ALL_HSACOS}) | ||
|
||
# add_custom_target(aotriton_v2_compile | ||
-- | ||
2.43.0 | ||
|