From a09b9648d9026553c1f655531b9f7f3576692e5f Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Sat, 13 Jul 2024 20:02:59 -0400 Subject: [PATCH] initial gfx1036 and gfx1103 support - initial support for gfx1036 and gfx1103 as a build target - updated also the gfx1010 configuration settings to be more similar in composable kernel and miopen fixes: https://github.com/lamikr/rocm_sdk_builder/issues/101 fixes: https://github.com/lamikr/rocm_sdk_builder/issues/103 Signed-off-by: Mika Laitio --- binfo/040_02_onnxruntime_deepspeed.binfo | 2 +- binfo/user_config.sh | 4 +- ...-preconfig-and-build_install-scripts.patch | 2 +- ...ath-from-installed-pytorch-variables.patch | 2 +- ...-deepspeed-for-rocm-in-virtual-linux.patch | 14 +- ...ve-linear_kernel-which-fails-on-rocm.patch | 2 +- ...-on-install-for-missing-kdb.bz2-file.patch | 6 +- ...0002-fix-libroctx64.so-linking-error.patch | 6 +- ...3-MIOpen-gfx1010-and-gfx1035-support.patch | 6 +- .../0004-improved-gfx1010-support.patch | 6 +- .../0005-gfx1036-and-gfx1103-support.patch | 119 ++++++ ...0001-Tensile-fix-fallback-arch-build.patch | 6 +- .../0002-Tensile-add-gfx1035-support.patch | 6 +- .../Tensile/0003-llvm-path-changes.patch | 15 +- ...-gfx1035-gfx1036-and-gfx1103-support.patch | 339 ++++++++++++++++++ ...RD_DWORD-support-for-not-listed-gpus.patch | 6 +- ...mposable-kernel-jit-library-backport.patch | 6 +- ...-gfx1010-and-gfx1035-initial-support.patch | 6 +- ...0004-add-gfx1036-and-gfx1103-support.patch | 76 ++++ ...penBLAS-link-support-for-client-apps.patch | 4 +- ...-1036-and-1103-to-client-utilty-code.patch | 61 ++++ ...02-add-gfx1035-to-client-utilty-code.patch | 28 -- ...r-gtest-and-lapack-linking-in-ubuntu.patch | 4 +- .../0001-tensilelite-llvm-path-changes.patch | 4 +- ...dd-more-gpus-for-default-target-list.patch | 4 +- ...-Tensilelite-fix-fallback-arch-build.patch | 4 +- ...fx1035-1036-and-1103-to-supported-i.patch} | 26 +- .../0005-fallback-support-debug-patch.patch | 4 +- ...and-BLIS-library-search-improvements.patch | 4 +- ...ro-and-arch-linux-msgpack-search-fix.patch | 4 +- ...me-training-rocm-sdk-builder-scripts.patch | 2 +- ...l-patches-to-support-additiona-gpus.patch} | 217 ++++++++--- ...er.cc-dangling-reference-warning-fix.patch | 2 +- ...-rocm-sdk-builder-CMAKE_PREFIX_PATHS.patch | 2 +- ...-not-allowed-for-constructor-in-C-20.patch | 2 +- ...time-optimized-maybe-uninitialized-e.patch | 2 +- ...d-breaks-for-uninitialized-variables.patch | 2 +- ...-preconfig-build-and-install-scripts.patch | 10 +- ...ssage-if-ROCM_SOURCE_DIR-not-defined.patch | 6 +- ...HIP-force-ROCM-detection-and-patches.patch | 6 +- ...b-and-lib64-search-path-adjustements.patch | 8 +- ...parameter-is-null-optimization-error.patch | 6 +- ...with-min-and-max-for-fedora-40-issue.patch | 6 +- ...erbose-output-on-dumpversion-command.patch | 35 ++ .../0001-gfx1010-1030-1035-and-1036.patch | 40 --- .../rccl/0001-gfx1103-support.patch | 47 +++ ...1-add-mageia-9-support-to-install.sh.patch | 4 +- ...x1035-gfx1036-and-gfx1103-to-gpulist.patch | 110 ++++++ .../rocBLAS/0002-add-gfx1035-to-gpulist.patch | 30 -- ...and-BLIS-library-search-improvements.patch | 4 +- ...able-DPP-from-gfx1035-1036-and-1103.patch} | 10 +- ...cRAND-add-gfx1010-gfx1035-and-gfx11.patch} | 28 +- 52 files changed, 1088 insertions(+), 267 deletions(-) create mode 100644 patches/rocm-6.1.2/MIOpen/0005-gfx1036-and-gfx1103-support.patch create mode 100644 patches/rocm-6.1.2/Tensile/0004-gfx1035-gfx1036-and-gfx1103-support.patch create mode 100644 patches/rocm-6.1.2/composable_kernel/0004-add-gfx1036-and-gfx1103-support.patch create mode 100644 patches/rocm-6.1.2/hipBLAS/0002-add-gfx1035-1036-and-1103-to-client-utilty-code.patch delete mode 100644 patches/rocm-6.1.2/hipBLAS/0002-add-gfx1035-to-client-utilty-code.patch rename patches/rocm-6.1.2/hipBLASLt/{0004-Tensilelite-add-gfx1035-to-supported-isa-list.patch => 0004-Tensilelite-add-gfx1035-1036-and-1103-to-supported-i.patch} (52%) rename patches/rocm-6.1.2/onnxruntime/{0002-composable-kernel-patches-to-support-gfx1010-and-gfx.patch => 0002-composable-kernel-patches-to-support-additiona-gpus.patch} (58%) create mode 100644 patches/rocm-6.1.2/pytorch/0007-handle-hipcc-verbose-output-on-dumpversion-command.patch delete mode 100644 patches/rocm-6.1.2/rccl/0001-gfx1010-1030-1035-and-1036.patch create mode 100644 patches/rocm-6.1.2/rccl/0001-gfx1103-support.patch create mode 100644 patches/rocm-6.1.2/rocBLAS/0002-add-gfx1035-gfx1036-and-gfx1103-to-gpulist.patch delete mode 100644 patches/rocm-6.1.2/rocBLAS/0002-add-gfx1035-to-gpulist.patch rename patches/rocm-6.1.2/rocPRIM/{0001-disable-DPP-from-gfx1035.patch => 0001-disable-DPP-from-gfx1035-1036-and-1103.patch} (75%) rename patches/rocm-6.1.2/rocRAND/{0001-rocRAND-build-gfx1035-options.patch => 0001-rocRAND-add-gfx1010-gfx1035-and-gfx11.patch} (78%) diff --git a/binfo/040_02_onnxruntime_deepspeed.binfo b/binfo/040_02_onnxruntime_deepspeed.binfo index 6de4e41..f77001a 100755 --- a/binfo/040_02_onnxruntime_deepspeed.binfo +++ b/binfo/040_02_onnxruntime_deepspeed.binfo @@ -14,7 +14,7 @@ BINFO_APP_PRE_CONFIG_CMD_ARRAY=( BINFO_APP_BUILD_CMD_ARRAY=( "cd ${BINFO_APP_SRC_DIR}" - "./build_rocm.sh \"${SEMICOLON_SEPARATED_GPU_TARGET_LIST_DEFAULT}\"" + "./build_rocm.sh ${SEMICOLON_SEPARATED_GPU_TARGET_LIST_DEFAULT}" ) BINFO_APP_INSTALL_CMD_ARRAY=( diff --git a/binfo/user_config.sh b/binfo/user_config.sh index 4e23816..c5b12cb 100644 --- a/binfo/user_config.sh +++ b/binfo/user_config.sh @@ -3,7 +3,7 @@ # Function to select ROCM SDK build target GPUs func_build_cfg_user() { local message="Select ROCM SDK build target GPUs. Space to select, Enter to finish save, ESC to cancel." - local options="gfx906|gfx90a|gfx940|gfx1010|gfx1011|gfx1012|gfx1030|gfx1031|gfx1035|gfx1100|gfx1101|gfx1102|gfx1150|gfx1151" + local options="gfx906|gfx90a|gfx940|gfx1010|gfx1011|gfx1012|gfx1030|gfx1031|gfx1035|gfx1036|gfx1100|gfx1101|gfx1102|gfx1103|gfx1150|gfx1151" local script_path="./build/checkbox.sh" # Check if the script exists and is executable @@ -14,4 +14,4 @@ func_build_cfg_user() { # Execute the checkbox script with the specified parameters "$script_path" --message="$message" --options="$options" --multiple -} \ No newline at end of file +} diff --git a/patches/rocm-6.1.2/DeepSpeed/0001-deepspeed-rocm-preconfig-and-build_install-scripts.patch b/patches/rocm-6.1.2/DeepSpeed/0001-deepspeed-rocm-preconfig-and-build_install-scripts.patch index 6a082fd..b0b3a65 100644 --- a/patches/rocm-6.1.2/DeepSpeed/0001-deepspeed-rocm-preconfig-and-build_install-scripts.patch +++ b/patches/rocm-6.1.2/DeepSpeed/0001-deepspeed-rocm-preconfig-and-build_install-scripts.patch @@ -1,4 +1,4 @@ -From 108755dda9e284382bd0b63e5e351cba9f7abb16 Mon Sep 17 00:00:00 2001 +From 3d237f904f4f74618da216179f9279fd027b05e8 Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Mon, 20 May 2024 22:36:23 -0700 Subject: [PATCH 1/4] deepspeed rocm preconfig and build_install scripts diff --git a/patches/rocm-6.1.2/DeepSpeed/0002-check-rocm-path-from-installed-pytorch-variables.patch b/patches/rocm-6.1.2/DeepSpeed/0002-check-rocm-path-from-installed-pytorch-variables.patch index 63e4f57..4c3c7bc 100644 --- a/patches/rocm-6.1.2/DeepSpeed/0002-check-rocm-path-from-installed-pytorch-variables.patch +++ b/patches/rocm-6.1.2/DeepSpeed/0002-check-rocm-path-from-installed-pytorch-variables.patch @@ -1,4 +1,4 @@ -From 40f44ac1ca3cc73851bfb7c3016276894022eb4f Mon Sep 17 00:00:00 2001 +From ff7f27c49a5ac7ac31f7ebc70e02c18f3aa0ac91 Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Tue, 21 May 2024 07:57:53 -0700 Subject: [PATCH 2/4] check rocm path from installed pytorch variables diff --git a/patches/rocm-6.1.2/DeepSpeed/0003-allow-building-deepspeed-for-rocm-in-virtual-linux.patch b/patches/rocm-6.1.2/DeepSpeed/0003-allow-building-deepspeed-for-rocm-in-virtual-linux.patch index 587db10..9443e00 100644 --- a/patches/rocm-6.1.2/DeepSpeed/0003-allow-building-deepspeed-for-rocm-in-virtual-linux.patch +++ b/patches/rocm-6.1.2/DeepSpeed/0003-allow-building-deepspeed-for-rocm-in-virtual-linux.patch @@ -1,4 +1,4 @@ -From 59fe85c08a0900acaf07bd2aa506853af3fbd83e Mon Sep 17 00:00:00 2001 +From e423db20d2eb75934214c4487fb4193f3df389a9 Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Wed, 26 Jun 2024 14:44:04 -0700 Subject: [PATCH 3/4] allow building deepspeed for rocm in virtual linux @@ -10,24 +10,26 @@ fixes: https://github.com/lamikr/rocm_sdk_builder/issues/75 Signed-off-by: Mika Laitio --- - build_rocm.sh | 2 ++ + build_rocm.sh | 4 +++- op_builder/builder.py | 2 +- setup.py | 18 ++++++++++-------- - 3 files changed, 13 insertions(+), 9 deletions(-) + 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/build_rocm.sh b/build_rocm.sh -index f4a75855..3f3ce526 100755 +index f4a75855..b48a0ad9 100755 --- a/build_rocm.sh +++ b/build_rocm.sh -@@ -15,6 +15,8 @@ mkdir -p deepspeed/ops/spatial +@@ -15,7 +15,9 @@ mkdir -p deepspeed/ops/spatial #export CFLAGS="-I/usr/include" #export LDFLAGS="-L/usr/lib64" +# needed by real accelerator.py to detect the cuda when build on virtual linux without access to real hardware +export DS_ACCELERATOR=cuda # install command will create wheel and install it. bdist_wheel comamnd would only create the wheel - AMDGPU_TARGETS=${amd_target_gpu} DS_BUILD_AIO=0 DS_BUILD_FP_QUANTIZER=0 DS_BUILD_QUANTIZER=0 DS_BUILD_SPARSE_ATTN=0 DS_BUILD_RAGGED_DEVICE_OPS=0 DS_BUILD_CUTLASS_OPS=0 DS_BUILD_EVOFORMER_ATTN=0 DS_BUILD_OPS=1 python setup.py bdist_wheel +-AMDGPU_TARGETS=${amd_target_gpu} DS_BUILD_AIO=0 DS_BUILD_FP_QUANTIZER=0 DS_BUILD_QUANTIZER=0 DS_BUILD_SPARSE_ATTN=0 DS_BUILD_RAGGED_DEVICE_OPS=0 DS_BUILD_CUTLASS_OPS=0 DS_BUILD_EVOFORMER_ATTN=0 DS_BUILD_OPS=1 python setup.py bdist_wheel ++AMDGPU_TARGETS="${amd_target_gpu}" DS_BUILD_AIO=0 DS_BUILD_FP_QUANTIZER=0 DS_BUILD_QUANTIZER=0 DS_BUILD_SPARSE_ATTN=0 DS_BUILD_RAGGED_DEVICE_OPS=0 DS_BUILD_CUTLASS_OPS=0 DS_BUILD_EVOFORMER_ATTN=0 DS_BUILD_OPS=1 python setup.py bdist_wheel + #DS_BUILD_UTILS=1 DS_BUILD_CPU_ADAGRAD=1 DS_BUILD_RANDOM_LTD=1 DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 DS_BUILD_FUSED_LAMB=1 DS_BUILD_CCL_COMM=1 python setup.py develop diff --git a/op_builder/builder.py b/op_builder/builder.py index a27b134c..4980a528 100644 --- a/op_builder/builder.py diff --git a/patches/rocm-6.1.2/DeepSpeed/0004-remove-linear_kernel-which-fails-on-rocm.patch b/patches/rocm-6.1.2/DeepSpeed/0004-remove-linear_kernel-which-fails-on-rocm.patch index 85e3d67..d599380 100644 --- a/patches/rocm-6.1.2/DeepSpeed/0004-remove-linear_kernel-which-fails-on-rocm.patch +++ b/patches/rocm-6.1.2/DeepSpeed/0004-remove-linear_kernel-which-fails-on-rocm.patch @@ -1,4 +1,4 @@ -From bae0b1c89880e23f8ec885eba5f18043726319a0 Mon Sep 17 00:00:00 2001 +From 44b7a213e104e7f7b9e6f9b7e63f5eb8ff7be88f Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Tue, 21 May 2024 11:41:20 -0700 Subject: [PATCH 4/4] remove linear_kernel which fails on rocm diff --git a/patches/rocm-6.1.2/MIOpen/0001-Do-not-fail-on-install-for-missing-kdb.bz2-file.patch b/patches/rocm-6.1.2/MIOpen/0001-Do-not-fail-on-install-for-missing-kdb.bz2-file.patch index c4e4b73..86835c3 100644 --- a/patches/rocm-6.1.2/MIOpen/0001-Do-not-fail-on-install-for-missing-kdb.bz2-file.patch +++ b/patches/rocm-6.1.2/MIOpen/0001-Do-not-fail-on-install-for-missing-kdb.bz2-file.patch @@ -1,7 +1,7 @@ -From 915a3c074ff1c3694566fd74f2e83003a1253104 Mon Sep 17 00:00:00 2001 +From 7b51b6d06842de4851b6c20b61dda2ead78e68ff Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Wed, 8 May 2024 13:48:21 -0700 -Subject: [PATCH 1/4] Do not fail on install for missing kdb.bz2 file +Subject: [PATCH 1/5] Do not fail on install for missing kdb.bz2 file Do not fail if kdb.bz2 file for some GPU does not exist because these kdb files are not mandatory. Their function is to speed up the @@ -43,5 +43,5 @@ index 32d9a2e5b..d6c2db704 100644 endif() -- -2.41.1 +2.45.2 diff --git a/patches/rocm-6.1.2/MIOpen/0002-fix-libroctx64.so-linking-error.patch b/patches/rocm-6.1.2/MIOpen/0002-fix-libroctx64.so-linking-error.patch index d0742dc..1cf141b 100644 --- a/patches/rocm-6.1.2/MIOpen/0002-fix-libroctx64.so-linking-error.patch +++ b/patches/rocm-6.1.2/MIOpen/0002-fix-libroctx64.so-linking-error.patch @@ -1,7 +1,7 @@ -From ecb981cd1b66749186404fa76c56237c758953b5 Mon Sep 17 00:00:00 2001 +From fdc0e268b2edbb0a57af8e8820b388816af38471 Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Fri, 3 May 2024 14:15:09 -0700 -Subject: [PATCH 2/4] fix libroctx64.so linking error +Subject: [PATCH 2/5] fix libroctx64.so linking error search the library and if found link it from there instead of expecting it to be in the ld library path @@ -31,5 +31,5 @@ index 0741a6023..ae4405eed 100644 ############################################################ -- -2.41.1 +2.45.2 diff --git a/patches/rocm-6.1.2/MIOpen/0003-MIOpen-gfx1010-and-gfx1035-support.patch b/patches/rocm-6.1.2/MIOpen/0003-MIOpen-gfx1010-and-gfx1035-support.patch index 0f68e9e..42cfefe 100644 --- a/patches/rocm-6.1.2/MIOpen/0003-MIOpen-gfx1010-and-gfx1035-support.patch +++ b/patches/rocm-6.1.2/MIOpen/0003-MIOpen-gfx1010-and-gfx1035-support.patch @@ -1,7 +1,7 @@ -From 6578a68e3226e97716aad12d445632358f2a463e Mon Sep 17 00:00:00 2001 +From 4b65dfcb0208bfe1eb64c474fb97e31e2b8bf60f Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Tue, 19 Dec 2023 15:13:46 -0800 -Subject: [PATCH 3/4] MIOpen gfx1010 and gfx1035 support +Subject: [PATCH 3/5] MIOpen gfx1010 and gfx1035 support - todo: check gfx1010 specific parts @@ -174,5 +174,5 @@ index 16ce78f04..2ec3eaf09 100644 "gfx1101", "gfx1102"}; -- -2.41.1 +2.45.2 diff --git a/patches/rocm-6.1.2/MIOpen/0004-improved-gfx1010-support.patch b/patches/rocm-6.1.2/MIOpen/0004-improved-gfx1010-support.patch index fc47b2c..2f14843 100644 --- a/patches/rocm-6.1.2/MIOpen/0004-improved-gfx1010-support.patch +++ b/patches/rocm-6.1.2/MIOpen/0004-improved-gfx1010-support.patch @@ -1,7 +1,7 @@ -From 5e7803271cbbe475da352ab188f09b345006d9c0 Mon Sep 17 00:00:00 2001 +From 3c0552223df66774e8f3613826a6094939763cce Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Mon, 8 Jul 2024 21:44:10 +0300 -Subject: [PATCH 4/4] improved gfx1010 support +Subject: [PATCH 4/5] improved gfx1010 support - allows running pytorch gpu benchmark on gfx1010/amd rx 5700 @@ -135,5 +135,5 @@ index bf02d4d55..c3fa2bd3a 100644 {"Rembrandt", "gfx1035"}, }; -- -2.41.1 +2.45.2 diff --git a/patches/rocm-6.1.2/MIOpen/0005-gfx1036-and-gfx1103-support.patch b/patches/rocm-6.1.2/MIOpen/0005-gfx1036-and-gfx1103-support.patch new file mode 100644 index 0000000..c3d41e4 --- /dev/null +++ b/patches/rocm-6.1.2/MIOpen/0005-gfx1036-and-gfx1103-support.patch @@ -0,0 +1,119 @@ +From 08071937d4c2c34f619ed5b49bd0ced4805875fa Mon Sep 17 00:00:00 2001 +From: Mika Laitio +Date: Sat, 13 Jul 2024 21:07:11 -0400 +Subject: [PATCH 5/5] gfx1036 and gfx1103 support + +Signed-off-by: Mika Laitio +--- + .../composable_kernel/include/utility/config.hpp | 15 ++++++++------- + src/include/miopen/solver/ck_utility_common.hpp | 8 +++++++- + src/target_properties.cpp | 14 ++++++++++++-- + 3 files changed, 27 insertions(+), 10 deletions(-) + +diff --git a/src/composable_kernel/composable_kernel/include/utility/config.hpp b/src/composable_kernel/composable_kernel/include/utility/config.hpp +index 5957a79d8..6ca920b5e 100644 +--- a/src/composable_kernel/composable_kernel/include/utility/config.hpp ++++ b/src/composable_kernel/composable_kernel/include/utility/config.hpp +@@ -16,8 +16,8 @@ + #if !(defined(CK_AMD_GPU_GFX803) || defined(CK_AMD_GPU_GFX900) || defined(CK_AMD_GPU_GFX906) || \ + defined(CK_AMD_GPU_GFX940) || defined(CK_AMD_GPU_GFX908) || defined(CK_AMD_GPU_GFX90A) || \ + defined(CK_AMD_GPU_GFX941) || defined(CK_AMD_GPU_GFX942) || defined(CK_AMD_GPU_GFX1010) || defined(CK_AMD_GPU_GFX1030) || \ +- defined(CK_AMD_GPU_GFX1031) || defined(CK_AMD_GPU_GFX1035) || defined(CK_AMD_GPU_GFX1100) || defined(CK_AMD_GPU_GFX1101) || \ +- defined(CK_AMD_GPU_GFX1102)) ++ defined(CK_AMD_GPU_GFX1031) || defined(CK_AMD_GPU_GFX1035) || defined(CK_AMD_GPU_GFX1036) || defined(CK_AMD_GPU_GFX1100) || defined(CK_AMD_GPU_GFX1101) || \ ++ defined(CK_AMD_GPU_GFX1102) || defined(CK_AMD_GPU_GFX1103)) + #error Need to define (only) one GPU target + #endif + +@@ -29,14 +29,15 @@ + #define CK_MIN_BLOCK_PER_CU 2 + #endif + +-// TODO: gfx1010 check CK_BUFFER_RESOURCE_3RD_DWORD ++// TODO: composable_kernel has differend CK_BUFFER_RESOURCE_3RD_DWORD for gfx110* devices + // buffer resourse + #if defined(CK_AMD_GPU_GFX803) || defined(CK_AMD_GPU_GFX900) || defined(CK_AMD_GPU_GFX906) || \ + defined(CK_AMD_GPU_GFX941) || defined(CK_AMD_GPU_GFX942) || defined(CK_AMD_GPU_GFX940) || \ +- defined(CK_AMD_GPU_GFX908) || defined(CK_AMD_GPU_GFX90A) || defined(CK_AMD_GPU_GFX1010) ++ defined(CK_AMD_GPU_GFX908) || defined(CK_AMD_GPU_GFX90A) + #define CK_BUFFER_RESOURCE_3RD_DWORD 0x00020000 +-#elif defined(CK_AMD_GPU_GFX1030) || defined(CK_AMD_GPU_GFX1031) || defined(CK_AMD_GPU_GFX1035) || defined(CK_AMD_GPU_GFX1100) || \ +- defined(CK_AMD_GPU_GFX1101) || defined(CK_AMD_GPU_GFX1102) ++#elif defined(CK_AMD_GPU_GFX1010) || defined(CK_AMD_GPU_GFX1030) || defined(CK_AMD_GPU_GFX1031) || \ ++ defined(CK_AMD_GPU_GFX1035) || defined(CK_AMD_GPU_GFX1036) || \ ++ defined(CK_AMD_GPU_GFX1100) || defined(CK_AMD_GPU_GFX1101) || defined(CK_AMD_GPU_GFX1102) || defined(CK_AMD_GPU_GFX1103) + #define CK_BUFFER_RESOURCE_3RD_DWORD 0x31014000 + #endif + +@@ -49,7 +50,7 @@ + #elif defined(CK_AMD_GPU_GFX906) || defined(CK_AMD_GPU_GFX908) || defined(CK_AMD_GPU_GFX90a) || \ + defined(CK_AMD_GPU_GFX941) || defined(CK_AMD_GPU_GFX942) || defined(CK_AMD_GPU_GFX940) || \ + defined(CK_AMD_GPU_GFX1030) || defined(CK_AMD_GPU_GFX1031) || defined(CK_AMD_GPU_GFX1100) || \ +- defined(CK_AMD_GPU_GFX1101) || defined(CK_AMD_GPU_GFX1102) ++ defined(CK_AMD_GPU_GFX1101) || defined(CK_AMD_GPU_GFX1102) || defined(CK_AMD_GPU_GFX1103) + #define CK_USE_AMD_V_FMAC_F32 + #define CK_USE_AMD_V_DOT2_F32_F16 + #define CK_USE_AMD_V_DOT4_I32_I8 +diff --git a/src/include/miopen/solver/ck_utility_common.hpp b/src/include/miopen/solver/ck_utility_common.hpp +index aea036066..ea5629871 100644 +--- a/src/include/miopen/solver/ck_utility_common.hpp ++++ b/src/include/miopen/solver/ck_utility_common.hpp +@@ -61,9 +61,11 @@ static inline bool is_ck_supported_hardware(const Handle& handle) + StartsWith(handle.GetDeviceName(), "gfx1030") || + StartsWith(handle.GetDeviceName(), "gfx1031") || + StartsWith(handle.GetDeviceName(), "gfx1035") || ++ StartsWith(handle.GetDeviceName(), "gfx1036") || + StartsWith(handle.GetDeviceName(), "gfx1100") || + StartsWith(handle.GetDeviceName(), "gfx1101") || +- StartsWith(handle.GetDeviceName(), "gfx1102"); ++ StartsWith(handle.GetDeviceName(), "gfx1102") || ++ StartsWith(handle.GetDeviceName(), "gfx1103"); + } + + // MI100 : gfx908 +@@ -121,12 +123,16 @@ static inline auto get_ck_common_compiler_flag(const Handle& handle) + compiler_flag << " -DCK_AMD_GPU_GFX1031"; + else if(StartsWith(device_name, "gfx1035")) + compiler_flag << " -DCK_AMD_GPU_GFX1035"; ++ else if(StartsWith(device_name, "gfx1036")) ++ compiler_flag << " -DCK_AMD_GPU_GFX1036"; + else if(StartsWith(device_name, "gfx1100")) + compiler_flag << " -DCK_AMD_GPU_GFX1100"; + else if(StartsWith(device_name, "gfx1101")) + compiler_flag << " -DCK_AMD_GPU_GFX1101"; + else if(StartsWith(device_name, "gfx1102")) + compiler_flag << " -DCK_AMD_GPU_GFX1102"; ++ else if(StartsWith(device_name, "gfx1103")) ++ compiler_flag << " -DCK_AMD_GPU_GFX1103"; + // NOLINTEND(*-braces-around-statements) + + // buffer atomic-fadd +diff --git a/src/target_properties.cpp b/src/target_properties.cpp +index c3fa2bd3a..de979aae9 100644 +--- a/src/target_properties.cpp ++++ b/src/target_properties.cpp +@@ -52,9 +52,19 @@ static std::string GetDeviceNameFromMap(const std::string& in) + {"gfx804", "gfx803"}, + {"Vega10", "gfx900"}, + {"gfx901", "gfx900"}, +- {"Navi10", "gfx1010"}, ++ {"navi10", "gfx1010"}, ++ {"navi12", "gfx1011"}, ++ {"navi14", "gfx1012"}, + {"10.3.0 Sienna_Cichlid 18", "gfx1030"}, +- {"Rembrandt", "gfx1035"}, ++ {"navi22", "gfx1031"}, ++ {"navi23", "gfx1032"}, ++ {"navi24", "gfx1034"}, ++ {"rembrandt", "gfx1035"}, ++ {"raphael", "gfx1036"}, ++ {"navi31", "gfx1100"}, ++ {"navi32", "gfx1101"}, ++ {"navi33", "gfx1102"}, ++ {"phoenix", "gfx1103"}, + }; + + const auto& dev_str = miopen::GetStringEnv(ENV(MIOPEN_DEBUG_ENFORCE_DEVICE)); +-- +2.45.2 + diff --git a/patches/rocm-6.1.2/Tensile/0001-Tensile-fix-fallback-arch-build.patch b/patches/rocm-6.1.2/Tensile/0001-Tensile-fix-fallback-arch-build.patch index 4fc1570..75db742 100644 --- a/patches/rocm-6.1.2/Tensile/0001-Tensile-fix-fallback-arch-build.patch +++ b/patches/rocm-6.1.2/Tensile/0001-Tensile-fix-fallback-arch-build.patch @@ -1,7 +1,7 @@ -From dd39290f90df98f928452720e622c1497b4fd7da Mon Sep 17 00:00:00 2001 +From 3e2d2f891001a8d2a8f74a46884cbec84a2fd8c8 Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Fri, 10 May 2024 20:34:13 -0700 -Subject: [PATCH 1/3] Tensile fix fallback arch build +Subject: [PATCH 1/4] Tensile fix fallback arch build fixes build error which happens if only the rx 5700 is enabled (only GPU_BUILD_AMD_NAVI10_GFX1010=1 enabled in rocm_sdk_builder envsetup.sh) @@ -61,5 +61,5 @@ index ca3ef322..9e37b4b0 100644 for arch in archs: if arch in architectureMap: -- -2.41.0 +2.45.2 diff --git a/patches/rocm-6.1.2/Tensile/0002-Tensile-add-gfx1035-support.patch b/patches/rocm-6.1.2/Tensile/0002-Tensile-add-gfx1035-support.patch index 83e65a2..d8397b2 100644 --- a/patches/rocm-6.1.2/Tensile/0002-Tensile-add-gfx1035-support.patch +++ b/patches/rocm-6.1.2/Tensile/0002-Tensile-add-gfx1035-support.patch @@ -1,7 +1,7 @@ -From 73281f46189f7012334d3b1a7e52baffade5295f Mon Sep 17 00:00:00 2001 +From cbec649b52abb1e45d72a3755fa01a77cf9784e7 Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Fri, 3 May 2024 13:13:02 -0700 -Subject: [PATCH 2/3] Tensile, add gfx1035 support +Subject: [PATCH 2/4] Tensile, add gfx1035 support Signed-off-by: Mika Laitio --- @@ -84,5 +84,5 @@ index 6ececf1c..a89b7c39 100644 else: printWarning("Assembler not present, asm caps loaded from cache are unverified") -- -2.41.0 +2.45.2 diff --git a/patches/rocm-6.1.2/Tensile/0003-llvm-path-changes.patch b/patches/rocm-6.1.2/Tensile/0003-llvm-path-changes.patch index 7daa1e0..77db49b 100644 --- a/patches/rocm-6.1.2/Tensile/0003-llvm-path-changes.patch +++ b/patches/rocm-6.1.2/Tensile/0003-llvm-path-changes.patch @@ -1,7 +1,7 @@ -From 3ffffba383528d997372861fb5d940d09ebe2996 Mon Sep 17 00:00:00 2001 +From 80776357e8f44019675a224474e314bbb551bc8a Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Wed, 15 May 2024 21:09:56 -0700 -Subject: [PATCH 3/3] llvm path changes +Subject: [PATCH 3/4] llvm path changes Signed-off-by: Mika Laitio --- @@ -18,17 +18,18 @@ index a89b7c39..9b24aee7 100644 else: - globalParameters["AssemblerPath"] = locateExe(os.path.join(globalParameters["ROCmPath"], "llvm/bin"), "clang++") + globalParameters["AssemblerPath"] = locateExe(os.path.join(globalParameters["ROCmPath"], "bin"), "clang++") - + globalParameters["ROCmSMIPath"] = locateExe(globalParameters["ROCmBinPath"], "rocm-smi") - + @@ -2277,7 +2277,7 @@ def assignGlobalParameters( config ): if os.name == "nt": globalParameters["ClangOffloadBundlerPath"] = locateExe(globalParameters["ROCmBinPath"], "clang-offload-bundler.exe") else: - globalParameters["ClangOffloadBundlerPath"] = locateExe(os.path.join(globalParameters["ROCmPath"], "llvm/bin"), "clang-offload-bundler") + globalParameters["ClangOffloadBundlerPath"] = locateExe(os.path.join(globalParameters["ROCmPath"], "bin"), "clang-offload-bundler") - + if "ROCmAgentEnumeratorPath" in config: globalParameters["ROCmAgentEnumeratorPath"] = config["ROCmAgentEnumeratorPath"] --- -2.41.0 +-- +2.45.2 + diff --git a/patches/rocm-6.1.2/Tensile/0004-gfx1035-gfx1036-and-gfx1103-support.patch b/patches/rocm-6.1.2/Tensile/0004-gfx1035-gfx1036-and-gfx1103-support.patch new file mode 100644 index 0000000..5e9aad7 --- /dev/null +++ b/patches/rocm-6.1.2/Tensile/0004-gfx1035-gfx1036-and-gfx1103-support.patch @@ -0,0 +1,339 @@ +From 35954a100a66f26fd3ad356101fb5723c2eaac74 Mon Sep 17 00:00:00 2001 +From: Mika Laitio +Date: Sat, 13 Jul 2024 15:50:11 -0400 +Subject: [PATCH 4/4] gfx1035,gfx1036 and gfx1103 support + +Signed-off-by: Mika Laitio +--- + Tensile/AsmCaps.py | 84 +++++++++++++++++++ + Tensile/Common.py | 10 +-- + Tensile/SolutionLibrary.py | 4 +- + Tensile/Source/CMakeLists.txt | 4 +- + Tensile/Source/lib/include/Tensile/AMDGPU.hpp | 20 ++++- + .../include/Tensile/PlaceholderLibrary.hpp | 6 ++ + .../Tensile/Serialization/Predicates.hpp | 2 + + .../preload_kernel_arguments_always_half.yaml | 5 +- + pytest.ini | 4 + + 9 files changed, 127 insertions(+), 12 deletions(-) + +diff --git a/Tensile/AsmCaps.py b/Tensile/AsmCaps.py +index ef93f53f..4ba97887 100644 +--- a/Tensile/AsmCaps.py ++++ b/Tensile/AsmCaps.py +@@ -629,6 +629,48 @@ CACHED_ASM_CAPS = \ + 'v_mad_mix_f32': False, + 'v_pk_fma_f16': True, + 'v_pk_fmac_f16': False}, ++ (10, 3, 6): {'HasAddLshl': True, ++ 'HasAtomicAdd': False, ++ 'HasDirectToLdsDest': False, ++ 'HasDirectToLdsNoDest': True, ++ 'HasExplicitCO': True, ++ 'HasExplicitNC': True, ++ 'HasGLCModifier': True, ++ 'HasLshlOr': True, ++ 'HasMFMA': False, ++ 'HasMFMA_b8': False, ++ 'HasMFMA_bf16_1k': False, ++ 'HasMFMA_bf16_original': False, ++ 'HasMFMA_constSrc': False, ++ 'HasMFMA_f64': False, ++ 'HasMFMA_f8': False, ++ 'HasMFMA_i8_908': False, ++ 'HasMFMA_i8_940': False, ++ 'HasMFMA_vgpr': False, ++ 'HasMFMA_xf32': False, ++ 'HasSMulHi': True, ++ 'HasWMMA': False, ++ 'KernargPreloading': False, ++ 'MaxLgkmcnt': 15, ++ 'MaxVmcnt': 63, ++ 'SupportedISA': True, ++ 'SupportedSource': True, ++ 'VOP3v_dot4_i32_i8': True, ++ 'v_dot2_f32_f16': True, ++ 'v_dot2c_f32_f16': True, ++ 'v_dot4_i32_i8': False, ++ 'v_dot4c_i32_i8': True, ++ 'v_fma_f16': True, ++ 'v_fma_f32': True, ++ 'v_fma_f64': True, ++ 'v_fma_mix_f32': True, ++ 'v_fmac_f16': False, ++ 'v_fmac_f32': True, ++ 'v_mac_f16': False, ++ 'v_mac_f32': False, ++ 'v_mad_mix_f32': False, ++ 'v_pk_fma_f16': True, ++ 'v_pk_fmac_f16': False}, + (11, 0, 0): {'HasAddLshl': True, + 'HasAtomicAdd': True, + 'HasDirectToLdsDest': False, +@@ -714,6 +756,48 @@ CACHED_ASM_CAPS = \ + 'v_pk_fma_f16': True, + 'v_pk_fmac_f16': False}, + (11, 0, 2): {'HasAddLshl': True, ++ 'HasAtomicAdd': True, ++ 'HasDirectToLdsDest': False, ++ 'HasDirectToLdsNoDest': False, ++ 'HasExplicitCO': True, ++ 'HasExplicitNC': True, ++ 'HasGLCModifier': True, ++ 'HasLshlOr': True, ++ 'HasMFMA': False, ++ 'HasMFMA_b8': False, ++ 'HasMFMA_bf16_1k': False, ++ 'HasMFMA_bf16_original': False, ++ 'HasMFMA_constSrc': False, ++ 'HasMFMA_f64': False, ++ 'HasMFMA_f8': False, ++ 'HasMFMA_i8_908': False, ++ 'HasMFMA_i8_940': False, ++ 'HasMFMA_vgpr': False, ++ 'HasMFMA_xf32': False, ++ 'HasSMulHi': True, ++ 'HasWMMA': True, ++ 'KernargPreloading': False, ++ 'MaxLgkmcnt': 15, ++ 'MaxVmcnt': 63, ++ 'SupportedISA': True, ++ 'SupportedSource': True, ++ 'VOP3v_dot4_i32_i8': False, ++ 'v_dot2_f32_f16': True, ++ 'v_dot2c_f32_f16': True, ++ 'v_dot4_i32_i8': False, ++ 'v_dot4c_i32_i8': False, ++ 'v_fma_f16': True, ++ 'v_fma_f32': True, ++ 'v_fma_f64': True, ++ 'v_fma_mix_f32': True, ++ 'v_fmac_f16': False, ++ 'v_fmac_f32': True, ++ 'v_mac_f16': False, ++ 'v_mac_f32': False, ++ 'v_mad_mix_f32': False, ++ 'v_pk_fma_f16': True, ++ 'v_pk_fmac_f16': False}, ++ (11, 0, 3): {'HasAddLshl': True, + 'HasAtomicAdd': True, + 'HasDirectToLdsDest': False, + 'HasDirectToLdsNoDest': False, +diff --git a/Tensile/Common.py b/Tensile/Common.py +index 9b24aee7..13a5fd3b 100644 +--- a/Tensile/Common.py ++++ b/Tensile/Common.py +@@ -228,7 +228,7 @@ globalParameters["SupportedISA"] = [(8,0,3), + (9,0,0), (9,0,6), (9,0,8), (9,0,10), + (9,4,0), (9,4,1), (9,4,2), + (10,1,0), (10,1,1), (10,1,2), (10,3,0), (10,3,1), (10,3,5), +- (11,0,0), (11,0,1), (11,0,2)] # assembly kernels writer supports these architectures ++ (11,0,0), (11,0,1), (11,0,2), (11,0,3)] # assembly kernels writer supports these architectures + + globalParameters["CleanupBuildFiles"] = False # cleanup build files (e.g. kernel assembly) once no longer needed + globalParameters["GenerateManifestAndExit"] = False # Output manifest file with list of expected library objects and exit +@@ -305,8 +305,8 @@ architectureMap = { + 'gfx941':'aquavanjaram941', 'gfx941:xnack+':'aquavanjaram941', 'gfx941:xnack-':'aquavanjaram941', + 'gfx942':'aquavanjaram942', 'gfx942:xnack+':'aquavanjaram942', 'gfx942:xnack-':'aquavanjaram942', + 'gfx1010':'navi10', 'gfx1011':'navi12', 'gfx1012':'navi14', +- 'gfx1030':'navi21', 'gfx1031':'navi22', 'gfx1032':'navi23', 'gfx1034':'navi24', 'gfx1035':'rembrandt', +- 'gfx1100':'navi31', 'gfx1101':'navi32', 'gfx1102':'navi33' ++ 'gfx1030':'navi21', 'gfx1031':'navi22', 'gfx1032':'navi23', 'gfx1034':'navi24', 'gfx1035':'rembrandt', 'gfx1036':'raphael', ++ 'gfx1100':'navi31', 'gfx1101':'navi32', 'gfx1102':'navi33', 'gfx1103':'phoenix' + } + + def getArchitectureName(gfxName): +@@ -2291,8 +2291,8 @@ def assignGlobalParameters( config ): + globalParameters["CurrentISA"] = (9,0,6) + printWarning("Failed to detect ISA so forcing (gfx906) on windows") + if globalParameters["CurrentISA"] == (9,4,1) or globalParameters["CurrentISA"] == (9,4,2) or globalParameters["CurrentISA"] == (11,0,0) or \ +- globalParameters["CurrentISA"] == (11,0,1) or globalParameters["CurrentISA"] == (11,0,2): +- printWarning("HardwareMonitor currently disabled for gfx941/942 or gfx1100/gfx1101/gfx1102") ++ globalParameters["CurrentISA"] == (11,0,1) or globalParameters["CurrentISA"] == (11,0,2) or globalParameters["CurrentISA"] == (11,0,3): ++ printWarning("HardwareMonitor currently disabled for gfx941/942 or gfx1100/gfx1101/gfx1102/gfx1103") + globalParameters["HardwareMonitor"] = False + + # For ubuntu platforms, call dpkg to grep the version of hip-clang. This check is platform specific, and in the future +diff --git a/Tensile/SolutionLibrary.py b/Tensile/SolutionLibrary.py +index 66f2e86a..9953aff9 100644 +--- a/Tensile/SolutionLibrary.py ++++ b/Tensile/SolutionLibrary.py +@@ -255,8 +255,8 @@ class MasterSolutionLibrary: + def ArchitectureIndexMap(cls, architectureName): + # 'fallback', 'gfx803', 'gfx900', 'gfx906', 'gfx908', 'gfx90a', + # 'gfx940', 'gfx941', 'gfx942', 'gfx1010', 'gfx1011', 'gfx1012', +- # 'gfx1030', 'gfx1031', 'gfx1032', 'gfx1034', 'gfx1035', 'gfx1100', +- # 'gfx1101', 'gfx1102' ++ # 'gfx1030', 'gfx1031', 'gfx1032', 'gfx1034', 'gfx1035', 'gfx1036', ++ # 'gfx1100', 'gfx1101', 'gfx1102', 'gfx1103' + archval = -1 + if architectureName == "fallback": + archval = 0 +diff --git a/Tensile/Source/CMakeLists.txt b/Tensile/Source/CMakeLists.txt +index e973a9ed..ecd18dd5 100644 +--- a/Tensile/Source/CMakeLists.txt ++++ b/Tensile/Source/CMakeLists.txt +@@ -51,9 +51,9 @@ if(NOT DEFINED CXX_VERSION_STRING) + endif() + + if(CMAKE_CXX_COMPILER STREQUAL "hipcc") +- set(TENSILE_GPU_ARCHS gfx803 gfx900 gfx906:xnack- gfx908:xnack- gfx90a:xnack- gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 CACHE STRING "GPU architectures") ++ set(TENSILE_GPU_ARCHS gfx803 gfx900 gfx906:xnack- gfx908:xnack- gfx90a:xnack- gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1036 gfx1100 gfx1101 gfx1102 gfx1103 CACHE STRING "GPU architectures") + else() +- set(TENSILE_GPU_ARCHS gfx803 gfx900 gfx906 gfx908 gfx90a gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 CACHE STRING "GPU architectures") ++ set(TENSILE_GPU_ARCHS gfx803 gfx900 gfx906 gfx908 gfx90a gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1036 gfx1100 gfx1101 gfx1102 gfx1103 CACHE STRING "GPU architectures") + endif() + + include(CMakeDependentOption) +diff --git a/Tensile/Source/lib/include/Tensile/AMDGPU.hpp b/Tensile/Source/lib/include/Tensile/AMDGPU.hpp +index c845d749..d1c673a1 100644 +--- a/Tensile/Source/lib/include/Tensile/AMDGPU.hpp ++++ b/Tensile/Source/lib/include/Tensile/AMDGPU.hpp +@@ -71,9 +71,11 @@ namespace Tensile + gfx1032 = 1032, + gfx1034 = 1034, + gfx1035 = 1035, ++ gfx1036 = 1036, + gfx1100 = 1100, + gfx1101 = 1101, +- gfx1102 = 1102 ++ gfx1102 = 1102, ++ gfx1103 = 1103 + }; + + static std::string toString(Processor p) +@@ -112,12 +114,16 @@ namespace Tensile + return "gfx1034"; + case AMDGPU::Processor::gfx1035: + return "gfx1035"; ++ case AMDGPU::Processor::gfx1036: ++ return "gfx1036"; + case AMDGPU::Processor::gfx1100: + return "gfx1100"; + case AMDGPU::Processor::gfx1101: + return "gfx1101"; + case AMDGPU::Processor::gfx1102: + return "gfx1102"; ++ case AMDGPU::Processor::gfx1103: ++ return "gfx1103"; + } + return ""; + } +@@ -172,6 +178,14 @@ namespace Tensile + { + return AMDGPU::Processor::gfx1030; + } ++ else if(deviceString.find("gfx1035") != std::string::npos) ++ { ++ return AMDGPU::Processor::gfx1035; ++ } ++ else if(deviceString.find("gfx1036") != std::string::npos) ++ { ++ return AMDGPU::Processor::gfx1036; ++ } + else if(deviceString.find("gfx1100") != std::string::npos) + { + return AMDGPU::Processor::gfx1100; +@@ -184,6 +198,10 @@ namespace Tensile + { + return AMDGPU::Processor::gfx1102; + } ++ else if(deviceString.find("gfx1103") != std::string::npos) ++ { ++ return AMDGPU::Processor::gfx1103; ++ } + else + { + return static_cast(0); +diff --git a/Tensile/Source/lib/include/Tensile/PlaceholderLibrary.hpp b/Tensile/Source/lib/include/Tensile/PlaceholderLibrary.hpp +index 10898ec2..38e99640 100644 +--- a/Tensile/Source/lib/include/Tensile/PlaceholderLibrary.hpp ++++ b/Tensile/Source/lib/include/Tensile/PlaceholderLibrary.hpp +@@ -55,9 +55,11 @@ namespace Tensile + gfx1032, + gfx1034, + gfx1035, ++ gfx1036, + gfx1100, + gfx1101, + gfx1102, ++ gfx1103, + All + }; + +@@ -100,12 +102,16 @@ namespace Tensile + return "TensileLibrary_*_gfx1034"; + case LazyLoadingInit::gfx1035: + return "TensileLibrary_*_gfx1035"; ++ case LazyLoadingInit::gfx1036: ++ return "TensileLibrary_*_gfx1036"; + case LazyLoadingInit::gfx1100: + return "TensileLibrary_*_gfx1100"; + case LazyLoadingInit::gfx1101: + return "TensileLibrary_*_gfx1101"; + case LazyLoadingInit::gfx1102: + return "TensileLibrary_*_gfx1102"; ++ case LazyLoadingInit::gfx1103: ++ return "TensileLibrary_*_gfx1103"; + case LazyLoadingInit::None: + return ""; + } +diff --git a/Tensile/Source/lib/include/Tensile/Serialization/Predicates.hpp b/Tensile/Source/lib/include/Tensile/Serialization/Predicates.hpp +index 87fc0d24..b52c9d38 100644 +--- a/Tensile/Source/lib/include/Tensile/Serialization/Predicates.hpp ++++ b/Tensile/Source/lib/include/Tensile/Serialization/Predicates.hpp +@@ -229,9 +229,11 @@ namespace Tensile + iot::enumCase(io, value, "gfx1032", AMDGPU::Processor::gfx1032); + iot::enumCase(io, value, "gfx1034", AMDGPU::Processor::gfx1034); + iot::enumCase(io, value, "gfx1035", AMDGPU::Processor::gfx1035); ++ iot::enumCase(io, value, "gfx1036", AMDGPU::Processor::gfx1036); + iot::enumCase(io, value, "gfx1100", AMDGPU::Processor::gfx1100); + iot::enumCase(io, value, "gfx1101", AMDGPU::Processor::gfx1101); + iot::enumCase(io, value, "gfx1102", AMDGPU::Processor::gfx1102); ++ iot::enumCase(io, value, "gfx1103", AMDGPU::Processor::gfx1103); + } + }; + +diff --git a/Tensile/Tests/pre_checkin/preload_kernel_arguments_always_half.yaml b/Tensile/Tests/pre_checkin/preload_kernel_arguments_always_half.yaml +index 7b916d94..825100b0 100644 +--- a/Tensile/Tests/pre_checkin/preload_kernel_arguments_always_half.yaml ++++ b/Tensile/Tests/pre_checkin/preload_kernel_arguments_always_half.yaml +@@ -2,8 +2,9 @@ TestParameters: + marks: [skip-gfx900, skip-gfx906, skip-gfx908, + skip-gfx1010, skip-gfx1011, skip-gfx1012, + skip-gfx1030, skip-gfx1031, skip-gfx1032, +- skip-gfx1034, skip-gfx1035, skip-gfx1100, +- skip-gfx1101, skip-gfx1102] ++ skip-gfx1034, skip-gfx1035, skip-gfx1036, ++ skip-gfx1100, skip-gfx1101, skip-gfx1102, ++ skip-gfx1103] + + GlobalParameters: + MinimumRequiredVersion: 4.2.0 +diff --git a/pytest.ini b/pytest.ini +index 2dc9a329..f16a65e3 100644 +--- a/pytest.ini ++++ b/pytest.ini +@@ -104,9 +104,11 @@ markers = + xfail-gfx1032: architecture + xfail-gfx1034: architecture + xfail-gfx1035: architecture ++ xfail-gfx1036: architecture + xfail-gfx1100: architecture + xfail-gfx1101: architecture + xfail-gfx1102: architecture ++ xfail-gfx1103: architecture + skip-gfx000: architecture + skip-gfx900: architecture + skip-gfx906: architecture +@@ -123,6 +125,8 @@ markers = + skip-gfx1032: architecture + skip-gfx1034: architecture + skip-gfx1035: architecture ++ skip-gfx1036: architecture + skip-gfx1100: architecture + skip-gfx1101: architecture + skip-gfx1102: architecture ++ skip-gfx1103: architecture +-- +2.45.2 + diff --git a/patches/rocm-6.1.2/composable_kernel/0001-by-default-no-3RD_DWORD-support-for-not-listed-gpus.patch b/patches/rocm-6.1.2/composable_kernel/0001-by-default-no-3RD_DWORD-support-for-not-listed-gpus.patch index 382188e..6781de3 100644 --- a/patches/rocm-6.1.2/composable_kernel/0001-by-default-no-3RD_DWORD-support-for-not-listed-gpus.patch +++ b/patches/rocm-6.1.2/composable_kernel/0001-by-default-no-3RD_DWORD-support-for-not-listed-gpus.patch @@ -1,7 +1,7 @@ -From fe5fb410b74f6c5b0ea8471469f7b77e3b916b01 Mon Sep 17 00:00:00 2001 +From 63f717b9e4019902a4c1e705a3a907c3b455aca2 Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Tue, 19 Dec 2023 15:16:58 -0800 -Subject: [PATCH 1/3] by default no 3RD_DWORD support for not listed gpus +Subject: [PATCH 1/4] by default no 3RD_DWORD support for not listed gpus Signed-off-by: Mika Laitio --- @@ -22,5 +22,5 @@ index 88efb0277..da5bb86c6 100644 // FMA instruction -- -2.41.0 +2.45.2 diff --git a/patches/rocm-6.1.2/composable_kernel/0002-composable-kernel-jit-library-backport.patch b/patches/rocm-6.1.2/composable_kernel/0002-composable-kernel-jit-library-backport.patch index 6dc4332..b08ee60 100644 --- a/patches/rocm-6.1.2/composable_kernel/0002-composable-kernel-jit-library-backport.patch +++ b/patches/rocm-6.1.2/composable_kernel/0002-composable-kernel-jit-library-backport.patch @@ -1,7 +1,7 @@ -From e49dd3637bca9e1c2e228f12c8ecd9ebfbaa7afb Mon Sep 17 00:00:00 2001 +From 7c9d19e1fe72007106c8a71a5fef848c5e54c404 Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Fri, 3 May 2024 13:49:47 -0700 -Subject: [PATCH 2/3] composable kernel jit library backport +Subject: [PATCH 2/4] composable kernel jit library backport - backported from the composable_kernel migraphx branch revision 57cdd70b7cb14e5e3b60cd9a5f96ba8dc343763e @@ -2287,5 +2287,5 @@ index 000000000..a5057da10 + } +} -- -2.41.0 +2.45.2 diff --git a/patches/rocm-6.1.2/composable_kernel/0003-gfx1010-and-gfx1035-initial-support.patch b/patches/rocm-6.1.2/composable_kernel/0003-gfx1010-and-gfx1035-initial-support.patch index 28a7e5e..8c95fc8 100644 --- a/patches/rocm-6.1.2/composable_kernel/0003-gfx1010-and-gfx1035-initial-support.patch +++ b/patches/rocm-6.1.2/composable_kernel/0003-gfx1010-and-gfx1035-initial-support.patch @@ -1,7 +1,7 @@ -From 26942bd2a0a7a1d341969095e51eeb2625c6b8b4 Mon Sep 17 00:00:00 2001 +From 7c02a14553826261d1052da5e1fb17487e15d2d4 Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Fri, 24 May 2024 03:50:43 -0700 -Subject: [PATCH 3/3] gfx1010 and gfx1035 initial support +Subject: [PATCH 3/4] gfx1010 and gfx1035 initial support - needs more testing @@ -59,5 +59,5 @@ index e8dabc997..0908f6757 100644 const auto name = raw_name.substr(0, raw_name.find(':')); // str.substr(0, npos) returns str. -- -2.41.0 +2.45.2 diff --git a/patches/rocm-6.1.2/composable_kernel/0004-add-gfx1036-and-gfx1103-support.patch b/patches/rocm-6.1.2/composable_kernel/0004-add-gfx1036-and-gfx1103-support.patch new file mode 100644 index 0000000..ae8d2a2 --- /dev/null +++ b/patches/rocm-6.1.2/composable_kernel/0004-add-gfx1036-and-gfx1103-support.patch @@ -0,0 +1,76 @@ +From d3a76a05875c148f25985ee08c54099c69b75edb Mon Sep 17 00:00:00 2001 +From: Mika Laitio +Date: Sat, 13 Jul 2024 21:35:33 -0400 +Subject: [PATCH 4/4] add gfx1036 and gfx1103 support + +Signed-off-by: Mika Laitio +--- + include/ck/ck.hpp | 12 +++++++----- + include/ck/host_utility/device_prop.hpp | 7 ++++++- + 2 files changed, 13 insertions(+), 6 deletions(-) + +diff --git a/include/ck/ck.hpp b/include/ck/ck.hpp +index 266460342..379261ba3 100644 +--- a/include/ck/ck.hpp ++++ b/include/ck/ck.hpp +@@ -49,11 +49,11 @@ + #define CK_BUFFER_RESOURCE_3RD_DWORD -1 + #elif defined(__gfx803__) || defined(__gfx900__) || defined(__gfx906__) || defined(__gfx908__) || \ + defined(__gfx90a__) || defined(__gfx940__) || defined(__gfx941__) || \ +- defined(__gfx942__) || defined(__gfx1010__) // for GPU code ++ defined(__gfx942__) // for GPU code + #define CK_BUFFER_RESOURCE_3RD_DWORD 0x00020000 +-#elif defined(__gfx1030__) || defined(__gfx1031__) || defined(__gfx1035__) // for GPU code ++#elif defined(__gfx1010__) || defined(__gfx1030__) || defined(__gfx1031__) || defined(__gfx1035__) || defined(__gfx1036__) // for GPU code + #define CK_BUFFER_RESOURCE_3RD_DWORD 0x31014000 +-#elif defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) // for GPU code ++#elif defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1103__) // for GPU code + #define CK_BUFFER_RESOURCE_3RD_DWORD 0x31004000 + #else + #define CK_BUFFER_RESOURCE_3RD_DWORD -1 +@@ -66,12 +66,14 @@ + #ifndef __HIP_DEVICE_COMPILE__ // for host code, define nothing + #elif defined(__gfx803__) || defined(__gfx900__) // for GPU code + #define CK_USE_AMD_V_MAC_F32 ++#elif defined(__gfx1010__) ++#define CK_USE_AMD_V_FMAC_F32 + #elif defined(__gfx906__) || defined(__gfx908__) || defined(__gfx90a__) || defined(__gfx1030__) || defined(__gfx1031__) || \ + defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__) // for GPU code + #define CK_USE_AMD_V_FMAC_F32 + #define CK_USE_AMD_V_DOT2_F32_F16 + #define CK_USE_AMD_V_DOT4_I32_I8 +-#elif defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) ++#elif defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1103__) + #define CK_USE_AMD_V_FMAC_F32 + #define CK_USE_AMD_V_DOT2_F32_F16 + #define CK_USE_AMD_V_DOT4_I32_I8_GFX11 +@@ -96,7 +98,7 @@ + // WMMA instruction + #ifndef __HIP_DEVICE_COMPILE__ // for host code + #define CK_USE_AMD_WMMA +-#elif defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) // for GPU code ++#elif defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1103__) // for GPU code + #define CK_USE_AMD_WMMA + #endif + +diff --git a/include/ck/host_utility/device_prop.hpp b/include/ck/host_utility/device_prop.hpp +index 0908f6757..294a60f9a 100644 +--- a/include/ck/host_utility/device_prop.hpp ++++ b/include/ck/host_utility/device_prop.hpp +@@ -43,7 +43,12 @@ inline std::string get_device_name() + {"navi10", "gfx1010"}, + {"gfx1031", "gfx1030"}, + {"10.3.0 Sienna_Cichlid 18", "gfx1030"}, +- {"Rembrandt", "gfx1035"}, ++ {"rembrandt", "gfx1035"}, ++ {"raphael", "gfx1036"}, ++ {"navi31", "gfx1100"}, ++ {"navi32", "gfx1101"}, ++ {"navi33", "gfx1102"}, ++ {"phoenix", "gfx1103"}, + }; + + const auto name = raw_name.substr(0, raw_name.find(':')); // str.substr(0, npos) returns str. +-- +2.45.2 + diff --git a/patches/rocm-6.1.2/hipBLAS/0001-add-OpenBLAS-link-support-for-client-apps.patch b/patches/rocm-6.1.2/hipBLAS/0001-add-OpenBLAS-link-support-for-client-apps.patch index 2df06d7..7db6814 100644 --- a/patches/rocm-6.1.2/hipBLAS/0001-add-OpenBLAS-link-support-for-client-apps.patch +++ b/patches/rocm-6.1.2/hipBLAS/0001-add-OpenBLAS-link-support-for-client-apps.patch @@ -1,4 +1,4 @@ -From 9dfc58e3856e4a8e3bcd75c3bdc308a0b50eaf14 Mon Sep 17 00:00:00 2001 +From 2f4b5e5e72f16210ce1ec33aa508c34367aabaef Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Sat, 18 May 2024 18:34:49 -0700 Subject: [PATCH 1/3] add OpenBLAS link support for client apps @@ -49,5 +49,5 @@ index 4a6e5c8..0240293 100644 endif() set( ${cblas_libs} ${libs} PARENT_SCOPE ) -- -2.41.0 +2.45.2 diff --git a/patches/rocm-6.1.2/hipBLAS/0002-add-gfx1035-1036-and-1103-to-client-utilty-code.patch b/patches/rocm-6.1.2/hipBLAS/0002-add-gfx1035-1036-and-1103-to-client-utilty-code.patch new file mode 100644 index 0000000..2f671c7 --- /dev/null +++ b/patches/rocm-6.1.2/hipBLAS/0002-add-gfx1035-1036-and-1103-to-client-utilty-code.patch @@ -0,0 +1,61 @@ +From 604e6e5f8313bda35b64ab06d549ad1b567488be Mon Sep 17 00:00:00 2001 +From: Mika Laitio +Date: Sat, 18 May 2024 18:35:05 -0700 +Subject: [PATCH 2/3] add gfx1035,1036 and 1103 to client utilty code + +Signed-off-by: Mika Laitio +--- + clients/common/utility.cpp | 12 ++++++++++++ + clients/include/utility.h | 4 +++- + 2 files changed, 15 insertions(+), 1 deletion(-) + +diff --git a/clients/common/utility.cpp b/clients/common/utility.cpp +index 9dd74e7..968b708 100644 +--- a/clients/common/utility.cpp ++++ b/clients/common/utility.cpp +@@ -476,6 +476,14 @@ hipblasClientProcessor getArch() + { + return hipblasClientProcessor::gfx1030; + } ++ else if(deviceString.find("gfx1035") != std::string::npos) ++ { ++ return hipblasClientProcessor::gfx1035; ++ } ++ else if(deviceString.find("gfx1036") != std::string::npos) ++ { ++ return hipblasClientProcessor::gfx1036; ++ } + else if(deviceString.find("gfx1100") != std::string::npos) + { + return hipblasClientProcessor::gfx1100; +@@ -488,6 +496,10 @@ hipblasClientProcessor getArch() + { + return hipblasClientProcessor::gfx1102; + } ++ else if(deviceString.find("gfx1103") != std::string::npos) ++ { ++ return hipblasClientProcessor::gfx1103; ++ } + return static_cast(0); + } + +diff --git a/clients/include/utility.h b/clients/include/utility.h +index 0891137..fd440a8 100644 +--- a/clients/include/utility.h ++++ b/clients/include/utility.h +@@ -867,9 +867,11 @@ typedef enum hipblasClientProcessor + gfx1032 = 1032, + gfx1034 = 1034, + gfx1035 = 1035, ++ gfx1036 = 1036, + gfx1100 = 1100, + gfx1101 = 1101, +- gfx1102 = 1102 ++ gfx1102 = 1102, ++ gfx1103 = 1103 + } hipblasClientProcessor; + + /* get architecture number */ +-- +2.45.2 + diff --git a/patches/rocm-6.1.2/hipBLAS/0002-add-gfx1035-to-client-utilty-code.patch b/patches/rocm-6.1.2/hipBLAS/0002-add-gfx1035-to-client-utilty-code.patch deleted file mode 100644 index 47e7798..0000000 --- a/patches/rocm-6.1.2/hipBLAS/0002-add-gfx1035-to-client-utilty-code.patch +++ /dev/null @@ -1,28 +0,0 @@ -From 68e6d6b2323a635085eca7487647a2d6f4591910 Mon Sep 17 00:00:00 2001 -From: Mika Laitio -Date: Sat, 18 May 2024 18:35:05 -0700 -Subject: [PATCH 2/3] add gfx1035 to client utilty code - -Signed-off-by: Mika Laitio ---- - clients/common/utility.cpp | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/clients/common/utility.cpp b/clients/common/utility.cpp -index 9dd74e7..2a2d214 100644 ---- a/clients/common/utility.cpp -+++ b/clients/common/utility.cpp -@@ -476,6 +476,10 @@ hipblasClientProcessor getArch() - { - return hipblasClientProcessor::gfx1030; - } -+ else if(deviceString.find("gfx1035") != std::string::npos) -+ { -+ return hipblasClientProcessor::gfx1035; -+ } - else if(deviceString.find("gfx1100") != std::string::npos) - { - return hipblasClientProcessor::gfx1100; --- -2.41.0 - diff --git a/patches/rocm-6.1.2/hipBLAS/0003-fixes-for-gtest-and-lapack-linking-in-ubuntu.patch b/patches/rocm-6.1.2/hipBLAS/0003-fixes-for-gtest-and-lapack-linking-in-ubuntu.patch index d22e809..6e5ed71 100644 --- a/patches/rocm-6.1.2/hipBLAS/0003-fixes-for-gtest-and-lapack-linking-in-ubuntu.patch +++ b/patches/rocm-6.1.2/hipBLAS/0003-fixes-for-gtest-and-lapack-linking-in-ubuntu.patch @@ -1,4 +1,4 @@ -From d7a6f0002b35ebcb6b6f861a5bf839aafb20c8b5 Mon Sep 17 00:00:00 2001 +From 95c06ec97e891829b2d525c9bee1da21a52fdfc8 Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Thu, 23 May 2024 11:51:49 -0700 Subject: [PATCH 3/3] fixes for gtest and lapack linking in ubuntu @@ -68,5 +68,5 @@ index b670407..8c66252 100644 CXX_EXTENSIONS OFF RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" -- -2.41.0 +2.45.2 diff --git a/patches/rocm-6.1.2/hipBLASLt/0001-tensilelite-llvm-path-changes.patch b/patches/rocm-6.1.2/hipBLASLt/0001-tensilelite-llvm-path-changes.patch index 9bc517a..390453e 100644 --- a/patches/rocm-6.1.2/hipBLASLt/0001-tensilelite-llvm-path-changes.patch +++ b/patches/rocm-6.1.2/hipBLASLt/0001-tensilelite-llvm-path-changes.patch @@ -1,4 +1,4 @@ -From 05c7fa87dabe346ed68a338aaa967a90bdfc9832 Mon Sep 17 00:00:00 2001 +From e5daa8fa4cf061e18c4b6bfb27c318f0d79bf594 Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Fri, 3 May 2024 13:16:31 -0700 Subject: [PATCH 1/7] tensilelite llvm path changes @@ -49,5 +49,5 @@ index 230c4469..81a1a015 100644 . ${venv}/bin/activate -- -2.41.1 +2.45.2 diff --git a/patches/rocm-6.1.2/hipBLASLt/0002-add-more-gpus-for-default-target-list.patch b/patches/rocm-6.1.2/hipBLASLt/0002-add-more-gpus-for-default-target-list.patch index 6f76be2..400c041 100644 --- a/patches/rocm-6.1.2/hipBLASLt/0002-add-more-gpus-for-default-target-list.patch +++ b/patches/rocm-6.1.2/hipBLASLt/0002-add-more-gpus-for-default-target-list.patch @@ -1,4 +1,4 @@ -From b8b8b1831a52364980a70841eadcc7dc6667bbaa Mon Sep 17 00:00:00 2001 +From 490d18000263e3e4849ed78ab4ac7975529a86a7 Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Wed, 15 May 2024 19:20:44 -0700 Subject: [PATCH 2/7] add more gpus for default target list @@ -26,5 +26,5 @@ index fec03c1b..c6a16e01 100644 if (AMDGPU_TARGETS) -- -2.41.1 +2.45.2 diff --git a/patches/rocm-6.1.2/hipBLASLt/0003-Tensilelite-fix-fallback-arch-build.patch b/patches/rocm-6.1.2/hipBLASLt/0003-Tensilelite-fix-fallback-arch-build.patch index 5a21c7f..40f942c 100644 --- a/patches/rocm-6.1.2/hipBLASLt/0003-Tensilelite-fix-fallback-arch-build.patch +++ b/patches/rocm-6.1.2/hipBLASLt/0003-Tensilelite-fix-fallback-arch-build.patch @@ -1,4 +1,4 @@ -From 4d0cd887735f8b24687d4007ce4310a4a2df3951 Mon Sep 17 00:00:00 2001 +From 61b76b1bbdaeac7f08ef13200ffcd4b3fce8f5a6 Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Wed, 15 May 2024 19:30:31 -0700 Subject: [PATCH 3/7] Tensilelite fix fallback arch build @@ -62,5 +62,5 @@ index 8a37403d..d2ac2166 100644 for arch in archs: if arch in architectureMap: -- -2.41.1 +2.45.2 diff --git a/patches/rocm-6.1.2/hipBLASLt/0004-Tensilelite-add-gfx1035-to-supported-isa-list.patch b/patches/rocm-6.1.2/hipBLASLt/0004-Tensilelite-add-gfx1035-1036-and-1103-to-supported-i.patch similarity index 52% rename from patches/rocm-6.1.2/hipBLASLt/0004-Tensilelite-add-gfx1035-to-supported-isa-list.patch rename to patches/rocm-6.1.2/hipBLASLt/0004-Tensilelite-add-gfx1035-1036-and-1103-to-supported-i.patch index 52e1761..3b5af24 100644 --- a/patches/rocm-6.1.2/hipBLASLt/0004-Tensilelite-add-gfx1035-to-supported-isa-list.patch +++ b/patches/rocm-6.1.2/hipBLASLt/0004-Tensilelite-add-gfx1035-1036-and-1103-to-supported-i.patch @@ -1,15 +1,16 @@ -From b4f84e3aa32dee684c200ed3d5a395db9d36811d Mon Sep 17 00:00:00 2001 +From a687a8343961e0a3c732101b44e0071150f38d51 Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Wed, 15 May 2024 20:56:51 -0700 -Subject: [PATCH 4/7] Tensilelite add gfx1035 to supported isa list +Subject: [PATCH 4/7] Tensilelite add gfx1035, 1036 and 1103 to supported isa + list Signed-off-by: Mika Laitio --- - tensilelite/Tensile/Common.py | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) + tensilelite/Tensile/Common.py | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensilelite/Tensile/Common.py b/tensilelite/Tensile/Common.py -index c47a8752..1751e0b4 100644 +index c47a8752..3630ee41 100644 --- a/tensilelite/Tensile/Common.py +++ b/tensilelite/Tensile/Common.py @@ -209,7 +209,7 @@ globalParameters["MergeFiles"] = True # F=store every solution and k @@ -17,10 +18,21 @@ index c47a8752..1751e0b4 100644 globalParameters["MaxFileName"] = 64 # If a file name would be longer than this, shorten it with a hash. -globalParameters["SupportedISA"] = [(8,0,3), (9,0,0), (9,0,6), (9,0,8), (9,0,10), (9,4,0), (9,4,1), (9,4,2), (10,1,0), (10,1,1), (10,1,2), (10,3,0), (11,0,0), (11,0,1), (11,0,2)] # assembly kernels writer supports these architectures -+globalParameters["SupportedISA"] = [(8,0,3), (9,0,0), (9,0,6), (9,0,8), (9,0,10), (9,4,0), (9,4,1), (9,4,2), (10,1,0), (10,1,1), (10,1,2), (10,3,0), (10,3,1), (10,3,5), (11,0,0), (11,0,1), (11,0,2)] # assembly kernels writer supports these architectures ++globalParameters["SupportedISA"] = [(8,0,3), (9,0,0), (9,0,6), (9,0,8), (9,0,10), (9,4,0), (9,4,1), (9,4,2), (10,1,0), (10,1,1), (10,1,2), (10,3,0), (10,3,1), (10,3,5), (10,3,6), (11,0,0), (11,0,1), (11,0,2), (11,0,3)] # assembly kernels writer supports these architectures globalParameters["GenerateManifestAndExit"] = False # Output manifest file with list of expected library objects and exit globalParameters["NewClient"] = 2 # Old client deprecated: NewClient must be set to 2. +@@ -286,8 +286,8 @@ architectureMap = { + 'gfx940':'aquavanjaram', 'gfx940:xnack+':'aquavanjaram', 'gfx940:xnack-':'aquavanjaram', + 'gfx941':'aquavanjaram', 'gfx941:xnack+':'aquavanjaram', 'gfx941:xnack-':'aquavanjaram', + 'gfx942':'aquavanjaram', 'gfx942:xnack+':'aquavanjaram', 'gfx942:xnack-':'aquavanjaram', +- 'gfx1010':'navi10', 'gfx1011':'navi12', 'gfx1012':'navi14', 'gfx1030':'navi21', +- 'gfx1100':'navi31', 'gfx1101':'navi32', 'gfx1102':'navi33' ++ 'gfx1010':'navi10', 'gfx1011':'navi12', 'gfx1012':'navi14', 'gfx1030':'navi21', 'gfx1035':'rembrandt', 'gfx1036':'raphael', ++ 'gfx1100':'navi31', 'gfx1101':'navi32', 'gfx1102':'navi33', 'gfx1103':'phoenix' + } + + def getArchitectureName(gfxName): -- -2.41.1 +2.45.2 diff --git a/patches/rocm-6.1.2/hipBLASLt/0005-fallback-support-debug-patch.patch b/patches/rocm-6.1.2/hipBLASLt/0005-fallback-support-debug-patch.patch index 5093087..2d004c7 100644 --- a/patches/rocm-6.1.2/hipBLASLt/0005-fallback-support-debug-patch.patch +++ b/patches/rocm-6.1.2/hipBLASLt/0005-fallback-support-debug-patch.patch @@ -1,4 +1,4 @@ -From 29e64fd34440137d7406bc7044b19d470d785e27 Mon Sep 17 00:00:00 2001 +From 6b32bb226ef045db90c86f955fb3836d3f01ca98 Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Sat, 18 May 2024 18:39:43 -0700 Subject: [PATCH 5/7] fallback support debug patch @@ -23,5 +23,5 @@ index d2ac2166..ce6c0d5f 100644 if globalParameters["LazyLibraryLoading"] and not (globalParameters["MergeFiles"] and globalParameters["SeparateArchitectures"]): printExit("--lazy-library-loading requires --merge-files and --separate-architectures enabled") -- -2.41.1 +2.45.2 diff --git a/patches/rocm-6.1.2/hipBLASLt/0006-OpenBLAS-and-BLIS-library-search-improvements.patch b/patches/rocm-6.1.2/hipBLASLt/0006-OpenBLAS-and-BLIS-library-search-improvements.patch index d628ff6..2437e75 100644 --- a/patches/rocm-6.1.2/hipBLASLt/0006-OpenBLAS-and-BLIS-library-search-improvements.patch +++ b/patches/rocm-6.1.2/hipBLASLt/0006-OpenBLAS-and-BLIS-library-search-improvements.patch @@ -1,4 +1,4 @@ -From 24f6c1010ce3941833b40b4faad775de320caa3f Mon Sep 17 00:00:00 2001 +From 992e946dddb9e1714da704bd1277b291496f5f69 Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Sat, 18 May 2024 18:40:43 -0700 Subject: [PATCH 6/7] OpenBLAS and BLIS library search improvements @@ -156,5 +156,5 @@ index bc9109d0..53b854e6 100644 message("BLIS lib found: ${BLIS_LIB}") -- -2.41.1 +2.45.2 diff --git a/patches/rocm-6.1.2/hipBLASLt/0007-manjaro-and-arch-linux-msgpack-search-fix.patch b/patches/rocm-6.1.2/hipBLASLt/0007-manjaro-and-arch-linux-msgpack-search-fix.patch index b4ce482..3a9f210 100644 --- a/patches/rocm-6.1.2/hipBLASLt/0007-manjaro-and-arch-linux-msgpack-search-fix.patch +++ b/patches/rocm-6.1.2/hipBLASLt/0007-manjaro-and-arch-linux-msgpack-search-fix.patch @@ -1,4 +1,4 @@ -From f93b0ac353b7c8ce61d88c8f6a1b84598119c685 Mon Sep 17 00:00:00 2001 +From f320e89bad147c2b9e6cb6fd00261d5bfd0526be Mon Sep 17 00:00:00 2001 From: Daniele <57776841+daniandtheweb@users.noreply.github.com> Date: Tue, 18 Jun 2024 21:46:30 -0700 Subject: [PATCH 7/7] manjaro and arch linux msgpack search fix @@ -28,5 +28,5 @@ index 43206527..e9476b3d 100644 if(TARGET msgpackc-cxx) -- -2.41.1 +2.45.2 diff --git a/patches/rocm-6.1.2/onnxruntime/0001-onnxruntime-training-rocm-sdk-builder-scripts.patch b/patches/rocm-6.1.2/onnxruntime/0001-onnxruntime-training-rocm-sdk-builder-scripts.patch index e0e036d..46ba411 100644 --- a/patches/rocm-6.1.2/onnxruntime/0001-onnxruntime-training-rocm-sdk-builder-scripts.patch +++ b/patches/rocm-6.1.2/onnxruntime/0001-onnxruntime-training-rocm-sdk-builder-scripts.patch @@ -1,4 +1,4 @@ -From dd64c46c98dbe8655fa26827df23a84b51e835f0 Mon Sep 17 00:00:00 2001 +From 929438e2cbadac5ad9fb8cdb415baeb9e6d598ee Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Mon, 20 May 2024 14:02:57 -0700 Subject: [PATCH 1/7] onnxruntime training rocm sdk builder scripts diff --git a/patches/rocm-6.1.2/onnxruntime/0002-composable-kernel-patches-to-support-gfx1010-and-gfx.patch b/patches/rocm-6.1.2/onnxruntime/0002-composable-kernel-patches-to-support-additiona-gpus.patch similarity index 58% rename from patches/rocm-6.1.2/onnxruntime/0002-composable-kernel-patches-to-support-gfx1010-and-gfx.patch rename to patches/rocm-6.1.2/onnxruntime/0002-composable-kernel-patches-to-support-additiona-gpus.patch index 4b495b5..b076016 100644 --- a/patches/rocm-6.1.2/onnxruntime/0002-composable-kernel-patches-to-support-gfx1010-and-gfx.patch +++ b/patches/rocm-6.1.2/onnxruntime/0002-composable-kernel-patches-to-support-additiona-gpus.patch @@ -1,20 +1,25 @@ -From 0422716712c83f720a6db4b99eed5366f8c5a8fa Mon Sep 17 00:00:00 2001 +From 014cf53d98668c6f5db201eb26f8fba8ff82f965 Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Fri, 24 May 2024 16:03:37 -0700 -Subject: [PATCH 2/7] composable kernel patches to support gfx1010 and gfx1035 +Subject: [PATCH 2/7] composable kernel patches to support additiona gpus -add onnxruntime composable kernel patches required -to build onxxruntime for gfx1010 or gfx1030 +onnxruntime composable kernel patches to support +- gfx1010 +- gfx1035 +- gfx1036 +- gfx1103 Signed-off-by: Mika Laitio --- cmake/external/composable_kernel.cmake | 2 +- - ...RD_DWORD-support-for-not-listed-gpus.patch | 25 ++++ - ...-gfx1010-and-gfx1035-initial-support.patch | 62 ++++++++ - .../composable_kernel_patches_combined.patch | 135 ++++++++++++++++++ - 4 files changed, 223 insertions(+), 1 deletion(-) + ...RD_DWORD-support-for-not-listed-gpus.patch | 26 +++ + ...-gfx1010-and-gfx1035-initial-support.patch | 63 +++++++ + ...0004-add-gfx1036-and-gfx1103-support.patch | 76 +++++++++ + .../composable_kernel_patches_combined.patch | 155 ++++++++++++++++++ + 5 files changed, 321 insertions(+), 1 deletion(-) create mode 100644 cmake/patches/composable_kernel/0001-by-default-no-3RD_DWORD-support-for-not-listed-gpus.patch create mode 100644 cmake/patches/composable_kernel/0003-gfx1010-and-gfx1035-initial-support.patch + create mode 100644 cmake/patches/composable_kernel/0004-add-gfx1036-and-gfx1103-support.patch create mode 100644 cmake/patches/composable_kernel/composable_kernel_patches_combined.patch diff --git a/cmake/external/composable_kernel.cmake b/cmake/external/composable_kernel.cmake @@ -29,14 +34,14 @@ index b4e6c834c8..123bafa9b9 100644 FetchContent_Declare(composable_kernel diff --git a/cmake/patches/composable_kernel/0001-by-default-no-3RD_DWORD-support-for-not-listed-gpus.patch b/cmake/patches/composable_kernel/0001-by-default-no-3RD_DWORD-support-for-not-listed-gpus.patch new file mode 100644 -index 0000000000..7fe3f07b50 +index 0000000000..6781de399b --- /dev/null +++ b/cmake/patches/composable_kernel/0001-by-default-no-3RD_DWORD-support-for-not-listed-gpus.patch -@@ -0,0 +1,25 @@ -+From fe5fb410b74f6c5b0ea8471469f7b77e3b916b01 Mon Sep 17 00:00:00 2001 +@@ -0,0 +1,26 @@ ++From 63f717b9e4019902a4c1e705a3a907c3b455aca2 Mon Sep 17 00:00:00 2001 +From: Mika Laitio +Date: Tue, 19 Dec 2023 15:16:58 -0800 -+Subject: [PATCH 1/3] by default no 3RD_DWORD support for not listed gpus ++Subject: [PATCH 1/4] by default no 3RD_DWORD support for not listed gpus + +Signed-off-by: Mika Laitio +--- @@ -54,20 +59,21 @@ index 0000000000..7fe3f07b50 ++#else ++#define CK_BUFFER_RESOURCE_3RD_DWORD -1 + #endif -+ ++ + // FMA instruction -+-- -+2.41.0 ++-- ++2.45.2 ++ diff --git a/cmake/patches/composable_kernel/0003-gfx1010-and-gfx1035-initial-support.patch b/cmake/patches/composable_kernel/0003-gfx1010-and-gfx1035-initial-support.patch new file mode 100644 -index 0000000000..624a7edd50 +index 0000000000..8c95fc8117 --- /dev/null +++ b/cmake/patches/composable_kernel/0003-gfx1010-and-gfx1035-initial-support.patch -@@ -0,0 +1,62 @@ -+From 26942bd2a0a7a1d341969095e51eeb2625c6b8b4 Mon Sep 17 00:00:00 2001 +@@ -0,0 +1,63 @@ ++From 7c02a14553826261d1052da5e1fb17487e15d2d4 Mon Sep 17 00:00:00 2001 +From: Mika Laitio +Date: Fri, 24 May 2024 03:50:43 -0700 -+Subject: [PATCH 3/3] gfx1010 and gfx1035 initial support ++Subject: [PATCH 3/4] gfx1010 and gfx1035 initial support + +- needs more testing + @@ -96,7 +102,7 @@ index 0000000000..624a7edd50 +@@ -59,11 +59,14 @@ + #define CK_BUFFER_RESOURCE_3RD_DWORD -1 + #endif -+ ++ ++// whether to use assembly or rely on compiler for these instructions ++// TODO: rdna1/gfx1010 has CK_USE_AMD_V_FMAC_F32 but not CK_USE_AMD_V_DOT2_F32_F16 CK_USE_AMD_V_DOT4_I32_I8 ++// TODO: check defined(__gfx1035__) @@ -122,32 +128,115 @@ index 0000000000..624a7edd50 + {"10.3.0 Sienna_Cichlid 18", "gfx1030"}, ++ {"Rembrandt", "gfx1035"}, + }; ++ ++ const auto name = raw_name.substr(0, raw_name.find(':')); // str.substr(0, npos) returns str. ++-- ++2.45.2 ++ +diff --git a/cmake/patches/composable_kernel/0004-add-gfx1036-and-gfx1103-support.patch b/cmake/patches/composable_kernel/0004-add-gfx1036-and-gfx1103-support.patch +new file mode 100644 +index 0000000000..339b7109e5 +--- /dev/null ++++ b/cmake/patches/composable_kernel/0004-add-gfx1036-and-gfx1103-support.patch +@@ -0,0 +1,76 @@ ++From d3a76a05875c148f25985ee08c54099c69b75edb Mon Sep 17 00:00:00 2001 ++From: Mika Laitio ++Date: Sat, 13 Jul 2024 21:35:33 -0400 ++Subject: [PATCH 4/4] add gfx1036 and gfx1103 support ++ ++Signed-off-by: Mika Laitio ++--- ++ include/ck/ck.hpp | 12 +++++++----- ++ include/ck/host_utility/device_prop.hpp | 7 ++++++- ++ 2 files changed, 13 insertions(+), 6 deletions(-) + ++diff --git a/include/ck/ck.hpp b/include/ck/ck.hpp ++index 266460342..379261ba3 100644 ++--- a/include/ck/ck.hpp +++++ b/include/ck/ck.hpp ++@@ -49,11 +49,11 @@ ++ #define CK_BUFFER_RESOURCE_3RD_DWORD -1 ++ #elif defined(__gfx803__) || defined(__gfx900__) || defined(__gfx906__) || defined(__gfx908__) || \ ++ defined(__gfx90a__) || defined(__gfx940__) || defined(__gfx941__) || \ ++- defined(__gfx942__) || defined(__gfx1010__) // for GPU code +++ defined(__gfx942__) // for GPU code ++ #define CK_BUFFER_RESOURCE_3RD_DWORD 0x00020000 ++-#elif defined(__gfx1030__) || defined(__gfx1031__) || defined(__gfx1035__) // for GPU code +++#elif defined(__gfx1010__) || defined(__gfx1030__) || defined(__gfx1031__) || defined(__gfx1035__) || defined(__gfx1036__) // for GPU code ++ #define CK_BUFFER_RESOURCE_3RD_DWORD 0x31014000 ++-#elif defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) // for GPU code +++#elif defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1103__) // for GPU code ++ #define CK_BUFFER_RESOURCE_3RD_DWORD 0x31004000 ++ #else ++ #define CK_BUFFER_RESOURCE_3RD_DWORD -1 ++@@ -66,12 +66,14 @@ ++ #ifndef __HIP_DEVICE_COMPILE__ // for host code, define nothing ++ #elif defined(__gfx803__) || defined(__gfx900__) // for GPU code ++ #define CK_USE_AMD_V_MAC_F32 +++#elif defined(__gfx1010__) +++#define CK_USE_AMD_V_FMAC_F32 ++ #elif defined(__gfx906__) || defined(__gfx908__) || defined(__gfx90a__) || defined(__gfx1030__) || defined(__gfx1031__) || \ ++ defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__) // for GPU code ++ #define CK_USE_AMD_V_FMAC_F32 ++ #define CK_USE_AMD_V_DOT2_F32_F16 ++ #define CK_USE_AMD_V_DOT4_I32_I8 ++-#elif defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) +++#elif defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1103__) ++ #define CK_USE_AMD_V_FMAC_F32 ++ #define CK_USE_AMD_V_DOT2_F32_F16 ++ #define CK_USE_AMD_V_DOT4_I32_I8_GFX11 ++@@ -96,7 +98,7 @@ ++ // WMMA instruction ++ #ifndef __HIP_DEVICE_COMPILE__ // for host code ++ #define CK_USE_AMD_WMMA ++-#elif defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) // for GPU code +++#elif defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1103__) // for GPU code ++ #define CK_USE_AMD_WMMA ++ #endif ++ ++diff --git a/include/ck/host_utility/device_prop.hpp b/include/ck/host_utility/device_prop.hpp ++index 0908f6757..294a60f9a 100644 ++--- a/include/ck/host_utility/device_prop.hpp +++++ b/include/ck/host_utility/device_prop.hpp ++@@ -43,7 +43,12 @@ inline std::string get_device_name() ++ {"navi10", "gfx1010"}, ++ {"gfx1031", "gfx1030"}, ++ {"10.3.0 Sienna_Cichlid 18", "gfx1030"}, ++- {"Rembrandt", "gfx1035"}, +++ {"rembrandt", "gfx1035"}, +++ {"raphael", "gfx1036"}, +++ {"navi31", "gfx1100"}, +++ {"navi32", "gfx1101"}, +++ {"navi33", "gfx1102"}, +++ {"phoenix", "gfx1103"}, ++ }; ++ + const auto name = raw_name.substr(0, raw_name.find(':')); // str.substr(0, npos) returns str. -+-- -+2.41.0 ++-- ++2.45.2 ++ diff --git a/cmake/patches/composable_kernel/composable_kernel_patches_combined.patch b/cmake/patches/composable_kernel/composable_kernel_patches_combined.patch new file mode 100644 -index 0000000000..ee8eafe1bf +index 0000000000..6ca90805e7 --- /dev/null +++ b/cmake/patches/composable_kernel/composable_kernel_patches_combined.patch -@@ -0,0 +1,135 @@ -+diff -Naur composable_kernel-src-orig/CMakeLists.txt composable_kernel-src/CMakeLists.txt -+--- composable_kernel-src-orig/CMakeLists.txt 2024-05-24 16:17:29.000000000 -0700 -++++ composable_kernel-src/CMakeLists.txt 2024-05-24 16:19:07.628761994 -0700 +@@ -0,0 +1,155 @@ ++diff -Naur composable_kernel_orig_5356c4a943/CMakeLists.txt composable_kernel/CMakeLists.txt ++--- composable_kernel_orig_5356c4a943/CMakeLists.txt 2023-11-13 12:16:04.000000000 -0500 +++++ composable_kernel/CMakeLists.txt 2024-07-14 17:26:06.779625019 -0400 +@@ -19,7 +19,7 @@ -+ ++ + set(version 1.1.0) + # Check support for CUDA/HIP in Cmake +-project(composable_kernel VERSION ${version}) ++project(composable_kernel VERSION ${version} LANGUAGES CXX HIP) -+ ++ + list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake") -+ ++ +@@ -173,27 +173,6 @@ + set(CMAKE_CXX_EXTENSIONS OFF) + message("CMAKE_CXX_COMPILER_ID: ${CMAKE_CXX_COMPILER_ID}") -+ ++ +-## OpenMP +-if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") +- # workaround issue hipcc in rocm3.5 cannot find openmp @@ -178,7 +267,7 @@ index 0000000000..ee8eafe1bf + message(STATUS "Build with HIP ${HIP_VERSION}") +-link_libraries(hip::device) +-add_compile_definitions(__HIP_PLATFORM_HCC__=1) -+ ++ + ## tidy + include(EnableCompilerWarnings) +@@ -376,7 +353,9 @@ @@ -190,10 +279,10 @@ index 0000000000..ee8eafe1bf ++# add_compile_options(-Oz -flto=thin) ++add_compile_options(-Oz) + message("CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}") -+ ++ + add_custom_target(check COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure -C ${CMAKE_CFG_INTDIR}) +@@ -482,11 +461,3 @@ -+ ++ + set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE") + set(CPACK_RPM_PACKAGE_LICENSE "MIT") +- @@ -204,25 +293,23 @@ index 0000000000..ee8eafe1bf +- LDCONFIG +- HEADER_ONLY +-) -+diff -Naur composable_kernel-src-orig/include/ck/ck.hpp composable_kernel-src/include/ck/ck.hpp -+--- composable_kernel-src-orig/include/ck/ck.hpp 2024-05-24 16:06:02.000000000 -0700 -++++ composable_kernel-src/include/ck/ck.hpp 2024-05-24 16:19:17.709088800 -0700 -+@@ -49,19 +49,24 @@ -+ #define CK_BUFFER_RESOURCE_3RD_DWORD -1 -+ #elif defined(__gfx803__) || defined(__gfx900__) || defined(__gfx906__) || defined(__gfx908__) || \ ++diff -Naur composable_kernel_orig_5356c4a943/include/ck/ck.hpp composable_kernel/include/ck/ck.hpp ++--- composable_kernel_orig_5356c4a943/include/ck/ck.hpp 2023-11-13 12:16:04.000000000 -0500 +++++ composable_kernel/include/ck/ck.hpp 2024-07-14 17:24:43.730836771 -0400 ++@@ -51,22 +51,29 @@ + defined(__gfx90a__) || defined(__gfx940__) || defined(__gfx941__) || \ -+- defined(__gfx942__) // for GPU code -++ defined(__gfx942__) || defined(__gfx1010__) // for GPU code ++ defined(__gfx942__) // for GPU code + #define CK_BUFFER_RESOURCE_3RD_DWORD 0x00020000 +-#elif defined(__gfx1030__) // for GPU code -++#elif defined(__gfx1030__) || defined(__gfx1031__) || defined(__gfx1035__) // for GPU code +++#elif defined(__gfx1010__) || defined(__gfx1030__) || defined(__gfx1031__) || defined(__gfx1035__) || defined(__gfx1036__) // for GPU code + #define CK_BUFFER_RESOURCE_3RD_DWORD 0x31014000 -+ #elif defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) // for GPU code ++-#elif defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) // for GPU code +++#elif defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1103__) // for GPU code + #define CK_BUFFER_RESOURCE_3RD_DWORD 0x31004000 ++#else ++#define CK_BUFFER_RESOURCE_3RD_DWORD -1 + #endif -+ ++ ++// whether to use assembly or rely on compiler for these instructions ++// TODO: rdna1/gfx1010 has CK_USE_AMD_V_FMAC_F32 but not CK_USE_AMD_V_DOT2_F32_F16 CK_USE_AMD_V_DOT4_I32_I8 ++// TODO: check defined(__gfx1035__) @@ -231,27 +318,49 @@ index 0000000000..ee8eafe1bf + #elif defined(__gfx803__) || defined(__gfx900__) // for GPU code + #define CK_USE_AMD_V_MAC_F32 +-#elif defined(__gfx906__) || defined(__gfx908__) || defined(__gfx90a__) || defined(__gfx1030__) || \ +++#elif defined(__gfx1010__) +++#define CK_USE_AMD_V_FMAC_F32 ++#elif defined(__gfx906__) || defined(__gfx908__) || defined(__gfx90a__) || defined(__gfx1030__) || defined(__gfx1031__) || \ + defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__) // for GPU code + #define CK_USE_AMD_V_FMAC_F32 + #define CK_USE_AMD_V_DOT2_F32_F16 -+diff -Naur composable_kernel-src-orig/include/ck/host_utility/device_prop.hpp composable_kernel-src/include/ck/host_utility/device_prop.hpp -+--- composable_kernel-src-orig/include/ck/host_utility/device_prop.hpp 2024-05-24 16:06:02.000000000 -0700 -++++ composable_kernel-src/include/ck/host_utility/device_prop.hpp 2024-05-24 16:19:17.709088800 -0700 -+@@ -40,7 +40,10 @@ ++ #define CK_USE_AMD_V_DOT4_I32_I8 ++-#elif defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) +++#elif defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1103__) ++ #define CK_USE_AMD_V_FMAC_F32 ++ #define CK_USE_AMD_V_DOT2_F32_F16 ++ #define CK_USE_AMD_V_DOT4_I32_I8_GFX11 ++@@ -91,7 +98,7 @@ ++ // WMMA instruction ++ #ifndef __HIP_DEVICE_COMPILE__ // for host code ++ #define CK_USE_AMD_WMMA ++-#elif defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) // for GPU code +++#elif defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1103__) // for GPU code ++ #define CK_USE_AMD_WMMA ++ #endif ++ ++diff -Naur composable_kernel_orig_5356c4a943/include/ck/host_utility/device_prop.hpp composable_kernel/include/ck/host_utility/device_prop.hpp ++--- composable_kernel_orig_5356c4a943/include/ck/host_utility/device_prop.hpp 2023-11-13 12:16:04.000000000 -0500 +++++ composable_kernel/include/ck/host_utility/device_prop.hpp 2024-07-14 17:24:43.730836771 -0400 ++@@ -40,7 +40,15 @@ + {"gfx804", "gfx803"}, + {"Vega10", "gfx900"}, + {"gfx901", "gfx900"}, ++ {"navi10", "gfx1010"}, ++ {"gfx1031", "gfx1030"}, + {"10.3.0 Sienna_Cichlid 18", "gfx1030"}, -++ {"Rembrandt", "gfx1035"}, +++ {"rembrandt", "gfx1035"}, +++ {"raphael", "gfx1036"}, +++ {"navi31", "gfx1100"}, +++ {"navi32", "gfx1101"}, +++ {"navi33", "gfx1102"}, +++ {"phoenix", "gfx1103"}, + }; -+ ++ + const auto name = raw_name.substr(0, raw_name.find(':')); // str.substr(0, npos) returns str. -+diff -Naur composable_kernel-src-orig/library/src/tensor_operation_instance/gpu/CMakeLists.txt composable_kernel-src/library/src/tensor_operation_instance/gpu/CMakeLists.txt -+--- composable_kernel-src-orig/library/src/tensor_operation_instance/gpu/CMakeLists.txt 2024-05-24 16:17:29.000000000 -0700 -++++ composable_kernel-src/library/src/tensor_operation_instance/gpu/CMakeLists.txt 2024-05-24 16:19:07.629762026 -0700 ++diff -Naur composable_kernel_orig_5356c4a943/library/src/tensor_operation_instance/gpu/CMakeLists.txt composable_kernel/library/src/tensor_operation_instance/gpu/CMakeLists.txt ++--- composable_kernel_orig_5356c4a943/library/src/tensor_operation_instance/gpu/CMakeLists.txt 2023-11-13 12:16:04.000000000 -0500 +++++ composable_kernel/library/src/tensor_operation_instance/gpu/CMakeLists.txt 2024-07-14 17:26:06.779625019 -0400 +@@ -44,8 +44,14 @@ + endforeach() + #only continue if there are some source files left on the list diff --git a/patches/rocm-6.1.2/onnxruntime/0003-gradient_builder.cc-dangling-reference-warning-fix.patch b/patches/rocm-6.1.2/onnxruntime/0003-gradient_builder.cc-dangling-reference-warning-fix.patch index c6e0fbf..4b45bca 100644 --- a/patches/rocm-6.1.2/onnxruntime/0003-gradient_builder.cc-dangling-reference-warning-fix.patch +++ b/patches/rocm-6.1.2/onnxruntime/0003-gradient_builder.cc-dangling-reference-warning-fix.patch @@ -1,4 +1,4 @@ -From 7c9284f3711c57552c2b38ab6b3194d7ebbec5fc Mon Sep 17 00:00:00 2001 +From 99b06be5ecf3d3388e69e5bcca7c676bdb9e440f Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Fri, 24 May 2024 16:47:18 -0700 Subject: [PATCH 3/7] gradient_builder.cc dangling reference warning fix diff --git a/patches/rocm-6.1.2/onnxruntime/0004-rocm-sdk-builder-CMAKE_PREFIX_PATHS.patch b/patches/rocm-6.1.2/onnxruntime/0004-rocm-sdk-builder-CMAKE_PREFIX_PATHS.patch index 5a863f0..123aaf1 100644 --- a/patches/rocm-6.1.2/onnxruntime/0004-rocm-sdk-builder-CMAKE_PREFIX_PATHS.patch +++ b/patches/rocm-6.1.2/onnxruntime/0004-rocm-sdk-builder-CMAKE_PREFIX_PATHS.patch @@ -1,4 +1,4 @@ -From 5ed9381ce48cc4f9e83b609b4004dc6d106b8150 Mon Sep 17 00:00:00 2001 +From 6cf6ca95d440d8ce633d287e13f795e8acaf2f82 Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Fri, 24 May 2024 21:16:06 -0700 Subject: [PATCH 4/7] rocm sdk builder CMAKE_PREFIX_PATHS diff --git a/patches/rocm-6.1.2/onnxruntime/0005-template-id-not-allowed-for-constructor-in-C-20.patch b/patches/rocm-6.1.2/onnxruntime/0005-template-id-not-allowed-for-constructor-in-C-20.patch index 3421524..8c17283 100644 --- a/patches/rocm-6.1.2/onnxruntime/0005-template-id-not-allowed-for-constructor-in-C-20.patch +++ b/patches/rocm-6.1.2/onnxruntime/0005-template-id-not-allowed-for-constructor-in-C-20.patch @@ -1,4 +1,4 @@ -From 0d639cea739ecaa963835836fc0d3c68a596edfc Mon Sep 17 00:00:00 2001 +From 04f2bf647b3ff4b5b7bffc1565492f6381f7e3ff Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Fri, 31 May 2024 11:08:25 -0700 Subject: [PATCH 5/7] template-id not allowed for constructor in C++20 diff --git a/patches/rocm-6.1.2/onnxruntime/0006-fedora40-onnxruntime-optimized-maybe-uninitialized-e.patch b/patches/rocm-6.1.2/onnxruntime/0006-fedora40-onnxruntime-optimized-maybe-uninitialized-e.patch index 7973e12..d96e2f1 100644 --- a/patches/rocm-6.1.2/onnxruntime/0006-fedora40-onnxruntime-optimized-maybe-uninitialized-e.patch +++ b/patches/rocm-6.1.2/onnxruntime/0006-fedora40-onnxruntime-optimized-maybe-uninitialized-e.patch @@ -1,4 +1,4 @@ -From b03a7bfd7c702e9acac7257329bdf1c0d38d0f4b Mon Sep 17 00:00:00 2001 +From 7eff1b6f37e8786100d4a940da070daf1ef86de6 Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Fri, 31 May 2024 11:13:28 -0700 Subject: [PATCH 6/7] fedora40 onnxruntime optimized maybe-uninitialized error diff --git a/patches/rocm-6.1.2/onnxruntime/0007-fedora40-build-breaks-for-uninitialized-variables.patch b/patches/rocm-6.1.2/onnxruntime/0007-fedora40-build-breaks-for-uninitialized-variables.patch index ceee7bf..0499fea 100644 --- a/patches/rocm-6.1.2/onnxruntime/0007-fedora40-build-breaks-for-uninitialized-variables.patch +++ b/patches/rocm-6.1.2/onnxruntime/0007-fedora40-build-breaks-for-uninitialized-variables.patch @@ -1,4 +1,4 @@ -From 8648b381fa28d797532e00d8fe280c68f1911bb4 Mon Sep 17 00:00:00 2001 +From c463c3fe97893f1e3dcaccc979ded77e7fd9da3a Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Fri, 14 Jun 2024 14:32:34 -0400 Subject: [PATCH 7/7] fedora40 build breaks for uninitialized variables diff --git a/patches/rocm-6.1.2/pytorch/0001-pytorch_rocm-preconfig-build-and-install-scripts.patch b/patches/rocm-6.1.2/pytorch/0001-pytorch_rocm-preconfig-build-and-install-scripts.patch index 1df7b71..089f437 100644 --- a/patches/rocm-6.1.2/pytorch/0001-pytorch_rocm-preconfig-build-and-install-scripts.patch +++ b/patches/rocm-6.1.2/pytorch/0001-pytorch_rocm-preconfig-build-and-install-scripts.patch @@ -1,7 +1,7 @@ -From 7e9c55ab472aa09a6595d2dc941bb322edc7197a Mon Sep 17 00:00:00 2001 +From 7622aa9b48c8f540a6665802391266657164afde Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Mon, 11 Dec 2023 09:20:07 -0800 -Subject: [PATCH 1/6] pytorch_rocm preconfig, build and install scripts +Subject: [PATCH 1/7] pytorch_rocm preconfig, build and install scripts - clean previous build, build wheel and install wheel scripts "-Wno-error=maybe-uninitialized" is needed during @@ -21,7 +21,7 @@ Signed-off-by: Jeroen Mostert diff --git a/build_rocm.sh b/build_rocm.sh new file mode 100755 -index 0000000000..2cc3a7773b +index 00000000000..2cc3a7773b2 --- /dev/null +++ b/build_rocm.sh @@ -0,0 +1,23 @@ @@ -50,7 +50,7 @@ index 0000000000..2cc3a7773b +USE_FLASH_ATTENTION=OFF ROCM_PATH=${install_dir_prefix_rocm} ROCM_SOURCE_DIR=${install_dir_prefix_rocm} CMAKE_CXX_FLAGS="$CMAKE_CXX_FLAGS -Wno-error=maybe-uninitialized" CMAKE_PREFIX_PATH="${install_dir_prefix_rocm};${install_dir_prefix_rocm}/lib64/cmake;${install_dir_prefix_rocm}/lib/cmake;${install_dir_prefix_rocm}/lib64;${install_dir_prefix_rocm}/lib" ROCM_VERSION=${rocm_version_str} HIP_ROOT_DIR=${install_dir_prefix_rocm} USE_ROCM=1 PYTORCH_BUILD_VERSION="$(git describe --tags --abbrev=0 | sed 's/^v//')" PYTORCH_BUILD_NUMBER=1 python setup.py bdist_wheel diff --git a/install_rocm.sh b/install_rocm.sh new file mode 100755 -index 0000000000..38ed0fc21d +index 00000000000..38ed0fc21d0 --- /dev/null +++ b/install_rocm.sh @@ -0,0 +1,23 @@ @@ -79,7 +79,7 @@ index 0000000000..38ed0fc21d +fi diff --git a/preconfig_rocm.sh b/preconfig_rocm.sh new file mode 100755 -index 0000000000..92aacb9f0c +index 00000000000..92aacb9f0c0 --- /dev/null +++ b/preconfig_rocm.sh @@ -0,0 +1,21 @@ diff --git a/patches/rocm-6.1.2/pytorch/0002-show-error-message-if-ROCM_SOURCE_DIR-not-defined.patch b/patches/rocm-6.1.2/pytorch/0002-show-error-message-if-ROCM_SOURCE_DIR-not-defined.patch index c0e6e19..81d8851 100644 --- a/patches/rocm-6.1.2/pytorch/0002-show-error-message-if-ROCM_SOURCE_DIR-not-defined.patch +++ b/patches/rocm-6.1.2/pytorch/0002-show-error-message-if-ROCM_SOURCE_DIR-not-defined.patch @@ -1,7 +1,7 @@ -From 909642767f13385d2acf7d904df5db06b1d3a8fa Mon Sep 17 00:00:00 2001 +From 6ebac7a6c6682b3b116e5d9107e39204cc53915b Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Fri, 10 May 2024 10:16:19 -0700 -Subject: [PATCH 2/6] show error message if ROCM_SOURCE_DIR not defined +Subject: [PATCH 2/7] show error message if ROCM_SOURCE_DIR not defined ROCM_SOURCE_DIR is required by by third_party/kineto module and if it is not set, kineto will not find the @@ -16,7 +16,7 @@ Signed-off-by: Mika Laitio 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake -index a96075245a..8befd5b829 100644 +index a96075245ae..8befd5b8294 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -1967,8 +1967,8 @@ if(USE_KINETO) diff --git a/patches/rocm-6.1.2/pytorch/0003-LoadHIP-force-ROCM-detection-and-patches.patch b/patches/rocm-6.1.2/pytorch/0003-LoadHIP-force-ROCM-detection-and-patches.patch index f77dd6d..17950aa 100644 --- a/patches/rocm-6.1.2/pytorch/0003-LoadHIP-force-ROCM-detection-and-patches.patch +++ b/patches/rocm-6.1.2/pytorch/0003-LoadHIP-force-ROCM-detection-and-patches.patch @@ -1,7 +1,7 @@ -From 622a6eb412ef5839c5910da0d03bb4e061d4e91f Mon Sep 17 00:00:00 2001 +From 03128cee44f1d75f69bda80c972caa691bda054a Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Fri, 10 May 2024 10:32:33 -0700 -Subject: [PATCH 3/6] LoadHIP force ROCM detection and patches +Subject: [PATCH 3/7] LoadHIP force ROCM detection and patches - set HIP_ROOT_DIR to ROCM_PATH which is set by the build scripts @@ -13,7 +13,7 @@ Signed-off-by: Mika Laitio 1 file changed, 10 insertions(+) diff --git a/cmake/public/LoadHIP.cmake b/cmake/public/LoadHIP.cmake -index f6ca263c5e..0f7a61cf14 100644 +index f6ca263c5e5..0f7a61cf141 100644 --- a/cmake/public/LoadHIP.cmake +++ b/cmake/public/LoadHIP.cmake @@ -41,6 +41,7 @@ endmacro() diff --git a/patches/rocm-6.1.2/pytorch/0004-LoadHIP-lib-and-lib64-search-path-adjustements.patch b/patches/rocm-6.1.2/pytorch/0004-LoadHIP-lib-and-lib64-search-path-adjustements.patch index a2c8b32..a426f51 100644 --- a/patches/rocm-6.1.2/pytorch/0004-LoadHIP-lib-and-lib64-search-path-adjustements.patch +++ b/patches/rocm-6.1.2/pytorch/0004-LoadHIP-lib-and-lib64-search-path-adjustements.patch @@ -1,7 +1,7 @@ -From 043f85f904000acc0c9680c4094dcdbf56d187e7 Mon Sep 17 00:00:00 2001 +From f79c569e76365a07a950ed35a366307affb6e542 Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Fri, 10 May 2024 11:11:47 -0700 -Subject: [PATCH 4/6] LoadHIP lib and lib64 search path adjustements +Subject: [PATCH 4/7] LoadHIP lib and lib64 search path adjustements - search both lib and lib64 directories (note some libs still installed to lib-dir @@ -16,7 +16,7 @@ Signed-off-by: Mika Laitio 2 files changed, 25 insertions(+), 24 deletions(-) diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake -index 8befd5b829..e8c7a97b83 100644 +index 8befd5b8294..e8c7a97b83e 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -1334,8 +1334,9 @@ if(USE_ROCM) @@ -32,7 +32,7 @@ index 8befd5b829..e8c7a97b83 100644 caffe2_update_option(USE_MEM_EFF_ATTENTION OFF) endif() diff --git a/cmake/public/LoadHIP.cmake b/cmake/public/LoadHIP.cmake -index 0f7a61cf14..dfa4bab4df 100644 +index 0f7a61cf141..dfa4bab4df2 100644 --- a/cmake/public/LoadHIP.cmake +++ b/cmake/public/LoadHIP.cmake @@ -17,8 +17,8 @@ endif() diff --git a/patches/rocm-6.1.2/pytorch/0005-fix-gcc-parameter-is-null-optimization-error.patch b/patches/rocm-6.1.2/pytorch/0005-fix-gcc-parameter-is-null-optimization-error.patch index d50d2d9..b14c5d7 100644 --- a/patches/rocm-6.1.2/pytorch/0005-fix-gcc-parameter-is-null-optimization-error.patch +++ b/patches/rocm-6.1.2/pytorch/0005-fix-gcc-parameter-is-null-optimization-error.patch @@ -1,7 +1,7 @@ -From 9b939062b118322f53a8c284f4cd99721b887fd5 Mon Sep 17 00:00:00 2001 +From d08961e9f356e083624f65237ac5dbffc7457e94 Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Fri, 10 May 2024 19:25:50 -0700 -Subject: [PATCH 5/6] fix gcc parameter is null optimization error +Subject: [PATCH 5/7] fix gcc parameter is null optimization error https://github.com/pytorch/pytorch/issues/112089 and @@ -13,7 +13,7 @@ Signed-off-by: Mika Laitio 1 file changed, 5 insertions(+) diff --git a/test/cpp/api/CMakeLists.txt b/test/cpp/api/CMakeLists.txt -index 42b67d8cb2..0bc7f79b70 100644 +index 42b67d8cb25..0bc7f79b709 100644 --- a/test/cpp/api/CMakeLists.txt +++ b/test/cpp/api/CMakeLists.txt @@ -67,6 +67,11 @@ if(NOT MSVC) diff --git a/patches/rocm-6.1.2/pytorch/0006-replace-clamp-with-min-and-max-for-fedora-40-issue.patch b/patches/rocm-6.1.2/pytorch/0006-replace-clamp-with-min-and-max-for-fedora-40-issue.patch index 1af3fc7..4b55c33 100644 --- a/patches/rocm-6.1.2/pytorch/0006-replace-clamp-with-min-and-max-for-fedora-40-issue.patch +++ b/patches/rocm-6.1.2/pytorch/0006-replace-clamp-with-min-and-max-for-fedora-40-issue.patch @@ -1,7 +1,7 @@ -From a6c2edaae6571560cbbd5f61661fc08e88c713d6 Mon Sep 17 00:00:00 2001 +From 496ad51cc0855575f2dd31f023c03459f638e1c1 Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Fri, 31 May 2024 18:35:12 -0700 -Subject: [PATCH 6/6] replace clamp with min and max for fedora 40 issue +Subject: [PATCH 6/7] replace clamp with min and max for fedora 40 issue Fedora 40/gcc 14 throws following error during build time for clamp function usage during pytorch build time. @@ -25,7 +25,7 @@ Signed-off-by: Mika Laitio 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/aten/src/ATen/native/cuda/IndexKernel.cu b/aten/src/ATen/native/cuda/IndexKernel.cu -index 5682ba2757..862bcb9614 100644 +index 5682ba27573..862bcb9614d 100644 --- a/aten/src/ATen/native/cuda/IndexKernel.cu +++ b/aten/src/ATen/native/cuda/IndexKernel.cu @@ -249,7 +249,9 @@ void index_put_kernel_quantized_cuda(TensorIterator& iter, const IntArrayRef ind diff --git a/patches/rocm-6.1.2/pytorch/0007-handle-hipcc-verbose-output-on-dumpversion-command.patch b/patches/rocm-6.1.2/pytorch/0007-handle-hipcc-verbose-output-on-dumpversion-command.patch new file mode 100644 index 0000000..50ebd83 --- /dev/null +++ b/patches/rocm-6.1.2/pytorch/0007-handle-hipcc-verbose-output-on-dumpversion-command.patch @@ -0,0 +1,35 @@ +From 20d5f12ea0cf48dcdd463ae231d7ee4139514c58 Mon Sep 17 00:00:00 2001 +From: Mika Laitio +Date: Mon, 15 Jul 2024 00:57:55 -0400 +Subject: [PATCH 7/7] handle hipcc verbose output on dumpversion command + +Signed-off-by: Mika Laitio +--- + torch/utils/cpp_extension.py | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +diff --git a/torch/utils/cpp_extension.py b/torch/utils/cpp_extension.py +index d3d264d6172..30da0f7fc4d 100644 +--- a/torch/utils/cpp_extension.py ++++ b/torch/utils/cpp_extension.py +@@ -372,8 +372,16 @@ def get_compiler_abi_compatibility_and_version(compiler) -> Tuple[bool, TorchVer + try: + if IS_LINUX: + minimum_required_version = MINIMUM_GCC_VERSION ++ # if compiler is the HIPCC and environment variable HIPCC_VERBOSE > 0, then ++ # hipcc prints extra information on \n character separated lines before the real ++ # output on last line. ++ # split therefore the lines to an string array and then select the last line for versionstr. + versionstr = subprocess.check_output([compiler, '-dumpfullversion', '-dumpversion']) +- version = versionstr.decode(*SUBPROCESS_DECODE_ARGS).strip().split('.') ++ versionstr = versionstr.decode(*SUBPROCESS_DECODE_ARGS).strip() ++ versionstr_list = versionstr.splitlines() ++ versionstr = versionstr_list[-1] ++ #print("versionstr: " + str(versionstr)) ++ version = versionstr.split('.') + else: + minimum_required_version = MINIMUM_MSVC_VERSION + compiler_info = subprocess.check_output(compiler, stderr=subprocess.STDOUT) +-- +2.45.2 + diff --git a/patches/rocm-6.1.2/rccl/0001-gfx1010-1030-1035-and-1036.patch b/patches/rocm-6.1.2/rccl/0001-gfx1010-1030-1035-and-1036.patch deleted file mode 100644 index 7322954..0000000 --- a/patches/rocm-6.1.2/rccl/0001-gfx1010-1030-1035-and-1036.patch +++ /dev/null @@ -1,40 +0,0 @@ -From fbec847c285080c07c8d5bb6a49ff49196438b42 Mon Sep 17 00:00:00 2001 -From: Mika Laitio -Date: Sat, 6 Jan 2024 01:29:45 +0200 -Subject: [PATCH] gfx1010, 1030, 1035 and 1036 - -Signed-off-by: Mika Laitio ---- - CMakeLists.txt | 15 +++------------ - 1 file changed, 3 insertions(+), 12 deletions(-) - -diff --git a/CMakeLists.txt b/CMakeLists.txt -index ba4bd72..d17a9c3 100644 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -26,19 +26,10 @@ option(TRACE "Enable additional tracing" - # Default GPU architectures to build - #================================================================================================== - set(DEFAULT_GPUS -- gfx803 -- gfx900:xnack- -- gfx906:xnack- -- gfx908:xnack- -- gfx90a:xnack- -- gfx90a:xnack+ -- gfx940 -- gfx941 -- gfx942 -+ gfx1010 - gfx1030 -- gfx1100 -- gfx1101 -- gfx1102) -+ gfx1035 -+ gfx1036) - - # Load CMake modules - #================================================================================================== --- -2.41.0 - diff --git a/patches/rocm-6.1.2/rccl/0001-gfx1103-support.patch b/patches/rocm-6.1.2/rccl/0001-gfx1103-support.patch new file mode 100644 index 0000000..a0575a3 --- /dev/null +++ b/patches/rocm-6.1.2/rccl/0001-gfx1103-support.patch @@ -0,0 +1,47 @@ +From 3fcd05e8663accc97ba118dbc02bb48e3ee656a9 Mon Sep 17 00:00:00 2001 +From: Mika Laitio +Date: Sat, 13 Jul 2024 04:57:37 -0400 +Subject: [PATCH] gfx1103 support + +Signed-off-by: Mika Laitio +--- + src/collectives/device/common.h | 4 ++-- + tools/JitterBench/Common.hpp | 2 +- + 2 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/src/collectives/device/common.h b/src/collectives/device/common.h +index 093a26c..c51bd4a 100644 +--- a/src/collectives/device/common.h ++++ b/src/collectives/device/common.h +@@ -43,7 +43,7 @@ class ncclFunction { + #endif + }; + +-#if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) ++#if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1103__) + #define __trace_hwreg() + #else + #define __trace_hwreg() \ +@@ -393,4 +393,4 @@ __device__ __attribute__((noinline)) void NCCL_FUNC_NAME(func, algo, proto, dev + + #define NCCL_NVLS_ENABLED (__CUDA_ARCH__ >= 900 && NCCL_NVLS_SUPPORTS(NCCL_TYPE, NCCL_OP)) + +-#endif +\ No newline at end of file ++#endif +diff --git a/tools/JitterBench/Common.hpp b/tools/JitterBench/Common.hpp +index 8fad0ac..8728833 100644 +--- a/tools/JitterBench/Common.hpp ++++ b/tools/JitterBench/Common.hpp +@@ -43,7 +43,7 @@ THE SOFTWARE. + #endif + + // Macro for collecting HW_REG_HW_ID +-#if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__NVCC__) ++#if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1103__) || defined(__NVCC__) + #define GetHwId(val) \ + val = 0 + #else +-- +2.45.2 + diff --git a/patches/rocm-6.1.2/rocBLAS/0001-add-mageia-9-support-to-install.sh.patch b/patches/rocm-6.1.2/rocBLAS/0001-add-mageia-9-support-to-install.sh.patch index 6aaa228..008efe1 100644 --- a/patches/rocm-6.1.2/rocBLAS/0001-add-mageia-9-support-to-install.sh.patch +++ b/patches/rocm-6.1.2/rocBLAS/0001-add-mageia-9-support-to-install.sh.patch @@ -1,4 +1,4 @@ -From b9ebd530ecd258a25c87fbf2d37a770203e16367 Mon Sep 17 00:00:00 2001 +From b4e555f2d5c996b528cc13602f78012671383f3a Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Sat, 18 May 2024 18:17:42 -0700 Subject: [PATCH 1/3] add mageia 9 support to install.sh @@ -54,5 +54,5 @@ index fc644b87..46c95775 100755 elevate_if_not_root zypper -n --no-gpg-checks install rocblas-*.rpm ;; -- -2.41.0 +2.45.2 diff --git a/patches/rocm-6.1.2/rocBLAS/0002-add-gfx1035-gfx1036-and-gfx1103-to-gpulist.patch b/patches/rocm-6.1.2/rocBLAS/0002-add-gfx1035-gfx1036-and-gfx1103-to-gpulist.patch new file mode 100644 index 0000000..b97ec46 --- /dev/null +++ b/patches/rocm-6.1.2/rocBLAS/0002-add-gfx1035-gfx1036-and-gfx1103-to-gpulist.patch @@ -0,0 +1,110 @@ +From 98c87b3db281d5048524ecb0c14c33c1fac0719c Mon Sep 17 00:00:00 2001 +From: Mika Laitio +Date: Sat, 18 May 2024 18:18:33 -0700 +Subject: [PATCH 2/3] add gfx1035,gfx1036 and gfx1103 to gpulist + +Signed-off-by: Mika Laitio +--- + CMakeLists.txt | 6 +++--- + library/src/handle.cpp | 12 ++++++++++++ + library/src/include/handle.hpp | 4 +++- + library/src/tensile_host.cpp | 12 ++++++++++++ + 4 files changed, 30 insertions(+), 4 deletions(-) + +diff --git a/CMakeLists.txt b/CMakeLists.txt +index 92f227f7..69ab2478 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -108,9 +108,9 @@ rocm_setup_version( VERSION ${VERSION_STRING} ) + list( APPEND CMAKE_PREFIX_PATH ${ROCM_PATH}/llvm ${ROCM_PATH} ${ROCM_PATH}/hip /opt/rocm/llvm /opt/rocm /opt/rocm/hip ) + + # setting target list based on ROCm version +-set( TARGET_LIST_ROCM_5.6 "gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102") +-set( TARGET_LIST_ROCM_5.7 "gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102") +-set( TARGET_LIST_ROCM_6.0 "gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102") ++set( TARGET_LIST_ROCM_5.6 "gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx1010;gfx1012;gfx1030;gfx1035;gfx1036;gfx1100;gfx1101;gfx1102;gfx1103") ++set( TARGET_LIST_ROCM_5.7 "gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1035;gfx1036;gfx1100;gfx1101;gfx1102;gfx1103") ++set( TARGET_LIST_ROCM_6.0 "gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1035;gfx1036;gfx1100;gfx1101;gfx1102;gfx1103") + + if(ROCM_PLATFORM_VERSION) + if(${ROCM_PLATFORM_VERSION} VERSION_LESS 5.7.0) +diff --git a/library/src/handle.cpp b/library/src/handle.cpp +index 7b08a934..eb2254dd 100644 +--- a/library/src/handle.cpp ++++ b/library/src/handle.cpp +@@ -129,6 +129,14 @@ static Processor getActiveArch(int deviceId) + { + return Processor::gfx1030; + } ++ else if(deviceString.find("gfx1035") != std::string::npos) ++ { ++ return Processor::gfx1035; ++ } ++ else if(deviceString.find("gfx1036") != std::string::npos) ++ { ++ return Processor::gfx1036; ++ } + else if(deviceString.find("gfx1100") != std::string::npos) + { + return Processor::gfx1100; +@@ -141,6 +149,10 @@ static Processor getActiveArch(int deviceId) + { + return Processor::gfx1102; + } ++ else if(deviceString.find("gfx1103") != std::string::npos) ++ { ++ return Processor::gfx1103; ++ } + return static_cast(0); + } + +diff --git a/library/src/include/handle.hpp b/library/src/include/handle.hpp +index 282edb8f..cc3e6c0f 100644 +--- a/library/src/include/handle.hpp ++++ b/library/src/include/handle.hpp +@@ -90,9 +90,11 @@ enum class Processor : int + gfx1032 = 1032, + gfx1034 = 1034, + gfx1035 = 1035, ++ gfx1036 = 1036, + gfx1100 = 1100, + gfx1101 = 1101, +- gfx1102 = 1102 ++ gfx1102 = 1102, ++ gfx1103 = 1103 + }; + + // helper function in handle.cpp +diff --git a/library/src/tensile_host.cpp b/library/src/tensile_host.cpp +index 1f0349fd..073bb244 100644 +--- a/library/src/tensile_host.cpp ++++ b/library/src/tensile_host.cpp +@@ -257,6 +257,14 @@ namespace + { + return Tensile::LazyLoadingInit::gfx1030; + } ++ else if(deviceString.find("gfx1035") != std::string::npos) ++ { ++ return Tensile::LazyLoadingInit::gfx1035; ++ } ++ else if(deviceString.find("gfx1036") != std::string::npos) ++ { ++ return Tensile::LazyLoadingInit::gfx1036; ++ } + else if(deviceString.find("gfx1100") != std::string::npos) + { + return Tensile::LazyLoadingInit::gfx1100; +@@ -269,6 +277,10 @@ namespace + { + return Tensile::LazyLoadingInit::gfx1102; + } ++ else if(deviceString.find("gfx1103") != std::string::npos) ++ { ++ return Tensile::LazyLoadingInit::gfx1103; ++ } + return Tensile::LazyLoadingInit::None; + } + +-- +2.45.2 + diff --git a/patches/rocm-6.1.2/rocBLAS/0002-add-gfx1035-to-gpulist.patch b/patches/rocm-6.1.2/rocBLAS/0002-add-gfx1035-to-gpulist.patch deleted file mode 100644 index 42427f0..0000000 --- a/patches/rocm-6.1.2/rocBLAS/0002-add-gfx1035-to-gpulist.patch +++ /dev/null @@ -1,30 +0,0 @@ -From d39e2f6ea3251f403e88ce388a7e0b0b61bbee1d Mon Sep 17 00:00:00 2001 -From: Mika Laitio -Date: Sat, 18 May 2024 18:18:33 -0700 -Subject: [PATCH 2/3] add gfx1035 to gpulist - -Signed-off-by: Mika Laitio ---- - CMakeLists.txt | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/CMakeLists.txt b/CMakeLists.txt -index ae76864d..234e6b07 100644 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -108,9 +108,9 @@ rocm_setup_version( VERSION ${VERSION_STRING} ) - list( APPEND CMAKE_PREFIX_PATH ${ROCM_PATH}/llvm ${ROCM_PATH} ${ROCM_PATH}/hip /opt/rocm/llvm /opt/rocm /opt/rocm/hip ) - - # setting target list based on ROCm version --set( TARGET_LIST_ROCM_5.6 "gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102") --set( TARGET_LIST_ROCM_5.7 "gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102") --set( TARGET_LIST_ROCM_6.0 "gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102") -+set( TARGET_LIST_ROCM_5.6 "gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx1010;gfx1012;gfx1030;gfx1035;gfx1100;gfx1101;gfx1102") -+set( TARGET_LIST_ROCM_5.7 "gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1035;gfx1100;gfx1101;gfx1102") -+set( TARGET_LIST_ROCM_6.0 "gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1035;gfx1100;gfx1101;gfx1102") - - if(ROCM_PLATFORM_VERSION) - if(${ROCM_PLATFORM_VERSION} VERSION_LESS 5.7.0) --- -2.41.0 - diff --git a/patches/rocm-6.1.2/rocBLAS/0003-OpenBLAS-and-BLIS-library-search-improvements.patch b/patches/rocm-6.1.2/rocBLAS/0003-OpenBLAS-and-BLIS-library-search-improvements.patch index 8612a8b..9c8a31b 100644 --- a/patches/rocm-6.1.2/rocBLAS/0003-OpenBLAS-and-BLIS-library-search-improvements.patch +++ b/patches/rocm-6.1.2/rocBLAS/0003-OpenBLAS-and-BLIS-library-search-improvements.patch @@ -1,4 +1,4 @@ -From 695530043ca5d5654f1e236744db90ad590043cd Mon Sep 17 00:00:00 2001 +From 8e12ea9b4770c29d36ae205d55e19f085229ab1a Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Sat, 18 May 2024 18:15:13 -0700 Subject: [PATCH 3/3] OpenBLAS and BLIS library search improvements @@ -50,5 +50,5 @@ index dc8040ea..704414b5 100755 else() # WIN32 set( BLAS_INCLUDE_DIR ${OPENBLAS_DIR}/include CACHE PATH "OpenBLAS library include path" ) -- -2.41.0 +2.45.2 diff --git a/patches/rocm-6.1.2/rocPRIM/0001-disable-DPP-from-gfx1035.patch b/patches/rocm-6.1.2/rocPRIM/0001-disable-DPP-from-gfx1035-1036-and-1103.patch similarity index 75% rename from patches/rocm-6.1.2/rocPRIM/0001-disable-DPP-from-gfx1035.patch rename to patches/rocm-6.1.2/rocPRIM/0001-disable-DPP-from-gfx1035-1036-and-1103.patch index 4f9b113..07c2383 100644 --- a/patches/rocm-6.1.2/rocPRIM/0001-disable-DPP-from-gfx1035.patch +++ b/patches/rocm-6.1.2/rocPRIM/0001-disable-DPP-from-gfx1035-1036-and-1103.patch @@ -1,7 +1,7 @@ -From 0741820ac794fa369685f2ed4fdfdbc8970d36a7 Mon Sep 17 00:00:00 2001 +From 855c5e9e4121043c80b5330bab26d0bf8f6faf5e Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Fri, 3 May 2024 13:04:15 -0700 -Subject: [PATCH] disable DPP from gfx1035 +Subject: [PATCH] disable DPP from gfx1035 1036 and 1103 Signed-off-by: Mika Laitio --- @@ -9,14 +9,14 @@ Signed-off-by: Mika Laitio 1 file changed, 7 insertions(+) diff --git a/rocprim/include/rocprim/config.hpp b/rocprim/include/rocprim/config.hpp -index b78afff2..0b189c31 100644 +index b78afff2..b563a416 100644 --- a/rocprim/include/rocprim/config.hpp +++ b/rocprim/include/rocprim/config.hpp @@ -80,6 +80,13 @@ #define ROCPRIM_DETAIL_HAS_DPP 1 #endif -+#if ( defined(__gfx1035__)) ++#if defined(__gfx1035__) || defined(__gfx1036__) || defined(__gfx1103__) + #ifndef ROCPRIM_DISABLE_DPP + #define ROCPRIM_DISABLE_DPP + #define ROCPRIM_DETAIL_USE_DPP 0 @@ -27,5 +27,5 @@ index b78afff2..0b189c31 100644 #define ROCPRIM_DETAIL_USE_DPP 1 #else -- -2.41.0 +2.45.2 diff --git a/patches/rocm-6.1.2/rocRAND/0001-rocRAND-build-gfx1035-options.patch b/patches/rocm-6.1.2/rocRAND/0001-rocRAND-add-gfx1010-gfx1035-and-gfx11.patch similarity index 78% rename from patches/rocm-6.1.2/rocRAND/0001-rocRAND-build-gfx1035-options.patch rename to patches/rocm-6.1.2/rocRAND/0001-rocRAND-add-gfx1010-gfx1035-and-gfx11.patch index 6f939ce..32cc4cf 100644 --- a/patches/rocm-6.1.2/rocRAND/0001-rocRAND-build-gfx1035-options.patch +++ b/patches/rocm-6.1.2/rocRAND/0001-rocRAND-add-gfx1010-gfx1035-and-gfx11.patch @@ -1,14 +1,14 @@ -From 55bb9a72de1235d8df19dfe496ae784e5fc05e6b Mon Sep 17 00:00:00 2001 +From 39a9b24ea6c945795f101364b2f65d24fe552f8b Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Fri, 5 Jan 2024 10:43:18 +0200 -Subject: [PATCH] rocRAND build gfx1035 options +Subject: [PATCH] rocRAND add gfx1010 gfx1035 and gfx11 Signed-off-by: Mika Laitio --- - CMakeLists.txt | 2 +- - library/include/rocrand/rocrand_common.h | 3 ++- - rmake.py | 4 ++-- - 3 files changed, 5 insertions(+), 4 deletions(-) + CMakeLists.txt | 2 +- + library/include/rocrand/rocrand_common.h | 11 ++++++++++- + rmake.py | 4 ++-- + 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6e1dec5..f05f64d 100644 @@ -24,16 +24,24 @@ index 6e1dec5..f05f64d 100644 if (AMDGPU_TARGETS) if( AMDGPU_TARGETS STREQUAL "all" ) diff --git a/library/include/rocrand/rocrand_common.h b/library/include/rocrand/rocrand_common.h -index 1aad64f..0311ac2 100644 +index 1aad64f..cc4883a 100644 --- a/library/include/rocrand/rocrand_common.h +++ b/library/include/rocrand/rocrand_common.h -@@ -61,7 +61,8 @@ namespace detail { +@@ -61,7 +61,16 @@ namespace detail { defined(__gfx906__) || \ defined(__gfx908__) || \ defined(__gfx909__) || \ - defined(__gfx1030__) ) ++ defined(__gfx1010__) || \ + defined(__gfx1030__) || \ -+ defined(__gfx1035__) ) ++ defined(__gfx1031__) || \ ++ defined(__gfx1032__) || \ ++ defined(__gfx1035__) || \ ++ defined(__gfx1036__) || \ ++ defined(__gfx1100__) || \ ++ defined(__gfx1101__) || \ ++ defined(__gfx1102__) || \ ++ defined(__gfx1103__) ) #if !defined(ROCRAND_ENABLE_INLINE_ASM) #define ROCRAND_ENABLE_INLINE_ASM #endif @@ -53,5 +61,5 @@ index 93e4dc1..5955cc2 100644 help='Verbose build (default: False)') return parser.parse_args() -- -2.41.0 +2.45.2