From 750fe4c2133b435456cd2970eade1e5453ba70cf Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Sat, 13 Jul 2024 20:02:59 -0400 Subject: [PATCH] initial gfx1036 and gfx1103 support - initial support for gfx1036 and gfx1103 as a build target - updated also the gfx1010 configuration settings to be more similar in composable kernel and miopen fixes: https://github.com/lamikr/rocm_sdk_builder/issues/101 fixes: https://github.com/lamikr/rocm_sdk_builder/issues/103 Signed-off-by: Mika Laitio --- ...-on-install-for-missing-kdb.bz2-file.patch | 6 +- ...0002-fix-libroctx64.so-linking-error.patch | 6 +- ...3-MIOpen-gfx1010-and-gfx1035-support.patch | 6 +- .../0004-improved-gfx1010-support.patch | 6 +- .../0005-gfx1036-and-gfx1103-support.patch | 119 ++++++ ...0001-Tensile-fix-fallback-arch-build.patch | 6 +- .../0002-Tensile-add-gfx1035-support.patch | 6 +- .../Tensile/0003-llvm-path-changes.patch | 15 +- ...-gfx1035-gfx1036-and-gfx1103-support.patch | 339 ++++++++++++++++++ ...RD_DWORD-support-for-not-listed-gpus.patch | 6 +- ...mposable-kernel-jit-library-backport.patch | 6 +- ...-gfx1010-and-gfx1035-initial-support.patch | 6 +- ...0004-add-gfx1036-and-gfx1103-support.patch | 76 ++++ ...penBLAS-link-support-for-client-apps.patch | 4 +- ...-1036-and-1103-to-client-utilty-code.patch | 61 ++++ ...02-add-gfx1035-to-client-utilty-code.patch | 28 -- ...r-gtest-and-lapack-linking-in-ubuntu.patch | 4 +- .../0001-tensilelite-llvm-path-changes.patch | 4 +- ...dd-more-gpus-for-default-target-list.patch | 4 +- ...-Tensilelite-fix-fallback-arch-build.patch | 4 +- ...fx1035-1036-and-1103-to-supported-i.patch} | 26 +- .../0005-fallback-support-debug-patch.patch | 4 +- ...and-BLIS-library-search-improvements.patch | 4 +- ...ro-and-arch-linux-msgpack-search-fix.patch | 4 +- .../0001-gfx1010-1030-1035-and-1036.patch | 40 --- .../rccl/0001-gfx1103-support.patch | 47 +++ ...1-add-mageia-9-support-to-install.sh.patch | 4 +- ...x1035-gfx1036-and-gfx1103-to-gpulist.patch | 110 ++++++ .../rocBLAS/0002-add-gfx1035-to-gpulist.patch | 30 -- ...and-BLIS-library-search-improvements.patch | 4 +- ...able-DPP-from-gfx1035-1036-and-1103.patch} | 10 +- ...cRAND-add-gfx1010-gfx1035-and-gfx11.patch} | 28 +- 32 files changed, 849 insertions(+), 174 deletions(-) create mode 100644 patches/rocm-6.1.2/MIOpen/0005-gfx1036-and-gfx1103-support.patch create mode 100644 patches/rocm-6.1.2/Tensile/0004-gfx1035-gfx1036-and-gfx1103-support.patch create mode 100644 patches/rocm-6.1.2/composable_kernel/0004-add-gfx1036-and-gfx1103-support.patch create mode 100644 patches/rocm-6.1.2/hipBLAS/0002-add-gfx1035-1036-and-1103-to-client-utilty-code.patch delete mode 100644 patches/rocm-6.1.2/hipBLAS/0002-add-gfx1035-to-client-utilty-code.patch rename patches/rocm-6.1.2/hipBLASLt/{0004-Tensilelite-add-gfx1035-to-supported-isa-list.patch => 0004-Tensilelite-add-gfx1035-1036-and-1103-to-supported-i.patch} (52%) delete mode 100644 patches/rocm-6.1.2/rccl/0001-gfx1010-1030-1035-and-1036.patch create mode 100644 patches/rocm-6.1.2/rccl/0001-gfx1103-support.patch create mode 100644 patches/rocm-6.1.2/rocBLAS/0002-add-gfx1035-gfx1036-and-gfx1103-to-gpulist.patch delete mode 100644 patches/rocm-6.1.2/rocBLAS/0002-add-gfx1035-to-gpulist.patch rename patches/rocm-6.1.2/rocPRIM/{0001-disable-DPP-from-gfx1035.patch => 0001-disable-DPP-from-gfx1035-1036-and-1103.patch} (75%) rename patches/rocm-6.1.2/rocRAND/{0001-rocRAND-build-gfx1035-options.patch => 0001-rocRAND-add-gfx1010-gfx1035-and-gfx11.patch} (78%) diff --git a/patches/rocm-6.1.2/MIOpen/0001-Do-not-fail-on-install-for-missing-kdb.bz2-file.patch b/patches/rocm-6.1.2/MIOpen/0001-Do-not-fail-on-install-for-missing-kdb.bz2-file.patch index c4e4b73..86835c3 100644 --- a/patches/rocm-6.1.2/MIOpen/0001-Do-not-fail-on-install-for-missing-kdb.bz2-file.patch +++ b/patches/rocm-6.1.2/MIOpen/0001-Do-not-fail-on-install-for-missing-kdb.bz2-file.patch @@ -1,7 +1,7 @@ -From 915a3c074ff1c3694566fd74f2e83003a1253104 Mon Sep 17 00:00:00 2001 +From 7b51b6d06842de4851b6c20b61dda2ead78e68ff Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Wed, 8 May 2024 13:48:21 -0700 -Subject: [PATCH 1/4] Do not fail on install for missing kdb.bz2 file +Subject: [PATCH 1/5] Do not fail on install for missing kdb.bz2 file Do not fail if kdb.bz2 file for some GPU does not exist because these kdb files are not mandatory. Their function is to speed up the @@ -43,5 +43,5 @@ index 32d9a2e5b..d6c2db704 100644 endif() -- -2.41.1 +2.45.2 diff --git a/patches/rocm-6.1.2/MIOpen/0002-fix-libroctx64.so-linking-error.patch b/patches/rocm-6.1.2/MIOpen/0002-fix-libroctx64.so-linking-error.patch index d0742dc..1cf141b 100644 --- a/patches/rocm-6.1.2/MIOpen/0002-fix-libroctx64.so-linking-error.patch +++ b/patches/rocm-6.1.2/MIOpen/0002-fix-libroctx64.so-linking-error.patch @@ -1,7 +1,7 @@ -From ecb981cd1b66749186404fa76c56237c758953b5 Mon Sep 17 00:00:00 2001 +From fdc0e268b2edbb0a57af8e8820b388816af38471 Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Fri, 3 May 2024 14:15:09 -0700 -Subject: [PATCH 2/4] fix libroctx64.so linking error +Subject: [PATCH 2/5] fix libroctx64.so linking error search the library and if found link it from there instead of expecting it to be in the ld library path @@ -31,5 +31,5 @@ index 0741a6023..ae4405eed 100644 ############################################################ -- -2.41.1 +2.45.2 diff --git a/patches/rocm-6.1.2/MIOpen/0003-MIOpen-gfx1010-and-gfx1035-support.patch b/patches/rocm-6.1.2/MIOpen/0003-MIOpen-gfx1010-and-gfx1035-support.patch index 0f68e9e..42cfefe 100644 --- a/patches/rocm-6.1.2/MIOpen/0003-MIOpen-gfx1010-and-gfx1035-support.patch +++ b/patches/rocm-6.1.2/MIOpen/0003-MIOpen-gfx1010-and-gfx1035-support.patch @@ -1,7 +1,7 @@ -From 6578a68e3226e97716aad12d445632358f2a463e Mon Sep 17 00:00:00 2001 +From 4b65dfcb0208bfe1eb64c474fb97e31e2b8bf60f Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Tue, 19 Dec 2023 15:13:46 -0800 -Subject: [PATCH 3/4] MIOpen gfx1010 and gfx1035 support +Subject: [PATCH 3/5] MIOpen gfx1010 and gfx1035 support - todo: check gfx1010 specific parts @@ -174,5 +174,5 @@ index 16ce78f04..2ec3eaf09 100644 "gfx1101", "gfx1102"}; -- -2.41.1 +2.45.2 diff --git a/patches/rocm-6.1.2/MIOpen/0004-improved-gfx1010-support.patch b/patches/rocm-6.1.2/MIOpen/0004-improved-gfx1010-support.patch index fc47b2c..2f14843 100644 --- a/patches/rocm-6.1.2/MIOpen/0004-improved-gfx1010-support.patch +++ b/patches/rocm-6.1.2/MIOpen/0004-improved-gfx1010-support.patch @@ -1,7 +1,7 @@ -From 5e7803271cbbe475da352ab188f09b345006d9c0 Mon Sep 17 00:00:00 2001 +From 3c0552223df66774e8f3613826a6094939763cce Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Mon, 8 Jul 2024 21:44:10 +0300 -Subject: [PATCH 4/4] improved gfx1010 support +Subject: [PATCH 4/5] improved gfx1010 support - allows running pytorch gpu benchmark on gfx1010/amd rx 5700 @@ -135,5 +135,5 @@ index bf02d4d55..c3fa2bd3a 100644 {"Rembrandt", "gfx1035"}, }; -- -2.41.1 +2.45.2 diff --git a/patches/rocm-6.1.2/MIOpen/0005-gfx1036-and-gfx1103-support.patch b/patches/rocm-6.1.2/MIOpen/0005-gfx1036-and-gfx1103-support.patch new file mode 100644 index 0000000..1c51d15 --- /dev/null +++ b/patches/rocm-6.1.2/MIOpen/0005-gfx1036-and-gfx1103-support.patch @@ -0,0 +1,119 @@ +From 08071937d4c2c34f619ed5b49bd0ced4805875fa Mon Sep 17 00:00:00 2001 +From: Mika Laitio +Date: Sat, 13 Jul 2024 21:07:11 -0400 +Subject: [PATCH 5/5] gfx1036 and gfx1103 support + +Signed-off-by: Mika Laitio +--- + .../composable_kernel/include/utility/config.hpp | 15 ++++++++------- + src/include/miopen/solver/ck_utility_common.hpp | 8 +++++++- + src/target_properties.cpp | 14 ++++++++++++-- + 3 files changed, 27 insertions(+), 10 deletions(-) + +diff --git a/src/composable_kernel/composable_kernel/include/utility/config.hpp b/src/composable_kernel/composable_kernel/include/utility/config.hpp +index 5957a79d8..6ca920b5e 100644 +--- a/src/composable_kernel/composable_kernel/include/utility/config.hpp ++++ b/src/composable_kernel/composable_kernel/include/utility/config.hpp +@@ -16,8 +16,8 @@ + #if !(defined(CK_AMD_GPU_GFX803) || defined(CK_AMD_GPU_GFX900) || defined(CK_AMD_GPU_GFX906) || \ + defined(CK_AMD_GPU_GFX940) || defined(CK_AMD_GPU_GFX908) || defined(CK_AMD_GPU_GFX90A) || \ + defined(CK_AMD_GPU_GFX941) || defined(CK_AMD_GPU_GFX942) || defined(CK_AMD_GPU_GFX1010) || defined(CK_AMD_GPU_GFX1030) || \ +- defined(CK_AMD_GPU_GFX1031) || defined(CK_AMD_GPU_GFX1035) || defined(CK_AMD_GPU_GFX1100) || defined(CK_AMD_GPU_GFX1101) || \ +- defined(CK_AMD_GPU_GFX1102)) ++ defined(CK_AMD_GPU_GFX1031) || defined(CK_AMD_GPU_GFX1035) || defined(CK_AMD_GPU_GFX1036) || defined(CK_AMD_GPU_GFX1100) || defined(CK_AMD_GPU_GFX1101) || \ ++ defined(CK_AMD_GPU_GFX1102) || defined(CK_AMD_GPU_GFX1103)) + #error Need to define (only) one GPU target + #endif + +@@ -29,14 +29,15 @@ + #define CK_MIN_BLOCK_PER_CU 2 + #endif + +-// TODO: gfx1010 check CK_BUFFER_RESOURCE_3RD_DWORD ++// TODO: composable_kernel has differend CK_BUFFER_RESOURCE_3RD_DWORD for gfx110* devices + // buffer resourse + #if defined(CK_AMD_GPU_GFX803) || defined(CK_AMD_GPU_GFX900) || defined(CK_AMD_GPU_GFX906) || \ + defined(CK_AMD_GPU_GFX941) || defined(CK_AMD_GPU_GFX942) || defined(CK_AMD_GPU_GFX940) || \ +- defined(CK_AMD_GPU_GFX908) || defined(CK_AMD_GPU_GFX90A) || defined(CK_AMD_GPU_GFX1010) ++ defined(CK_AMD_GPU_GFX908) || defined(CK_AMD_GPU_GFX90A) + #define CK_BUFFER_RESOURCE_3RD_DWORD 0x00020000 +-#elif defined(CK_AMD_GPU_GFX1030) || defined(CK_AMD_GPU_GFX1031) || defined(CK_AMD_GPU_GFX1035) || defined(CK_AMD_GPU_GFX1100) || \ +- defined(CK_AMD_GPU_GFX1101) || defined(CK_AMD_GPU_GFX1102) ++#elif defined(CK_AMD_GPU_GFX1010) || defined(CK_AMD_GPU_GFX1030) || defined(CK_AMD_GPU_GFX1031) || \ ++ defined(CK_AMD_GPU_GFX1035) || defined(CK_AMD_GPU_GFX1036) || \ ++ defined(CK_AMD_GPU_GFX1100) || defined(CK_AMD_GPU_GFX1101) || defined(CK_AMD_GPU_GFX1102) || defined(CK_AMD_GPU_GFX1103) + #define CK_BUFFER_RESOURCE_3RD_DWORD 0x31014000 + #endif + +@@ -49,7 +50,7 @@ + #elif defined(CK_AMD_GPU_GFX906) || defined(CK_AMD_GPU_GFX908) || defined(CK_AMD_GPU_GFX90a) || \ + defined(CK_AMD_GPU_GFX941) || defined(CK_AMD_GPU_GFX942) || defined(CK_AMD_GPU_GFX940) || \ + defined(CK_AMD_GPU_GFX1030) || defined(CK_AMD_GPU_GFX1031) || defined(CK_AMD_GPU_GFX1100) || \ +- defined(CK_AMD_GPU_GFX1101) || defined(CK_AMD_GPU_GFX1102) ++ defined(CK_AMD_GPU_GFX1101) || defined(CK_AMD_GPU_GFX1102) || defined(CK_AMD_GPU_GFX1103) + #define CK_USE_AMD_V_FMAC_F32 + #define CK_USE_AMD_V_DOT2_F32_F16 + #define CK_USE_AMD_V_DOT4_I32_I8 +diff --git a/src/include/miopen/solver/ck_utility_common.hpp b/src/include/miopen/solver/ck_utility_common.hpp +index aea036066..ea5629871 100644 +--- a/src/include/miopen/solver/ck_utility_common.hpp ++++ b/src/include/miopen/solver/ck_utility_common.hpp +@@ -61,9 +61,11 @@ static inline bool is_ck_supported_hardware(const Handle& handle) + StartsWith(handle.GetDeviceName(), "gfx1030") || + StartsWith(handle.GetDeviceName(), "gfx1031") || + StartsWith(handle.GetDeviceName(), "gfx1035") || ++ StartsWith(handle.GetDeviceName(), "gfx1036") || + StartsWith(handle.GetDeviceName(), "gfx1100") || + StartsWith(handle.GetDeviceName(), "gfx1101") || +- StartsWith(handle.GetDeviceName(), "gfx1102"); ++ StartsWith(handle.GetDeviceName(), "gfx1102") || ++ StartsWith(handle.GetDeviceName(), "gfx1103"); + } + + // MI100 : gfx908 +@@ -121,12 +123,16 @@ static inline auto get_ck_common_compiler_flag(const Handle& handle) + compiler_flag << " -DCK_AMD_GPU_GFX1031"; + else if(StartsWith(device_name, "gfx1035")) + compiler_flag << " -DCK_AMD_GPU_GFX1035"; ++ else if(StartsWith(device_name, "gfx1036")) ++ compiler_flag << " -DCK_AMD_GPU_GFX1036"; + else if(StartsWith(device_name, "gfx1100")) + compiler_flag << " -DCK_AMD_GPU_GFX1100"; + else if(StartsWith(device_name, "gfx1101")) + compiler_flag << " -DCK_AMD_GPU_GFX1101"; + else if(StartsWith(device_name, "gfx1102")) + compiler_flag << " -DCK_AMD_GPU_GFX1102"; ++ else if(StartsWith(device_name, "gfx1103")) ++ compiler_flag << " -DCK_AMD_GPU_GFX1103"; + // NOLINTEND(*-braces-around-statements) + + // buffer atomic-fadd +diff --git a/src/target_properties.cpp b/src/target_properties.cpp +index c3fa2bd3a..de979aae9 100644 +--- a/src/target_properties.cpp ++++ b/src/target_properties.cpp +@@ -52,9 +52,19 @@ static std::string GetDeviceNameFromMap(const std::string& in) + {"gfx804", "gfx803"}, + {"Vega10", "gfx900"}, + {"gfx901", "gfx900"}, +- {"Navi10", "gfx1010"}, ++ {"navi10", "gfx1010"}, ++ {"navi12", "gfx1011"}, ++ {"navi14", "gfx1012"}, + {"10.3.0 Sienna_Cichlid 18", "gfx1030"}, +- {"Rembrandt", "gfx1035"}, ++ {"navi22", "gfx1031"}, ++ {"navi23", "gfx1032"}, ++ {"navi24", "gfx1034"}, ++ {"rembrandt", "gfx1035"}, ++ {"rembrandt1036", "gfx1036"}, ++ {"navi31", "gfx1100"}, ++ {"navi32", "gfx1101"}, ++ {"navi33", "gfx1102"}, ++ {"phoenix", "gfx1103"}, + }; + + const auto& dev_str = miopen::GetStringEnv(ENV(MIOPEN_DEBUG_ENFORCE_DEVICE)); +-- +2.45.2 + diff --git a/patches/rocm-6.1.2/Tensile/0001-Tensile-fix-fallback-arch-build.patch b/patches/rocm-6.1.2/Tensile/0001-Tensile-fix-fallback-arch-build.patch index 4fc1570..75db742 100644 --- a/patches/rocm-6.1.2/Tensile/0001-Tensile-fix-fallback-arch-build.patch +++ b/patches/rocm-6.1.2/Tensile/0001-Tensile-fix-fallback-arch-build.patch @@ -1,7 +1,7 @@ -From dd39290f90df98f928452720e622c1497b4fd7da Mon Sep 17 00:00:00 2001 +From 3e2d2f891001a8d2a8f74a46884cbec84a2fd8c8 Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Fri, 10 May 2024 20:34:13 -0700 -Subject: [PATCH 1/3] Tensile fix fallback arch build +Subject: [PATCH 1/4] Tensile fix fallback arch build fixes build error which happens if only the rx 5700 is enabled (only GPU_BUILD_AMD_NAVI10_GFX1010=1 enabled in rocm_sdk_builder envsetup.sh) @@ -61,5 +61,5 @@ index ca3ef322..9e37b4b0 100644 for arch in archs: if arch in architectureMap: -- -2.41.0 +2.45.2 diff --git a/patches/rocm-6.1.2/Tensile/0002-Tensile-add-gfx1035-support.patch b/patches/rocm-6.1.2/Tensile/0002-Tensile-add-gfx1035-support.patch index 83e65a2..d8397b2 100644 --- a/patches/rocm-6.1.2/Tensile/0002-Tensile-add-gfx1035-support.patch +++ b/patches/rocm-6.1.2/Tensile/0002-Tensile-add-gfx1035-support.patch @@ -1,7 +1,7 @@ -From 73281f46189f7012334d3b1a7e52baffade5295f Mon Sep 17 00:00:00 2001 +From cbec649b52abb1e45d72a3755fa01a77cf9784e7 Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Fri, 3 May 2024 13:13:02 -0700 -Subject: [PATCH 2/3] Tensile, add gfx1035 support +Subject: [PATCH 2/4] Tensile, add gfx1035 support Signed-off-by: Mika Laitio --- @@ -84,5 +84,5 @@ index 6ececf1c..a89b7c39 100644 else: printWarning("Assembler not present, asm caps loaded from cache are unverified") -- -2.41.0 +2.45.2 diff --git a/patches/rocm-6.1.2/Tensile/0003-llvm-path-changes.patch b/patches/rocm-6.1.2/Tensile/0003-llvm-path-changes.patch index 7daa1e0..77db49b 100644 --- a/patches/rocm-6.1.2/Tensile/0003-llvm-path-changes.patch +++ b/patches/rocm-6.1.2/Tensile/0003-llvm-path-changes.patch @@ -1,7 +1,7 @@ -From 3ffffba383528d997372861fb5d940d09ebe2996 Mon Sep 17 00:00:00 2001 +From 80776357e8f44019675a224474e314bbb551bc8a Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Wed, 15 May 2024 21:09:56 -0700 -Subject: [PATCH 3/3] llvm path changes +Subject: [PATCH 3/4] llvm path changes Signed-off-by: Mika Laitio --- @@ -18,17 +18,18 @@ index a89b7c39..9b24aee7 100644 else: - globalParameters["AssemblerPath"] = locateExe(os.path.join(globalParameters["ROCmPath"], "llvm/bin"), "clang++") + globalParameters["AssemblerPath"] = locateExe(os.path.join(globalParameters["ROCmPath"], "bin"), "clang++") - + globalParameters["ROCmSMIPath"] = locateExe(globalParameters["ROCmBinPath"], "rocm-smi") - + @@ -2277,7 +2277,7 @@ def assignGlobalParameters( config ): if os.name == "nt": globalParameters["ClangOffloadBundlerPath"] = locateExe(globalParameters["ROCmBinPath"], "clang-offload-bundler.exe") else: - globalParameters["ClangOffloadBundlerPath"] = locateExe(os.path.join(globalParameters["ROCmPath"], "llvm/bin"), "clang-offload-bundler") + globalParameters["ClangOffloadBundlerPath"] = locateExe(os.path.join(globalParameters["ROCmPath"], "bin"), "clang-offload-bundler") - + if "ROCmAgentEnumeratorPath" in config: globalParameters["ROCmAgentEnumeratorPath"] = config["ROCmAgentEnumeratorPath"] --- -2.41.0 +-- +2.45.2 + diff --git a/patches/rocm-6.1.2/Tensile/0004-gfx1035-gfx1036-and-gfx1103-support.patch b/patches/rocm-6.1.2/Tensile/0004-gfx1035-gfx1036-and-gfx1103-support.patch new file mode 100644 index 0000000..9ba562e --- /dev/null +++ b/patches/rocm-6.1.2/Tensile/0004-gfx1035-gfx1036-and-gfx1103-support.patch @@ -0,0 +1,339 @@ +From 35954a100a66f26fd3ad356101fb5723c2eaac74 Mon Sep 17 00:00:00 2001 +From: Mika Laitio +Date: Sat, 13 Jul 2024 15:50:11 -0400 +Subject: [PATCH 4/4] gfx1035,gfx1036 and gfx1103 support + +Signed-off-by: Mika Laitio +--- + Tensile/AsmCaps.py | 84 +++++++++++++++++++ + Tensile/Common.py | 10 +-- + Tensile/SolutionLibrary.py | 4 +- + Tensile/Source/CMakeLists.txt | 4 +- + Tensile/Source/lib/include/Tensile/AMDGPU.hpp | 20 ++++- + .../include/Tensile/PlaceholderLibrary.hpp | 6 ++ + .../Tensile/Serialization/Predicates.hpp | 2 + + .../preload_kernel_arguments_always_half.yaml | 5 +- + pytest.ini | 4 + + 9 files changed, 127 insertions(+), 12 deletions(-) + +diff --git a/Tensile/AsmCaps.py b/Tensile/AsmCaps.py +index ef93f53f..4ba97887 100644 +--- a/Tensile/AsmCaps.py ++++ b/Tensile/AsmCaps.py +@@ -629,6 +629,48 @@ CACHED_ASM_CAPS = \ + 'v_mad_mix_f32': False, + 'v_pk_fma_f16': True, + 'v_pk_fmac_f16': False}, ++ (10, 3, 6): {'HasAddLshl': True, ++ 'HasAtomicAdd': False, ++ 'HasDirectToLdsDest': False, ++ 'HasDirectToLdsNoDest': True, ++ 'HasExplicitCO': True, ++ 'HasExplicitNC': True, ++ 'HasGLCModifier': True, ++ 'HasLshlOr': True, ++ 'HasMFMA': False, ++ 'HasMFMA_b8': False, ++ 'HasMFMA_bf16_1k': False, ++ 'HasMFMA_bf16_original': False, ++ 'HasMFMA_constSrc': False, ++ 'HasMFMA_f64': False, ++ 'HasMFMA_f8': False, ++ 'HasMFMA_i8_908': False, ++ 'HasMFMA_i8_940': False, ++ 'HasMFMA_vgpr': False, ++ 'HasMFMA_xf32': False, ++ 'HasSMulHi': True, ++ 'HasWMMA': False, ++ 'KernargPreloading': False, ++ 'MaxLgkmcnt': 15, ++ 'MaxVmcnt': 63, ++ 'SupportedISA': True, ++ 'SupportedSource': True, ++ 'VOP3v_dot4_i32_i8': True, ++ 'v_dot2_f32_f16': True, ++ 'v_dot2c_f32_f16': True, ++ 'v_dot4_i32_i8': False, ++ 'v_dot4c_i32_i8': True, ++ 'v_fma_f16': True, ++ 'v_fma_f32': True, ++ 'v_fma_f64': True, ++ 'v_fma_mix_f32': True, ++ 'v_fmac_f16': False, ++ 'v_fmac_f32': True, ++ 'v_mac_f16': False, ++ 'v_mac_f32': False, ++ 'v_mad_mix_f32': False, ++ 'v_pk_fma_f16': True, ++ 'v_pk_fmac_f16': False}, + (11, 0, 0): {'HasAddLshl': True, + 'HasAtomicAdd': True, + 'HasDirectToLdsDest': False, +@@ -714,6 +756,48 @@ CACHED_ASM_CAPS = \ + 'v_pk_fma_f16': True, + 'v_pk_fmac_f16': False}, + (11, 0, 2): {'HasAddLshl': True, ++ 'HasAtomicAdd': True, ++ 'HasDirectToLdsDest': False, ++ 'HasDirectToLdsNoDest': False, ++ 'HasExplicitCO': True, ++ 'HasExplicitNC': True, ++ 'HasGLCModifier': True, ++ 'HasLshlOr': True, ++ 'HasMFMA': False, ++ 'HasMFMA_b8': False, ++ 'HasMFMA_bf16_1k': False, ++ 'HasMFMA_bf16_original': False, ++ 'HasMFMA_constSrc': False, ++ 'HasMFMA_f64': False, ++ 'HasMFMA_f8': False, ++ 'HasMFMA_i8_908': False, ++ 'HasMFMA_i8_940': False, ++ 'HasMFMA_vgpr': False, ++ 'HasMFMA_xf32': False, ++ 'HasSMulHi': True, ++ 'HasWMMA': True, ++ 'KernargPreloading': False, ++ 'MaxLgkmcnt': 15, ++ 'MaxVmcnt': 63, ++ 'SupportedISA': True, ++ 'SupportedSource': True, ++ 'VOP3v_dot4_i32_i8': False, ++ 'v_dot2_f32_f16': True, ++ 'v_dot2c_f32_f16': True, ++ 'v_dot4_i32_i8': False, ++ 'v_dot4c_i32_i8': False, ++ 'v_fma_f16': True, ++ 'v_fma_f32': True, ++ 'v_fma_f64': True, ++ 'v_fma_mix_f32': True, ++ 'v_fmac_f16': False, ++ 'v_fmac_f32': True, ++ 'v_mac_f16': False, ++ 'v_mac_f32': False, ++ 'v_mad_mix_f32': False, ++ 'v_pk_fma_f16': True, ++ 'v_pk_fmac_f16': False}, ++ (11, 0, 3): {'HasAddLshl': True, + 'HasAtomicAdd': True, + 'HasDirectToLdsDest': False, + 'HasDirectToLdsNoDest': False, +diff --git a/Tensile/Common.py b/Tensile/Common.py +index 9b24aee7..13a5fd3b 100644 +--- a/Tensile/Common.py ++++ b/Tensile/Common.py +@@ -228,7 +228,7 @@ globalParameters["SupportedISA"] = [(8,0,3), + (9,0,0), (9,0,6), (9,0,8), (9,0,10), + (9,4,0), (9,4,1), (9,4,2), + (10,1,0), (10,1,1), (10,1,2), (10,3,0), (10,3,1), (10,3,5), +- (11,0,0), (11,0,1), (11,0,2)] # assembly kernels writer supports these architectures ++ (11,0,0), (11,0,1), (11,0,2), (11,0,3)] # assembly kernels writer supports these architectures + + globalParameters["CleanupBuildFiles"] = False # cleanup build files (e.g. kernel assembly) once no longer needed + globalParameters["GenerateManifestAndExit"] = False # Output manifest file with list of expected library objects and exit +@@ -305,8 +305,8 @@ architectureMap = { + 'gfx941':'aquavanjaram941', 'gfx941:xnack+':'aquavanjaram941', 'gfx941:xnack-':'aquavanjaram941', + 'gfx942':'aquavanjaram942', 'gfx942:xnack+':'aquavanjaram942', 'gfx942:xnack-':'aquavanjaram942', + 'gfx1010':'navi10', 'gfx1011':'navi12', 'gfx1012':'navi14', +- 'gfx1030':'navi21', 'gfx1031':'navi22', 'gfx1032':'navi23', 'gfx1034':'navi24', 'gfx1035':'rembrandt', +- 'gfx1100':'navi31', 'gfx1101':'navi32', 'gfx1102':'navi33' ++ 'gfx1030':'navi21', 'gfx1031':'navi22', 'gfx1032':'navi23', 'gfx1034':'navi24', 'gfx1035':'rembrandt', 'gfx1036':'rembrandt1036', ++ 'gfx1100':'navi31', 'gfx1101':'navi32', 'gfx1102':'navi33', 'gfx1103':'phoenix' + } + + def getArchitectureName(gfxName): +@@ -2291,8 +2291,8 @@ def assignGlobalParameters( config ): + globalParameters["CurrentISA"] = (9,0,6) + printWarning("Failed to detect ISA so forcing (gfx906) on windows") + if globalParameters["CurrentISA"] == (9,4,1) or globalParameters["CurrentISA"] == (9,4,2) or globalParameters["CurrentISA"] == (11,0,0) or \ +- globalParameters["CurrentISA"] == (11,0,1) or globalParameters["CurrentISA"] == (11,0,2): +- printWarning("HardwareMonitor currently disabled for gfx941/942 or gfx1100/gfx1101/gfx1102") ++ globalParameters["CurrentISA"] == (11,0,1) or globalParameters["CurrentISA"] == (11,0,2) or globalParameters["CurrentISA"] == (11,0,3): ++ printWarning("HardwareMonitor currently disabled for gfx941/942 or gfx1100/gfx1101/gfx1102/gfx1103") + globalParameters["HardwareMonitor"] = False + + # For ubuntu platforms, call dpkg to grep the version of hip-clang. This check is platform specific, and in the future +diff --git a/Tensile/SolutionLibrary.py b/Tensile/SolutionLibrary.py +index 66f2e86a..9953aff9 100644 +--- a/Tensile/SolutionLibrary.py ++++ b/Tensile/SolutionLibrary.py +@@ -255,8 +255,8 @@ class MasterSolutionLibrary: + def ArchitectureIndexMap(cls, architectureName): + # 'fallback', 'gfx803', 'gfx900', 'gfx906', 'gfx908', 'gfx90a', + # 'gfx940', 'gfx941', 'gfx942', 'gfx1010', 'gfx1011', 'gfx1012', +- # 'gfx1030', 'gfx1031', 'gfx1032', 'gfx1034', 'gfx1035', 'gfx1100', +- # 'gfx1101', 'gfx1102' ++ # 'gfx1030', 'gfx1031', 'gfx1032', 'gfx1034', 'gfx1035', 'gfx1036', ++ # 'gfx1100', 'gfx1101', 'gfx1102', 'gfx1103' + archval = -1 + if architectureName == "fallback": + archval = 0 +diff --git a/Tensile/Source/CMakeLists.txt b/Tensile/Source/CMakeLists.txt +index e973a9ed..ecd18dd5 100644 +--- a/Tensile/Source/CMakeLists.txt ++++ b/Tensile/Source/CMakeLists.txt +@@ -51,9 +51,9 @@ if(NOT DEFINED CXX_VERSION_STRING) + endif() + + if(CMAKE_CXX_COMPILER STREQUAL "hipcc") +- set(TENSILE_GPU_ARCHS gfx803 gfx900 gfx906:xnack- gfx908:xnack- gfx90a:xnack- gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 CACHE STRING "GPU architectures") ++ set(TENSILE_GPU_ARCHS gfx803 gfx900 gfx906:xnack- gfx908:xnack- gfx90a:xnack- gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1036 gfx1100 gfx1101 gfx1102 gfx1103 CACHE STRING "GPU architectures") + else() +- set(TENSILE_GPU_ARCHS gfx803 gfx900 gfx906 gfx908 gfx90a gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 CACHE STRING "GPU architectures") ++ set(TENSILE_GPU_ARCHS gfx803 gfx900 gfx906 gfx908 gfx90a gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1036 gfx1100 gfx1101 gfx1102 gfx1103 CACHE STRING "GPU architectures") + endif() + + include(CMakeDependentOption) +diff --git a/Tensile/Source/lib/include/Tensile/AMDGPU.hpp b/Tensile/Source/lib/include/Tensile/AMDGPU.hpp +index c845d749..d1c673a1 100644 +--- a/Tensile/Source/lib/include/Tensile/AMDGPU.hpp ++++ b/Tensile/Source/lib/include/Tensile/AMDGPU.hpp +@@ -71,9 +71,11 @@ namespace Tensile + gfx1032 = 1032, + gfx1034 = 1034, + gfx1035 = 1035, ++ gfx1036 = 1036, + gfx1100 = 1100, + gfx1101 = 1101, +- gfx1102 = 1102 ++ gfx1102 = 1102, ++ gfx1103 = 1103 + }; + + static std::string toString(Processor p) +@@ -112,12 +114,16 @@ namespace Tensile + return "gfx1034"; + case AMDGPU::Processor::gfx1035: + return "gfx1035"; ++ case AMDGPU::Processor::gfx1036: ++ return "gfx1036"; + case AMDGPU::Processor::gfx1100: + return "gfx1100"; + case AMDGPU::Processor::gfx1101: + return "gfx1101"; + case AMDGPU::Processor::gfx1102: + return "gfx1102"; ++ case AMDGPU::Processor::gfx1103: ++ return "gfx1103"; + } + return ""; + } +@@ -172,6 +178,14 @@ namespace Tensile + { + return AMDGPU::Processor::gfx1030; + } ++ else if(deviceString.find("gfx1035") != std::string::npos) ++ { ++ return AMDGPU::Processor::gfx1035; ++ } ++ else if(deviceString.find("gfx1036") != std::string::npos) ++ { ++ return AMDGPU::Processor::gfx1036; ++ } + else if(deviceString.find("gfx1100") != std::string::npos) + { + return AMDGPU::Processor::gfx1100; +@@ -184,6 +198,10 @@ namespace Tensile + { + return AMDGPU::Processor::gfx1102; + } ++ else if(deviceString.find("gfx1103") != std::string::npos) ++ { ++ return AMDGPU::Processor::gfx1103; ++ } + else + { + return static_cast(0); +diff --git a/Tensile/Source/lib/include/Tensile/PlaceholderLibrary.hpp b/Tensile/Source/lib/include/Tensile/PlaceholderLibrary.hpp +index 10898ec2..38e99640 100644 +--- a/Tensile/Source/lib/include/Tensile/PlaceholderLibrary.hpp ++++ b/Tensile/Source/lib/include/Tensile/PlaceholderLibrary.hpp +@@ -55,9 +55,11 @@ namespace Tensile + gfx1032, + gfx1034, + gfx1035, ++ gfx1036, + gfx1100, + gfx1101, + gfx1102, ++ gfx1103, + All + }; + +@@ -100,12 +102,16 @@ namespace Tensile + return "TensileLibrary_*_gfx1034"; + case LazyLoadingInit::gfx1035: + return "TensileLibrary_*_gfx1035"; ++ case LazyLoadingInit::gfx1036: ++ return "TensileLibrary_*_gfx1036"; + case LazyLoadingInit::gfx1100: + return "TensileLibrary_*_gfx1100"; + case LazyLoadingInit::gfx1101: + return "TensileLibrary_*_gfx1101"; + case LazyLoadingInit::gfx1102: + return "TensileLibrary_*_gfx1102"; ++ case LazyLoadingInit::gfx1103: ++ return "TensileLibrary_*_gfx1103"; + case LazyLoadingInit::None: + return ""; + } +diff --git a/Tensile/Source/lib/include/Tensile/Serialization/Predicates.hpp b/Tensile/Source/lib/include/Tensile/Serialization/Predicates.hpp +index 87fc0d24..b52c9d38 100644 +--- a/Tensile/Source/lib/include/Tensile/Serialization/Predicates.hpp ++++ b/Tensile/Source/lib/include/Tensile/Serialization/Predicates.hpp +@@ -229,9 +229,11 @@ namespace Tensile + iot::enumCase(io, value, "gfx1032", AMDGPU::Processor::gfx1032); + iot::enumCase(io, value, "gfx1034", AMDGPU::Processor::gfx1034); + iot::enumCase(io, value, "gfx1035", AMDGPU::Processor::gfx1035); ++ iot::enumCase(io, value, "gfx1036", AMDGPU::Processor::gfx1036); + iot::enumCase(io, value, "gfx1100", AMDGPU::Processor::gfx1100); + iot::enumCase(io, value, "gfx1101", AMDGPU::Processor::gfx1101); + iot::enumCase(io, value, "gfx1102", AMDGPU::Processor::gfx1102); ++ iot::enumCase(io, value, "gfx1103", AMDGPU::Processor::gfx1103); + } + }; + +diff --git a/Tensile/Tests/pre_checkin/preload_kernel_arguments_always_half.yaml b/Tensile/Tests/pre_checkin/preload_kernel_arguments_always_half.yaml +index 7b916d94..825100b0 100644 +--- a/Tensile/Tests/pre_checkin/preload_kernel_arguments_always_half.yaml ++++ b/Tensile/Tests/pre_checkin/preload_kernel_arguments_always_half.yaml +@@ -2,8 +2,9 @@ TestParameters: + marks: [skip-gfx900, skip-gfx906, skip-gfx908, + skip-gfx1010, skip-gfx1011, skip-gfx1012, + skip-gfx1030, skip-gfx1031, skip-gfx1032, +- skip-gfx1034, skip-gfx1035, skip-gfx1100, +- skip-gfx1101, skip-gfx1102] ++ skip-gfx1034, skip-gfx1035, skip-gfx1036, ++ skip-gfx1100, skip-gfx1101, skip-gfx1102, ++ skip-gfx1103] + + GlobalParameters: + MinimumRequiredVersion: 4.2.0 +diff --git a/pytest.ini b/pytest.ini +index 2dc9a329..f16a65e3 100644 +--- a/pytest.ini ++++ b/pytest.ini +@@ -104,9 +104,11 @@ markers = + xfail-gfx1032: architecture + xfail-gfx1034: architecture + xfail-gfx1035: architecture ++ xfail-gfx1036: architecture + xfail-gfx1100: architecture + xfail-gfx1101: architecture + xfail-gfx1102: architecture ++ xfail-gfx1103: architecture + skip-gfx000: architecture + skip-gfx900: architecture + skip-gfx906: architecture +@@ -123,6 +125,8 @@ markers = + skip-gfx1032: architecture + skip-gfx1034: architecture + skip-gfx1035: architecture ++ skip-gfx1036: architecture + skip-gfx1100: architecture + skip-gfx1101: architecture + skip-gfx1102: architecture ++ skip-gfx1103: architecture +-- +2.45.2 + diff --git a/patches/rocm-6.1.2/composable_kernel/0001-by-default-no-3RD_DWORD-support-for-not-listed-gpus.patch b/patches/rocm-6.1.2/composable_kernel/0001-by-default-no-3RD_DWORD-support-for-not-listed-gpus.patch index 382188e..6781de3 100644 --- a/patches/rocm-6.1.2/composable_kernel/0001-by-default-no-3RD_DWORD-support-for-not-listed-gpus.patch +++ b/patches/rocm-6.1.2/composable_kernel/0001-by-default-no-3RD_DWORD-support-for-not-listed-gpus.patch @@ -1,7 +1,7 @@ -From fe5fb410b74f6c5b0ea8471469f7b77e3b916b01 Mon Sep 17 00:00:00 2001 +From 63f717b9e4019902a4c1e705a3a907c3b455aca2 Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Tue, 19 Dec 2023 15:16:58 -0800 -Subject: [PATCH 1/3] by default no 3RD_DWORD support for not listed gpus +Subject: [PATCH 1/4] by default no 3RD_DWORD support for not listed gpus Signed-off-by: Mika Laitio --- @@ -22,5 +22,5 @@ index 88efb0277..da5bb86c6 100644 // FMA instruction -- -2.41.0 +2.45.2 diff --git a/patches/rocm-6.1.2/composable_kernel/0002-composable-kernel-jit-library-backport.patch b/patches/rocm-6.1.2/composable_kernel/0002-composable-kernel-jit-library-backport.patch index 6dc4332..b08ee60 100644 --- a/patches/rocm-6.1.2/composable_kernel/0002-composable-kernel-jit-library-backport.patch +++ b/patches/rocm-6.1.2/composable_kernel/0002-composable-kernel-jit-library-backport.patch @@ -1,7 +1,7 @@ -From e49dd3637bca9e1c2e228f12c8ecd9ebfbaa7afb Mon Sep 17 00:00:00 2001 +From 7c9d19e1fe72007106c8a71a5fef848c5e54c404 Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Fri, 3 May 2024 13:49:47 -0700 -Subject: [PATCH 2/3] composable kernel jit library backport +Subject: [PATCH 2/4] composable kernel jit library backport - backported from the composable_kernel migraphx branch revision 57cdd70b7cb14e5e3b60cd9a5f96ba8dc343763e @@ -2287,5 +2287,5 @@ index 000000000..a5057da10 + } +} -- -2.41.0 +2.45.2 diff --git a/patches/rocm-6.1.2/composable_kernel/0003-gfx1010-and-gfx1035-initial-support.patch b/patches/rocm-6.1.2/composable_kernel/0003-gfx1010-and-gfx1035-initial-support.patch index 28a7e5e..8c95fc8 100644 --- a/patches/rocm-6.1.2/composable_kernel/0003-gfx1010-and-gfx1035-initial-support.patch +++ b/patches/rocm-6.1.2/composable_kernel/0003-gfx1010-and-gfx1035-initial-support.patch @@ -1,7 +1,7 @@ -From 26942bd2a0a7a1d341969095e51eeb2625c6b8b4 Mon Sep 17 00:00:00 2001 +From 7c02a14553826261d1052da5e1fb17487e15d2d4 Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Fri, 24 May 2024 03:50:43 -0700 -Subject: [PATCH 3/3] gfx1010 and gfx1035 initial support +Subject: [PATCH 3/4] gfx1010 and gfx1035 initial support - needs more testing @@ -59,5 +59,5 @@ index e8dabc997..0908f6757 100644 const auto name = raw_name.substr(0, raw_name.find(':')); // str.substr(0, npos) returns str. -- -2.41.0 +2.45.2 diff --git a/patches/rocm-6.1.2/composable_kernel/0004-add-gfx1036-and-gfx1103-support.patch b/patches/rocm-6.1.2/composable_kernel/0004-add-gfx1036-and-gfx1103-support.patch new file mode 100644 index 0000000..339b710 --- /dev/null +++ b/patches/rocm-6.1.2/composable_kernel/0004-add-gfx1036-and-gfx1103-support.patch @@ -0,0 +1,76 @@ +From d3a76a05875c148f25985ee08c54099c69b75edb Mon Sep 17 00:00:00 2001 +From: Mika Laitio +Date: Sat, 13 Jul 2024 21:35:33 -0400 +Subject: [PATCH 4/4] add gfx1036 and gfx1103 support + +Signed-off-by: Mika Laitio +--- + include/ck/ck.hpp | 12 +++++++----- + include/ck/host_utility/device_prop.hpp | 7 ++++++- + 2 files changed, 13 insertions(+), 6 deletions(-) + +diff --git a/include/ck/ck.hpp b/include/ck/ck.hpp +index 266460342..379261ba3 100644 +--- a/include/ck/ck.hpp ++++ b/include/ck/ck.hpp +@@ -49,11 +49,11 @@ + #define CK_BUFFER_RESOURCE_3RD_DWORD -1 + #elif defined(__gfx803__) || defined(__gfx900__) || defined(__gfx906__) || defined(__gfx908__) || \ + defined(__gfx90a__) || defined(__gfx940__) || defined(__gfx941__) || \ +- defined(__gfx942__) || defined(__gfx1010__) // for GPU code ++ defined(__gfx942__) // for GPU code + #define CK_BUFFER_RESOURCE_3RD_DWORD 0x00020000 +-#elif defined(__gfx1030__) || defined(__gfx1031__) || defined(__gfx1035__) // for GPU code ++#elif defined(__gfx1010__) || defined(__gfx1030__) || defined(__gfx1031__) || defined(__gfx1035__) || defined(__gfx1036__) // for GPU code + #define CK_BUFFER_RESOURCE_3RD_DWORD 0x31014000 +-#elif defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) // for GPU code ++#elif defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1103__) // for GPU code + #define CK_BUFFER_RESOURCE_3RD_DWORD 0x31004000 + #else + #define CK_BUFFER_RESOURCE_3RD_DWORD -1 +@@ -66,12 +66,14 @@ + #ifndef __HIP_DEVICE_COMPILE__ // for host code, define nothing + #elif defined(__gfx803__) || defined(__gfx900__) // for GPU code + #define CK_USE_AMD_V_MAC_F32 ++#elif defined(__gfx1010__) ++#define CK_USE_AMD_V_FMAC_F32 + #elif defined(__gfx906__) || defined(__gfx908__) || defined(__gfx90a__) || defined(__gfx1030__) || defined(__gfx1031__) || \ + defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__) // for GPU code + #define CK_USE_AMD_V_FMAC_F32 + #define CK_USE_AMD_V_DOT2_F32_F16 + #define CK_USE_AMD_V_DOT4_I32_I8 +-#elif defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) ++#elif defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1103__) + #define CK_USE_AMD_V_FMAC_F32 + #define CK_USE_AMD_V_DOT2_F32_F16 + #define CK_USE_AMD_V_DOT4_I32_I8_GFX11 +@@ -96,7 +98,7 @@ + // WMMA instruction + #ifndef __HIP_DEVICE_COMPILE__ // for host code + #define CK_USE_AMD_WMMA +-#elif defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) // for GPU code ++#elif defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1103__) // for GPU code + #define CK_USE_AMD_WMMA + #endif + +diff --git a/include/ck/host_utility/device_prop.hpp b/include/ck/host_utility/device_prop.hpp +index 0908f6757..294a60f9a 100644 +--- a/include/ck/host_utility/device_prop.hpp ++++ b/include/ck/host_utility/device_prop.hpp +@@ -43,7 +43,12 @@ inline std::string get_device_name() + {"navi10", "gfx1010"}, + {"gfx1031", "gfx1030"}, + {"10.3.0 Sienna_Cichlid 18", "gfx1030"}, +- {"Rembrandt", "gfx1035"}, ++ {"rembrandt", "gfx1035"}, ++ {"rembrandt1036", "gfx1036"}, ++ {"navi31", "gfx1100"}, ++ {"navi32", "gfx1101"}, ++ {"navi33", "gfx1102"}, ++ {"phoenix", "gfx1103"}, + }; + + const auto name = raw_name.substr(0, raw_name.find(':')); // str.substr(0, npos) returns str. +-- +2.45.2 + diff --git a/patches/rocm-6.1.2/hipBLAS/0001-add-OpenBLAS-link-support-for-client-apps.patch b/patches/rocm-6.1.2/hipBLAS/0001-add-OpenBLAS-link-support-for-client-apps.patch index 2df06d7..7db6814 100644 --- a/patches/rocm-6.1.2/hipBLAS/0001-add-OpenBLAS-link-support-for-client-apps.patch +++ b/patches/rocm-6.1.2/hipBLAS/0001-add-OpenBLAS-link-support-for-client-apps.patch @@ -1,4 +1,4 @@ -From 9dfc58e3856e4a8e3bcd75c3bdc308a0b50eaf14 Mon Sep 17 00:00:00 2001 +From 2f4b5e5e72f16210ce1ec33aa508c34367aabaef Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Sat, 18 May 2024 18:34:49 -0700 Subject: [PATCH 1/3] add OpenBLAS link support for client apps @@ -49,5 +49,5 @@ index 4a6e5c8..0240293 100644 endif() set( ${cblas_libs} ${libs} PARENT_SCOPE ) -- -2.41.0 +2.45.2 diff --git a/patches/rocm-6.1.2/hipBLAS/0002-add-gfx1035-1036-and-1103-to-client-utilty-code.patch b/patches/rocm-6.1.2/hipBLAS/0002-add-gfx1035-1036-and-1103-to-client-utilty-code.patch new file mode 100644 index 0000000..2f671c7 --- /dev/null +++ b/patches/rocm-6.1.2/hipBLAS/0002-add-gfx1035-1036-and-1103-to-client-utilty-code.patch @@ -0,0 +1,61 @@ +From 604e6e5f8313bda35b64ab06d549ad1b567488be Mon Sep 17 00:00:00 2001 +From: Mika Laitio +Date: Sat, 18 May 2024 18:35:05 -0700 +Subject: [PATCH 2/3] add gfx1035,1036 and 1103 to client utilty code + +Signed-off-by: Mika Laitio +--- + clients/common/utility.cpp | 12 ++++++++++++ + clients/include/utility.h | 4 +++- + 2 files changed, 15 insertions(+), 1 deletion(-) + +diff --git a/clients/common/utility.cpp b/clients/common/utility.cpp +index 9dd74e7..968b708 100644 +--- a/clients/common/utility.cpp ++++ b/clients/common/utility.cpp +@@ -476,6 +476,14 @@ hipblasClientProcessor getArch() + { + return hipblasClientProcessor::gfx1030; + } ++ else if(deviceString.find("gfx1035") != std::string::npos) ++ { ++ return hipblasClientProcessor::gfx1035; ++ } ++ else if(deviceString.find("gfx1036") != std::string::npos) ++ { ++ return hipblasClientProcessor::gfx1036; ++ } + else if(deviceString.find("gfx1100") != std::string::npos) + { + return hipblasClientProcessor::gfx1100; +@@ -488,6 +496,10 @@ hipblasClientProcessor getArch() + { + return hipblasClientProcessor::gfx1102; + } ++ else if(deviceString.find("gfx1103") != std::string::npos) ++ { ++ return hipblasClientProcessor::gfx1103; ++ } + return static_cast(0); + } + +diff --git a/clients/include/utility.h b/clients/include/utility.h +index 0891137..fd440a8 100644 +--- a/clients/include/utility.h ++++ b/clients/include/utility.h +@@ -867,9 +867,11 @@ typedef enum hipblasClientProcessor + gfx1032 = 1032, + gfx1034 = 1034, + gfx1035 = 1035, ++ gfx1036 = 1036, + gfx1100 = 1100, + gfx1101 = 1101, +- gfx1102 = 1102 ++ gfx1102 = 1102, ++ gfx1103 = 1103 + } hipblasClientProcessor; + + /* get architecture number */ +-- +2.45.2 + diff --git a/patches/rocm-6.1.2/hipBLAS/0002-add-gfx1035-to-client-utilty-code.patch b/patches/rocm-6.1.2/hipBLAS/0002-add-gfx1035-to-client-utilty-code.patch deleted file mode 100644 index 47e7798..0000000 --- a/patches/rocm-6.1.2/hipBLAS/0002-add-gfx1035-to-client-utilty-code.patch +++ /dev/null @@ -1,28 +0,0 @@ -From 68e6d6b2323a635085eca7487647a2d6f4591910 Mon Sep 17 00:00:00 2001 -From: Mika Laitio -Date: Sat, 18 May 2024 18:35:05 -0700 -Subject: [PATCH 2/3] add gfx1035 to client utilty code - -Signed-off-by: Mika Laitio ---- - clients/common/utility.cpp | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/clients/common/utility.cpp b/clients/common/utility.cpp -index 9dd74e7..2a2d214 100644 ---- a/clients/common/utility.cpp -+++ b/clients/common/utility.cpp -@@ -476,6 +476,10 @@ hipblasClientProcessor getArch() - { - return hipblasClientProcessor::gfx1030; - } -+ else if(deviceString.find("gfx1035") != std::string::npos) -+ { -+ return hipblasClientProcessor::gfx1035; -+ } - else if(deviceString.find("gfx1100") != std::string::npos) - { - return hipblasClientProcessor::gfx1100; --- -2.41.0 - diff --git a/patches/rocm-6.1.2/hipBLAS/0003-fixes-for-gtest-and-lapack-linking-in-ubuntu.patch b/patches/rocm-6.1.2/hipBLAS/0003-fixes-for-gtest-and-lapack-linking-in-ubuntu.patch index d22e809..6e5ed71 100644 --- a/patches/rocm-6.1.2/hipBLAS/0003-fixes-for-gtest-and-lapack-linking-in-ubuntu.patch +++ b/patches/rocm-6.1.2/hipBLAS/0003-fixes-for-gtest-and-lapack-linking-in-ubuntu.patch @@ -1,4 +1,4 @@ -From d7a6f0002b35ebcb6b6f861a5bf839aafb20c8b5 Mon Sep 17 00:00:00 2001 +From 95c06ec97e891829b2d525c9bee1da21a52fdfc8 Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Thu, 23 May 2024 11:51:49 -0700 Subject: [PATCH 3/3] fixes for gtest and lapack linking in ubuntu @@ -68,5 +68,5 @@ index b670407..8c66252 100644 CXX_EXTENSIONS OFF RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" -- -2.41.0 +2.45.2 diff --git a/patches/rocm-6.1.2/hipBLASLt/0001-tensilelite-llvm-path-changes.patch b/patches/rocm-6.1.2/hipBLASLt/0001-tensilelite-llvm-path-changes.patch index 9bc517a..390453e 100644 --- a/patches/rocm-6.1.2/hipBLASLt/0001-tensilelite-llvm-path-changes.patch +++ b/patches/rocm-6.1.2/hipBLASLt/0001-tensilelite-llvm-path-changes.patch @@ -1,4 +1,4 @@ -From 05c7fa87dabe346ed68a338aaa967a90bdfc9832 Mon Sep 17 00:00:00 2001 +From e5daa8fa4cf061e18c4b6bfb27c318f0d79bf594 Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Fri, 3 May 2024 13:16:31 -0700 Subject: [PATCH 1/7] tensilelite llvm path changes @@ -49,5 +49,5 @@ index 230c4469..81a1a015 100644 . ${venv}/bin/activate -- -2.41.1 +2.45.2 diff --git a/patches/rocm-6.1.2/hipBLASLt/0002-add-more-gpus-for-default-target-list.patch b/patches/rocm-6.1.2/hipBLASLt/0002-add-more-gpus-for-default-target-list.patch index 6f76be2..400c041 100644 --- a/patches/rocm-6.1.2/hipBLASLt/0002-add-more-gpus-for-default-target-list.patch +++ b/patches/rocm-6.1.2/hipBLASLt/0002-add-more-gpus-for-default-target-list.patch @@ -1,4 +1,4 @@ -From b8b8b1831a52364980a70841eadcc7dc6667bbaa Mon Sep 17 00:00:00 2001 +From 490d18000263e3e4849ed78ab4ac7975529a86a7 Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Wed, 15 May 2024 19:20:44 -0700 Subject: [PATCH 2/7] add more gpus for default target list @@ -26,5 +26,5 @@ index fec03c1b..c6a16e01 100644 if (AMDGPU_TARGETS) -- -2.41.1 +2.45.2 diff --git a/patches/rocm-6.1.2/hipBLASLt/0003-Tensilelite-fix-fallback-arch-build.patch b/patches/rocm-6.1.2/hipBLASLt/0003-Tensilelite-fix-fallback-arch-build.patch index 5a21c7f..40f942c 100644 --- a/patches/rocm-6.1.2/hipBLASLt/0003-Tensilelite-fix-fallback-arch-build.patch +++ b/patches/rocm-6.1.2/hipBLASLt/0003-Tensilelite-fix-fallback-arch-build.patch @@ -1,4 +1,4 @@ -From 4d0cd887735f8b24687d4007ce4310a4a2df3951 Mon Sep 17 00:00:00 2001 +From 61b76b1bbdaeac7f08ef13200ffcd4b3fce8f5a6 Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Wed, 15 May 2024 19:30:31 -0700 Subject: [PATCH 3/7] Tensilelite fix fallback arch build @@ -62,5 +62,5 @@ index 8a37403d..d2ac2166 100644 for arch in archs: if arch in architectureMap: -- -2.41.1 +2.45.2 diff --git a/patches/rocm-6.1.2/hipBLASLt/0004-Tensilelite-add-gfx1035-to-supported-isa-list.patch b/patches/rocm-6.1.2/hipBLASLt/0004-Tensilelite-add-gfx1035-1036-and-1103-to-supported-i.patch similarity index 52% rename from patches/rocm-6.1.2/hipBLASLt/0004-Tensilelite-add-gfx1035-to-supported-isa-list.patch rename to patches/rocm-6.1.2/hipBLASLt/0004-Tensilelite-add-gfx1035-1036-and-1103-to-supported-i.patch index 52e1761..f3a7c33 100644 --- a/patches/rocm-6.1.2/hipBLASLt/0004-Tensilelite-add-gfx1035-to-supported-isa-list.patch +++ b/patches/rocm-6.1.2/hipBLASLt/0004-Tensilelite-add-gfx1035-1036-and-1103-to-supported-i.patch @@ -1,15 +1,16 @@ -From b4f84e3aa32dee684c200ed3d5a395db9d36811d Mon Sep 17 00:00:00 2001 +From a687a8343961e0a3c732101b44e0071150f38d51 Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Wed, 15 May 2024 20:56:51 -0700 -Subject: [PATCH 4/7] Tensilelite add gfx1035 to supported isa list +Subject: [PATCH 4/7] Tensilelite add gfx1035, 1036 and 1103 to supported isa + list Signed-off-by: Mika Laitio --- - tensilelite/Tensile/Common.py | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) + tensilelite/Tensile/Common.py | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensilelite/Tensile/Common.py b/tensilelite/Tensile/Common.py -index c47a8752..1751e0b4 100644 +index c47a8752..3630ee41 100644 --- a/tensilelite/Tensile/Common.py +++ b/tensilelite/Tensile/Common.py @@ -209,7 +209,7 @@ globalParameters["MergeFiles"] = True # F=store every solution and k @@ -17,10 +18,21 @@ index c47a8752..1751e0b4 100644 globalParameters["MaxFileName"] = 64 # If a file name would be longer than this, shorten it with a hash. -globalParameters["SupportedISA"] = [(8,0,3), (9,0,0), (9,0,6), (9,0,8), (9,0,10), (9,4,0), (9,4,1), (9,4,2), (10,1,0), (10,1,1), (10,1,2), (10,3,0), (11,0,0), (11,0,1), (11,0,2)] # assembly kernels writer supports these architectures -+globalParameters["SupportedISA"] = [(8,0,3), (9,0,0), (9,0,6), (9,0,8), (9,0,10), (9,4,0), (9,4,1), (9,4,2), (10,1,0), (10,1,1), (10,1,2), (10,3,0), (10,3,1), (10,3,5), (11,0,0), (11,0,1), (11,0,2)] # assembly kernels writer supports these architectures ++globalParameters["SupportedISA"] = [(8,0,3), (9,0,0), (9,0,6), (9,0,8), (9,0,10), (9,4,0), (9,4,1), (9,4,2), (10,1,0), (10,1,1), (10,1,2), (10,3,0), (10,3,1), (10,3,5), (10,3,6), (11,0,0), (11,0,1), (11,0,2), (11,0,3)] # assembly kernels writer supports these architectures globalParameters["GenerateManifestAndExit"] = False # Output manifest file with list of expected library objects and exit globalParameters["NewClient"] = 2 # Old client deprecated: NewClient must be set to 2. +@@ -286,8 +286,8 @@ architectureMap = { + 'gfx940':'aquavanjaram', 'gfx940:xnack+':'aquavanjaram', 'gfx940:xnack-':'aquavanjaram', + 'gfx941':'aquavanjaram', 'gfx941:xnack+':'aquavanjaram', 'gfx941:xnack-':'aquavanjaram', + 'gfx942':'aquavanjaram', 'gfx942:xnack+':'aquavanjaram', 'gfx942:xnack-':'aquavanjaram', +- 'gfx1010':'navi10', 'gfx1011':'navi12', 'gfx1012':'navi14', 'gfx1030':'navi21', +- 'gfx1100':'navi31', 'gfx1101':'navi32', 'gfx1102':'navi33' ++ 'gfx1010':'navi10', 'gfx1011':'navi12', 'gfx1012':'navi14', 'gfx1030':'navi21', 'gfx1035':'rembrandt', 'gfx1036':'rembrandt1036', ++ 'gfx1100':'navi31', 'gfx1101':'navi32', 'gfx1102':'navi33', 'gfx1103':'phoenix' + } + + def getArchitectureName(gfxName): -- -2.41.1 +2.45.2 diff --git a/patches/rocm-6.1.2/hipBLASLt/0005-fallback-support-debug-patch.patch b/patches/rocm-6.1.2/hipBLASLt/0005-fallback-support-debug-patch.patch index 5093087..2d004c7 100644 --- a/patches/rocm-6.1.2/hipBLASLt/0005-fallback-support-debug-patch.patch +++ b/patches/rocm-6.1.2/hipBLASLt/0005-fallback-support-debug-patch.patch @@ -1,4 +1,4 @@ -From 29e64fd34440137d7406bc7044b19d470d785e27 Mon Sep 17 00:00:00 2001 +From 6b32bb226ef045db90c86f955fb3836d3f01ca98 Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Sat, 18 May 2024 18:39:43 -0700 Subject: [PATCH 5/7] fallback support debug patch @@ -23,5 +23,5 @@ index d2ac2166..ce6c0d5f 100644 if globalParameters["LazyLibraryLoading"] and not (globalParameters["MergeFiles"] and globalParameters["SeparateArchitectures"]): printExit("--lazy-library-loading requires --merge-files and --separate-architectures enabled") -- -2.41.1 +2.45.2 diff --git a/patches/rocm-6.1.2/hipBLASLt/0006-OpenBLAS-and-BLIS-library-search-improvements.patch b/patches/rocm-6.1.2/hipBLASLt/0006-OpenBLAS-and-BLIS-library-search-improvements.patch index d628ff6..2437e75 100644 --- a/patches/rocm-6.1.2/hipBLASLt/0006-OpenBLAS-and-BLIS-library-search-improvements.patch +++ b/patches/rocm-6.1.2/hipBLASLt/0006-OpenBLAS-and-BLIS-library-search-improvements.patch @@ -1,4 +1,4 @@ -From 24f6c1010ce3941833b40b4faad775de320caa3f Mon Sep 17 00:00:00 2001 +From 992e946dddb9e1714da704bd1277b291496f5f69 Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Sat, 18 May 2024 18:40:43 -0700 Subject: [PATCH 6/7] OpenBLAS and BLIS library search improvements @@ -156,5 +156,5 @@ index bc9109d0..53b854e6 100644 message("BLIS lib found: ${BLIS_LIB}") -- -2.41.1 +2.45.2 diff --git a/patches/rocm-6.1.2/hipBLASLt/0007-manjaro-and-arch-linux-msgpack-search-fix.patch b/patches/rocm-6.1.2/hipBLASLt/0007-manjaro-and-arch-linux-msgpack-search-fix.patch index b4ce482..3a9f210 100644 --- a/patches/rocm-6.1.2/hipBLASLt/0007-manjaro-and-arch-linux-msgpack-search-fix.patch +++ b/patches/rocm-6.1.2/hipBLASLt/0007-manjaro-and-arch-linux-msgpack-search-fix.patch @@ -1,4 +1,4 @@ -From f93b0ac353b7c8ce61d88c8f6a1b84598119c685 Mon Sep 17 00:00:00 2001 +From f320e89bad147c2b9e6cb6fd00261d5bfd0526be Mon Sep 17 00:00:00 2001 From: Daniele <57776841+daniandtheweb@users.noreply.github.com> Date: Tue, 18 Jun 2024 21:46:30 -0700 Subject: [PATCH 7/7] manjaro and arch linux msgpack search fix @@ -28,5 +28,5 @@ index 43206527..e9476b3d 100644 if(TARGET msgpackc-cxx) -- -2.41.1 +2.45.2 diff --git a/patches/rocm-6.1.2/rccl/0001-gfx1010-1030-1035-and-1036.patch b/patches/rocm-6.1.2/rccl/0001-gfx1010-1030-1035-and-1036.patch deleted file mode 100644 index 7322954..0000000 --- a/patches/rocm-6.1.2/rccl/0001-gfx1010-1030-1035-and-1036.patch +++ /dev/null @@ -1,40 +0,0 @@ -From fbec847c285080c07c8d5bb6a49ff49196438b42 Mon Sep 17 00:00:00 2001 -From: Mika Laitio -Date: Sat, 6 Jan 2024 01:29:45 +0200 -Subject: [PATCH] gfx1010, 1030, 1035 and 1036 - -Signed-off-by: Mika Laitio ---- - CMakeLists.txt | 15 +++------------ - 1 file changed, 3 insertions(+), 12 deletions(-) - -diff --git a/CMakeLists.txt b/CMakeLists.txt -index ba4bd72..d17a9c3 100644 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -26,19 +26,10 @@ option(TRACE "Enable additional tracing" - # Default GPU architectures to build - #================================================================================================== - set(DEFAULT_GPUS -- gfx803 -- gfx900:xnack- -- gfx906:xnack- -- gfx908:xnack- -- gfx90a:xnack- -- gfx90a:xnack+ -- gfx940 -- gfx941 -- gfx942 -+ gfx1010 - gfx1030 -- gfx1100 -- gfx1101 -- gfx1102) -+ gfx1035 -+ gfx1036) - - # Load CMake modules - #================================================================================================== --- -2.41.0 - diff --git a/patches/rocm-6.1.2/rccl/0001-gfx1103-support.patch b/patches/rocm-6.1.2/rccl/0001-gfx1103-support.patch new file mode 100644 index 0000000..a0575a3 --- /dev/null +++ b/patches/rocm-6.1.2/rccl/0001-gfx1103-support.patch @@ -0,0 +1,47 @@ +From 3fcd05e8663accc97ba118dbc02bb48e3ee656a9 Mon Sep 17 00:00:00 2001 +From: Mika Laitio +Date: Sat, 13 Jul 2024 04:57:37 -0400 +Subject: [PATCH] gfx1103 support + +Signed-off-by: Mika Laitio +--- + src/collectives/device/common.h | 4 ++-- + tools/JitterBench/Common.hpp | 2 +- + 2 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/src/collectives/device/common.h b/src/collectives/device/common.h +index 093a26c..c51bd4a 100644 +--- a/src/collectives/device/common.h ++++ b/src/collectives/device/common.h +@@ -43,7 +43,7 @@ class ncclFunction { + #endif + }; + +-#if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) ++#if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1103__) + #define __trace_hwreg() + #else + #define __trace_hwreg() \ +@@ -393,4 +393,4 @@ __device__ __attribute__((noinline)) void NCCL_FUNC_NAME(func, algo, proto, dev + + #define NCCL_NVLS_ENABLED (__CUDA_ARCH__ >= 900 && NCCL_NVLS_SUPPORTS(NCCL_TYPE, NCCL_OP)) + +-#endif +\ No newline at end of file ++#endif +diff --git a/tools/JitterBench/Common.hpp b/tools/JitterBench/Common.hpp +index 8fad0ac..8728833 100644 +--- a/tools/JitterBench/Common.hpp ++++ b/tools/JitterBench/Common.hpp +@@ -43,7 +43,7 @@ THE SOFTWARE. + #endif + + // Macro for collecting HW_REG_HW_ID +-#if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__NVCC__) ++#if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1103__) || defined(__NVCC__) + #define GetHwId(val) \ + val = 0 + #else +-- +2.45.2 + diff --git a/patches/rocm-6.1.2/rocBLAS/0001-add-mageia-9-support-to-install.sh.patch b/patches/rocm-6.1.2/rocBLAS/0001-add-mageia-9-support-to-install.sh.patch index 6aaa228..008efe1 100644 --- a/patches/rocm-6.1.2/rocBLAS/0001-add-mageia-9-support-to-install.sh.patch +++ b/patches/rocm-6.1.2/rocBLAS/0001-add-mageia-9-support-to-install.sh.patch @@ -1,4 +1,4 @@ -From b9ebd530ecd258a25c87fbf2d37a770203e16367 Mon Sep 17 00:00:00 2001 +From b4e555f2d5c996b528cc13602f78012671383f3a Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Sat, 18 May 2024 18:17:42 -0700 Subject: [PATCH 1/3] add mageia 9 support to install.sh @@ -54,5 +54,5 @@ index fc644b87..46c95775 100755 elevate_if_not_root zypper -n --no-gpg-checks install rocblas-*.rpm ;; -- -2.41.0 +2.45.2 diff --git a/patches/rocm-6.1.2/rocBLAS/0002-add-gfx1035-gfx1036-and-gfx1103-to-gpulist.patch b/patches/rocm-6.1.2/rocBLAS/0002-add-gfx1035-gfx1036-and-gfx1103-to-gpulist.patch new file mode 100644 index 0000000..b97ec46 --- /dev/null +++ b/patches/rocm-6.1.2/rocBLAS/0002-add-gfx1035-gfx1036-and-gfx1103-to-gpulist.patch @@ -0,0 +1,110 @@ +From 98c87b3db281d5048524ecb0c14c33c1fac0719c Mon Sep 17 00:00:00 2001 +From: Mika Laitio +Date: Sat, 18 May 2024 18:18:33 -0700 +Subject: [PATCH 2/3] add gfx1035,gfx1036 and gfx1103 to gpulist + +Signed-off-by: Mika Laitio +--- + CMakeLists.txt | 6 +++--- + library/src/handle.cpp | 12 ++++++++++++ + library/src/include/handle.hpp | 4 +++- + library/src/tensile_host.cpp | 12 ++++++++++++ + 4 files changed, 30 insertions(+), 4 deletions(-) + +diff --git a/CMakeLists.txt b/CMakeLists.txt +index 92f227f7..69ab2478 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -108,9 +108,9 @@ rocm_setup_version( VERSION ${VERSION_STRING} ) + list( APPEND CMAKE_PREFIX_PATH ${ROCM_PATH}/llvm ${ROCM_PATH} ${ROCM_PATH}/hip /opt/rocm/llvm /opt/rocm /opt/rocm/hip ) + + # setting target list based on ROCm version +-set( TARGET_LIST_ROCM_5.6 "gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102") +-set( TARGET_LIST_ROCM_5.7 "gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102") +-set( TARGET_LIST_ROCM_6.0 "gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102") ++set( TARGET_LIST_ROCM_5.6 "gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx1010;gfx1012;gfx1030;gfx1035;gfx1036;gfx1100;gfx1101;gfx1102;gfx1103") ++set( TARGET_LIST_ROCM_5.7 "gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1035;gfx1036;gfx1100;gfx1101;gfx1102;gfx1103") ++set( TARGET_LIST_ROCM_6.0 "gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1035;gfx1036;gfx1100;gfx1101;gfx1102;gfx1103") + + if(ROCM_PLATFORM_VERSION) + if(${ROCM_PLATFORM_VERSION} VERSION_LESS 5.7.0) +diff --git a/library/src/handle.cpp b/library/src/handle.cpp +index 7b08a934..eb2254dd 100644 +--- a/library/src/handle.cpp ++++ b/library/src/handle.cpp +@@ -129,6 +129,14 @@ static Processor getActiveArch(int deviceId) + { + return Processor::gfx1030; + } ++ else if(deviceString.find("gfx1035") != std::string::npos) ++ { ++ return Processor::gfx1035; ++ } ++ else if(deviceString.find("gfx1036") != std::string::npos) ++ { ++ return Processor::gfx1036; ++ } + else if(deviceString.find("gfx1100") != std::string::npos) + { + return Processor::gfx1100; +@@ -141,6 +149,10 @@ static Processor getActiveArch(int deviceId) + { + return Processor::gfx1102; + } ++ else if(deviceString.find("gfx1103") != std::string::npos) ++ { ++ return Processor::gfx1103; ++ } + return static_cast(0); + } + +diff --git a/library/src/include/handle.hpp b/library/src/include/handle.hpp +index 282edb8f..cc3e6c0f 100644 +--- a/library/src/include/handle.hpp ++++ b/library/src/include/handle.hpp +@@ -90,9 +90,11 @@ enum class Processor : int + gfx1032 = 1032, + gfx1034 = 1034, + gfx1035 = 1035, ++ gfx1036 = 1036, + gfx1100 = 1100, + gfx1101 = 1101, +- gfx1102 = 1102 ++ gfx1102 = 1102, ++ gfx1103 = 1103 + }; + + // helper function in handle.cpp +diff --git a/library/src/tensile_host.cpp b/library/src/tensile_host.cpp +index 1f0349fd..073bb244 100644 +--- a/library/src/tensile_host.cpp ++++ b/library/src/tensile_host.cpp +@@ -257,6 +257,14 @@ namespace + { + return Tensile::LazyLoadingInit::gfx1030; + } ++ else if(deviceString.find("gfx1035") != std::string::npos) ++ { ++ return Tensile::LazyLoadingInit::gfx1035; ++ } ++ else if(deviceString.find("gfx1036") != std::string::npos) ++ { ++ return Tensile::LazyLoadingInit::gfx1036; ++ } + else if(deviceString.find("gfx1100") != std::string::npos) + { + return Tensile::LazyLoadingInit::gfx1100; +@@ -269,6 +277,10 @@ namespace + { + return Tensile::LazyLoadingInit::gfx1102; + } ++ else if(deviceString.find("gfx1103") != std::string::npos) ++ { ++ return Tensile::LazyLoadingInit::gfx1103; ++ } + return Tensile::LazyLoadingInit::None; + } + +-- +2.45.2 + diff --git a/patches/rocm-6.1.2/rocBLAS/0002-add-gfx1035-to-gpulist.patch b/patches/rocm-6.1.2/rocBLAS/0002-add-gfx1035-to-gpulist.patch deleted file mode 100644 index 42427f0..0000000 --- a/patches/rocm-6.1.2/rocBLAS/0002-add-gfx1035-to-gpulist.patch +++ /dev/null @@ -1,30 +0,0 @@ -From d39e2f6ea3251f403e88ce388a7e0b0b61bbee1d Mon Sep 17 00:00:00 2001 -From: Mika Laitio -Date: Sat, 18 May 2024 18:18:33 -0700 -Subject: [PATCH 2/3] add gfx1035 to gpulist - -Signed-off-by: Mika Laitio ---- - CMakeLists.txt | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/CMakeLists.txt b/CMakeLists.txt -index ae76864d..234e6b07 100644 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -108,9 +108,9 @@ rocm_setup_version( VERSION ${VERSION_STRING} ) - list( APPEND CMAKE_PREFIX_PATH ${ROCM_PATH}/llvm ${ROCM_PATH} ${ROCM_PATH}/hip /opt/rocm/llvm /opt/rocm /opt/rocm/hip ) - - # setting target list based on ROCm version --set( TARGET_LIST_ROCM_5.6 "gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102") --set( TARGET_LIST_ROCM_5.7 "gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102") --set( TARGET_LIST_ROCM_6.0 "gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102") -+set( TARGET_LIST_ROCM_5.6 "gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx1010;gfx1012;gfx1030;gfx1035;gfx1100;gfx1101;gfx1102") -+set( TARGET_LIST_ROCM_5.7 "gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1035;gfx1100;gfx1101;gfx1102") -+set( TARGET_LIST_ROCM_6.0 "gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1035;gfx1100;gfx1101;gfx1102") - - if(ROCM_PLATFORM_VERSION) - if(${ROCM_PLATFORM_VERSION} VERSION_LESS 5.7.0) --- -2.41.0 - diff --git a/patches/rocm-6.1.2/rocBLAS/0003-OpenBLAS-and-BLIS-library-search-improvements.patch b/patches/rocm-6.1.2/rocBLAS/0003-OpenBLAS-and-BLIS-library-search-improvements.patch index 8612a8b..9c8a31b 100644 --- a/patches/rocm-6.1.2/rocBLAS/0003-OpenBLAS-and-BLIS-library-search-improvements.patch +++ b/patches/rocm-6.1.2/rocBLAS/0003-OpenBLAS-and-BLIS-library-search-improvements.patch @@ -1,4 +1,4 @@ -From 695530043ca5d5654f1e236744db90ad590043cd Mon Sep 17 00:00:00 2001 +From 8e12ea9b4770c29d36ae205d55e19f085229ab1a Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Sat, 18 May 2024 18:15:13 -0700 Subject: [PATCH 3/3] OpenBLAS and BLIS library search improvements @@ -50,5 +50,5 @@ index dc8040ea..704414b5 100755 else() # WIN32 set( BLAS_INCLUDE_DIR ${OPENBLAS_DIR}/include CACHE PATH "OpenBLAS library include path" ) -- -2.41.0 +2.45.2 diff --git a/patches/rocm-6.1.2/rocPRIM/0001-disable-DPP-from-gfx1035.patch b/patches/rocm-6.1.2/rocPRIM/0001-disable-DPP-from-gfx1035-1036-and-1103.patch similarity index 75% rename from patches/rocm-6.1.2/rocPRIM/0001-disable-DPP-from-gfx1035.patch rename to patches/rocm-6.1.2/rocPRIM/0001-disable-DPP-from-gfx1035-1036-and-1103.patch index 4f9b113..07c2383 100644 --- a/patches/rocm-6.1.2/rocPRIM/0001-disable-DPP-from-gfx1035.patch +++ b/patches/rocm-6.1.2/rocPRIM/0001-disable-DPP-from-gfx1035-1036-and-1103.patch @@ -1,7 +1,7 @@ -From 0741820ac794fa369685f2ed4fdfdbc8970d36a7 Mon Sep 17 00:00:00 2001 +From 855c5e9e4121043c80b5330bab26d0bf8f6faf5e Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Fri, 3 May 2024 13:04:15 -0700 -Subject: [PATCH] disable DPP from gfx1035 +Subject: [PATCH] disable DPP from gfx1035 1036 and 1103 Signed-off-by: Mika Laitio --- @@ -9,14 +9,14 @@ Signed-off-by: Mika Laitio 1 file changed, 7 insertions(+) diff --git a/rocprim/include/rocprim/config.hpp b/rocprim/include/rocprim/config.hpp -index b78afff2..0b189c31 100644 +index b78afff2..b563a416 100644 --- a/rocprim/include/rocprim/config.hpp +++ b/rocprim/include/rocprim/config.hpp @@ -80,6 +80,13 @@ #define ROCPRIM_DETAIL_HAS_DPP 1 #endif -+#if ( defined(__gfx1035__)) ++#if defined(__gfx1035__) || defined(__gfx1036__) || defined(__gfx1103__) + #ifndef ROCPRIM_DISABLE_DPP + #define ROCPRIM_DISABLE_DPP + #define ROCPRIM_DETAIL_USE_DPP 0 @@ -27,5 +27,5 @@ index b78afff2..0b189c31 100644 #define ROCPRIM_DETAIL_USE_DPP 1 #else -- -2.41.0 +2.45.2 diff --git a/patches/rocm-6.1.2/rocRAND/0001-rocRAND-build-gfx1035-options.patch b/patches/rocm-6.1.2/rocRAND/0001-rocRAND-add-gfx1010-gfx1035-and-gfx11.patch similarity index 78% rename from patches/rocm-6.1.2/rocRAND/0001-rocRAND-build-gfx1035-options.patch rename to patches/rocm-6.1.2/rocRAND/0001-rocRAND-add-gfx1010-gfx1035-and-gfx11.patch index 6f939ce..32cc4cf 100644 --- a/patches/rocm-6.1.2/rocRAND/0001-rocRAND-build-gfx1035-options.patch +++ b/patches/rocm-6.1.2/rocRAND/0001-rocRAND-add-gfx1010-gfx1035-and-gfx11.patch @@ -1,14 +1,14 @@ -From 55bb9a72de1235d8df19dfe496ae784e5fc05e6b Mon Sep 17 00:00:00 2001 +From 39a9b24ea6c945795f101364b2f65d24fe552f8b Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Fri, 5 Jan 2024 10:43:18 +0200 -Subject: [PATCH] rocRAND build gfx1035 options +Subject: [PATCH] rocRAND add gfx1010 gfx1035 and gfx11 Signed-off-by: Mika Laitio --- - CMakeLists.txt | 2 +- - library/include/rocrand/rocrand_common.h | 3 ++- - rmake.py | 4 ++-- - 3 files changed, 5 insertions(+), 4 deletions(-) + CMakeLists.txt | 2 +- + library/include/rocrand/rocrand_common.h | 11 ++++++++++- + rmake.py | 4 ++-- + 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6e1dec5..f05f64d 100644 @@ -24,16 +24,24 @@ index 6e1dec5..f05f64d 100644 if (AMDGPU_TARGETS) if( AMDGPU_TARGETS STREQUAL "all" ) diff --git a/library/include/rocrand/rocrand_common.h b/library/include/rocrand/rocrand_common.h -index 1aad64f..0311ac2 100644 +index 1aad64f..cc4883a 100644 --- a/library/include/rocrand/rocrand_common.h +++ b/library/include/rocrand/rocrand_common.h -@@ -61,7 +61,8 @@ namespace detail { +@@ -61,7 +61,16 @@ namespace detail { defined(__gfx906__) || \ defined(__gfx908__) || \ defined(__gfx909__) || \ - defined(__gfx1030__) ) ++ defined(__gfx1010__) || \ + defined(__gfx1030__) || \ -+ defined(__gfx1035__) ) ++ defined(__gfx1031__) || \ ++ defined(__gfx1032__) || \ ++ defined(__gfx1035__) || \ ++ defined(__gfx1036__) || \ ++ defined(__gfx1100__) || \ ++ defined(__gfx1101__) || \ ++ defined(__gfx1102__) || \ ++ defined(__gfx1103__) ) #if !defined(ROCRAND_ENABLE_INLINE_ASM) #define ROCRAND_ENABLE_INLINE_ASM #endif @@ -53,5 +61,5 @@ index 93e4dc1..5955cc2 100644 help='Verbose build (default: False)') return parser.parse_args() -- -2.41.0 +2.45.2