Skip to content

Commit e2489b0

Browse files
kpyzhovzhang2amd
authored andcommitted
[AMDGPU] Use "hostcall" module flag instead of searching for ockl_hostcall_internal() declaration.
The current way to detect hostcalls by looking for "ockl_hostcall_internal()" function in the module seems to be not reliable enough. The LTO may rename the "ockl_hostcall_internal()" function when an application is compiled with "-fgpu-rdc", and MetadataStreamer pass to fail to detect hostcalls, therefore it does not set the "hidden_hostcall_buffer" kernel argument. This change adds a new module flag: hostcall that can be used to detect whether GPU functions use host calls for printf. Differential revision: https://reviews.llvm.org/D110337 [AMDGPU] Correction to 095c48f. Differential Revision: https://reviews.llvm.org/D110337 Change-Id: I5eb847884f4cb98687dcfdef85f78d2d2c380bcd
1 parent f54f00d commit e2489b0

File tree

3 files changed

+13
-8
lines changed

3 files changed

+13
-8
lines changed

llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -794,7 +794,8 @@ void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func,
794794
if (!HiddenArgNumBytes)
795795
return;
796796

797-
auto &DL = Func.getParent()->getDataLayout();
797+
const Module *M = Func.getParent();
798+
auto &DL = M->getDataLayout();
798799
auto Int64Ty = Type::getInt64Ty(Func.getContext());
799800

800801
if (HiddenArgNumBytes >= 8)
@@ -810,16 +811,16 @@ void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func,
810811
auto Int8PtrTy =
811812
Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
812813

813-
// Emit "printf buffer" argument if printf is used, otherwise emit dummy
814-
// "none" argument.
814+
// Emit "printf buffer" argument if printf is used, emit "hostcall buffer"
815+
// if "hostcall" module flag is set, otherwise emit dummy "none" argument.
815816
if (HiddenArgNumBytes >= 32) {
816-
if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
817+
if (M->getNamedMetadata("llvm.printf.fmts"))
817818
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
818819
Args);
819-
else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) {
820+
else if (M->getModuleFlag("amdgpu_hostcall")) {
820821
// The printf runtime binding pass should have ensured that hostcall and
821822
// printf are not used in the same module.
822-
assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts"));
823+
assert(!M->getNamedMetadata("llvm.printf.fmts"));
823824
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
824825
Args);
825826
} else

llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,9 @@ static Value *callPrintfBegin(IRBuilder<> &Builder, Value *Version) {
6363
auto Int64Ty = Builder.getInt64Ty();
6464
auto M = Builder.GetInsertBlock()->getModule();
6565
auto Fn = M->getOrInsertFunction("__ockl_printf_begin", Int64Ty, Int64Ty);
66+
if (!M->getModuleFlag("amdgpu_hostcall")) {
67+
M->addModuleFlag(llvm::Module::Override, "amdgpu_hostcall", 1);
68+
}
6669
return Builder.CreateCall(Fn, Version);
6770
}
6871

llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,6 @@
2929
; CHECK: .name: test_kernel
3030
; CHECK: .symbol: test_kernel.kd
3131

32-
declare <2 x i64> @__ockl_hostcall_internal(i8*, i32, i64, i64, i64, i64, i64, i64, i64, i64)
33-
3432
define amdgpu_kernel void @test_kernel(i8 %a) #0
3533
!kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
3634
!kernel_arg_base_type !3 !kernel_arg_type_qual !4 {
@@ -51,4 +49,7 @@ attributes #0 = { "amdgpu-implicitarg-num-bytes"="48" }
5149
!opencl.ocl.version = !{!90}
5250
!90 = !{i32 2, i32 0}
5351

52+
!llvm.module.flags = !{!0}
53+
!0 = !{i32 1, !"amdgpu_hostcall", i32 1}
54+
5455
; PARSER: AMDGPU HSA Metadata Parser Test: PASS

0 commit comments

Comments
 (0)