1212
1313#include " AMDGPU.h"
1414#include " GCNSubtarget.h"
15+ #include " Utils/AMDGPUBaseInfo.h"
1516#include " llvm/CodeGen/TargetPassConfig.h"
1617#include " llvm/IR/IntrinsicsAMDGPU.h"
1718#include " llvm/IR/IntrinsicsR600.h"
2223
2324using namespace llvm ;
2425
26+ #define AMDGPU_ATTRIBUTE (Name, Str ) Name##_POS,
27+
28+ enum ImplicitArgumentPositions {
29+ #include " AMDGPUAttributes.def"
30+ LAST_ARG_POS
31+ };
32+
33+ #define AMDGPU_ATTRIBUTE (Name, Str ) Name = 1 << Name##_POS,
34+
2535enum ImplicitArgumentMask {
2636 NOT_IMPLICIT_INPUT = 0 ,
27-
28- // SGPRs
29- DISPATCH_PTR = 1 << 0 ,
30- QUEUE_PTR = 1 << 1 ,
31- DISPATCH_ID = 1 << 2 ,
32- IMPLICIT_ARG_PTR = 1 << 3 ,
33- WORKGROUP_ID_X = 1 << 4 ,
34- WORKGROUP_ID_Y = 1 << 5 ,
35- WORKGROUP_ID_Z = 1 << 6 ,
36-
37- // VGPRS:
38- WORKITEM_ID_X = 1 << 7 ,
39- WORKITEM_ID_Y = 1 << 8 ,
40- WORKITEM_ID_Z = 1 << 9 ,
41- ALL_ARGUMENT_MASK = (1 << 10 ) - 1
37+ #include " AMDGPUAttributes.def"
38+ ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1
4239};
4340
41+ #define AMDGPU_ATTRIBUTE (Name, Str ) {Name, Str},
4442static constexpr std::pair<ImplicitArgumentMask,
4543 StringLiteral> ImplicitAttrs[] = {
46- {DISPATCH_PTR, " amdgpu-no-dispatch-ptr" },
47- {QUEUE_PTR, " amdgpu-no-queue-ptr" },
48- {DISPATCH_ID, " amdgpu-no-dispatch-id" },
49- {IMPLICIT_ARG_PTR, " amdgpu-no-implicitarg-ptr" },
50- {WORKGROUP_ID_X, " amdgpu-no-workgroup-id-x" },
51- {WORKGROUP_ID_Y, " amdgpu-no-workgroup-id-y" },
52- {WORKGROUP_ID_Z, " amdgpu-no-workgroup-id-z" },
53- {WORKITEM_ID_X, " amdgpu-no-workitem-id-x" },
54- {WORKITEM_ID_Y, " amdgpu-no-workitem-id-y" },
55- {WORKITEM_ID_Z, " amdgpu-no-workitem-id-z" }
44+ #include " AMDGPUAttributes.def"
5645};
5746
5847// We do not need to note the x workitem or workgroup id because they are always
@@ -90,7 +79,7 @@ intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &IsQueuePtr) {
9079 case Intrinsic::amdgcn_queue_ptr:
9180 case Intrinsic::amdgcn_is_shared:
9281 case Intrinsic::amdgcn_is_private:
93- // TODO: Does not require queue ptr on gfx9+
82+ // TODO: Does not require the queue pointer on gfx9+
9483 case Intrinsic::trap:
9584 case Intrinsic::debugtrap:
9685 IsQueuePtr = true ;
@@ -112,6 +101,17 @@ static bool isDSAddress(const Constant *C) {
112101 return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS;
113102}
114103
104+ // / Returns true if the function requires the implicit argument be passed
105+ // / regardless of the function contents.
106+ static bool funcRequiresHostcallPtr (const Function &F) {
107+ // Sanitizers require the hostcall buffer passed in the implicit arguments.
108+ return F.hasFnAttribute (Attribute::SanitizeAddress) ||
109+ F.hasFnAttribute (Attribute::SanitizeThread) ||
110+ F.hasFnAttribute (Attribute::SanitizeMemory) ||
111+ F.hasFnAttribute (Attribute::SanitizeHWAddress) ||
112+ F.hasFnAttribute (Attribute::SanitizeMemTag);
113+ }
114+
115115class AMDGPUInformationCache : public InformationCache {
116116public:
117117 AMDGPUInformationCache (const Module &M, AnalysisGetter &AG,
@@ -129,7 +129,7 @@ class AMDGPUInformationCache : public InformationCache {
129129 }
130130
131131private:
132- // / Check if the ConstantExpr \p CE requires queue ptr attribute .
132+ // / Check if the ConstantExpr \p CE requires the queue pointer .
133133 static bool visitConstExpr (const ConstantExpr *CE) {
134134 if (CE->getOpcode () == Instruction::AddrSpaceCast) {
135135 unsigned SrcAS = CE->getOperand (0 )->getType ()->getPointerAddressSpace ();
@@ -163,7 +163,7 @@ class AMDGPUInformationCache : public InformationCache {
163163 }
164164
165165public:
166- // / Returns true if \p Fn needs a queue ptr attribute because of \p C.
166+ // / Returns true if \p Fn needs the queue pointer because of \p C.
167167 bool needsQueuePtr (const Constant *C, Function &Fn) {
168168 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC (Fn.getCallingConv ());
169169 bool HasAperture = hasApertureRegs (Fn);
@@ -182,7 +182,7 @@ class AMDGPUInformationCache : public InformationCache {
182182 }
183183
184184private:
185- // / Used to determine if the Constant needs a queue ptr attribute .
185+ // / Used to determine if the Constant needs the queue pointer .
186186 DenseMap<const Constant *, uint8_t > ConstantStatus;
187187};
188188
@@ -327,7 +327,20 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
327327
328328 void initialize (Attributor &A) override {
329329 Function *F = getAssociatedFunction ();
330+
331+ // If the function requires the implicit arg pointer due to sanitizers,
332+ // assume it's needed even if explicitly marked as not requiring it.
333+ const bool NeedsHostcall = funcRequiresHostcallPtr (*F);
334+ if (NeedsHostcall) {
335+ removeAssumedBits (IMPLICIT_ARG_PTR);
336+ removeAssumedBits (HOSTCALL_PTR);
337+ }
338+
330339 for (auto Attr : ImplicitAttrs) {
340+ if (NeedsHostcall &&
341+ (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR))
342+ continue ;
343+
331344 if (F->hasFnAttribute (Attr.second ))
332345 addKnownBits (Attr.first );
333346 }
@@ -355,7 +368,6 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
355368 return indicatePessimisticFixpoint ();
356369
357370 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC (F->getCallingConv ());
358- auto &InfoCache = static_cast <AMDGPUInformationCache &>(A.getInfoCache ());
359371
360372 bool NeedsQueuePtr = false ;
361373
@@ -377,13 +389,58 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
377389 }
378390 }
379391
380- // If we found that we need amdgpu-queue-ptr, nothing else to do.
392+ if (!NeedsQueuePtr) {
393+ NeedsQueuePtr = checkForQueuePtr (A);
394+ }
395+
381396 if (NeedsQueuePtr) {
382397 removeAssumedBits (QUEUE_PTR);
383- return getAssumed () != OrigAssumed ? ChangeStatus::CHANGED :
384- ChangeStatus::UNCHANGED;
385398 }
386399
400+ if (funcRetrievesHostcallPtr (A)) {
401+ removeAssumedBits (IMPLICIT_ARG_PTR);
402+ removeAssumedBits (HOSTCALL_PTR);
403+ }
404+
405+ return getAssumed () != OrigAssumed ? ChangeStatus::CHANGED
406+ : ChangeStatus::UNCHANGED;
407+ }
408+
409+ ChangeStatus manifest (Attributor &A) override {
410+ SmallVector<Attribute, 8 > AttrList;
411+ LLVMContext &Ctx = getAssociatedFunction ()->getContext ();
412+
413+ for (auto Attr : ImplicitAttrs) {
414+ if (isKnown (Attr.first ))
415+ AttrList.push_back (Attribute::get (Ctx, Attr.second ));
416+ }
417+
418+ return IRAttributeManifest::manifestAttrs (A, getIRPosition (), AttrList,
419+ /* ForceReplace */ true );
420+ }
421+
422+ const std::string getAsStr () const override {
423+ std::string Str;
424+ raw_string_ostream OS (Str);
425+ OS << " AMDInfo[" ;
426+ for (auto Attr : ImplicitAttrs)
427+ OS << ' ' << Attr.second ;
428+ OS << " ]" ;
429+ return OS.str ();
430+ }
431+
432+ // / See AbstractAttribute::trackStatistics()
433+ void trackStatistics () const override {}
434+
435+ private:
436+ bool checkForQueuePtr (Attributor &A) {
437+ Function *F = getAssociatedFunction ();
438+ bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC (F->getCallingConv ());
439+
440+ auto &InfoCache = static_cast <AMDGPUInformationCache &>(A.getInfoCache ());
441+
442+ bool NeedsQueuePtr = false ;
443+
387444 auto CheckAddrSpaceCasts = [&](Instruction &I) {
388445 unsigned SrcAS = static_cast <AddrSpaceCastInst &>(I).getSrcAddressSpace ();
389446 if (castRequiresQueuePtr (SrcAS)) {
@@ -398,69 +455,63 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
398455 // `checkForAllInstructions` is much more cheaper than going through all
399456 // instructions, try it first.
400457
401- // amdgpu- queue-ptr is not needed if aperture regs is present.
458+ // The queue pointer is not needed if aperture regs is present.
402459 if (!HasApertureRegs) {
403460 bool UsedAssumedInformation = false ;
404461 A.checkForAllInstructions (CheckAddrSpaceCasts, *this ,
405462 {Instruction::AddrSpaceCast},
406463 UsedAssumedInformation);
407464 }
408465
409- // If we found that we need amdgpu-queue-ptr, nothing else to do.
410- if (NeedsQueuePtr) {
411- removeAssumedBits (QUEUE_PTR);
412- return getAssumed () != OrigAssumed ? ChangeStatus::CHANGED :
413- ChangeStatus::UNCHANGED;
414- }
466+ // If we found that we need the queue pointer, nothing else to do.
467+ if (NeedsQueuePtr)
468+ return true ;
415469
416- if (!IsNonEntryFunc && HasApertureRegs) {
417- return getAssumed () != OrigAssumed ? ChangeStatus::CHANGED :
418- ChangeStatus::UNCHANGED;
419- }
470+ if (!IsNonEntryFunc && HasApertureRegs)
471+ return false ;
420472
421473 for (BasicBlock &BB : *F) {
422474 for (Instruction &I : BB) {
423475 for (const Use &U : I.operands ()) {
424476 if (const auto *C = dyn_cast<Constant>(U)) {
425- if (InfoCache.needsQueuePtr (C, *F)) {
426- removeAssumedBits (QUEUE_PTR);
427- return getAssumed () != OrigAssumed ? ChangeStatus::CHANGED :
428- ChangeStatus::UNCHANGED;
429- }
477+ if (InfoCache.needsQueuePtr (C, *F))
478+ return true ;
430479 }
431480 }
432481 }
433482 }
434483
435- return getAssumed () != OrigAssumed ? ChangeStatus::CHANGED :
436- ChangeStatus::UNCHANGED;
484+ return false ;
437485 }
438486
439- ChangeStatus manifest (Attributor &A) override {
440- SmallVector<Attribute, 8 > AttrList;
441- LLVMContext &Ctx = getAssociatedFunction ()->getContext ();
442-
443- for (auto Attr : ImplicitAttrs) {
444- if (isKnown (Attr.first ))
445- AttrList.push_back (Attribute::get (Ctx, Attr.second ));
446- }
487+ bool funcRetrievesHostcallPtr (Attributor &A) {
488+ auto Pos = llvm::AMDGPU::getHostcallImplicitArgPosition ();
489+
490+ // Check if this is a call to the implicitarg_ptr builtin and it
491+ // is used to retrieve the hostcall pointer. The implicit arg for
492+ // hostcall is not used only if every use of the implicitarg_ptr
493+ // is a load that clearly does not retrieve any byte of the
494+ // hostcall pointer. We check this by tracing all the uses of the
495+ // initial call to the implicitarg_ptr intrinsic.
496+ auto DoesNotLeadToHostcallPtr = [&](Instruction &I) {
497+ auto &Call = cast<CallBase>(I);
498+ if (Call.getIntrinsicID () != Intrinsic::amdgcn_implicitarg_ptr)
499+ return true ;
500+
501+ const auto &PointerInfoAA = A.getAAFor <AAPointerInfo>(
502+ *this , IRPosition::callsite_returned (Call), DepClassTy::REQUIRED);
503+
504+ AAPointerInfo::OffsetAndSize OAS (Pos, 8 );
505+ return PointerInfoAA.forallInterferingAccesses (
506+ OAS, [](const AAPointerInfo::Access &Acc, bool IsExact) {
507+ return Acc.getRemoteInst ()->isDroppable ();
508+ });
509+ };
447510
448- return IRAttributeManifest::manifestAttrs (A, getIRPosition (), AttrList,
449- /* ForceReplace */ true );
511+ bool UsedAssumedInformation = false ;
512+ return !A.checkForAllCallLikeInstructions (DoesNotLeadToHostcallPtr, *this ,
513+ UsedAssumedInformation);
450514 }
451-
452- const std::string getAsStr () const override {
453- std::string Str;
454- raw_string_ostream OS (Str);
455- OS << " AMDInfo[" ;
456- for (auto Attr : ImplicitAttrs)
457- OS << ' ' << Attr.second ;
458- OS << " ]" ;
459- return OS.str ();
460- }
461-
462- // / See AbstractAttribute::trackStatistics()
463- void trackStatistics () const override {}
464515};
465516
466517AAAMDAttributes &AAAMDAttributes::createForPosition (const IRPosition &IRP,
@@ -497,7 +548,8 @@ class AMDGPUAttributor : public ModulePass {
497548 BumpPtrAllocator Allocator;
498549 AMDGPUInformationCache InfoCache (M, AG, Allocator, nullptr , *TM);
499550 DenseSet<const char *> Allowed (
500- {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID, &AACallEdges::ID});
551+ {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
552+ &AACallEdges::ID, &AAPointerInfo::ID});
501553
502554 Attributor A (Functions, InfoCache, CGUpdater, &Allowed);
503555
0 commit comments