Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CIR][CUDA] Add target-specific attributes #1457

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion clang/include/clang/CIR/Dialect/IR/CIROps.td
Original file line number Diff line number Diff line change
Expand Up @@ -3560,11 +3560,12 @@ def CC_C : I32EnumAttrCase<"C", 1, "c">;
def CC_SpirKernel : I32EnumAttrCase<"SpirKernel", 2, "spir_kernel">;
def CC_SpirFunction : I32EnumAttrCase<"SpirFunction", 3, "spir_function">;
def CC_OpenCLKernel : I32EnumAttrCase<"OpenCLKernel", 4, "opencl_kernel">;
def CC_PTXKernel : I32EnumAttrCase<"PTXKernel", 5, "ptx_kernel">;

def CallingConv : I32EnumAttr<
"CallingConv",
"calling convention",
[CC_C, CC_SpirKernel, CC_SpirFunction, CC_OpenCLKernel]> {
[CC_C, CC_SpirKernel, CC_SpirFunction, CC_OpenCLKernel, CC_PTXKernel]> {
let cppNamespace = "::cir";
}

Expand Down
4 changes: 1 addition & 3 deletions clang/include/clang/CIR/MissingFeatures.h
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,7 @@ struct MissingFeatures {
static bool exceptions() { return false; }
static bool metaDataNode() { return false; }
static bool emitDeclMetadata() { return false; }
static bool emitNVVMMetadata() { return false; }
static bool emitScalarRangeCheck() { return false; }
static bool stmtExprEvaluation() { return false; }
static bool setCallingConv() { return false; }
Expand Down Expand Up @@ -470,9 +471,6 @@ struct MissingFeatures {
// can optimize away the store and load ops. Seems like an early optimization.
static bool returnValueDominatingStoreOptmiization() { return false; }

// Globals (vars and functions) may have attributes that are target depedent.
static bool setTargetAttributes() { return false; }

// CIR modules parsed from text form may not carry the triple or data layout
// specs. We should make it always present.
static bool makeTripleAlwaysPresent() { return false; }
Expand Down
10 changes: 6 additions & 4 deletions clang/lib/CIR/CodeGen/CIRGenModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -903,7 +903,7 @@ void CIRGenModule::setNonAliasAttributes(GlobalDecl gd, mlir::Operation *go) {
if (f)
assert(!cir::MissingFeatures::setSectionForFuncOp());
}
assert(!cir::MissingFeatures::setTargetAttributes());
getTargetCIRGenInfo().setTargetAttributes(d, go, *this);
}

static llvm::SmallVector<int64_t> indexesOfArrayAttr(mlir::ArrayAttr indexes) {
Expand Down Expand Up @@ -1211,10 +1211,8 @@ CIRGenModule::getOrCreateCIRGlobal(StringRef mangledName, mlir::Type ty,
// something closer to GlobalValue::isDeclaration instead of checking for
// initializer.
if (gv.isDeclaration()) {
// TODO(cir): set target attributes
getTargetCIRGenInfo().setTargetAttributes(d, gv, *this);

// External HIP managed variables needed to be recorded for transformation
// in both device and host compilations.
// External HIP managed variables needed to be recorded for transformation
// in both device and host compilations.
if (getLangOpts().CUDA && d && d->hasAttr<HIPManagedAttr>() &&
Expand Down Expand Up @@ -2920,6 +2918,10 @@ void CIRGenModule::setFunctionAttributes(GlobalDecl globalDecl,
// TODO(cir): Complete the remaining part of the function.
assert(!cir::MissingFeatures::setFunctionAttributes());

if (!isIncompleteFunction && func.isDeclaration())
getTargetCIRGenInfo().setTargetAttributes(globalDecl.getDecl(), func,
*this);

// TODO(cir): This needs a lot of work to better match CodeGen. That
// ultimately ends up in setGlobalVisibility, which already has the linkage of
// the LLVM GV (corresponding to our FuncOp) computed, so it doesn't have to
Expand Down
28 changes: 28 additions & 0 deletions clang/lib/CIR/CodeGen/TargetInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -345,11 +345,39 @@ class NVPTXTargetCIRGenInfo : public TargetCIRGenInfo {
public:
NVPTXTargetCIRGenInfo(CIRGenTypes &cgt)
: TargetCIRGenInfo(std::make_unique<NVPTXABIInfo>(cgt)) {}

mlir::Type getCUDADeviceBuiltinSurfaceDeviceType() const override {
// On the device side, texture reference is represented as an object handle
// in 64-bit integer.
return cir::IntType::get(&getABIInfo().CGT.getMLIRContext(), 64, true);
}

void setTargetAttributes(const clang::Decl *decl, mlir::Operation *global,
CIRGenModule &cgm) const override {
if (const auto *vd = clang::dyn_cast_or_null<clang::VarDecl>(decl)) {
assert(!cir::MissingFeatures::emitNVVMMetadata());
return;
}

if (const auto *fd = clang::dyn_cast_or_null<clang::FunctionDecl>(decl)) {
cir::FuncOp func = mlir::cast<cir::FuncOp>(global);
if (func.isDeclaration())
return;

if (cgm.getLangOpts().CUDA) {
if (fd->hasAttr<CUDAGlobalAttr>()) {
func.setCallingConv(cir::CallingConv::PTXKernel);

// In LLVM we should create metadata like:
// !{<func-ref>, metadata !"kernel", i32 1}
assert(!cir::MissingFeatures::emitNVVMMetadata());
}
}

if (fd->getAttr<CUDALaunchBoundsAttr>())
llvm_unreachable("NYI");
}
}
};

} // namespace
Expand Down
9 changes: 9 additions & 0 deletions clang/lib/CIR/CodeGen/TargetInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,15 @@ class TargetCIRGenInfo {
/// Returns ABI info helper for the target.
const ABIInfo &getABIInfo() const { return *Info; }

/// Provides a convenient hook to handle extra target-specific attributes
/// for the given global.
/// In OG, the function receives an llvm::GlobalValue. However, functions
/// and global variables are separate types in Clang IR, so we use a general
/// mlir::Operation*.
virtual void setTargetAttributes(const clang::Decl *decl,
mlir::Operation *global,
CIRGenModule &module) const {}

virtual bool isScalarizableAsmOperand(CIRGenFunction &CGF,
mlir::Type Ty) const {
return false;
Expand Down
6 changes: 4 additions & 2 deletions clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -763,11 +763,11 @@ mlir::LLVM::Linkage convertLinkage(cir::GlobalLinkageKind linkage) {
};
}

mlir::LLVM::CConv convertCallingConv(cir::CallingConv callinvConv) {
mlir::LLVM::CConv convertCallingConv(cir::CallingConv callingConv) {
using CIR = cir::CallingConv;
using LLVM = mlir::LLVM::CConv;

switch (callinvConv) {
switch (callingConv) {
case CIR::C:
return LLVM::C;
case CIR::SpirKernel:
Expand All @@ -776,6 +776,8 @@ mlir::LLVM::CConv convertCallingConv(cir::CallingConv callinvConv) {
return LLVM::SPIR_FUNC;
case CIR::OpenCLKernel:
llvm_unreachable("NYI");
case CIR::PTXKernel:
return LLVM::PTX_Kernel;
}
llvm_unreachable("Unknown calling convention");
}
Expand Down
33 changes: 17 additions & 16 deletions clang/test/CIR/CodeGen/CUDA/simple.cu
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ __device__ void device_fn(int* a, double b, float c) {}
// CIR-DEVICE: cir.func @_Z9device_fnPidf

__global__ void global_fn(int a) {}
// CIR-DEVICE: @_Z9global_fni
// CIR-DEVICE: @_Z9global_fni({{.*}} cc(ptx_kernel)
// LLVM-DEVICE: define dso_local ptx_kernel void @_Z9global_fni

// Check for device stub emission.

Expand All @@ -32,9 +33,9 @@ __global__ void global_fn(int a) {}
// CIR-HOST: cir.get_global @_Z24__device_stub__global_fni
// CIR-HOST: cir.call @cudaLaunchKernel

// COM: LLVM-HOST: void @_Z24__device_stub__global_fni
// COM: LLVM-HOST: call i32 @__cudaPopCallConfiguration
// COM: LLVM-HOST: call i32 @cudaLaunchKernel(ptr @_Z24__device_stub__global_fni
// LLVM-HOST: void @_Z24__device_stub__global_fni
// LLVM-HOST: call i32 @__cudaPopCallConfiguration
// LLVM-HOST: call i32 @cudaLaunchKernel(ptr @_Z24__device_stub__global_fni

int main() {
global_fn<<<1, 1>>>(1);
Expand All @@ -51,15 +52,15 @@ int main() {
// CIR-HOST: cir.call @_Z24__device_stub__global_fni([[Arg]])
// CIR-HOST: }

// COM: LLVM-HOST: define dso_local i32 @main
// COM: LLVM-HOST: alloca %struct.dim3
// COM: LLVM-HOST: alloca %struct.dim3
// COM: LLVM-HOST: call void @_ZN4dim3C1Ejjj
// COM: LLVM-HOST: call void @_ZN4dim3C1Ejjj
// COM: LLVM-HOST: [[LLVMConfigOK:%[0-9]+]] = call i32 @__cudaPushCallConfiguration
// COM: LLVM-HOST: br [[LLVMConfigOK]], label %[[Good:[0-9]+]], label [[Bad:[0-9]+]]
// COM: LLVM-HOST: [[Good]]:
// COM: LLVM-HOST: call void @_Z24__device_stub__global_fni
// COM: LLVM-HOST: br label [[Bad]]
// COM: LLVM-HOST: [[Bad]]:
// COM: LLVM-HOST: ret i32
// LLVM-HOST: define dso_local i32 @main
// LLVM-HOST: alloca %struct.dim3
// LLVM-HOST: alloca %struct.dim3
// LLVM-HOST: call void @_ZN4dim3C1Ejjj
// LLVM-HOST: call void @_ZN4dim3C1Ejjj
// LLVM-HOST: [[LLVMConfigOK:%[0-9]+]] = call i32 @__cudaPushCallConfiguration
// LLVM-HOST: br [[LLVMConfigOK]], label %[[Good:[0-9]+]], label [[Bad:[0-9]+]]
// LLVM-HOST: [[Good]]:
// LLVM-HOST: call void @_Z24__device_stub__global_fni
// LLVM-HOST: br label [[Bad]]
// LLVM-HOST: [[Bad]]:
// LLVM-HOST: ret i32