From a92e64d9a769d3d7b58d5f028fc157e56b879282 Mon Sep 17 00:00:00 2001 From: Joao Saffran Date: Tue, 29 Oct 2024 19:39:31 +0000 Subject: [PATCH 1/4] adding llvm intrinsic --- clang/include/clang/Basic/Builtins.td | 6 ++++++ clang/lib/CodeGen/CGBuiltin.cpp | 10 ++++++++++ clang/lib/CodeGen/CGHLSLRuntime.h | 2 +- clang/lib/Headers/hlsl/hlsl_intrinsics.h | 17 +++++++++++++++++ clang/lib/Sema/SemaHLSL.cpp | 8 ++++++++ clang/test/CodeGenHLSL/builtins/clip.hlsl | 14 ++++++++++++++ llvm/include/llvm/IR/IntrinsicsDirectX.td | 1 + llvm/include/llvm/IR/IntrinsicsSPIRV.td | 1 + 8 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 clang/test/CodeGenHLSL/builtins/clip.hlsl diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 9bd67e0cefebc3..1b8697b209bd70 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -4877,6 +4877,12 @@ def HLSLSplitDouble: LangBuiltin<"HLSL_LANG"> { let Prototype = "void(...)"; } +def HLSLClip: LangBuiltin<"HLSL_LANG"> { + let Spellings = ["__builtin_hlsl_elementwise_clip"]; + let Attributes = [NoThrow, Const]; + let Prototype = "void(bool)"; +} + // Builtins for XRay. def XRayCustomEvent : Builtin { let Spellings = ["__xray_customevent"]; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 34fedd67114751..1588c4917abbfb 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -19093,6 +19093,16 @@ case Builtin::BI__builtin_hlsl_elementwise_isinf: { "asuint operands types mismatch"); return handleHlslSplitdouble(E, this); } + case Builtin::BI__builtin_hlsl_elementwise_clip: + + assert(E->getArg(0)->getType()->hasFloatingRepresentation() && + "clip operands types mismatch"); + + Value *Op0 = EmitScalarExpr(E->getArg(0)); + auto *CMP = + Builder.CreateFCmpOLT(Op0, ConstantFP::get(Builder.getFloatTy(), 0.0)); + return Builder.CreateIntrinsic( + VoidTy, CGM.getHLSLRuntime().getClipIntrinsic(), {CMP}, nullptr); } return nullptr; } diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h index cd533cad84e9fb..06abc95bdb734f 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.h +++ b/clang/lib/CodeGen/CGHLSLRuntime.h @@ -91,7 +91,7 @@ class CGHLSLRuntime { GENERATE_HLSL_INTRINSIC_FUNCTION(UDot, udot) GENERATE_HLSL_INTRINSIC_FUNCTION(WaveIsFirstLane, wave_is_first_lane) GENERATE_HLSL_INTRINSIC_FUNCTION(WaveReadLaneAt, wave_readlane) - + GENERATE_HLSL_INTRINSIC_FUNCTION(Clip, clip) GENERATE_HLSL_INTRINSIC_FUNCTION(CreateHandleFromBinding, handle_fromBinding) //===----------------------------------------------------------------------===// diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h index d9f3a17ea23d8e..424c2f7e7c23ee 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h @@ -655,6 +655,23 @@ double3 clamp(double3, double3, double3); _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp) double4 clamp(double4, double4, double4); +//===----------------------------------------------------------------------===// +// clip builtins +//===----------------------------------------------------------------------===// + +/// \fn void clip(T Val) +/// \brief Discards the current pixel if the specified value is less than zero. +/// \param Val The input value. + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clip) +void clip(float); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clip) +void clip(float2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clip) +void clip(float3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clip) +void clip(float4); + //===----------------------------------------------------------------------===// // cos builtins //===----------------------------------------------------------------------===// diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index a472538236e2d9..e360c54dc0760e 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -2110,6 +2110,14 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { return true; break; } + case Builtin::BI__builtin_hlsl_elementwise_clip: { + if (SemaRef.checkArgCount(TheCall, 1)) + return true; + + if (CheckScalarOrVector(&SemaRef, TheCall, SemaRef.Context.FloatTy, 0)) + return true; + break; + } case Builtin::BI__builtin_elementwise_acos: case Builtin::BI__builtin_elementwise_asin: case Builtin::BI__builtin_elementwise_atan: diff --git a/clang/test/CodeGenHLSL/builtins/clip.hlsl b/clang/test/CodeGenHLSL/builtins/clip.hlsl new file mode 100644 index 00000000000000..426ec8f128436a --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/clip.hlsl @@ -0,0 +1,14 @@ +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.3-pixel %s -fnative-half-type -emit-llvm -o - | FileCheck %s + +RWBuffer Buf; + +//CHECK-LABEL: define void @main() +float4 main( ) { + float4 p1 = Buf[0]; + //CHECK: [[LOAD:%.*]] = load <4 x float>, ptr %p1{{.*}}, align 16 + //CHECK-NEXT: [[EXTR:%.*]] = extractelement <4 x float> [[LOAD]], i32 3 + //CHECK-NEXT: [[FCMP:%.*]] = fcmp olt float [[EXTR]], 0.000000e+00 + //CHECK-NEXT: call void @llvm.dx.clip(i1 [[FCMP]]) + clip(p1.a); + return p1; +} diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td index e30d37f69f781e..0d8dc4ead02c8a 100644 --- a/llvm/include/llvm/IR/IntrinsicsDirectX.td +++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td @@ -92,4 +92,5 @@ def int_dx_step : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, L def int_dx_splitdouble : DefaultAttrsIntrinsic<[llvm_anyint_ty, LLVMMatchType<0>], [LLVMScalarOrSameVectorWidth<0, llvm_double_ty>], [IntrNoMem]>; def int_dx_radians : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; +def int_dx_clip : DefaultAttrsIntrinsic<[], [llvm_i1_ty], [IntrNoMem]>; } diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td index ddb47390537412..edafe235036e1a 100644 --- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td +++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -88,6 +88,7 @@ let TargetPrefix = "spv" in { def int_spv_sign : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_any_ty], [IntrNoMem]>; def int_spv_radians : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty], [IntrNoMem]>; def int_spv_group_memory_barrier_with_group_sync : DefaultAttrsIntrinsic<[], [], []>; + def int_spv_clip : DefaultAttrsIntrinsic<[], [llvm_i1_ty], [IntrNoMem]>; // Create resource handle given the binding information. Returns a // type appropriate for the kind of resource given the set id, binding id, From 0ce2597325c940c85994950a5db1eb7be5dfaeef Mon Sep 17 00:00:00 2001 From: Joao Saffran Date: Wed, 30 Oct 2024 23:29:44 +0000 Subject: [PATCH 2/4] adding DXIL and SPIRV codegen --- clang/lib/CodeGen/CGBuiltin.cpp | 32 +++++++++--- clang/lib/CodeGen/CGHLSLRuntime.h | 1 - clang/test/CodeGenHLSL/builtins/clip.hlsl | 9 ++-- llvm/include/llvm/IR/IntrinsicsSPIRV.td | 2 +- llvm/lib/Target/DirectX/DXIL.td | 51 +++++++++++-------- llvm/lib/Target/SPIRV/SPIRVInstrInfo.td | 1 + .../Target/SPIRV/SPIRVInstructionSelector.cpp | 20 ++++++++ llvm/test/CodeGen/DirectX/clip.ll | 11 ++++ .../CodeGen/SPIRV/hlsl-intrinsics/clip.ll | 18 +++++++ 9 files changed, 110 insertions(+), 35 deletions(-) create mode 100644 llvm/test/CodeGen/DirectX/clip.ll create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-intrinsics/clip.ll diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 1588c4917abbfb..21c6d29cf3ec2d 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -99,6 +99,31 @@ static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size, I->addAnnotationMetadata("auto-init"); } +static Value *handleHlslClip(const CallExpr *E, CodeGenFunction *CGF) { + Value *Op0 = CGF->EmitScalarExpr(E->getArg(0)); + auto *CMP = CGF->Builder.CreateFCmpOLT( + Op0, ConstantFP::get(CGF->Builder.getFloatTy(), 0.0)); + + if (CGF->CGM.getTarget().getTriple().isDXIL()) + return CGF->Builder.CreateIntrinsic(CGF->VoidTy, llvm::Intrinsic::dx_clip, + {CMP}, nullptr); + + BasicBlock *LT0 = CGF->createBasicBlock("lt0", CGF->CurFn); + BasicBlock *End = CGF->createBasicBlock("end", CGF->CurFn); + + CGF->Builder.CreateCondBr(CMP, LT0, End); + + CGF->Builder.SetInsertPoint(LT0); + + auto *IntrCall = CGF->Builder.CreateIntrinsic( + CGF->VoidTy, llvm::Intrinsic::spv_clip, {}, nullptr); + + CGF->Builder.CreateBr(End); + + CGF->Builder.SetInsertPoint(End); + return IntrCall; +} + static Value *handleHlslSplitdouble(const CallExpr *E, CodeGenFunction *CGF) { Value *Op0 = CGF->EmitScalarExpr(E->getArg(0)); const auto *OutArg1 = dyn_cast(E->getArg(1)); @@ -19097,12 +19122,7 @@ case Builtin::BI__builtin_hlsl_elementwise_isinf: { assert(E->getArg(0)->getType()->hasFloatingRepresentation() && "clip operands types mismatch"); - - Value *Op0 = EmitScalarExpr(E->getArg(0)); - auto *CMP = - Builder.CreateFCmpOLT(Op0, ConstantFP::get(Builder.getFloatTy(), 0.0)); - return Builder.CreateIntrinsic( - VoidTy, CGM.getHLSLRuntime().getClipIntrinsic(), {CMP}, nullptr); + return handleHlslClip(E, this); } return nullptr; } diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h index 06abc95bdb734f..568cff65d04df2 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.h +++ b/clang/lib/CodeGen/CGHLSLRuntime.h @@ -91,7 +91,6 @@ class CGHLSLRuntime { GENERATE_HLSL_INTRINSIC_FUNCTION(UDot, udot) GENERATE_HLSL_INTRINSIC_FUNCTION(WaveIsFirstLane, wave_is_first_lane) GENERATE_HLSL_INTRINSIC_FUNCTION(WaveReadLaneAt, wave_readlane) - GENERATE_HLSL_INTRINSIC_FUNCTION(Clip, clip) GENERATE_HLSL_INTRINSIC_FUNCTION(CreateHandleFromBinding, handle_fromBinding) //===----------------------------------------------------------------------===// diff --git a/clang/test/CodeGenHLSL/builtins/clip.hlsl b/clang/test/CodeGenHLSL/builtins/clip.hlsl index 426ec8f128436a..994a7c9f3311c3 100644 --- a/clang/test/CodeGenHLSL/builtins/clip.hlsl +++ b/clang/test/CodeGenHLSL/builtins/clip.hlsl @@ -1,14 +1,11 @@ // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.3-pixel %s -fnative-half-type -emit-llvm -o - | FileCheck %s - -RWBuffer Buf; +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple spirv-vulkan-pixel %s -fnative-half-type -emit-llvm -o - | FileCheck %s //CHECK-LABEL: define void @main() -float4 main( ) { - float4 p1 = Buf[0]; +void test(float Buf) { //CHECK: [[LOAD:%.*]] = load <4 x float>, ptr %p1{{.*}}, align 16 //CHECK-NEXT: [[EXTR:%.*]] = extractelement <4 x float> [[LOAD]], i32 3 //CHECK-NEXT: [[FCMP:%.*]] = fcmp olt float [[EXTR]], 0.000000e+00 //CHECK-NEXT: call void @llvm.dx.clip(i1 [[FCMP]]) - clip(p1.a); - return p1; + clip(Buf); } diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td index edafe235036e1a..9d2f85315f63e0 100644 --- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td +++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -88,7 +88,7 @@ let TargetPrefix = "spv" in { def int_spv_sign : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_any_ty], [IntrNoMem]>; def int_spv_radians : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty], [IntrNoMem]>; def int_spv_group_memory_barrier_with_group_sync : DefaultAttrsIntrinsic<[], [], []>; - def int_spv_clip : DefaultAttrsIntrinsic<[], [llvm_i1_ty], [IntrNoMem]>; + def int_spv_clip : Intrinsic<[], [], []>; // Create resource handle given the binding information. Returns a // type appropriate for the kind of resource given the set id, binding id, diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td index 1e8dc63ffa257e..94b1f6ff9cc088 100644 --- a/llvm/lib/Target/DirectX/DXIL.td +++ b/llvm/lib/Target/DirectX/DXIL.td @@ -739,6 +739,15 @@ def CheckAccessFullyMapped : DXILOp<71, checkAccessFullyMapped> { let stages = [Stages]; } +def Discard : DXILOp<82, discard> { + let Doc = "discard the current pixel"; + let LLVMIntrinsic = int_dx_clip; + let arguments = [Int1Ty]; + let result = VoidTy; + let stages = [Stages]; + let attributes = [Attributes]; +} + def ThreadId : DXILOp<93, threadId> { let Doc = "Reads the thread ID"; let LLVMIntrinsic = int_dx_thread_id; @@ -788,20 +797,6 @@ def SplitDouble : DXILOp<102, splitDouble> { let attributes = [Attributes]; } -def AnnotateHandle : DXILOp<217, annotateHandle> { - let Doc = "annotate handle with resource properties"; - let arguments = [HandleTy, ResPropsTy]; - let result = HandleTy; - let stages = [Stages]; -} - -def CreateHandleFromBinding : DXILOp<218, createHandleFromBinding> { - let Doc = "create resource handle from binding"; - let arguments = [ResBindTy, Int32Ty, Int1Ty]; - let result = HandleTy; - let stages = [Stages]; -} - def WaveIsFirstLane : DXILOp<110, waveIsFirstLane> { let Doc = "returns 1 for the first lane in the wave"; let LLVMIntrinsic = int_dx_wave_is_first_lane; @@ -811,6 +806,15 @@ def WaveIsFirstLane : DXILOp<110, waveIsFirstLane> { let attributes = [Attributes]; } +def WaveGetLaneIndex : DXILOp<111, waveGetLaneIndex> { + let Doc = "returns the index of the current lane in the wave"; + let LLVMIntrinsic = int_dx_wave_getlaneindex; + let arguments = []; + let result = Int32Ty; + let stages = [Stages]; + let attributes = [Attributes]; +} + def WaveReadLaneAt: DXILOp<117, waveReadLaneAt> { let Doc = "returns the value from the specified lane"; let LLVMIntrinsic = int_dx_wave_readlane; @@ -821,11 +825,16 @@ def WaveReadLaneAt: DXILOp<117, waveReadLaneAt> { let attributes = [Attributes]; } -def WaveGetLaneIndex : DXILOp<111, waveGetLaneIndex> { - let Doc = "returns the index of the current lane in the wave"; - let LLVMIntrinsic = int_dx_wave_getlaneindex; - let arguments = []; - let result = Int32Ty; - let stages = [Stages]; - let attributes = [Attributes]; +def AnnotateHandle : DXILOp<217, annotateHandle> { + let Doc = "annotate handle with resource properties"; + let arguments = [HandleTy, ResPropsTy]; + let result = HandleTy; + let stages = [Stages]; +} + +def CreateHandleFromBinding : DXILOp<218, createHandleFromBinding> { + let Doc = "create resource handle from binding"; + let arguments = [ResBindTy, Int32Ty, Int1Ty]; + let result = HandleTy; + let stages = [Stages]; } diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td index ee6b70a16417f4..d8e27153bd7d24 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td +++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td @@ -636,6 +636,7 @@ let isReturn = 1, hasDelaySlot = 0, isBarrier = 0, isTerminator = 1, isNotDuplic } def OpLifetimeStart: Op<256, (outs), (ins ID:$ptr, i32imm:$sz), "OpLifetimeStart $ptr, $sz">; def OpLifetimeStop: Op<257, (outs), (ins ID:$ptr, i32imm:$sz), "OpLifetimeStop $ptr, $sz">; +def OpDemoteToHelperInvocation: SimpleOp<"OpDemoteToHelperInvocation", 5380>; // 3.42.18 Atomic Instructions diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index 892912a5680113..1cab51139aa5b3 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -147,6 +147,9 @@ class SPIRVInstructionSelector : public InstructionSelector { unsigned comparisonOpcode, MachineInstr &I) const; bool selectCross(Register ResVReg, const SPIRVType *ResType, MachineInstr &I) const; + bool selectClip(Register ResVReg, const SPIRVType *ResType, + MachineInstr &I) const; + bool selectICmp(Register ResVReg, const SPIRVType *ResType, MachineInstr &I) const; bool selectFCmp(Register ResVReg, const SPIRVType *ResType, @@ -1966,6 +1969,20 @@ bool SPIRVInstructionSelector::selectSplatVector(Register ResVReg, return MIB.constrainAllUses(TII, TRI, RBI); } +bool SPIRVInstructionSelector::selectClip(Register ResVReg, + const SPIRVType *ResType, + MachineInstr &I) const { + + const auto Opcode = + STI.getTargetTriple().getVulkanVersion() < llvm::VersionTuple(1, 3) + ? SPIRV::OpKill + : SPIRV::OpDemoteToHelperInvocation; + + MachineBasicBlock &BB = *I.getParent(); + return BuildMI(BB, I, I.getDebugLoc(), TII.get(Opcode)) + .constrainAllUses(TII, TRI, RBI); +} + bool SPIRVInstructionSelector::selectCmp(Register ResVReg, const SPIRVType *ResType, unsigned CmpOpc, @@ -2599,6 +2616,9 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg, selectHandleFromBinding(ResVReg, ResType, I); return true; } + case Intrinsic::spv_clip: { + return selectClip(ResVReg, ResType, I); + } default: { std::string DiagMsg; raw_string_ostream OS(DiagMsg); diff --git a/llvm/test/CodeGen/DirectX/clip.ll b/llvm/test/CodeGen/DirectX/clip.ll new file mode 100644 index 00000000000000..54ff924675dbb3 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/clip.ll @@ -0,0 +1,11 @@ +; RUN: opt -S -dxil-intrinsic-expansion -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-pixel %s | FileCheck %s + +; CHECK-LABEL: define void @test_dxil_lowering +; CHECK: call void @dx.op.discard(i32 82, i1 %0) +; +define void @test_dxil_lowering(float noundef %p) #0 { +entry: + %0 = fcmp olt float %p, 0.000000e+00 + call void @llvm.dx.clip(i1 %0) + ret void +} diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/clip.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/clip.ll new file mode 100644 index 00000000000000..4c133258f69ba2 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/clip.ll @@ -0,0 +1,18 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} + + +; CHECK-LABEL: define void @test_dxil_lowering +; CHECK: call void @dx.op.discard(i32 82, i1 %0) +; +define spir_func void @test_dxil_lowering(float noundef %Buf) #0 { +entry: + %Buf.addr = alloca float, align 4 + store float %Buf, ptr %Buf.addr, align 4 + %1 = load float, ptr %Buf.addr, align 4 + %2 = fcmp olt float %1, 0.000000e+00 + call void @llvm.spv.clip(i1 %2) + ret void +} + +declare void @llvm.spv.clip(i1) #1 From 20af135e25ffd21e13cf9f73f1e1997177b4a4d4 Mon Sep 17 00:00:00 2001 From: Joao Saffran Date: Thu, 31 Oct 2024 19:30:14 +0000 Subject: [PATCH 3/4] adding tests --- clang/include/clang/Basic/Builtins.td | 2 +- clang/lib/CodeGen/CGBuiltin.cpp | 23 ++++++++++- clang/test/CodeGenHLSL/builtins/clip.hlsl | 39 +++++++++++++++---- llvm/include/llvm/IR/IntrinsicsDirectX.td | 2 +- .../Target/SPIRV/SPIRVInstructionSelector.cpp | 9 ++--- .../CodeGen/SPIRV/hlsl-intrinsics/clip.ll | 39 +++++++++++++++---- 6 files changed, 90 insertions(+), 24 deletions(-) diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 1b8697b209bd70..13ba369e323f72 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -4880,7 +4880,7 @@ def HLSLSplitDouble: LangBuiltin<"HLSL_LANG"> { def HLSLClip: LangBuiltin<"HLSL_LANG"> { let Spellings = ["__builtin_hlsl_elementwise_clip"]; let Attributes = [NoThrow, Const]; - let Prototype = "void(bool)"; + let Prototype = "void(...)"; } // Builtins for XRay. diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 21c6d29cf3ec2d..865cc73d166301 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -41,8 +41,10 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsAArch64.h" #include "llvm/IR/IntrinsicsAMDGPU.h" @@ -55,12 +57,14 @@ #include "llvm/IR/IntrinsicsR600.h" #include "llvm/IR/IntrinsicsRISCV.h" #include "llvm/IR/IntrinsicsS390.h" +#include "llvm/IR/IntrinsicsSPIRV.h" #include "llvm/IR/IntrinsicsVE.h" #include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/IR/IntrinsicsX86.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/MatrixBuilder.h" #include "llvm/IR/MemoryModelRelaxationAnnotations.h" +#include "llvm/IR/Type.h" #include "llvm/Support/AMDGPUAddrSpace.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/MathExtras.h" @@ -101,13 +105,28 @@ static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size, static Value *handleHlslClip(const CallExpr *E, CodeGenFunction *CGF) { Value *Op0 = CGF->EmitScalarExpr(E->getArg(0)); - auto *CMP = CGF->Builder.CreateFCmpOLT( - Op0, ConstantFP::get(CGF->Builder.getFloatTy(), 0.0)); + + Constant *FZeroConst = ConstantFP::getZero(CGF->FloatTy); + Value *CMP; + + if (const auto *VecTy = E->getArg(0)->getType()->getAs()) { + FZeroConst = ConstantVector::getSplat( + ElementCount::getFixed(VecTy->getNumElements()), FZeroConst); + CMP = CGF->Builder.CreateFCmpOLT(Op0, FZeroConst); + } else + CMP = CGF->Builder.CreateFCmpOLT(Op0, FZeroConst); if (CGF->CGM.getTarget().getTriple().isDXIL()) return CGF->Builder.CreateIntrinsic(CGF->VoidTy, llvm::Intrinsic::dx_clip, {CMP}, nullptr); + if (const auto *VecTy = E->getArg(0)->getType()->getAs()){ + + CMP = CGF->Builder.CreateIntrinsic(CGF->Builder.getInt1Ty(), llvm::Intrinsic::spv_any, + {CMP}, nullptr); + } + + BasicBlock *LT0 = CGF->createBasicBlock("lt0", CGF->CurFn); BasicBlock *End = CGF->createBasicBlock("end", CGF->CurFn); diff --git a/clang/test/CodeGenHLSL/builtins/clip.hlsl b/clang/test/CodeGenHLSL/builtins/clip.hlsl index 994a7c9f3311c3..0445db8a5d3e06 100644 --- a/clang/test/CodeGenHLSL/builtins/clip.hlsl +++ b/clang/test/CodeGenHLSL/builtins/clip.hlsl @@ -1,11 +1,36 @@ // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.3-pixel %s -fnative-half-type -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple spirv-vulkan-pixel %s -fnative-half-type -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple spirv-vulkan-pixel %s -fnative-half-type -emit-llvm -o - | FileCheck %s --check-prefix=SPIRV -//CHECK-LABEL: define void @main() -void test(float Buf) { - //CHECK: [[LOAD:%.*]] = load <4 x float>, ptr %p1{{.*}}, align 16 - //CHECK-NEXT: [[EXTR:%.*]] = extractelement <4 x float> [[LOAD]], i32 3 - //CHECK-NEXT: [[FCMP:%.*]] = fcmp olt float [[EXTR]], 0.000000e+00 - //CHECK-NEXT: call void @llvm.dx.clip(i1 [[FCMP]]) + +void test_scalar(float Buf) { + // CHECK: define void @{{.*}}test_scalar{{.*}}(float {{.*}} [[VALP:%.*]]) + // CHECK: [[LOAD:%.*]] = load float, ptr [[VALP]].addr, align 4 + // CHECK-NEXT: [[FCMP:%.*]] = fcmp olt float [[LOAD]], 0.000000e+00 + // CHECK-NEXT: call void @llvm.dx.clip.i1(i1 [[FCMP]]) + // + // SPIRV: define spir_func void @{{.*}}test_scalar{{.*}}(float {{.*}} [[VALP:%.*]]) + // SPIRV: [[LOAD:%.*]] = load float, ptr [[VALP]].addr, align 4 + // SPIRV-NEXT: [[FCMP:%.*]] = fcmp olt float [[LOAD]], 0.000000e+00 + // SPIRV-NEXT: br i1 [[FCMP]], label %[[LTL:.*]], label %[[ENDL:.*]] + // SPIRV: [[LTL]]: ; preds = %entry + // SPIRV-NEXT: call void @llvm.spv.clip() + // SPIRV: br label %[[ENDL]] + clip(Buf); +} + +void test_vector4(float4 Buf) { + // CHECK: define void @{{.*}}test_vector{{.*}}(<4 x float> {{.*}} [[VALP:%.*]]) + // CHECK: [[LOAD:%.*]] = load <4 x float>, ptr [[VALP]].addr, align 16 + // CHECK-NEXT: [[FCMP:%.*]] = fcmp olt <4 x float> [[LOAD]], zeroinitializer + // CHECK-NEXT: call void @llvm.dx.clip.v4i1(<4 x i1> [[FCMP]]) + // + // SPIRV: define spir_func void @{{.*}}test_vector{{.*}}(<4 x float> {{.*}} [[VALP:%.*]]) + // SPIRV: [[LOAD:%.*]] = load <4 x float>, ptr [[VALP]].addr, align 16 + // SPIRV-NEXT: [[FCMP:%.*]] = fcmp olt <4 x float> [[LOAD]], zeroinitializer + // SPIRV-NEXT: [[RED:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[FCMP]]) + // SPIRV-NEXT: br i1 [[RED]], label %[[LTL:.*]], label %[[ENDL:.*]] + // SPIRV: [[LTL]]: ; preds = %entry + // SPIRV-NEXT: call void @llvm.spv.clip() + // SPIRV-NEXT: br label %[[ENDL]] clip(Buf); } diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td index 0d8dc4ead02c8a..644c9f31972362 100644 --- a/llvm/include/llvm/IR/IntrinsicsDirectX.td +++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td @@ -92,5 +92,5 @@ def int_dx_step : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, L def int_dx_splitdouble : DefaultAttrsIntrinsic<[llvm_anyint_ty, LLVMMatchType<0>], [LLVMScalarOrSameVectorWidth<0, llvm_double_ty>], [IntrNoMem]>; def int_dx_radians : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; -def int_dx_clip : DefaultAttrsIntrinsic<[], [llvm_i1_ty], [IntrNoMem]>; +def int_dx_clip : DefaultAttrsIntrinsic<[], [llvm_anyint_ty], [IntrNoMem]>; } diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index 1cab51139aa5b3..5856cd3f4db754 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -32,6 +32,7 @@ #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/IR/IntrinsicsSPIRV.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/VersionTuple.h" #define DEBUG_TYPE "spirv-isel" @@ -1972,11 +1973,9 @@ bool SPIRVInstructionSelector::selectSplatVector(Register ResVReg, bool SPIRVInstructionSelector::selectClip(Register ResVReg, const SPIRVType *ResType, MachineInstr &I) const { - - const auto Opcode = - STI.getTargetTriple().getVulkanVersion() < llvm::VersionTuple(1, 3) - ? SPIRV::OpKill - : SPIRV::OpDemoteToHelperInvocation; + const auto Opcode = (STI.isAtLeastSPIRVVer(VersionTuple(1, 6))) + ? SPIRV::OpDemoteToHelperInvocation + : SPIRV::OpKill; MachineBasicBlock &BB = *I.getParent(); return BuildMI(BB, I, I.getDebugLoc(), TII.get(Opcode)) diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/clip.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/clip.ll index 4c133258f69ba2..8c1214c9e67cbc 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/clip.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/clip.ll @@ -2,17 +2,40 @@ ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} -; CHECK-LABEL: define void @test_dxil_lowering -; CHECK: call void @dx.op.discard(i32 82, i1 %0) -; -define spir_func void @test_dxil_lowering(float noundef %Buf) #0 { +define void @test_scalar_lowering(float noundef %Buf) { entry: %Buf.addr = alloca float, align 4 store float %Buf, ptr %Buf.addr, align 4 - %1 = load float, ptr %Buf.addr, align 4 - %2 = fcmp olt float %1, 0.000000e+00 - call void @llvm.spv.clip(i1 %2) + %0 = load float, ptr %Buf.addr, align 4 + %1 = fcmp olt float %0, 0.000000e+00 + br i1 %1, label %lt0, label %end + +lt0: ; preds = %entry + call void @llvm.spv.clip() + br label %end + +end: ; preds = %lt0, %entry + ret void +} + +declare void @llvm.spv.clip() + + +define void @test_vector(<4 x float> noundef %Buf) { +entry: + %Buf.addr = alloca <4 x float>, align 16 + store <4 x float> %Buf, ptr %Buf.addr, align 16 + %1 = load <4 x float>, ptr %Buf.addr, align 16 + %2 = fcmp olt <4 x float> %1, zeroinitializer + %3 = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %2) + br i1 %3, label %lt0, label %end + +lt0: ; preds = %entry + call void @llvm.spv.clip() + br label %end + +end: ; preds = %lt0, %entry ret void } -declare void @llvm.spv.clip(i1) #1 +declare i1 @llvm.vector.reduce.or.v4i1(<4 x i1>) #3 From 20587a9ac9bbe125845c483ecedcb9c79975438c Mon Sep 17 00:00:00 2001 From: Joao Saffran Date: Fri, 1 Nov 2024 18:11:35 +0000 Subject: [PATCH 4/4] adding tests --- clang/lib/CodeGen/CGBuiltin.cpp | 24 +++----- clang/lib/CodeGen/CGHLSLRuntime.h | 1 + clang/test/CodeGenHLSL/builtins/clip.hlsl | 17 +++--- clang/test/SemaHLSL/BuiltIns/clip-errors.hlsl | 22 ++++++++ llvm/include/llvm/IR/IntrinsicsDirectX.td | 2 +- llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp | 4 +- .../Target/SPIRV/SPIRVInstructionSelector.cpp | 21 +++++-- llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp | 11 ++++ .../lib/Target/SPIRV/SPIRVSymbolicOperands.td | 1 + llvm/test/CodeGen/DirectX/clip.ll | 24 +++++++- .../CodeGen/SPIRV/hlsl-intrinsics/clip.ll | 56 +++++++++++++++---- 11 files changed, 141 insertions(+), 42 deletions(-) create mode 100644 clang/test/SemaHLSL/BuiltIns/clip-errors.hlsl diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 865cc73d166301..8177b144639180 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -41,10 +41,8 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" -#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsAArch64.h" #include "llvm/IR/IntrinsicsAMDGPU.h" @@ -57,14 +55,12 @@ #include "llvm/IR/IntrinsicsR600.h" #include "llvm/IR/IntrinsicsRISCV.h" #include "llvm/IR/IntrinsicsS390.h" -#include "llvm/IR/IntrinsicsSPIRV.h" #include "llvm/IR/IntrinsicsVE.h" #include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/IR/IntrinsicsX86.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/MatrixBuilder.h" #include "llvm/IR/MemoryModelRelaxationAnnotations.h" -#include "llvm/IR/Type.h" #include "llvm/Support/AMDGPUAddrSpace.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/MathExtras.h" @@ -112,7 +108,10 @@ static Value *handleHlslClip(const CallExpr *E, CodeGenFunction *CGF) { if (const auto *VecTy = E->getArg(0)->getType()->getAs()) { FZeroConst = ConstantVector::getSplat( ElementCount::getFixed(VecTy->getNumElements()), FZeroConst); - CMP = CGF->Builder.CreateFCmpOLT(Op0, FZeroConst); + auto *FCompInst = CGF->Builder.CreateFCmpOLT(Op0, FZeroConst); + CMP = CGF->Builder.CreateIntrinsic( + CGF->Builder.getInt1Ty(), CGF->CGM.getHLSLRuntime().getAnyIntrinsic(), + {FCompInst}, nullptr); } else CMP = CGF->Builder.CreateFCmpOLT(Op0, FZeroConst); @@ -120,13 +119,6 @@ static Value *handleHlslClip(const CallExpr *E, CodeGenFunction *CGF) { return CGF->Builder.CreateIntrinsic(CGF->VoidTy, llvm::Intrinsic::dx_clip, {CMP}, nullptr); - if (const auto *VecTy = E->getArg(0)->getType()->getAs()){ - - CMP = CGF->Builder.CreateIntrinsic(CGF->Builder.getInt1Ty(), llvm::Intrinsic::spv_any, - {CMP}, nullptr); - } - - BasicBlock *LT0 = CGF->createBasicBlock("lt0", CGF->CurFn); BasicBlock *End = CGF->createBasicBlock("end", CGF->CurFn); @@ -134,13 +126,13 @@ static Value *handleHlslClip(const CallExpr *E, CodeGenFunction *CGF) { CGF->Builder.SetInsertPoint(LT0); - auto *IntrCall = CGF->Builder.CreateIntrinsic( - CGF->VoidTy, llvm::Intrinsic::spv_clip, {}, nullptr); + CGF->Builder.CreateIntrinsic(CGF->VoidTy, llvm::Intrinsic::spv_clip, {}, + nullptr); - CGF->Builder.CreateBr(End); + auto *BrCall = CGF->Builder.CreateBr(End); CGF->Builder.SetInsertPoint(End); - return IntrCall; + return BrCall; } static Value *handleHlslSplitdouble(const CallExpr *E, CodeGenFunction *CGF) { diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h index 568cff65d04df2..cd533cad84e9fb 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.h +++ b/clang/lib/CodeGen/CGHLSLRuntime.h @@ -91,6 +91,7 @@ class CGHLSLRuntime { GENERATE_HLSL_INTRINSIC_FUNCTION(UDot, udot) GENERATE_HLSL_INTRINSIC_FUNCTION(WaveIsFirstLane, wave_is_first_lane) GENERATE_HLSL_INTRINSIC_FUNCTION(WaveReadLaneAt, wave_readlane) + GENERATE_HLSL_INTRINSIC_FUNCTION(CreateHandleFromBinding, handle_fromBinding) //===----------------------------------------------------------------------===// diff --git a/clang/test/CodeGenHLSL/builtins/clip.hlsl b/clang/test/CodeGenHLSL/builtins/clip.hlsl index 0445db8a5d3e06..81976839bbe7d9 100644 --- a/clang/test/CodeGenHLSL/builtins/clip.hlsl +++ b/clang/test/CodeGenHLSL/builtins/clip.hlsl @@ -6,15 +6,17 @@ void test_scalar(float Buf) { // CHECK: define void @{{.*}}test_scalar{{.*}}(float {{.*}} [[VALP:%.*]]) // CHECK: [[LOAD:%.*]] = load float, ptr [[VALP]].addr, align 4 // CHECK-NEXT: [[FCMP:%.*]] = fcmp olt float [[LOAD]], 0.000000e+00 - // CHECK-NEXT: call void @llvm.dx.clip.i1(i1 [[FCMP]]) + // CHECK-NO: call i1 @llvm.dx.any + // CHECK-NEXT: call void @llvm.dx.clip(i1 [[FCMP]]) // // SPIRV: define spir_func void @{{.*}}test_scalar{{.*}}(float {{.*}} [[VALP:%.*]]) // SPIRV: [[LOAD:%.*]] = load float, ptr [[VALP]].addr, align 4 // SPIRV-NEXT: [[FCMP:%.*]] = fcmp olt float [[LOAD]], 0.000000e+00 + // SPIRV-NO: call i1 @llvm.dx.any // SPIRV-NEXT: br i1 [[FCMP]], label %[[LTL:.*]], label %[[ENDL:.*]] - // SPIRV: [[LTL]]: ; preds = %entry + // SPIRV: [[LTL]]: ; preds = %entry // SPIRV-NEXT: call void @llvm.spv.clip() - // SPIRV: br label %[[ENDL]] + // SPIRV: br label %[[ENDL]] clip(Buf); } @@ -22,14 +24,15 @@ void test_vector4(float4 Buf) { // CHECK: define void @{{.*}}test_vector{{.*}}(<4 x float> {{.*}} [[VALP:%.*]]) // CHECK: [[LOAD:%.*]] = load <4 x float>, ptr [[VALP]].addr, align 16 // CHECK-NEXT: [[FCMP:%.*]] = fcmp olt <4 x float> [[LOAD]], zeroinitializer - // CHECK-NEXT: call void @llvm.dx.clip.v4i1(<4 x i1> [[FCMP]]) + // CHECK-NEXT: [[ANYC:%.*]] = call i1 @llvm.dx.any.v4i1(<4 x i1> [[FCMP]]) + // CHECK-NEXT: call void @llvm.dx.clip(i1 [[ANYC]]) // // SPIRV: define spir_func void @{{.*}}test_vector{{.*}}(<4 x float> {{.*}} [[VALP:%.*]]) // SPIRV: [[LOAD:%.*]] = load <4 x float>, ptr [[VALP]].addr, align 16 // SPIRV-NEXT: [[FCMP:%.*]] = fcmp olt <4 x float> [[LOAD]], zeroinitializer - // SPIRV-NEXT: [[RED:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[FCMP]]) - // SPIRV-NEXT: br i1 [[RED]], label %[[LTL:.*]], label %[[ENDL:.*]] - // SPIRV: [[LTL]]: ; preds = %entry + // SPIRV-NEXT: [[ANYC:%.*]] = call i1 @llvm.spv.any.v4i1(<4 x i1> [[FCMP]]) + // SPIRV-NEXT: br i1 [[ANYC]], label %[[LTL:.*]], label %[[ENDL:.*]] + // SPIRV: [[LTL]]: ; preds = %entry // SPIRV-NEXT: call void @llvm.spv.clip() // SPIRV-NEXT: br label %[[ENDL]] clip(Buf); diff --git a/clang/test/SemaHLSL/BuiltIns/clip-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/clip-errors.hlsl new file mode 100644 index 00000000000000..570b4bc18dcd4b --- /dev/null +++ b/clang/test/SemaHLSL/BuiltIns/clip-errors.hlsl @@ -0,0 +1,22 @@ +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -verify + + +void test_arg_missing() { + __builtin_hlsl_elementwise_clip(); + // expected-error@-1 {{too few arguments to function call, expected 1, have 0}} +} + +void test_too_many_args(float p1, float p2) { + __builtin_hlsl_elementwise_clip(p1, p2); + // expected-error@-1 {{too many arguments to function call, expected 1, have 2}} +} + +void test_first_arg_type_mismatch(bool p) { + __builtin_hlsl_elementwise_clip(p); + // expected-error@-1 {{invalid operand of type 'bool' where 'float' or a vector of such type is required}} +} + +void test_first_arg_type_mismatch_2(half p) { + __builtin_hlsl_elementwise_clip(p); + // expected-error@-1 {{invalid operand of type 'double' where 'float' or a vector of such type is required}} +} diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td index 644c9f31972362..0d8dc4ead02c8a 100644 --- a/llvm/include/llvm/IR/IntrinsicsDirectX.td +++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td @@ -92,5 +92,5 @@ def int_dx_step : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, L def int_dx_splitdouble : DefaultAttrsIntrinsic<[llvm_anyint_ty, LLVMMatchType<0>], [LLVMScalarOrSameVectorWidth<0, llvm_double_ty>], [IntrNoMem]>; def int_dx_radians : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; -def int_dx_clip : DefaultAttrsIntrinsic<[], [llvm_anyint_ty], [IntrNoMem]>; +def int_dx_clip : DefaultAttrsIntrinsic<[], [llvm_i1_ty], [IntrNoMem]>; } diff --git a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp index dbfc133864bba4..23221cacca7df3 100644 --- a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp @@ -73,7 +73,9 @@ static const std::map {"SPV_KHR_cooperative_matrix", SPIRV::Extension::Extension::SPV_KHR_cooperative_matrix}, {"SPV_KHR_non_semantic_info", - SPIRV::Extension::Extension::SPV_KHR_non_semantic_info}}; + SPIRV::Extension::Extension::SPV_KHR_non_semantic_info}, + {"SPV_EXT_demote_to_helper_invocation", + SPIRV::Extension::Extension::SPV_EXT_demote_to_helper_invocation}}; bool SPIRVExtensionsParser::parse(cl::Option &O, llvm::StringRef ArgName, llvm::StringRef ArgValue, diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index 5856cd3f4db754..be3852ed482c83 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -32,7 +32,6 @@ #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/IR/IntrinsicsSPIRV.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/VersionTuple.h" #define DEBUG_TYPE "spirv-isel" @@ -1973,9 +1972,23 @@ bool SPIRVInstructionSelector::selectSplatVector(Register ResVReg, bool SPIRVInstructionSelector::selectClip(Register ResVReg, const SPIRVType *ResType, MachineInstr &I) const { - const auto Opcode = (STI.isAtLeastSPIRVVer(VersionTuple(1, 6))) - ? SPIRV::OpDemoteToHelperInvocation - : SPIRV::OpKill; + + unsigned Opcode; + + if (STI.isAtLeastSPIRVVer(VersionTuple(1, 6))) { + if (!STI.canUseExtension( + SPIRV::Extension::SPV_EXT_demote_to_helper_invocation)) + report_fatal_error( + "llvm.spv.clip intrinsic: this instruction requires the following " + "SPIR-V extension: SPV_EXT_demote_to_helper_invocation", + false); + Opcode = SPIRV::OpDemoteToHelperInvocation; + } else { + Opcode = SPIRV::OpKill; + // OpKill must be the last operation of any basic block. + MachineInstr *NextI = I.getNextNode(); + NextI->removeFromParent(); + } MachineBasicBlock &BB = *I.getParent(); return BuildMI(BB, I, I.getDebugLoc(), TII.get(Opcode)) diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp index 29ce60d9983e38..bc41ee96f2af66 100644 --- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp @@ -1376,6 +1376,17 @@ void addInstrRequirements(const MachineInstr &MI, Reqs.addCapability(SPIRV::Capability::SplitBarrierINTEL); } break; + + case SPIRV::OpKill: { + Reqs.addCapability(SPIRV::Capability::Shader); + } break; + case SPIRV::OpDemoteToHelperInvocation: + if (ST.canUseExtension( + SPIRV::Extension::SPV_EXT_demote_to_helper_invocation)) { + Reqs.addExtension(SPIRV::Extension::SPV_EXT_demote_to_helper_invocation); + Reqs.addCapability(SPIRV::Capability::DemoteToHelperInvocation); + } + break; default: break; } diff --git a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td index d63438baca7e76..edf6e5547631a1 100644 --- a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td +++ b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td @@ -456,6 +456,7 @@ defm VulkanMemoryModelDeviceScopeKHR : CapabilityOperand<5346, 0, 0, [], []>; defm ImageFootprintNV : CapabilityOperand<5282, 0, 0, [], []>; defm FragmentBarycentricNV : CapabilityOperand<5284, 0, 0, [], []>; defm ComputeDerivativeGroupQuadsNV : CapabilityOperand<5288, 0, 0, [], []>; +defm DemoteToHelperInvocation : CapabilityOperand<5379, 0, 0, [SPV_EXT_demote_to_helper_invocation], []>; defm ComputeDerivativeGroupLinearNV : CapabilityOperand<5350, 0, 0, [], []>; defm FragmentDensityEXT : CapabilityOperand<5291, 0, 0, [], [Shader]>; defm PhysicalStorageBufferAddressesEXT : CapabilityOperand<5347, 0, 0, [], [Shader]>; diff --git a/llvm/test/CodeGen/DirectX/clip.ll b/llvm/test/CodeGen/DirectX/clip.ll index 54ff924675dbb3..71789e7048363a 100644 --- a/llvm/test/CodeGen/DirectX/clip.ll +++ b/llvm/test/CodeGen/DirectX/clip.ll @@ -1,11 +1,29 @@ -; RUN: opt -S -dxil-intrinsic-expansion -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-pixel %s | FileCheck %s +; RUN: opt -passes='function(scalarizer),module(dxil-op-lower,dxil-intrinsic-expansion)' -S -mtriple=dxil-pc-shadermodel6.3-pixel %s | FileCheck %s -; CHECK-LABEL: define void @test_dxil_lowering +; CHECK-LABEL: define void @test_scalar ; CHECK: call void @dx.op.discard(i32 82, i1 %0) ; -define void @test_dxil_lowering(float noundef %p) #0 { +define void @test_scalar(float noundef %p) #0 { entry: %0 = fcmp olt float %p, 0.000000e+00 call void @llvm.dx.clip(i1 %0) ret void } + +; CHECK-LABEL: define void @test_vector +; CHECK: [[EXTR0:%.*]] = extractelement <4 x i1> [[INP:%.*]], i64 0 +; CHECK-NEXT: [[EXTR1:%.*]] = extractelement <4 x i1> [[INP:%.*]], i64 1 +; CHECK-NEXT: [[OR1:%.*]] = or i1 [[EXTR0]], [[EXTR1]] +; CHECK-NEXT: [[EXTR2:%.*]] = extractelement <4 x i1> [[INP:%.*]], i64 2 +; CHECK-NEXT: [[OR2:%.*]] = or i1 [[OR1]], [[EXTR2]] +; CHECK-NEXT: [[EXTR3:%.*]] = extractelement <4 x i1> [[INP:%.*]], i64 3 +; CHECK-NEXT: [[OR3:%.*]] = or i1 [[OR2]], [[EXTR3]] +; CHECK-NEXT: call void @dx.op.discard(i32 82, i1 [[OR3]]) +; +define void @test_vector(<4 x float> noundef %p) #0 { +entry: + %0 = fcmp olt <4 x float> %p, zeroinitializer + %1 = call i1 @llvm.dx.any.v4i1(<4 x i1> %0) + call void @llvm.dx.clip(i1 %1) + ret void +} diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/clip.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/clip.ll index 8c1214c9e67cbc..89db4be3494ebd 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/clip.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/clip.ll @@ -1,14 +1,40 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s --check-prefixes=CHECK,SPIRV15 +; RUN: llc -verify-machineinstrs -spirv-ext=+SPV_EXT_demote_to_helper_invocation -O0 -mtriple=spirv32v1.6-unknown-unknown %s -o - | FileCheck %s --check-prefixes=CHECK,SPIRV16 ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} -define void @test_scalar_lowering(float noundef %Buf) { +; Make sure lowering is correctly generating spirv code. + +; CHECK-DAG: %[[#float:]] = OpTypeFloat 32 +; CHECK-DAG: %[[#void:]] = OpTypeVoid +; CHECK-DAG: %[[#bool:]] = OpTypeBool +; CHECK-DAG: %[[#v4bool:]] = OpTypeVector %[[#bool]] 4 +; CHECK-DAG: %[[#v4float:]] = OpTypeVector %[[#float]] 4 +; CHECK-DAG: %[[#fzero:]] = OpConstant %[[#float]] 0 +; CHECK-DAG: %[[#v4fzero:]] = OpConstantNull %[[#v4float]] +; SPIRV16-DAG: %[[#vecfuncopptr:]] = OpTypePointer Function %[[#v4float]] +; SPIRV16-DAG: %[[#funcopptr:]] = OpTypePointer Function %[[#float]] + +define void @test_scalar(float noundef %Buf) { entry: +; CHECK-LABEL: ; -- Begin function test_scalar +; SPIRV16: %[[#param:]] = OpVariable %[[#funcopptr]] Function +; SPIRV16: %[[#load:]] = OpLoad %[[#float]] %[[#param]] Aligned 4 +; SPIRV15: %[[#load:]] = OpFunctionParameter %[[#float]] +; CHECK: %[[#cmplt:]] = OpFOrdLessThan %[[#bool]] %[[#load]] %[[#fzero]] +; CHECK: OpBranchConditional %[[#cmplt]] %[[#truel:]] %[[#endl:]] +; CHECK: %[[#truel]] = OpLabel +; SPIRV15: OpKill +; SPIRV16-NO: OpKill +; SPIRV15-NO: OpBranch %[[#endl]] +; SPIRV16: OpDemoteToHelperInvocation +; SPIRV16: OpBranch %[[#endl]] +; CHECK: %[[#endl]] = OpLabel %Buf.addr = alloca float, align 4 store float %Buf, ptr %Buf.addr, align 4 - %0 = load float, ptr %Buf.addr, align 4 - %1 = fcmp olt float %0, 0.000000e+00 - br i1 %1, label %lt0, label %end + %1 = load float, ptr %Buf.addr, align 4 + %2 = fcmp olt float %1, 0.000000e+00 + br i1 %2, label %lt0, label %end lt0: ; preds = %entry call void @llvm.spv.clip() @@ -17,17 +43,29 @@ lt0: ; preds = %entry end: ; preds = %lt0, %entry ret void } - declare void @llvm.spv.clip() - define void @test_vector(<4 x float> noundef %Buf) { entry: +; CHECK-LABEL: ; -- Begin function test_vector +; SPIRV16: %[[#param:]] = OpVariable %[[#vecfuncopptr]] Function +; SPIRV16: %[[#loadvec:]] = OpLoad %[[#v4float]] %[[#param]] Aligned 16 +; SPIRV15: %[[#loadvec:]] = OpFunctionParameter %[[#v4float]] +; CHECK: %[[#cmplt:]] = OpFOrdLessThan %[[#v4bool]] %[[#loadvec]] %[[#v4fzero]] +; CHECK: %[[#opany:]] = OpAny %[[#bool]] %[[#cmplt]] +; CHECK: OpBranchConditional %[[#opany]] %[[#truel:]] %[[#endl:]] +; CHECK: %[[#truel]] = OpLabel +; SPIRV15: OpKill +; SPIRV16-NO: OpKill +; SPIRV15-NO: OpBranch %[[#endl]] +; SPIRV16: OpDemoteToHelperInvocation +; SPIRV16: OpBranch %[[#endl]] +; CHECK: %[[#endl]] = OpLabel %Buf.addr = alloca <4 x float>, align 16 store <4 x float> %Buf, ptr %Buf.addr, align 16 %1 = load <4 x float>, ptr %Buf.addr, align 16 %2 = fcmp olt <4 x float> %1, zeroinitializer - %3 = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %2) + %3 = call i1 @llvm.spv.any.v4i1(<4 x i1> %2) br i1 %3, label %lt0, label %end lt0: ; preds = %entry @@ -37,5 +75,3 @@ lt0: ; preds = %entry end: ; preds = %lt0, %entry ret void } - -declare i1 @llvm.vector.reduce.or.v4i1(<4 x i1>) #3