diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 1b8697b209bd70..13ba369e323f72 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -4880,7 +4880,7 @@ def HLSLSplitDouble: LangBuiltin<"HLSL_LANG"> { def HLSLClip: LangBuiltin<"HLSL_LANG"> { let Spellings = ["__builtin_hlsl_elementwise_clip"]; let Attributes = [NoThrow, Const]; - let Prototype = "void(bool)"; + let Prototype = "void(...)"; } // Builtins for XRay. diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 21c6d29cf3ec2d..865cc73d166301 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -41,8 +41,10 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsAArch64.h" #include "llvm/IR/IntrinsicsAMDGPU.h" @@ -55,12 +57,14 @@ #include "llvm/IR/IntrinsicsR600.h" #include "llvm/IR/IntrinsicsRISCV.h" #include "llvm/IR/IntrinsicsS390.h" +#include "llvm/IR/IntrinsicsSPIRV.h" #include "llvm/IR/IntrinsicsVE.h" #include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/IR/IntrinsicsX86.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/MatrixBuilder.h" #include "llvm/IR/MemoryModelRelaxationAnnotations.h" +#include "llvm/IR/Type.h" #include "llvm/Support/AMDGPUAddrSpace.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/MathExtras.h" @@ -101,13 +105,28 @@ static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size, static Value *handleHlslClip(const CallExpr *E, CodeGenFunction *CGF) { Value *Op0 = CGF->EmitScalarExpr(E->getArg(0)); - auto *CMP = CGF->Builder.CreateFCmpOLT( - Op0, ConstantFP::get(CGF->Builder.getFloatTy(), 0.0)); + + Constant *FZeroConst = ConstantFP::getZero(CGF->FloatTy); + Value *CMP; + + if (const auto *VecTy = E->getArg(0)->getType()->getAs()) { + FZeroConst = ConstantVector::getSplat( + ElementCount::getFixed(VecTy->getNumElements()), FZeroConst); + CMP = CGF->Builder.CreateFCmpOLT(Op0, FZeroConst); + } else + CMP = CGF->Builder.CreateFCmpOLT(Op0, FZeroConst); if (CGF->CGM.getTarget().getTriple().isDXIL()) return CGF->Builder.CreateIntrinsic(CGF->VoidTy, llvm::Intrinsic::dx_clip, {CMP}, nullptr); + if (const auto *VecTy = E->getArg(0)->getType()->getAs()){ + + CMP = CGF->Builder.CreateIntrinsic(CGF->Builder.getInt1Ty(), llvm::Intrinsic::spv_any, + {CMP}, nullptr); + } + + BasicBlock *LT0 = CGF->createBasicBlock("lt0", CGF->CurFn); BasicBlock *End = CGF->createBasicBlock("end", CGF->CurFn); diff --git a/clang/test/CodeGenHLSL/builtins/clip.hlsl b/clang/test/CodeGenHLSL/builtins/clip.hlsl index 994a7c9f3311c3..0445db8a5d3e06 100644 --- a/clang/test/CodeGenHLSL/builtins/clip.hlsl +++ b/clang/test/CodeGenHLSL/builtins/clip.hlsl @@ -1,11 +1,36 @@ // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.3-pixel %s -fnative-half-type -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple spirv-vulkan-pixel %s -fnative-half-type -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple spirv-vulkan-pixel %s -fnative-half-type -emit-llvm -o - | FileCheck %s --check-prefix=SPIRV -//CHECK-LABEL: define void @main() -void test(float Buf) { - //CHECK: [[LOAD:%.*]] = load <4 x float>, ptr %p1{{.*}}, align 16 - //CHECK-NEXT: [[EXTR:%.*]] = extractelement <4 x float> [[LOAD]], i32 3 - //CHECK-NEXT: [[FCMP:%.*]] = fcmp olt float [[EXTR]], 0.000000e+00 - //CHECK-NEXT: call void @llvm.dx.clip(i1 [[FCMP]]) + +void test_scalar(float Buf) { + // CHECK: define void @{{.*}}test_scalar{{.*}}(float {{.*}} [[VALP:%.*]]) + // CHECK: [[LOAD:%.*]] = load float, ptr [[VALP]].addr, align 4 + // CHECK-NEXT: [[FCMP:%.*]] = fcmp olt float [[LOAD]], 0.000000e+00 + // CHECK-NEXT: call void @llvm.dx.clip.i1(i1 [[FCMP]]) + // + // SPIRV: define spir_func void @{{.*}}test_scalar{{.*}}(float {{.*}} [[VALP:%.*]]) + // SPIRV: [[LOAD:%.*]] = load float, ptr [[VALP]].addr, align 4 + // SPIRV-NEXT: [[FCMP:%.*]] = fcmp olt float [[LOAD]], 0.000000e+00 + // SPIRV-NEXT: br i1 [[FCMP]], label %[[LTL:.*]], label %[[ENDL:.*]] + // SPIRV: [[LTL]]: ; preds = %entry + // SPIRV-NEXT: call void @llvm.spv.clip() + // SPIRV: br label %[[ENDL]] + clip(Buf); +} + +void test_vector4(float4 Buf) { + // CHECK: define void @{{.*}}test_vector{{.*}}(<4 x float> {{.*}} [[VALP:%.*]]) + // CHECK: [[LOAD:%.*]] = load <4 x float>, ptr [[VALP]].addr, align 16 + // CHECK-NEXT: [[FCMP:%.*]] = fcmp olt <4 x float> [[LOAD]], zeroinitializer + // CHECK-NEXT: call void @llvm.dx.clip.v4i1(<4 x i1> [[FCMP]]) + // + // SPIRV: define spir_func void @{{.*}}test_vector{{.*}}(<4 x float> {{.*}} [[VALP:%.*]]) + // SPIRV: [[LOAD:%.*]] = load <4 x float>, ptr [[VALP]].addr, align 16 + // SPIRV-NEXT: [[FCMP:%.*]] = fcmp olt <4 x float> [[LOAD]], zeroinitializer + // SPIRV-NEXT: [[RED:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[FCMP]]) + // SPIRV-NEXT: br i1 [[RED]], label %[[LTL:.*]], label %[[ENDL:.*]] + // SPIRV: [[LTL]]: ; preds = %entry + // SPIRV-NEXT: call void @llvm.spv.clip() + // SPIRV-NEXT: br label %[[ENDL]] clip(Buf); } diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td index 0d8dc4ead02c8a..644c9f31972362 100644 --- a/llvm/include/llvm/IR/IntrinsicsDirectX.td +++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td @@ -92,5 +92,5 @@ def int_dx_step : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, L def int_dx_splitdouble : DefaultAttrsIntrinsic<[llvm_anyint_ty, LLVMMatchType<0>], [LLVMScalarOrSameVectorWidth<0, llvm_double_ty>], [IntrNoMem]>; def int_dx_radians : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; -def int_dx_clip : DefaultAttrsIntrinsic<[], [llvm_i1_ty], [IntrNoMem]>; +def int_dx_clip : DefaultAttrsIntrinsic<[], [llvm_anyint_ty], [IntrNoMem]>; } diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index 1cab51139aa5b3..5856cd3f4db754 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -32,6 +32,7 @@ #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/IR/IntrinsicsSPIRV.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/VersionTuple.h" #define DEBUG_TYPE "spirv-isel" @@ -1972,11 +1973,9 @@ bool SPIRVInstructionSelector::selectSplatVector(Register ResVReg, bool SPIRVInstructionSelector::selectClip(Register ResVReg, const SPIRVType *ResType, MachineInstr &I) const { - - const auto Opcode = - STI.getTargetTriple().getVulkanVersion() < llvm::VersionTuple(1, 3) - ? SPIRV::OpKill - : SPIRV::OpDemoteToHelperInvocation; + const auto Opcode = (STI.isAtLeastSPIRVVer(VersionTuple(1, 6))) + ? SPIRV::OpDemoteToHelperInvocation + : SPIRV::OpKill; MachineBasicBlock &BB = *I.getParent(); return BuildMI(BB, I, I.getDebugLoc(), TII.get(Opcode)) diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/clip.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/clip.ll index 4c133258f69ba2..8c1214c9e67cbc 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/clip.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/clip.ll @@ -2,17 +2,40 @@ ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} -; CHECK-LABEL: define void @test_dxil_lowering -; CHECK: call void @dx.op.discard(i32 82, i1 %0) -; -define spir_func void @test_dxil_lowering(float noundef %Buf) #0 { +define void @test_scalar_lowering(float noundef %Buf) { entry: %Buf.addr = alloca float, align 4 store float %Buf, ptr %Buf.addr, align 4 - %1 = load float, ptr %Buf.addr, align 4 - %2 = fcmp olt float %1, 0.000000e+00 - call void @llvm.spv.clip(i1 %2) + %0 = load float, ptr %Buf.addr, align 4 + %1 = fcmp olt float %0, 0.000000e+00 + br i1 %1, label %lt0, label %end + +lt0: ; preds = %entry + call void @llvm.spv.clip() + br label %end + +end: ; preds = %lt0, %entry + ret void +} + +declare void @llvm.spv.clip() + + +define void @test_vector(<4 x float> noundef %Buf) { +entry: + %Buf.addr = alloca <4 x float>, align 16 + store <4 x float> %Buf, ptr %Buf.addr, align 16 + %1 = load <4 x float>, ptr %Buf.addr, align 16 + %2 = fcmp olt <4 x float> %1, zeroinitializer + %3 = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %2) + br i1 %3, label %lt0, label %end + +lt0: ; preds = %entry + call void @llvm.spv.clip() + br label %end + +end: ; preds = %lt0, %entry ret void } -declare void @llvm.spv.clip(i1) #1 +declare i1 @llvm.vector.reduce.or.v4i1(<4 x i1>) #3