18
18
#include " LoongArchSubtarget.h"
19
19
#include " MCTargetDesc/LoongArchBaseInfo.h"
20
20
#include " MCTargetDesc/LoongArchMCTargetDesc.h"
21
+ #include " MCTargetDesc/LoongArchMatInt.h"
21
22
#include " llvm/ADT/SmallSet.h"
22
23
#include " llvm/ADT/Statistic.h"
23
24
#include " llvm/ADT/StringExtras.h"
@@ -41,6 +42,34 @@ using namespace llvm;
41
42
42
43
STATISTIC (NumTailCalls, " Number of tail calls" );
43
44
45
+ enum MaterializeFPImm {
46
+ NoMaterializeFPImm = 0 ,
47
+ MaterializeFPImm2Ins = 2 ,
48
+ MaterializeFPImm3Ins = 3 ,
49
+ MaterializeFPImm4Ins = 4 ,
50
+ MaterializeFPImm5Ins = 5 ,
51
+ MaterializeFPImm6Ins = 6
52
+ };
53
+
54
+ static cl::opt<MaterializeFPImm> MaterializeFPImmInsNum (
55
+ " loongarch-materialize-float-imm" , cl::Hidden,
56
+ cl::desc (" Maximum number of instructions used (including code sequence "
57
+ " to generate the value and moving the value to FPR) when "
58
+ " materializing floating-point immediates (default = 3)" ),
59
+ cl::init(MaterializeFPImm3Ins),
60
+ cl::values(clEnumValN(NoMaterializeFPImm, " 0" , " Use constant pool" ),
61
+ clEnumValN(MaterializeFPImm2Ins, " 2" ,
62
+ " Materialize FP immediate within 2 instructions" ),
63
+ clEnumValN(MaterializeFPImm3Ins, " 3" ,
64
+ " Materialize FP immediate within 3 instructions" ),
65
+ clEnumValN(MaterializeFPImm4Ins, " 4" ,
66
+ " Materialize FP immediate within 4 instructions" ),
67
+ clEnumValN(MaterializeFPImm5Ins, " 5" ,
68
+ " Materialize FP immediate within 5 instructions" ),
69
+ clEnumValN(MaterializeFPImm6Ins, " 6" ,
70
+ " Materialize FP immediate within 6 instructions "
71
+ " (behaves same as 5 on loongarch64)" )));
72
+
44
73
static cl::opt<bool > ZeroDivCheck (" loongarch-check-zero-division" , cl::Hidden,
45
74
cl::desc (" Trap on integer division by zero." ),
46
75
cl::init(false ));
@@ -190,6 +219,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
190
219
setTruncStoreAction (MVT::f32 , MVT::bf16 , Expand);
191
220
setCondCodeAction (FPCCToExpand, MVT::f32 , Expand);
192
221
222
+ setOperationAction (ISD::ConstantFP, MVT::f32 , Custom);
193
223
setOperationAction (ISD::SELECT_CC, MVT::f32 , Expand);
194
224
setOperationAction (ISD::BR_CC, MVT::f32 , Expand);
195
225
setOperationAction (ISD::FMA, MVT::f32 , Legal);
@@ -237,6 +267,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
237
267
setTruncStoreAction (MVT::f64 , MVT::f32 , Expand);
238
268
setCondCodeAction (FPCCToExpand, MVT::f64 , Expand);
239
269
270
+ setOperationAction (ISD::ConstantFP, MVT::f64 , Custom);
240
271
setOperationAction (ISD::SELECT_CC, MVT::f64 , Expand);
241
272
setOperationAction (ISD::BR_CC, MVT::f64 , Expand);
242
273
setOperationAction (ISD::STRICT_FSETCCS, MVT::f64 , Legal);
@@ -557,10 +588,67 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
557
588
case ISD::VECREDUCE_UMAX:
558
589
case ISD::VECREDUCE_UMIN:
559
590
return lowerVECREDUCE (Op, DAG);
591
+ case ISD::ConstantFP:
592
+ return lowerConstantFP (Op, DAG);
560
593
}
561
594
return SDValue ();
562
595
}
563
596
597
+ SDValue LoongArchTargetLowering::lowerConstantFP (SDValue Op,
598
+ SelectionDAG &DAG) const {
599
+ EVT VT = Op.getValueType ();
600
+ ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
601
+ const APFloat &FPVal = CFP->getValueAPF ();
602
+ SDLoc DL (CFP);
603
+
604
+ assert ((VT == MVT::f32 && Subtarget.hasBasicF ()) ||
605
+ (VT == MVT::f64 && Subtarget.hasBasicD ()));
606
+
607
+ // If value is 0.0 or -0.0, just ignore it.
608
+ if (FPVal.isZero ())
609
+ return SDValue ();
610
+
611
+ // If lsx enabled, use cheaper 'vldi' instruction if possible.
612
+ if (isFPImmVLDILegal (FPVal, VT))
613
+ return SDValue ();
614
+
615
+ // Construct as integer, and move to float register.
616
+ APInt INTVal = FPVal.bitcastToAPInt ();
617
+
618
+ // If more than MaterializeFPImmInsNum instructions will be used to
619
+ // generate the INTVal and move it to float register, fallback to
620
+ // use floating point load from the constant pool.
621
+ auto Seq = LoongArchMatInt::generateInstSeq (INTVal.getSExtValue ());
622
+ int InsNum = Seq.size () + ((VT == MVT::f64 && !Subtarget.is64Bit ()) ? 2 : 1 );
623
+ if (InsNum > MaterializeFPImmInsNum && !FPVal.isExactlyValue (+1.0 ))
624
+ return SDValue ();
625
+
626
+ switch (VT.getSimpleVT ().SimpleTy ) {
627
+ default :
628
+ llvm_unreachable (" Unexpected floating point type!" );
629
+ break ;
630
+ case MVT::f32 : {
631
+ SDValue NewVal = DAG.getConstant (INTVal, DL, MVT::i32 );
632
+ if (Subtarget.is64Bit ())
633
+ NewVal = DAG.getNode (ISD::ZERO_EXTEND, DL, MVT::i64 , NewVal);
634
+ return DAG.getNode (Subtarget.is64Bit () ? LoongArchISD::MOVGR2FR_W_LA64
635
+ : LoongArchISD::MOVGR2FR_W,
636
+ DL, VT, NewVal);
637
+ }
638
+ case MVT::f64 : {
639
+ if (Subtarget.is64Bit ()) {
640
+ SDValue NewVal = DAG.getConstant (INTVal, DL, MVT::i64 );
641
+ return DAG.getNode (LoongArchISD::MOVGR2FR_D, DL, VT, NewVal);
642
+ }
643
+ SDValue Lo = DAG.getConstant (INTVal.trunc (32 ), DL, MVT::i32 );
644
+ SDValue Hi = DAG.getConstant (INTVal.lshr (32 ).trunc (32 ), DL, MVT::i32 );
645
+ return DAG.getNode (LoongArchISD::MOVGR2FR_D_LO_HI, DL, VT, Lo, Hi);
646
+ }
647
+ }
648
+
649
+ return SDValue ();
650
+ }
651
+
564
652
// Lower vecreduce_add using vhaddw instructions.
565
653
// For Example:
566
654
// call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
@@ -7152,7 +7240,10 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
7152
7240
NODE_NAME_CASE (SRL_W)
7153
7241
NODE_NAME_CASE (BSTRINS)
7154
7242
NODE_NAME_CASE (BSTRPICK)
7243
+ NODE_NAME_CASE (MOVGR2FR_W)
7155
7244
NODE_NAME_CASE (MOVGR2FR_W_LA64)
7245
+ NODE_NAME_CASE (MOVGR2FR_D)
7246
+ NODE_NAME_CASE (MOVGR2FR_D_LO_HI)
7156
7247
NODE_NAME_CASE (MOVFR2GR_S_LA64)
7157
7248
NODE_NAME_CASE (FTINT)
7158
7249
NODE_NAME_CASE (BUILD_PAIR_F64)
0 commit comments