Skip to content

Commit e128108

Browse files
committed
[DAGCombiner] Attempt to fold 'add' nodes to funnel-shift or rotate
1 parent 605ac34 commit e128108

File tree

2 files changed

+57
-71
lines changed

2 files changed

+57
-71
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

+43-43
Original file line numberDiff line numberDiff line change
@@ -662,14 +662,15 @@ namespace {
662662
bool DemandHighBits = true);
663663
SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
664664
SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
665-
SDValue InnerPos, SDValue InnerNeg, bool HasPos,
666-
unsigned PosOpcode, unsigned NegOpcode,
667-
const SDLoc &DL);
665+
SDValue InnerPos, SDValue InnerNeg, bool FromAdd,
666+
bool HasPos, unsigned PosOpcode,
667+
unsigned NegOpcode, const SDLoc &DL);
668668
SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
669-
SDValue InnerPos, SDValue InnerNeg, bool HasPos,
670-
unsigned PosOpcode, unsigned NegOpcode,
671-
const SDLoc &DL);
672-
SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
669+
SDValue InnerPos, SDValue InnerNeg, bool FromAdd,
670+
bool HasPos, unsigned PosOpcode,
671+
unsigned NegOpcode, const SDLoc &DL);
672+
SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL,
673+
bool FromAdd);
673674
SDValue MatchLoadCombine(SDNode *N);
674675
SDValue mergeTruncStores(StoreSDNode *N);
675676
SDValue reduceLoadWidth(SDNode *N);
@@ -2992,6 +2993,9 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
29922993
if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
29932994
return V;
29942995

2996+
if (SDValue V = MatchRotate(N0, N1, SDLoc(N), /*FromAdd=*/true))
2997+
return V;
2998+
29952999
// Try to match AVGFLOOR fixedwidth pattern
29963000
if (SDValue V = foldAddToAvg(N, DL))
29973001
return V;
@@ -8161,7 +8165,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
81618165
return V;
81628166

81638167
// See if this is some rotate idiom.
8164-
if (SDValue Rot = MatchRotate(N0, N1, DL))
8168+
if (SDValue Rot = MatchRotate(N0, N1, DL, /*FromAdd=*/false))
81658169
return Rot;
81668170

81678171
if (SDValue Load = MatchLoadCombine(N))
@@ -8350,7 +8354,7 @@ static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
83508354
// The IsRotate flag should be set when the LHS of both shifts is the same.
83518355
// Otherwise if matching a general funnel shift, it should be clear.
83528356
static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
8353-
SelectionDAG &DAG, bool IsRotate) {
8357+
SelectionDAG &DAG, bool IsRotate, bool FromAdd) {
83548358
const auto &TLI = DAG.getTargetLoweringInfo();
83558359
// If EltSize is a power of 2 then:
83568360
//
@@ -8389,7 +8393,7 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
83898393
// NOTE: We can only do this when matching operations which won't modify the
83908394
// least Log2(EltSize) significant bits and not a general funnel shift.
83918395
unsigned MaskLoBits = 0;
8392-
if (IsRotate && isPowerOf2_64(EltSize)) {
8396+
if (IsRotate && !FromAdd && isPowerOf2_64(EltSize)) {
83938397
unsigned Bits = Log2_64(EltSize);
83948398
unsigned NegBits = Neg.getScalarValueSizeInBits();
83958399
if (NegBits >= Bits) {
@@ -8472,9 +8476,9 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
84728476
// Neg with outer conversions stripped away.
84738477
SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
84748478
SDValue Neg, SDValue InnerPos,
8475-
SDValue InnerNeg, bool HasPos,
8476-
unsigned PosOpcode, unsigned NegOpcode,
8477-
const SDLoc &DL) {
8479+
SDValue InnerNeg, bool FromAdd,
8480+
bool HasPos, unsigned PosOpcode,
8481+
unsigned NegOpcode, const SDLoc &DL) {
84788482
// fold (or (shl x, (*ext y)),
84798483
// (srl x, (*ext (sub 32, y)))) ->
84808484
// (rotl x, y) or (rotr x, (sub 32, y))
@@ -8484,10 +8488,9 @@ SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
84848488
// (rotr x, y) or (rotl x, (sub 32, y))
84858489
EVT VT = Shifted.getValueType();
84868490
if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
8487-
/*IsRotate*/ true)) {
8491+
/*IsRotate*/ true, FromAdd))
84888492
return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
84898493
HasPos ? Pos : Neg);
8490-
}
84918494

84928495
return SDValue();
84938496
}
@@ -8500,9 +8503,9 @@ SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
85008503
// TODO: Merge with MatchRotatePosNeg.
85018504
SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
85028505
SDValue Neg, SDValue InnerPos,
8503-
SDValue InnerNeg, bool HasPos,
8504-
unsigned PosOpcode, unsigned NegOpcode,
8505-
const SDLoc &DL) {
8506+
SDValue InnerNeg, bool FromAdd,
8507+
bool HasPos, unsigned PosOpcode,
8508+
unsigned NegOpcode, const SDLoc &DL) {
85068509
EVT VT = N0.getValueType();
85078510
unsigned EltBits = VT.getScalarSizeInBits();
85088511

@@ -8513,10 +8516,10 @@ SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
85138516
// fold (or (shl x0, (*ext (sub 32, y))),
85148517
// (srl x1, (*ext y))) ->
85158518
// (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
8516-
if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) {
8519+
if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1,
8520+
FromAdd))
85178521
return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
85188522
HasPos ? Pos : Neg);
8519-
}
85208523

85218524
// Matching the shift+xor cases, we can't easily use the xor'd shift amount
85228525
// so for now just use the PosOpcode case if its legal.
@@ -8561,11 +8564,12 @@ SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
85618564
return SDValue();
85628565
}
85638566

8564-
// MatchRotate - Handle an 'or' of two operands. If this is one of the many
8565-
// idioms for rotate, and if the target supports rotation instructions, generate
8566-
// a rot[lr]. This also matches funnel shift patterns, similar to rotation but
8567-
// with different shifted sources.
8568-
SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
8567+
// MatchRotate - Handle an 'or' or 'add' of two operands. If this is one of the
8568+
// many idioms for rotate, and if the target supports rotation instructions,
8569+
// generate a rot[lr]. This also matches funnel shift patterns, similar to
8570+
// rotation but with different shifted sources.
8571+
SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL,
8572+
bool FromAdd) {
85698573
EVT VT = LHS.getValueType();
85708574

85718575
// The target must have at least one rotate/funnel flavor.
@@ -8592,9 +8596,9 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
85928596
if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
85938597
LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
85948598
assert(LHS.getValueType() == RHS.getValueType());
8595-
if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
8599+
if (SDValue Rot =
8600+
MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL, FromAdd))
85968601
return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
8597-
}
85988602
}
85998603

86008604
// Match "(X shl/srl V1) & V2" where V2 may not be present.
@@ -8774,29 +8778,25 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
87748778
}
87758779

87768780
if (IsRotate && (HasROTL || HasROTR)) {
8777-
SDValue TryL =
8778-
MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
8779-
RExtOp0, HasROTL, ISD::ROTL, ISD::ROTR, DL);
8780-
if (TryL)
8781+
if (SDValue TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
8782+
LExtOp0, RExtOp0, FromAdd, HasROTL,
8783+
ISD::ROTL, ISD::ROTR, DL))
87818784
return TryL;
87828785

8783-
SDValue TryR =
8784-
MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
8785-
LExtOp0, HasROTR, ISD::ROTR, ISD::ROTL, DL);
8786-
if (TryR)
8786+
if (SDValue TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
8787+
RExtOp0, LExtOp0, FromAdd, HasROTR,
8788+
ISD::ROTR, ISD::ROTL, DL))
87878789
return TryR;
87888790
}
87898791

8790-
SDValue TryL =
8791-
MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
8792-
LExtOp0, RExtOp0, HasFSHL, ISD::FSHL, ISD::FSHR, DL);
8793-
if (TryL)
8792+
if (SDValue TryL = MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt,
8793+
RHSShiftAmt, LExtOp0, RExtOp0, FromAdd,
8794+
HasFSHL, ISD::FSHL, ISD::FSHR, DL))
87948795
return TryL;
87958796

8796-
SDValue TryR =
8797-
MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
8798-
RExtOp0, LExtOp0, HasFSHR, ISD::FSHR, ISD::FSHL, DL);
8799-
if (TryR)
8797+
if (SDValue TryR = MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt,
8798+
LHSShiftAmt, RExtOp0, LExtOp0, FromAdd,
8799+
HasFSHR, ISD::FSHR, ISD::FSHL, DL))
88008800
return TryR;
88018801

88028802
return SDValue();

llvm/test/CodeGen/NVPTX/add-rotate.ll

+14-28
Original file line numberDiff line numberDiff line change
@@ -38,16 +38,13 @@ define i32 @test_rotr(i32 %x) {
3838
define i32 @test_rotl_var(i32 %x, i32 %y) {
3939
; CHECK-LABEL: test_rotl_var(
4040
; CHECK: {
41-
; CHECK-NEXT: .reg .b32 %r<7>;
41+
; CHECK-NEXT: .reg .b32 %r<4>;
4242
; CHECK-EMPTY:
4343
; CHECK-NEXT: // %bb.0:
4444
; CHECK-NEXT: ld.param.u32 %r1, [test_rotl_var_param_0];
4545
; CHECK-NEXT: ld.param.u32 %r2, [test_rotl_var_param_1];
46-
; CHECK-NEXT: shl.b32 %r3, %r1, %r2;
47-
; CHECK-NEXT: sub.s32 %r4, 32, %r2;
48-
; CHECK-NEXT: shr.u32 %r5, %r1, %r4;
49-
; CHECK-NEXT: add.s32 %r6, %r3, %r5;
50-
; CHECK-NEXT: st.param.b32 [func_retval0], %r6;
46+
; CHECK-NEXT: shf.l.wrap.b32 %r3, %r1, %r1, %r2;
47+
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
5148
; CHECK-NEXT: ret;
5249
%shl = shl i32 %x, %y
5350
%sub = sub i32 32, %y
@@ -59,16 +56,13 @@ define i32 @test_rotl_var(i32 %x, i32 %y) {
5956
define i32 @test_rotr_var(i32 %x, i32 %y) {
6057
; CHECK-LABEL: test_rotr_var(
6158
; CHECK: {
62-
; CHECK-NEXT: .reg .b32 %r<7>;
59+
; CHECK-NEXT: .reg .b32 %r<4>;
6360
; CHECK-EMPTY:
6461
; CHECK-NEXT: // %bb.0:
6562
; CHECK-NEXT: ld.param.u32 %r1, [test_rotr_var_param_0];
6663
; CHECK-NEXT: ld.param.u32 %r2, [test_rotr_var_param_1];
67-
; CHECK-NEXT: shr.u32 %r3, %r1, %r2;
68-
; CHECK-NEXT: sub.s32 %r4, 32, %r2;
69-
; CHECK-NEXT: shl.b32 %r5, %r1, %r4;
70-
; CHECK-NEXT: add.s32 %r6, %r3, %r5;
71-
; CHECK-NEXT: st.param.b32 [func_retval0], %r6;
64+
; CHECK-NEXT: shf.r.wrap.b32 %r3, %r1, %r1, %r2;
65+
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
7266
; CHECK-NEXT: ret;
7367
%shr = lshr i32 %x, %y
7468
%sub = sub i32 32, %y
@@ -126,18 +120,14 @@ define i32 @test_rotr_var_and(i32 %x, i32 %y) {
126120
define i32 @test_fshl_special_case(i32 %x0, i32 %x1, i32 %y) {
127121
; CHECK-LABEL: test_fshl_special_case(
128122
; CHECK: {
129-
; CHECK-NEXT: .reg .b32 %r<9>;
123+
; CHECK-NEXT: .reg .b32 %r<5>;
130124
; CHECK-EMPTY:
131125
; CHECK-NEXT: // %bb.0:
132126
; CHECK-NEXT: ld.param.u32 %r1, [test_fshl_special_case_param_0];
133-
; CHECK-NEXT: ld.param.u32 %r2, [test_fshl_special_case_param_2];
134-
; CHECK-NEXT: shl.b32 %r3, %r1, %r2;
135-
; CHECK-NEXT: ld.param.u32 %r4, [test_fshl_special_case_param_1];
136-
; CHECK-NEXT: shr.u32 %r5, %r4, 1;
137-
; CHECK-NEXT: xor.b32 %r6, %r2, 31;
138-
; CHECK-NEXT: shr.u32 %r7, %r5, %r6;
139-
; CHECK-NEXT: add.s32 %r8, %r3, %r7;
140-
; CHECK-NEXT: st.param.b32 [func_retval0], %r8;
127+
; CHECK-NEXT: ld.param.u32 %r2, [test_fshl_special_case_param_1];
128+
; CHECK-NEXT: ld.param.u32 %r3, [test_fshl_special_case_param_2];
129+
; CHECK-NEXT: shf.l.wrap.b32 %r4, %r2, %r1, %r3;
130+
; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
141131
; CHECK-NEXT: ret;
142132
%shl = shl i32 %x0, %y
143133
%srli = lshr i32 %x1, 1
@@ -150,18 +140,14 @@ define i32 @test_fshl_special_case(i32 %x0, i32 %x1, i32 %y) {
150140
define i32 @test_fshr_special_case(i32 %x0, i32 %x1, i32 %y) {
151141
; CHECK-LABEL: test_fshr_special_case(
152142
; CHECK: {
153-
; CHECK-NEXT: .reg .b32 %r<9>;
143+
; CHECK-NEXT: .reg .b32 %r<5>;
154144
; CHECK-EMPTY:
155145
; CHECK-NEXT: // %bb.0:
156146
; CHECK-NEXT: ld.param.u32 %r1, [test_fshr_special_case_param_0];
157147
; CHECK-NEXT: ld.param.u32 %r2, [test_fshr_special_case_param_1];
158148
; CHECK-NEXT: ld.param.u32 %r3, [test_fshr_special_case_param_2];
159-
; CHECK-NEXT: shr.u32 %r4, %r2, %r3;
160-
; CHECK-NEXT: shl.b32 %r5, %r1, 1;
161-
; CHECK-NEXT: xor.b32 %r6, %r3, 31;
162-
; CHECK-NEXT: shl.b32 %r7, %r5, %r6;
163-
; CHECK-NEXT: add.s32 %r8, %r4, %r7;
164-
; CHECK-NEXT: st.param.b32 [func_retval0], %r8;
149+
; CHECK-NEXT: shf.r.wrap.b32 %r4, %r2, %r1, %r3;
150+
; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
165151
; CHECK-NEXT: ret;
166152
%shl = lshr i32 %x1, %y
167153
%srli = shl i32 %x0, 1

0 commit comments

Comments
 (0)