-
Notifications
You must be signed in to change notification settings - Fork 13.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[ValueTracking] Add rotate idiom to haveNoCommonBitsSet special cases #122165
[ValueTracking] Add rotate idiom to haveNoCommonBitsSet special cases #122165
Conversation
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-llvm-analysis Author: Alex MacLean (AlexMaclean) ChangesAn occasional idiom for rotation is "(A << B) + (A >> (BitWidth - B))". Currently this is not well handled on targets with native funnel-shift/rotate support. Add a special case to haveNoCommonBitsSet to ensure that the addition is converted to a disjoint or in InstCombine so during instruction selection the idiom can be converted to an efficient rotation implementation. Proof: https://alive2.llvm.org/ce/z/3PBaX5 Full diff: https://github.com/llvm/llvm-project/pull/122165.diff 2 Files Affected:
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 2f6e869ae7b735..bf94491988bcc8 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -229,6 +229,19 @@ static bool haveNoCommonBitsSetSpecialCases(const Value *LHS, const Value *RHS,
return true;
}
+ // Look for: (X << V) op (Y >> (BitWidth - V))
+ // or (X >> V) op (Y << (BitWidth - V))
+ {
+ const Value *V;
+ const APInt *R;
+ if (((match(RHS, m_Shl(m_Value(), m_Sub(m_APInt(R), m_Value(V)))) &&
+ match(LHS, m_LShr(m_Value(), m_Specific(V)))) ||
+ (match(RHS, m_LShr(m_Value(), m_Sub(m_APInt(R), m_Value(V)))) &&
+ match(LHS, m_Shl(m_Value(), m_Specific(V))))) &&
+ R->uge(LHS->getType()->getScalarType()->getIntegerBitWidth()))
+ return true;
+ }
+
return false;
}
diff --git a/llvm/test/Transforms/InstCombine/rotate.ll b/llvm/test/Transforms/InstCombine/rotate.ll
index bae50736de0c33..524e04106f1850 100644
--- a/llvm/test/Transforms/InstCombine/rotate.ll
+++ b/llvm/test/Transforms/InstCombine/rotate.ll
@@ -191,7 +191,7 @@ define i32 @rotl_i32(i32 %x, i32 %y) {
; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[Y:%.*]]
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[X:%.*]], [[Y]]
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X]], [[SUB]]
-; CHECK-NEXT: [[R:%.*]] = or i32 [[SHR]], [[SHL]]
+; CHECK-NEXT: [[R:%.*]] = or disjoint i32 [[SHR]], [[SHL]]
; CHECK-NEXT: ret i32 [[R]]
;
%sub = sub i32 32, %y
@@ -208,7 +208,7 @@ define i37 @rotr_i37(i37 %x, i37 %y) {
; CHECK-NEXT: [[SUB:%.*]] = sub i37 37, [[Y:%.*]]
; CHECK-NEXT: [[SHL:%.*]] = shl i37 [[X:%.*]], [[SUB]]
; CHECK-NEXT: [[SHR:%.*]] = lshr i37 [[X]], [[Y]]
-; CHECK-NEXT: [[R:%.*]] = or i37 [[SHR]], [[SHL]]
+; CHECK-NEXT: [[R:%.*]] = or disjoint i37 [[SHR]], [[SHL]]
; CHECK-NEXT: ret i37 [[R]]
;
%sub = sub i37 37, %y
@@ -225,7 +225,7 @@ define i8 @rotr_i8_commute(i8 %x, i8 %y) {
; CHECK-NEXT: [[SUB:%.*]] = sub i8 8, [[Y:%.*]]
; CHECK-NEXT: [[SHL:%.*]] = shl i8 [[X:%.*]], [[SUB]]
; CHECK-NEXT: [[SHR:%.*]] = lshr i8 [[X]], [[Y]]
-; CHECK-NEXT: [[R:%.*]] = or i8 [[SHL]], [[SHR]]
+; CHECK-NEXT: [[R:%.*]] = or disjoint i8 [[SHL]], [[SHR]]
; CHECK-NEXT: ret i8 [[R]]
;
%sub = sub i8 8, %y
@@ -242,7 +242,7 @@ define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %y) {
; CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> splat (i32 32), [[Y:%.*]]
; CHECK-NEXT: [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], [[Y]]
; CHECK-NEXT: [[SHR:%.*]] = lshr <4 x i32> [[X]], [[SUB]]
-; CHECK-NEXT: [[R:%.*]] = or <4 x i32> [[SHL]], [[SHR]]
+; CHECK-NEXT: [[R:%.*]] = or disjoint <4 x i32> [[SHL]], [[SHR]]
; CHECK-NEXT: ret <4 x i32> [[R]]
;
%sub = sub <4 x i32> <i32 32, i32 32, i32 32, i32 32>, %y
@@ -259,7 +259,7 @@ define <3 x i42> @rotr_v3i42(<3 x i42> %x, <3 x i42> %y) {
; CHECK-NEXT: [[SUB:%.*]] = sub <3 x i42> splat (i42 42), [[Y:%.*]]
; CHECK-NEXT: [[SHL:%.*]] = shl <3 x i42> [[X:%.*]], [[SUB]]
; CHECK-NEXT: [[SHR:%.*]] = lshr <3 x i42> [[X]], [[Y]]
-; CHECK-NEXT: [[R:%.*]] = or <3 x i42> [[SHR]], [[SHL]]
+; CHECK-NEXT: [[R:%.*]] = or disjoint <3 x i42> [[SHR]], [[SHL]]
; CHECK-NEXT: ret <3 x i42> [[R]]
;
%sub = sub <3 x i42> <i42 42, i42 42, i42 42>, %y
@@ -838,7 +838,7 @@ define i24 @rotl_select_weird_type(i24 %x, i24 %shamt) {
; CHECK-NEXT: [[SUB:%.*]] = sub i24 24, [[SHAMT]]
; CHECK-NEXT: [[SHR:%.*]] = lshr i24 [[X:%.*]], [[SUB]]
; CHECK-NEXT: [[SHL:%.*]] = shl i24 [[X]], [[SHAMT]]
-; CHECK-NEXT: [[OR:%.*]] = or i24 [[SHL]], [[SHR]]
+; CHECK-NEXT: [[OR:%.*]] = or disjoint i24 [[SHL]], [[SHR]]
; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP]], i24 [[X]], i24 [[OR]]
; CHECK-NEXT: ret i24 [[R]]
;
@@ -981,3 +981,33 @@ define i16 @check_rotate_masked_16bit(i8 %shamt, i32 %cond) {
%trunc = trunc i32 %or to i16
ret i16 %trunc
}
+
+define i32 @rotl_i32_add(i32 %x, i32 %y) {
+; CHECK-LABEL: @rotl_i32_add(
+; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[Y:%.*]]
+; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[X:%.*]], [[Y]]
+; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X]], [[SUB]]
+; CHECK-NEXT: [[R:%.*]] = or disjoint i32 [[SHR]], [[SHL]]
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %sub = sub i32 32, %y
+ %shl = shl i32 %x, %y
+ %shr = lshr i32 %x, %sub
+ %r = add i32 %shr, %shl
+ ret i32 %r
+}
+
+define i32 @rotr_i32_add(i32 %x, i32 %y) {
+; CHECK-LABEL: @rotr_i32_add(
+; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[Y:%.*]]
+; CHECK-NEXT: [[SHL:%.*]] = lshr i32 [[X:%.*]], [[Y]]
+; CHECK-NEXT: [[SHR:%.*]] = shl i32 [[X]], [[SUB]]
+; CHECK-NEXT: [[R:%.*]] = or disjoint i32 [[SHR]], [[SHL]]
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %sub = sub i32 32, %y
+ %shl = lshr i32 %x, %y
+ %shr = shl i32 %x, %sub
+ %r = add i32 %shr, %shl
+ ret i32 %r
+}
|
llvm/lib/Analysis/ValueTracking.cpp
Outdated
match(LHS, m_LShr(m_Value(), m_Specific(V)))) || | ||
(match(RHS, m_LShr(m_Value(), m_Sub(m_APInt(R), m_Value(V)))) && | ||
match(LHS, m_Shl(m_Value(), m_Specific(V))))) && | ||
R->uge(LHS->getType()->getScalarType()->getIntegerBitWidth())) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Think you are missing check that the shl
and lshr
are operating on the same X
. I.e m_Value(X)
in the m_Shl
matchers and m_Specific(X)
in the m_LShr
matchers.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
While that is the case I find most interesting, it is not required for correctness. Even if they are different values they will have no common bits set (https://alive2.llvm.org/ce/z/3PBaX5). For different values a target may have funnel shift support allowing the resulting or to be further folded during ISel.
llvm/lib/Analysis/ValueTracking.cpp
Outdated
match(LHS, m_LShr(m_Value(), m_Specific(V)))) || | ||
(match(RHS, m_LShr(m_Value(), m_Sub(m_APInt(R), m_Value(V)))) && | ||
match(LHS, m_Shl(m_Value(), m_Specific(V))))) && | ||
R->uge(LHS->getType()->getScalarType()->getIntegerBitWidth())) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
R->uge(LHS->getType()->getScalarType()->getIntegerBitWidth())) | |
R->uge(LHS->getType()->getScalarSizeInBits())) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed
%shr = shl i32 %x, %sub | ||
%r = add i32 %shr, %shl | ||
ret i32 %r | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you please also add tests for the additional cases you handle, i.e. the shift LHS being different values, and the constant being > bitwidth?
We should also have a negative test where the constant is < bitwidth.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sounds good, I've added all these cases.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
…llvm#122165) An occasional idiom for rotation is "(A << B) + (A >> (BitWidth - B))". Currently this is not well handled on targets with native funnel-shift/rotate support. Add a special case to haveNoCommonBitsSet to ensure that the addition is converted to a disjoint or in InstCombine so during instruction selection the idiom can be converted to an efficient rotation implementation. Proof: https://alive2.llvm.org/ce/z/WdCZsN
@AlexMaclean, do you think that this should also be done at the SelectionDAG level (using helpful framework introduced with 5874874c2)? |
Good point, I've opted to generalize the rotate matching in DAGCombiner to ensure all valid rotate idioms can be folded when using an 'add' instead of an 'or'. See #125612 |
An occasional idiom for rotation is "(A << B) + (A >> (BitWidth - B))". Currently this is not well handled on targets with native funnel-shift/rotate support. Add a special case to haveNoCommonBitsSet to ensure that the addition is converted to a disjoint or in InstCombine so during instruction selection the idiom can be converted to an efficient rotation implementation.
Proof: https://alive2.llvm.org/ce/z/WdCZsN