[ValueTracking] Add rotate idiom to haveNoCommonBitsSet special cases #122165

AlexMaclean · 2025-01-08T20:15:41Z

An occasional idiom for rotation is "(A << B) + (A >> (BitWidth - B))". Currently this is not well handled on targets with native funnel-shift/rotate support. Add a special case to haveNoCommonBitsSet to ensure that the addition is converted to a disjoint or in InstCombine so during instruction selection the idiom can be converted to an efficient rotation implementation.

Proof: https://alive2.llvm.org/ce/z/WdCZsN

llvmbot · 2025-01-08T20:16:16Z

@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-llvm-analysis

Author: Alex MacLean (AlexMaclean)

Changes

An occasional idiom for rotation is "(A << B) + (A >> (BitWidth - B))". Currently this is not well handled on targets with native funnel-shift/rotate support. Add a special case to haveNoCommonBitsSet to ensure that the addition is converted to a disjoint or in InstCombine so during instruction selection the idiom can be converted to an efficient rotation implementation.

Proof: https://alive2.llvm.org/ce/z/3PBaX5

Full diff: https://github.com/llvm/llvm-project/pull/122165.diff

2 Files Affected:

(modified) llvm/lib/Analysis/ValueTracking.cpp (+13)
(modified) llvm/test/Transforms/InstCombine/rotate.ll (+36-6)

diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 2f6e869ae7b735..bf94491988bcc8 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -229,6 +229,19 @@ static bool haveNoCommonBitsSetSpecialCases(const Value *LHS, const Value *RHS,
       return true;
   }
 
+  // Look for: (X << V) op (Y >> (BitWidth - V))
+  // or        (X >> V) op (Y << (BitWidth - V))
+  {
+    const Value *V;
+    const APInt *R;
+    if (((match(RHS, m_Shl(m_Value(), m_Sub(m_APInt(R), m_Value(V)))) &&
+          match(LHS, m_LShr(m_Value(), m_Specific(V)))) ||
+         (match(RHS, m_LShr(m_Value(), m_Sub(m_APInt(R), m_Value(V)))) &&
+          match(LHS, m_Shl(m_Value(), m_Specific(V))))) &&
+        R->uge(LHS->getType()->getScalarType()->getIntegerBitWidth()))
+      return true;
+  }
+
   return false;
 }
 
diff --git a/llvm/test/Transforms/InstCombine/rotate.ll b/llvm/test/Transforms/InstCombine/rotate.ll
index bae50736de0c33..524e04106f1850 100644
--- a/llvm/test/Transforms/InstCombine/rotate.ll
+++ b/llvm/test/Transforms/InstCombine/rotate.ll
@@ -191,7 +191,7 @@ define i32 @rotl_i32(i32 %x, i32 %y) {
 ; CHECK-NEXT:    [[SUB:%.*]] = sub i32 32, [[Y:%.*]]
 ; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[X:%.*]], [[Y]]
 ; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[X]], [[SUB]]
-; CHECK-NEXT:    [[R:%.*]] = or i32 [[SHR]], [[SHL]]
+; CHECK-NEXT:    [[R:%.*]] = or disjoint i32 [[SHR]], [[SHL]]
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
   %sub = sub i32 32, %y
@@ -208,7 +208,7 @@ define i37 @rotr_i37(i37 %x, i37 %y) {
 ; CHECK-NEXT:    [[SUB:%.*]] = sub i37 37, [[Y:%.*]]
 ; CHECK-NEXT:    [[SHL:%.*]] = shl i37 [[X:%.*]], [[SUB]]
 ; CHECK-NEXT:    [[SHR:%.*]] = lshr i37 [[X]], [[Y]]
-; CHECK-NEXT:    [[R:%.*]] = or i37 [[SHR]], [[SHL]]
+; CHECK-NEXT:    [[R:%.*]] = or disjoint i37 [[SHR]], [[SHL]]
 ; CHECK-NEXT:    ret i37 [[R]]
 ;
   %sub = sub i37 37, %y
@@ -225,7 +225,7 @@ define i8 @rotr_i8_commute(i8 %x, i8 %y) {
 ; CHECK-NEXT:    [[SUB:%.*]] = sub i8 8, [[Y:%.*]]
 ; CHECK-NEXT:    [[SHL:%.*]] = shl i8 [[X:%.*]], [[SUB]]
 ; CHECK-NEXT:    [[SHR:%.*]] = lshr i8 [[X]], [[Y]]
-; CHECK-NEXT:    [[R:%.*]] = or i8 [[SHL]], [[SHR]]
+; CHECK-NEXT:    [[R:%.*]] = or disjoint i8 [[SHL]], [[SHR]]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %sub = sub i8 8, %y
@@ -242,7 +242,7 @@ define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-NEXT:    [[SUB:%.*]] = sub <4 x i32> splat (i32 32), [[Y:%.*]]
 ; CHECK-NEXT:    [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], [[Y]]
 ; CHECK-NEXT:    [[SHR:%.*]] = lshr <4 x i32> [[X]], [[SUB]]
-; CHECK-NEXT:    [[R:%.*]] = or <4 x i32> [[SHL]], [[SHR]]
+; CHECK-NEXT:    [[R:%.*]] = or disjoint <4 x i32> [[SHL]], [[SHR]]
 ; CHECK-NEXT:    ret <4 x i32> [[R]]
 ;
   %sub = sub <4 x i32> <i32 32, i32 32, i32 32, i32 32>, %y
@@ -259,7 +259,7 @@ define <3 x i42> @rotr_v3i42(<3 x i42> %x, <3 x i42> %y) {
 ; CHECK-NEXT:    [[SUB:%.*]] = sub <3 x i42> splat (i42 42), [[Y:%.*]]
 ; CHECK-NEXT:    [[SHL:%.*]] = shl <3 x i42> [[X:%.*]], [[SUB]]
 ; CHECK-NEXT:    [[SHR:%.*]] = lshr <3 x i42> [[X]], [[Y]]
-; CHECK-NEXT:    [[R:%.*]] = or <3 x i42> [[SHR]], [[SHL]]
+; CHECK-NEXT:    [[R:%.*]] = or disjoint <3 x i42> [[SHR]], [[SHL]]
 ; CHECK-NEXT:    ret <3 x i42> [[R]]
 ;
   %sub = sub <3 x i42> <i42 42, i42 42, i42 42>, %y
@@ -838,7 +838,7 @@ define i24 @rotl_select_weird_type(i24 %x, i24 %shamt) {
 ; CHECK-NEXT:    [[SUB:%.*]] = sub i24 24, [[SHAMT]]
 ; CHECK-NEXT:    [[SHR:%.*]] = lshr i24 [[X:%.*]], [[SUB]]
 ; CHECK-NEXT:    [[SHL:%.*]] = shl i24 [[X]], [[SHAMT]]
-; CHECK-NEXT:    [[OR:%.*]] = or i24 [[SHL]], [[SHR]]
+; CHECK-NEXT:    [[OR:%.*]] = or disjoint i24 [[SHL]], [[SHR]]
 ; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP]], i24 [[X]], i24 [[OR]]
 ; CHECK-NEXT:    ret i24 [[R]]
 ;
@@ -981,3 +981,33 @@ define i16 @check_rotate_masked_16bit(i8 %shamt, i32 %cond) {
   %trunc = trunc i32 %or to i16
   ret i16 %trunc
 }
+
+define i32 @rotl_i32_add(i32 %x, i32 %y) {
+; CHECK-LABEL: @rotl_i32_add(
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 32, [[Y:%.*]]
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[X]], [[SUB]]
+; CHECK-NEXT:    [[R:%.*]] = or disjoint i32 [[SHR]], [[SHL]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %sub = sub i32 32, %y
+  %shl = shl i32 %x, %y
+  %shr = lshr i32 %x, %sub
+  %r = add i32 %shr, %shl
+  ret i32 %r
+}
+
+define i32 @rotr_i32_add(i32 %x, i32 %y) {
+; CHECK-LABEL: @rotr_i32_add(
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 32, [[Y:%.*]]
+; CHECK-NEXT:    [[SHL:%.*]] = lshr i32 [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[SHR:%.*]] = shl i32 [[X]], [[SUB]]
+; CHECK-NEXT:    [[R:%.*]] = or disjoint i32 [[SHR]], [[SHL]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %sub = sub i32 32, %y
+  %shl = lshr i32 %x, %y
+  %shr = shl i32 %x, %sub
+  %r = add i32 %shr, %shl
+  ret i32 %r
+}

goldsteinn · 2025-01-08T22:21:40Z

llvm/lib/Analysis/ValueTracking.cpp

+          match(LHS, m_LShr(m_Value(), m_Specific(V)))) ||
+         (match(RHS, m_LShr(m_Value(), m_Sub(m_APInt(R), m_Value(V)))) &&
+          match(LHS, m_Shl(m_Value(), m_Specific(V))))) &&
+        R->uge(LHS->getType()->getScalarType()->getIntegerBitWidth()))


Think you are missing check that the shl and lshr are operating on the same X. I.e m_Value(X) in the m_Shl matchers and m_Specific(X) in the m_LShr matchers.

While that is the case I find most interesting, it is not required for correctness. Even if they are different values they will have no common bits set (https://alive2.llvm.org/ce/z/3PBaX5). For different values a target may have funnel shift support allowing the resulting or to be further folded during ISel.

dtcxzyw · 2025-01-09T08:35:34Z

llvm/lib/Analysis/ValueTracking.cpp

+          match(LHS, m_LShr(m_Value(), m_Specific(V)))) ||
+         (match(RHS, m_LShr(m_Value(), m_Sub(m_APInt(R), m_Value(V)))) &&
+          match(LHS, m_Shl(m_Value(), m_Specific(V))))) &&
+        R->uge(LHS->getType()->getScalarType()->getIntegerBitWidth()))


Suggested change

R->uge(LHS->getType()->getScalarType()->getIntegerBitWidth()))

R->uge(LHS->getType()->getScalarSizeInBits()))

nikic · 2025-01-09T09:41:33Z

llvm/test/Transforms/InstCombine/rotate.ll

+  %shr = shl i32 %x, %sub
+  %r = add i32 %shr, %shl
+  ret i32 %r
+}


Can you please also add tests for the additional cases you handle, i.e. the shift LHS being different values, and the constant being > bitwidth?

We should also have a negative test where the constant is < bitwidth.

Sounds good, I've added all these cases.

dtcxzyw

LGTM.

…llvm#122165) An occasional idiom for rotation is "(A << B) + (A >> (BitWidth - B))". Currently this is not well handled on targets with native funnel-shift/rotate support. Add a special case to haveNoCommonBitsSet to ensure that the addition is converted to a disjoint or in InstCombine so during instruction selection the idiom can be converted to an efficient rotation implementation. Proof: https://alive2.llvm.org/ce/z/WdCZsN

Bhramar-vatsa · 2025-02-01T03:18:24Z

@AlexMaclean, do you think that this should also be done at the SelectionDAG level (using helpful framework introduced with 5874874c2)?

AlexMaclean · 2025-02-04T01:13:41Z

@AlexMaclean, do you think that this should also be done at the SelectionDAG level (using helpful framework introduced with 5874874c2)?

Good point, I've opted to generalize the rotate matching in DAGCombiner to ensure all valid rotate idioms can be folded when using an 'add' instead of an 'or'. See #125612

AlexMaclean added 2 commits January 8, 2025 20:06

pre-commit tests

6d98d2a

[ValueTracking] Add rotate idiom to haveNoCommonBitsSet special cases

f7e238b

AlexMaclean requested a review from dtcxzyw January 8, 2025 20:15

AlexMaclean self-assigned this Jan 8, 2025

AlexMaclean requested a review from nikic as a code owner January 8, 2025 20:15

llvmbot added llvm:instcombine llvm:analysis llvm:transforms labels Jan 8, 2025

goldsteinn reviewed Jan 8, 2025

View reviewed changes

nikic mentioned this pull request Jan 9, 2025

Task submission dtcxzyw/llvm-opt-benchmark#1312

Open

dtcxzyw mentioned this pull request Jan 9, 2025

pre-commit: PR122165 dtcxzyw/llvm-opt-benchmark#1932

Closed

dtcxzyw reviewed Jan 9, 2025

View reviewed changes

nikic reviewed Jan 9, 2025

View reviewed changes

dtcxzyw mentioned this pull request Jan 9, 2025

Fuzz PR122165 dtcxzyw/llvm-mutation-based-fuzz-service#20

Closed

address comments

d6722b1

dtcxzyw approved these changes Jan 10, 2025

View reviewed changes

nikic approved these changes Jan 10, 2025

View reviewed changes

AlexMaclean merged commit 59ced72 into llvm:main Jan 10, 2025
8 checks passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[ValueTracking] Add rotate idiom to haveNoCommonBitsSet special cases #122165

[ValueTracking] Add rotate idiom to haveNoCommonBitsSet special cases #122165

AlexMaclean commented Jan 8, 2025 •

edited by dtcxzyw

Loading

llvmbot commented Jan 8, 2025 •

edited

Loading

goldsteinn Jan 8, 2025

AlexMaclean Jan 8, 2025

dtcxzyw Jan 9, 2025

AlexMaclean Jan 9, 2025

nikic Jan 9, 2025

AlexMaclean Jan 9, 2025

dtcxzyw left a comment

Bhramar-vatsa commented Feb 1, 2025 •

edited

Loading

AlexMaclean commented Feb 4, 2025

	R->uge(LHS->getType()->getScalarType()->getIntegerBitWidth()))
	R->uge(LHS->getType()->getScalarSizeInBits()))

[ValueTracking] Add rotate idiom to haveNoCommonBitsSet special cases #122165

[ValueTracking] Add rotate idiom to haveNoCommonBitsSet special cases #122165

Conversation

AlexMaclean commented Jan 8, 2025 • edited by dtcxzyw Loading

llvmbot commented Jan 8, 2025 • edited Loading

goldsteinn Jan 8, 2025

Choose a reason for hiding this comment

AlexMaclean Jan 8, 2025

Choose a reason for hiding this comment

dtcxzyw Jan 9, 2025

Choose a reason for hiding this comment

AlexMaclean Jan 9, 2025

Choose a reason for hiding this comment

nikic Jan 9, 2025

Choose a reason for hiding this comment

AlexMaclean Jan 9, 2025

Choose a reason for hiding this comment

dtcxzyw left a comment

Choose a reason for hiding this comment

Bhramar-vatsa commented Feb 1, 2025 • edited Loading

AlexMaclean commented Feb 4, 2025

AlexMaclean commented Jan 8, 2025 •

edited by dtcxzyw

Loading

llvmbot commented Jan 8, 2025 •

edited

Loading

Bhramar-vatsa commented Feb 1, 2025 •

edited

Loading