[AggressiveInstCombine] Generalize foldGuardedRotateToFunnelShift to generic funnel...

author Simon Pilgrim <llvm-dev@redking.me.uk>

Tue, 3 Nov 2020 10:49:33 +0000 (10:49 +0000)

committer Simon Pilgrim <llvm-dev@redking.me.uk>

Tue, 3 Nov 2020 10:49:49 +0000 (10:49 +0000)
author Simon Pilgrim <llvm-dev@redking.me.uk>
Tue, 3 Nov 2020 10:49:33 +0000 (10:49 +0000)
committer Simon Pilgrim <llvm-dev@redking.me.uk>
Tue, 3 Nov 2020 10:49:49 +0000 (10:49 +0000)
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp

index e7fb699..18dff9d 100644 (file)
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -39,6 +39,8 @@ using namespace PatternMatch;
  STATISTIC(NumAnyOrAllBitsSet, "Number of any/all-bits-set patterns folded");
  STATISTIC(NumGuardedRotates,
            "Number of guarded rotates transformed into funnel shifts");
+STATISTIC(NumGuardedFunnelShifts,
+          "Number of guarded funnel shifts transformed into funnel shifts");
  STATISTIC(NumPopCountRecognized, "Number of popcount idioms recognized");
  
  namespace {
@@ -67,17 +69,17 @@ public:
  };
  } // namespace
  
-/// Match a pattern for a bitwise rotate operation that partially guards
-/// against undefined behavior by branching around the rotation when the shift
-/// amount is 0.
-static bool foldGuardedRotateToFunnelShift(Instruction &I) {
+/// Match a pattern for a bitwise funnel/rotate operation that partially guards
+/// against undefined behavior by branching around the funnel-shift/rotation
+/// when the shift amount is 0.
+static bool foldGuardedFunnelShift(Instruction &I) {
    if (I.getOpcode() != Instruction::PHI || I.getNumOperands() != 2)
      return false;
  
    // As with the one-use checks below, this is not strictly necessary, but we
    // are being cautious to avoid potential perf regressions on targets that
-  // do not actually have a rotate instruction (where the funnel shift would be
-  // expanded back into math/shift/logic ops).
+  // do not actually have a funnel/rotate instruction (where the funnel shift
+  // would be expanded back into math/shift/logic ops).
    if (!isPowerOf2_32(I.getType()->getScalarSizeInBits()))
      return false;
  
@@ -111,27 +113,33 @@ static bool foldGuardedRotateToFunnelShift(Instruction &I) {
      return Intrinsic::not_intrinsic;
    };
  
-  // One phi operand must be a rotate operation, and the other phi operand must
-  // be the source value of that rotate operation:
+  // One phi operand must be a funnel/rotate operation, and the other phi
+  // operand must be the source value of that funnel/rotate operation:
    // phi [ rotate(RotSrc, ShAmt), FunnelBB ], [ RotSrc, GuardBB ]
+  // phi [ fshl(ShlVal0, ShlVal1, ShAmt), FunnelBB ], [ ShlVal0, GuardBB ]
+  // phi [ fshr(ShlVal0, ShlVal1, ShAmt), FunnelBB ], [ ShlVal1, GuardBB ]
    PHINode &Phi = cast<PHINode>(I);
    unsigned FunnelOp = 0, GuardOp = 1;
    Value *P0 = Phi.getOperand(0), *P1 = Phi.getOperand(1);
    Value *ShVal0, *ShVal1, *ShAmt;
    Intrinsic::ID IID = matchFunnelShift(P0, ShVal0, ShVal1, ShAmt);
-  if (IID == Intrinsic::not_intrinsic || ShVal0 != ShVal1 || ShVal0 != P1) {
+  if (IID == Intrinsic::not_intrinsic ||
+      (IID == Intrinsic::fshl && ShVal0 != P1) ||
+      (IID == Intrinsic::fshr && ShVal1 != P1)) {
      IID = matchFunnelShift(P1, ShVal0, ShVal1, ShAmt);
-    if (IID == Intrinsic::not_intrinsic || ShVal0 != ShVal1 || ShVal0 != P0)
+    if (IID == Intrinsic::not_intrinsic ||
+        (IID == Intrinsic::fshl && ShVal0 != P0) ||
+        (IID == Intrinsic::fshr && ShVal1 != P0))
        return false;
      assert((IID == Intrinsic::fshl || IID == Intrinsic::fshr) &&
             "Pattern must match funnel shift left or right");
      std::swap(FunnelOp, GuardOp);
    }
-  assert(ShVal0 == ShVal1 && "Rotation funnel shift pattern expected");
  
    // The incoming block with our source operand must be the "guard" block.
-  // That must contain a cmp+branch to avoid the rotate when the shift amount
-  // is equal to 0. The other incoming block is the block with the rotate.
+  // That must contain a cmp+branch to avoid the funnel/rotate when the shift
+  // amount is equal to 0. The other incoming block is the block with the
+  // funnel/rotate.
    BasicBlock *GuardBB = Phi.getIncomingBlock(GuardOp);
    BasicBlock *FunnelBB = Phi.getIncomingBlock(FunnelOp);
    Instruction *TermI = GuardBB->getTerminator();
@@ -150,18 +158,21 @@ static bool foldGuardedRotateToFunnelShift(Instruction &I) {
    //   br i1 %cmp, label %PhiBB, label %FunnelBB
    // FunnelBB:
    //   %sub = sub i32 32, %ShAmt
-  //   %shr = lshr i32 %RotSrc, %sub
-  //   %shl = shl i32 %RotSrc, %ShAmt
-  //   %rot = or i32 %shr, %shl
+  //   %shr = lshr i32 %ShVal1, %sub
+  //   %shl = shl i32 %ShVal0, %ShAmt
+  //   %fsh = or i32 %shr, %shl
    //   br label %PhiBB
    // PhiBB:
-  //   %cond = phi i32 [ %RotSrc, %FunnelBB ], [ %RotSrc, %GuardBB ]
+  //   %cond = phi i32 [ %fsh, %FunnelBB ], [ %ShVal0, %GuardBB ]
    // -->
-  // llvm.fshl.i32(i32 %RotSrc, i32 %RotSrc, i32 %ShAmt)
+  // llvm.fshl.i32(i32 %ShVal0, i32 %ShVal1, i32 %ShAmt)
    IRBuilder<> Builder(PhiBB, PhiBB->getFirstInsertionPt());
    Function *F = Intrinsic::getDeclaration(Phi.getModule(), IID, Phi.getType());
    Phi.replaceAllUsesWith(Builder.CreateCall(F, {ShVal0, ShVal1, ShAmt}));
-  ++NumGuardedRotates;
+  if (ShVal0 == ShVal1)
+    ++NumGuardedRotates;
+  else
+    ++NumGuardedFunnelShifts;
    return true;
  }
  
@@ -350,7 +361,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT) {
      // iteratively in this loop rather than waiting until the end.
      for (Instruction &I : make_range(BB.rbegin(), BB.rend())) {
        MadeChange |= foldAnyOrAllBitsSet(I);
-      MadeChange |= foldGuardedRotateToFunnelShift(I);
+      MadeChange |= foldGuardedFunnelShift(I);
        MadeChange |= tryToRecognizePopCount(I); 
      }
    }
diff --git a/llvm/test/Transforms/AggressiveInstCombine/funnel.ll b/llvm/test/Transforms/AggressiveInstCombine/funnel.ll

index 545b8e1..d48798d 100644 (file)
--- a/llvm/test/Transforms/AggressiveInstCombine/funnel.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/funnel.ll
@@ -7,14 +7,10 @@ define i32 @fshl(i32 %a, i32 %b, i32 %c) {
  ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
  ; CHECK-NEXT:    br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]]
  ; CHECK:       fshbb:
-; CHECK-NEXT:    [[SUB:%.*]] = sub i32 32, [[C]]
-; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[B:%.*]], [[SUB]]
-; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[A:%.*]], [[C]]
-; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
  ; CHECK-NEXT:    br label [[END]]
  ; CHECK:       end:
-; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[OR]], [[FSHBB]] ], [ [[A]], [[ENTRY:%.*]] ]
-; CHECK-NEXT:    ret i32 [[COND]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C]])
+; CHECK-NEXT:    ret i32 [[TMP0]]
  ;
  entry:
    %cmp = icmp eq i32 %c, 0
@@ -38,14 +34,10 @@ define i32 @fshl_commute_phi(i32 %a, i32 %b, i32 %c) {
  ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
  ; CHECK-NEXT:    br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]]
  ; CHECK:       fshbb:
-; CHECK-NEXT:    [[SUB:%.*]] = sub i32 32, [[C]]
-; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[B:%.*]], [[SUB]]
-; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[A:%.*]], [[C]]
-; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
  ; CHECK-NEXT:    br label [[END]]
  ; CHECK:       end:
-; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[FSHBB]] ]
-; CHECK-NEXT:    ret i32 [[COND]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C]])
+; CHECK-NEXT:    ret i32 [[TMP0]]
  ;
  entry:
    %cmp = icmp eq i32 %c, 0
@@ -69,14 +61,10 @@ define i32 @fshl_commute_or(i32 %a, i32 %b, i32 %c) {
  ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
  ; CHECK-NEXT:    br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]]
  ; CHECK:       fshbb:
-; CHECK-NEXT:    [[SUB:%.*]] = sub i32 32, [[C]]
-; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[B:%.*]], [[SUB]]
-; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[A:%.*]], [[C]]
-; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
  ; CHECK-NEXT:    br label [[END]]
  ; CHECK:       end:
-; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[FSHBB]] ]
-; CHECK-NEXT:    ret i32 [[COND]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C]])
+; CHECK-NEXT:    ret i32 [[TMP0]]
  ;
  entry:
    %cmp = icmp eq i32 %c, 0
@@ -102,15 +90,11 @@ define i32 @fshl_insert_valid_location(i32 %a, i32 %b, i32 %c) {
  ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
  ; CHECK-NEXT:    br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]]
  ; CHECK:       fshbb:
-; CHECK-NEXT:    [[SUB:%.*]] = sub i32 32, [[C]]
-; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[B:%.*]], [[SUB]]
-; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[A:%.*]], [[C]]
-; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
  ; CHECK-NEXT:    br label [[END]]
  ; CHECK:       end:
-; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[OR]], [[FSHBB]] ], [ [[A]], [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[OTHER:%.*]] = phi i32 [ 1, [[FSHBB]] ], [ 2, [[ENTRY]] ]
-; CHECK-NEXT:    [[RES:%.*]] = or i32 [[COND]], [[OTHER]]
+; CHECK-NEXT:    [[OTHER:%.*]] = phi i32 [ 1, [[FSHBB]] ], [ 2, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C]])
+; CHECK-NEXT:    [[RES:%.*]] = or i32 [[TMP0]], [[OTHER]]
  ; CHECK-NEXT:    ret i32 [[RES]]
  ;
  entry:
@@ -137,14 +121,10 @@ define i32 @fshr(i32 %a, i32 %b, i32 %c) {
  ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
  ; CHECK-NEXT:    br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]]
  ; CHECK:       fshbb:
-; CHECK-NEXT:    [[SUB:%.*]] = sub i32 32, [[C]]
-; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]]
-; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[B:%.*]], [[C]]
-; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
  ; CHECK-NEXT:    br label [[END]]
  ; CHECK:       end:
-; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[OR]], [[FSHBB]] ], [ [[B]], [[ENTRY:%.*]] ]
-; CHECK-NEXT:    ret i32 [[COND]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.fshr.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C]])
+; CHECK-NEXT:    ret i32 [[TMP0]]
  ;
  entry:
    %cmp = icmp eq i32 %c, 0
@@ -168,14 +148,10 @@ define i32 @fshr_commute_phi(i32 %a, i32 %b, i32 %c) {
  ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
  ; CHECK-NEXT:    br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]]
  ; CHECK:       fshbb:
-; CHECK-NEXT:    [[SUB:%.*]] = sub i32 32, [[C]]
-; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]]
-; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[B:%.*]], [[C]]
-; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
  ; CHECK-NEXT:    br label [[END]]
  ; CHECK:       end:
-; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[B]], [[ENTRY:%.*]] ], [ [[OR]], [[FSHBB]] ]
-; CHECK-NEXT:    ret i32 [[COND]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.fshr.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C]])
+; CHECK-NEXT:    ret i32 [[TMP0]]
  ;
  entry:
    %cmp = icmp eq i32 %c, 0
@@ -199,14 +175,10 @@ define i32 @fshr_commute_or(i32 %a, i32 %b, i32 %c) {
  ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
  ; CHECK-NEXT:    br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]]
  ; CHECK:       fshbb:
-; CHECK-NEXT:    [[SUB:%.*]] = sub i32 32, [[C]]
-; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]]
-; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[B:%.*]], [[C]]
-; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
  ; CHECK-NEXT:    br label [[END]]
  ; CHECK:       end:
-; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[B]], [[ENTRY:%.*]] ], [ [[OR]], [[FSHBB]] ]
-; CHECK-NEXT:    ret i32 [[COND]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.fshr.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C]])
+; CHECK-NEXT:    ret i32 [[TMP0]]
  ;
  entry:
    %cmp = icmp eq i32 %c, 0
@@ -396,7 +368,7 @@ end:
    ret i32 %cond
  }
  
-; Negative test - wrong shift.
+; Negative test - wrong shift for rotate (but can be folded to a generic funnel shift).
  
  define i32 @not_fshr_5(i32 %a, i32 %b, i32 %c) {
  ; CHECK-LABEL: @not_fshr_5(
@@ -404,14 +376,10 @@ define i32 @not_fshr_5(i32 %a, i32 %b, i32 %c) {
  ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
  ; CHECK-NEXT:    br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]]
  ; CHECK:       fshbb:
-; CHECK-NEXT:    [[SUB:%.*]] = sub i32 32, [[C]]
-; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[C]], [[SUB]]
-; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[B:%.*]], [[C]]
-; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
  ; CHECK-NEXT:    br label [[END]]
  ; CHECK:       end:
-; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[B]], [[ENTRY:%.*]] ], [ [[OR]], [[FSHBB]] ]
-; CHECK-NEXT:    ret i32 [[COND]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.fshr.i32(i32 [[C]], i32 [[B:%.*]], i32 [[C]])
+; CHECK-NEXT:    ret i32 [[TMP0]]
  ;
  entry:
    %cmp = icmp eq i32 %c, 0
diff --git a/llvm/test/Transforms/AggressiveInstCombine/rotate.ll b/llvm/test/Transforms/AggressiveInstCombine/rotate.ll

index e47fa9b..9e904e0 100644 (file)
--- a/llvm/test/Transforms/AggressiveInstCombine/rotate.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/rotate.ll
@@ -370,7 +370,7 @@ end:
    ret i32 %cond
  }
  
-; Negative test - wrong shift.
+; Negative test - wrong shift for rotate (but can be folded to a generic funnel shift).
  
  define i32 @not_rotr_5(i32 %a, i32 %b) {
  ; CHECK-LABEL: @not_rotr_5(
@@ -378,14 +378,10 @@ define i32 @not_rotr_5(i32 %a, i32 %b) {
  ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
  ; CHECK-NEXT:    br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
  ; CHECK:       rotbb:
-; CHECK-NEXT:    [[SUB:%.*]] = sub i32 32, [[B]]
-; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[B]], [[SUB]]
-; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[A:%.*]], [[B]]
-; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
  ; CHECK-NEXT:    br label [[END]]
  ; CHECK:       end:
-; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ]
-; CHECK-NEXT:    ret i32 [[COND]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.fshr.i32(i32 [[B]], i32 [[A:%.*]], i32 [[B]])
+; CHECK-NEXT:    ret i32 [[TMP0]]
  ;
  entry:
    %cmp = icmp eq i32 %b, 0
author	Simon Pilgrim <llvm-dev@redking.me.uk>
	Tue, 3 Nov 2020 10:49:33 +0000 (10:49 +0000)
committer	Simon Pilgrim <llvm-dev@redking.me.uk>
	Tue, 3 Nov 2020 10:49:49 +0000 (10:49 +0000)
llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp		patch \| blob \| history
llvm/test/Transforms/AggressiveInstCombine/funnel.ll		patch \| blob \| history
llvm/test/Transforms/AggressiveInstCombine/rotate.ll		patch \| blob \| history