From 36cf1e7d0e0e8aaea1f33bfa12d7a285756b75ba Mon Sep 17 00:00:00 2001 From: =?utf8?q?Martin=20Storsj=C3=B6?= Date: Wed, 4 Nov 2020 08:27:22 +0200 Subject: [PATCH] Revert "[AggressiveInstCombine] Generalize foldGuardedRotateToFunnelShift to generic funnel shifts" This reverts commit 59b22e495c15d2830f41381a327f5d6bf49ff416. That commit broke building for ARM and AArch64, reproducible like this: $ cat apedec-reduced.c a; b(e) { int c; unsigned d = f(); c = d >> 32 - e; return c; } g() { int h = i(); if (a) h = h << a | b(a); return h; } $ clang -target aarch64-linux-gnu -w -c -O3 apedec-reduced.c clang: ../lib/Transforms/InstCombine/InstructionCombining.cpp:3656: bool llvm::InstCombinerImpl::run(): Assertion `DT.dominates(BB, UserParent) && "Dominance relation broken?"' failed. Same thing for e.g. an armv7-linux-gnueabihf target. --- .../AggressiveInstCombine.cpp | 51 +++++++--------- .../Transforms/AggressiveInstCombine/funnel.ll | 68 ++++++++++++++++------ .../Transforms/AggressiveInstCombine/rotate.ll | 10 +++- 3 files changed, 77 insertions(+), 52 deletions(-) diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index 18dff9d..e7fb699 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -39,8 +39,6 @@ using namespace PatternMatch; STATISTIC(NumAnyOrAllBitsSet, "Number of any/all-bits-set patterns folded"); STATISTIC(NumGuardedRotates, "Number of guarded rotates transformed into funnel shifts"); -STATISTIC(NumGuardedFunnelShifts, - "Number of guarded funnel shifts transformed into funnel shifts"); STATISTIC(NumPopCountRecognized, "Number of popcount idioms recognized"); namespace { @@ -69,17 +67,17 @@ public: }; } // namespace -/// Match a pattern for a bitwise funnel/rotate operation that partially guards -/// against undefined behavior by branching around the funnel-shift/rotation -/// when the shift amount is 0. -static bool foldGuardedFunnelShift(Instruction &I) { +/// Match a pattern for a bitwise rotate operation that partially guards +/// against undefined behavior by branching around the rotation when the shift +/// amount is 0. +static bool foldGuardedRotateToFunnelShift(Instruction &I) { if (I.getOpcode() != Instruction::PHI || I.getNumOperands() != 2) return false; // As with the one-use checks below, this is not strictly necessary, but we // are being cautious to avoid potential perf regressions on targets that - // do not actually have a funnel/rotate instruction (where the funnel shift - // would be expanded back into math/shift/logic ops). + // do not actually have a rotate instruction (where the funnel shift would be + // expanded back into math/shift/logic ops). if (!isPowerOf2_32(I.getType()->getScalarSizeInBits())) return false; @@ -113,33 +111,27 @@ static bool foldGuardedFunnelShift(Instruction &I) { return Intrinsic::not_intrinsic; }; - // One phi operand must be a funnel/rotate operation, and the other phi - // operand must be the source value of that funnel/rotate operation: + // One phi operand must be a rotate operation, and the other phi operand must + // be the source value of that rotate operation: // phi [ rotate(RotSrc, ShAmt), FunnelBB ], [ RotSrc, GuardBB ] - // phi [ fshl(ShlVal0, ShlVal1, ShAmt), FunnelBB ], [ ShlVal0, GuardBB ] - // phi [ fshr(ShlVal0, ShlVal1, ShAmt), FunnelBB ], [ ShlVal1, GuardBB ] PHINode &Phi = cast(I); unsigned FunnelOp = 0, GuardOp = 1; Value *P0 = Phi.getOperand(0), *P1 = Phi.getOperand(1); Value *ShVal0, *ShVal1, *ShAmt; Intrinsic::ID IID = matchFunnelShift(P0, ShVal0, ShVal1, ShAmt); - if (IID == Intrinsic::not_intrinsic || - (IID == Intrinsic::fshl && ShVal0 != P1) || - (IID == Intrinsic::fshr && ShVal1 != P1)) { + if (IID == Intrinsic::not_intrinsic || ShVal0 != ShVal1 || ShVal0 != P1) { IID = matchFunnelShift(P1, ShVal0, ShVal1, ShAmt); - if (IID == Intrinsic::not_intrinsic || - (IID == Intrinsic::fshl && ShVal0 != P0) || - (IID == Intrinsic::fshr && ShVal1 != P0)) + if (IID == Intrinsic::not_intrinsic || ShVal0 != ShVal1 || ShVal0 != P0) return false; assert((IID == Intrinsic::fshl || IID == Intrinsic::fshr) && "Pattern must match funnel shift left or right"); std::swap(FunnelOp, GuardOp); } + assert(ShVal0 == ShVal1 && "Rotation funnel shift pattern expected"); // The incoming block with our source operand must be the "guard" block. - // That must contain a cmp+branch to avoid the funnel/rotate when the shift - // amount is equal to 0. The other incoming block is the block with the - // funnel/rotate. + // That must contain a cmp+branch to avoid the rotate when the shift amount + // is equal to 0. The other incoming block is the block with the rotate. BasicBlock *GuardBB = Phi.getIncomingBlock(GuardOp); BasicBlock *FunnelBB = Phi.getIncomingBlock(FunnelOp); Instruction *TermI = GuardBB->getTerminator(); @@ -158,21 +150,18 @@ static bool foldGuardedFunnelShift(Instruction &I) { // br i1 %cmp, label %PhiBB, label %FunnelBB // FunnelBB: // %sub = sub i32 32, %ShAmt - // %shr = lshr i32 %ShVal1, %sub - // %shl = shl i32 %ShVal0, %ShAmt - // %fsh = or i32 %shr, %shl + // %shr = lshr i32 %RotSrc, %sub + // %shl = shl i32 %RotSrc, %ShAmt + // %rot = or i32 %shr, %shl // br label %PhiBB // PhiBB: - // %cond = phi i32 [ %fsh, %FunnelBB ], [ %ShVal0, %GuardBB ] + // %cond = phi i32 [ %RotSrc, %FunnelBB ], [ %RotSrc, %GuardBB ] // --> - // llvm.fshl.i32(i32 %ShVal0, i32 %ShVal1, i32 %ShAmt) + // llvm.fshl.i32(i32 %RotSrc, i32 %RotSrc, i32 %ShAmt) IRBuilder<> Builder(PhiBB, PhiBB->getFirstInsertionPt()); Function *F = Intrinsic::getDeclaration(Phi.getModule(), IID, Phi.getType()); Phi.replaceAllUsesWith(Builder.CreateCall(F, {ShVal0, ShVal1, ShAmt})); - if (ShVal0 == ShVal1) - ++NumGuardedRotates; - else - ++NumGuardedFunnelShifts; + ++NumGuardedRotates; return true; } @@ -361,7 +350,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT) { // iteratively in this loop rather than waiting until the end. for (Instruction &I : make_range(BB.rbegin(), BB.rend())) { MadeChange |= foldAnyOrAllBitsSet(I); - MadeChange |= foldGuardedFunnelShift(I); + MadeChange |= foldGuardedRotateToFunnelShift(I); MadeChange |= tryToRecognizePopCount(I); } } diff --git a/llvm/test/Transforms/AggressiveInstCombine/funnel.ll b/llvm/test/Transforms/AggressiveInstCombine/funnel.ll index d48798d..545b8e1 100644 --- a/llvm/test/Transforms/AggressiveInstCombine/funnel.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/funnel.ll @@ -7,10 +7,14 @@ define i32 @fshl(i32 %a, i32 %b, i32 %c) { ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]] ; CHECK: fshbb: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[C]] +; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[B:%.*]], [[SUB]] +; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[C]] +; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[SHL]] ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C]]) -; CHECK-NEXT: ret i32 [[TMP0]] +; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[OR]], [[FSHBB]] ], [ [[A]], [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i32 [[COND]] ; entry: %cmp = icmp eq i32 %c, 0 @@ -34,10 +38,14 @@ define i32 @fshl_commute_phi(i32 %a, i32 %b, i32 %c) { ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]] ; CHECK: fshbb: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[C]] +; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[B:%.*]], [[SUB]] +; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[C]] +; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[SHL]] ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C]]) -; CHECK-NEXT: ret i32 [[TMP0]] +; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[FSHBB]] ] +; CHECK-NEXT: ret i32 [[COND]] ; entry: %cmp = icmp eq i32 %c, 0 @@ -61,10 +69,14 @@ define i32 @fshl_commute_or(i32 %a, i32 %b, i32 %c) { ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]] ; CHECK: fshbb: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[C]] +; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[B:%.*]], [[SUB]] +; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[C]] +; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]] ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C]]) -; CHECK-NEXT: ret i32 [[TMP0]] +; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[FSHBB]] ] +; CHECK-NEXT: ret i32 [[COND]] ; entry: %cmp = icmp eq i32 %c, 0 @@ -90,11 +102,15 @@ define i32 @fshl_insert_valid_location(i32 %a, i32 %b, i32 %c) { ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]] ; CHECK: fshbb: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[C]] +; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[B:%.*]], [[SUB]] +; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[C]] +; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[SHL]] ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[OTHER:%.*]] = phi i32 [ 1, [[FSHBB]] ], [ 2, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C]]) -; CHECK-NEXT: [[RES:%.*]] = or i32 [[TMP0]], [[OTHER]] +; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[OR]], [[FSHBB]] ], [ [[A]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[OTHER:%.*]] = phi i32 [ 1, [[FSHBB]] ], [ 2, [[ENTRY]] ] +; CHECK-NEXT: [[RES:%.*]] = or i32 [[COND]], [[OTHER]] ; CHECK-NEXT: ret i32 [[RES]] ; entry: @@ -121,10 +137,14 @@ define i32 @fshr(i32 %a, i32 %b, i32 %c) { ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]] ; CHECK: fshbb: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[C]] +; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]] +; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[B:%.*]], [[C]] +; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[SHL]] ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshr.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C]]) -; CHECK-NEXT: ret i32 [[TMP0]] +; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[OR]], [[FSHBB]] ], [ [[B]], [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i32 [[COND]] ; entry: %cmp = icmp eq i32 %c, 0 @@ -148,10 +168,14 @@ define i32 @fshr_commute_phi(i32 %a, i32 %b, i32 %c) { ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]] ; CHECK: fshbb: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[C]] +; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]] +; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[B:%.*]], [[C]] +; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[SHL]] ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshr.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C]]) -; CHECK-NEXT: ret i32 [[TMP0]] +; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[B]], [[ENTRY:%.*]] ], [ [[OR]], [[FSHBB]] ] +; CHECK-NEXT: ret i32 [[COND]] ; entry: %cmp = icmp eq i32 %c, 0 @@ -175,10 +199,14 @@ define i32 @fshr_commute_or(i32 %a, i32 %b, i32 %c) { ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]] ; CHECK: fshbb: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[C]] +; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]] +; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[B:%.*]], [[C]] +; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]] ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshr.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C]]) -; CHECK-NEXT: ret i32 [[TMP0]] +; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[B]], [[ENTRY:%.*]] ], [ [[OR]], [[FSHBB]] ] +; CHECK-NEXT: ret i32 [[COND]] ; entry: %cmp = icmp eq i32 %c, 0 @@ -368,7 +396,7 @@ end: ret i32 %cond } -; Negative test - wrong shift for rotate (but can be folded to a generic funnel shift). +; Negative test - wrong shift. define i32 @not_fshr_5(i32 %a, i32 %b, i32 %c) { ; CHECK-LABEL: @not_fshr_5( @@ -376,10 +404,14 @@ define i32 @not_fshr_5(i32 %a, i32 %b, i32 %c) { ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]] ; CHECK: fshbb: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[C]] +; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[C]], [[SUB]] +; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[B:%.*]], [[C]] +; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]] ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshr.i32(i32 [[C]], i32 [[B:%.*]], i32 [[C]]) -; CHECK-NEXT: ret i32 [[TMP0]] +; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[B]], [[ENTRY:%.*]] ], [ [[OR]], [[FSHBB]] ] +; CHECK-NEXT: ret i32 [[COND]] ; entry: %cmp = icmp eq i32 %c, 0 diff --git a/llvm/test/Transforms/AggressiveInstCombine/rotate.ll b/llvm/test/Transforms/AggressiveInstCombine/rotate.ll index 9e904e0..e47fa9b 100644 --- a/llvm/test/Transforms/AggressiveInstCombine/rotate.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/rotate.ll @@ -370,7 +370,7 @@ end: ret i32 %cond } -; Negative test - wrong shift for rotate (but can be folded to a generic funnel shift). +; Negative test - wrong shift. define i32 @not_rotr_5(i32 %a, i32 %b) { ; CHECK-LABEL: @not_rotr_5( @@ -378,10 +378,14 @@ define i32 @not_rotr_5(i32 %a, i32 %b) { ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]] ; CHECK: rotbb: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[B]] +; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[B]], [[SUB]] +; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[A:%.*]], [[B]] +; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]] ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshr.i32(i32 [[B]], i32 [[A:%.*]], i32 [[B]]) -; CHECK-NEXT: ret i32 [[TMP0]] +; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ] +; CHECK-NEXT: ret i32 [[COND]] ; entry: %cmp = icmp eq i32 %b, 0 -- 2.7.4