STATISTIC(NumAnyOrAllBitsSet, "Number of any/all-bits-set patterns folded");
STATISTIC(NumGuardedRotates,
"Number of guarded rotates transformed into funnel shifts");
+STATISTIC(NumGuardedFunnelShifts,
+ "Number of guarded funnel shifts transformed into funnel shifts");
STATISTIC(NumPopCountRecognized, "Number of popcount idioms recognized");
namespace {
};
} // namespace
-/// Match a pattern for a bitwise rotate operation that partially guards
-/// against undefined behavior by branching around the rotation when the shift
-/// amount is 0.
-static bool foldGuardedRotateToFunnelShift(Instruction &I) {
+/// Match a pattern for a bitwise funnel/rotate operation that partially guards
+/// against undefined behavior by branching around the funnel-shift/rotation
+/// when the shift amount is 0.
+static bool foldGuardedFunnelShift(Instruction &I) {
if (I.getOpcode() != Instruction::PHI || I.getNumOperands() != 2)
return false;
// As with the one-use checks below, this is not strictly necessary, but we
// are being cautious to avoid potential perf regressions on targets that
- // do not actually have a rotate instruction (where the funnel shift would be
- // expanded back into math/shift/logic ops).
+ // do not actually have a funnel/rotate instruction (where the funnel shift
+ // would be expanded back into math/shift/logic ops).
if (!isPowerOf2_32(I.getType()->getScalarSizeInBits()))
return false;
return Intrinsic::not_intrinsic;
};
- // One phi operand must be a rotate operation, and the other phi operand must
- // be the source value of that rotate operation:
+ // One phi operand must be a funnel/rotate operation, and the other phi
+ // operand must be the source value of that funnel/rotate operation:
// phi [ rotate(RotSrc, ShAmt), FunnelBB ], [ RotSrc, GuardBB ]
+ // phi [ fshl(ShlVal0, ShlVal1, ShAmt), FunnelBB ], [ ShlVal0, GuardBB ]
+ // phi [ fshr(ShlVal0, ShlVal1, ShAmt), FunnelBB ], [ ShlVal1, GuardBB ]
PHINode &Phi = cast<PHINode>(I);
unsigned FunnelOp = 0, GuardOp = 1;
Value *P0 = Phi.getOperand(0), *P1 = Phi.getOperand(1);
Value *ShVal0, *ShVal1, *ShAmt;
Intrinsic::ID IID = matchFunnelShift(P0, ShVal0, ShVal1, ShAmt);
- if (IID == Intrinsic::not_intrinsic || ShVal0 != ShVal1 || ShVal0 != P1) {
+ if (IID == Intrinsic::not_intrinsic ||
+ (IID == Intrinsic::fshl && ShVal0 != P1) ||
+ (IID == Intrinsic::fshr && ShVal1 != P1)) {
IID = matchFunnelShift(P1, ShVal0, ShVal1, ShAmt);
- if (IID == Intrinsic::not_intrinsic || ShVal0 != ShVal1 || ShVal0 != P0)
+ if (IID == Intrinsic::not_intrinsic ||
+ (IID == Intrinsic::fshl && ShVal0 != P0) ||
+ (IID == Intrinsic::fshr && ShVal1 != P0))
return false;
assert((IID == Intrinsic::fshl || IID == Intrinsic::fshr) &&
"Pattern must match funnel shift left or right");
std::swap(FunnelOp, GuardOp);
}
- assert(ShVal0 == ShVal1 && "Rotation funnel shift pattern expected");
// The incoming block with our source operand must be the "guard" block.
- // That must contain a cmp+branch to avoid the rotate when the shift amount
- // is equal to 0. The other incoming block is the block with the rotate.
+ // That must contain a cmp+branch to avoid the funnel/rotate when the shift
+ // amount is equal to 0. The other incoming block is the block with the
+ // funnel/rotate.
BasicBlock *GuardBB = Phi.getIncomingBlock(GuardOp);
BasicBlock *FunnelBB = Phi.getIncomingBlock(FunnelOp);
Instruction *TermI = GuardBB->getTerminator();
// br i1 %cmp, label %PhiBB, label %FunnelBB
// FunnelBB:
// %sub = sub i32 32, %ShAmt
- // %shr = lshr i32 %RotSrc, %sub
- // %shl = shl i32 %RotSrc, %ShAmt
- // %rot = or i32 %shr, %shl
+ // %shr = lshr i32 %ShVal1, %sub
+ // %shl = shl i32 %ShVal0, %ShAmt
+ // %fsh = or i32 %shr, %shl
// br label %PhiBB
// PhiBB:
- // %cond = phi i32 [ %RotSrc, %FunnelBB ], [ %RotSrc, %GuardBB ]
+ // %cond = phi i32 [ %fsh, %FunnelBB ], [ %ShVal0, %GuardBB ]
// -->
- // llvm.fshl.i32(i32 %RotSrc, i32 %RotSrc, i32 %ShAmt)
+ // llvm.fshl.i32(i32 %ShVal0, i32 %ShVal1, i32 %ShAmt)
IRBuilder<> Builder(PhiBB, PhiBB->getFirstInsertionPt());
Function *F = Intrinsic::getDeclaration(Phi.getModule(), IID, Phi.getType());
Phi.replaceAllUsesWith(Builder.CreateCall(F, {ShVal0, ShVal1, ShAmt}));
- ++NumGuardedRotates;
+ if (ShVal0 == ShVal1)
+ ++NumGuardedRotates;
+ else
+ ++NumGuardedFunnelShifts;
return true;
}
// iteratively in this loop rather than waiting until the end.
for (Instruction &I : make_range(BB.rbegin(), BB.rend())) {
MadeChange |= foldAnyOrAllBitsSet(I);
- MadeChange |= foldGuardedRotateToFunnelShift(I);
+ MadeChange |= foldGuardedFunnelShift(I);
MadeChange |= tryToRecognizePopCount(I);
}
}
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]]
; CHECK: fshbb:
-; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[C]]
-; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[B:%.*]], [[SUB]]
-; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[C]]
-; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
; CHECK-NEXT: br label [[END]]
; CHECK: end:
-; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[OR]], [[FSHBB]] ], [ [[A]], [[ENTRY:%.*]] ]
-; CHECK-NEXT: ret i32 [[COND]]
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C]])
+; CHECK-NEXT: ret i32 [[TMP0]]
;
entry:
%cmp = icmp eq i32 %c, 0
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]]
; CHECK: fshbb:
-; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[C]]
-; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[B:%.*]], [[SUB]]
-; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[C]]
-; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
; CHECK-NEXT: br label [[END]]
; CHECK: end:
-; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[FSHBB]] ]
-; CHECK-NEXT: ret i32 [[COND]]
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C]])
+; CHECK-NEXT: ret i32 [[TMP0]]
;
entry:
%cmp = icmp eq i32 %c, 0
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]]
; CHECK: fshbb:
-; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[C]]
-; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[B:%.*]], [[SUB]]
-; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[C]]
-; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
; CHECK-NEXT: br label [[END]]
; CHECK: end:
-; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[FSHBB]] ]
-; CHECK-NEXT: ret i32 [[COND]]
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C]])
+; CHECK-NEXT: ret i32 [[TMP0]]
;
entry:
%cmp = icmp eq i32 %c, 0
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]]
; CHECK: fshbb:
-; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[C]]
-; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[B:%.*]], [[SUB]]
-; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[C]]
-; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
; CHECK-NEXT: br label [[END]]
; CHECK: end:
-; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[OR]], [[FSHBB]] ], [ [[A]], [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[OTHER:%.*]] = phi i32 [ 1, [[FSHBB]] ], [ 2, [[ENTRY]] ]
-; CHECK-NEXT: [[RES:%.*]] = or i32 [[COND]], [[OTHER]]
+; CHECK-NEXT: [[OTHER:%.*]] = phi i32 [ 1, [[FSHBB]] ], [ 2, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C]])
+; CHECK-NEXT: [[RES:%.*]] = or i32 [[TMP0]], [[OTHER]]
; CHECK-NEXT: ret i32 [[RES]]
;
entry:
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]]
; CHECK: fshbb:
-; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[C]]
-; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]]
-; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[B:%.*]], [[C]]
-; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
; CHECK-NEXT: br label [[END]]
; CHECK: end:
-; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[OR]], [[FSHBB]] ], [ [[B]], [[ENTRY:%.*]] ]
-; CHECK-NEXT: ret i32 [[COND]]
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshr.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C]])
+; CHECK-NEXT: ret i32 [[TMP0]]
;
entry:
%cmp = icmp eq i32 %c, 0
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]]
; CHECK: fshbb:
-; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[C]]
-; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]]
-; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[B:%.*]], [[C]]
-; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
; CHECK-NEXT: br label [[END]]
; CHECK: end:
-; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[B]], [[ENTRY:%.*]] ], [ [[OR]], [[FSHBB]] ]
-; CHECK-NEXT: ret i32 [[COND]]
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshr.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C]])
+; CHECK-NEXT: ret i32 [[TMP0]]
;
entry:
%cmp = icmp eq i32 %c, 0
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]]
; CHECK: fshbb:
-; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[C]]
-; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]]
-; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[B:%.*]], [[C]]
-; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
; CHECK-NEXT: br label [[END]]
; CHECK: end:
-; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[B]], [[ENTRY:%.*]] ], [ [[OR]], [[FSHBB]] ]
-; CHECK-NEXT: ret i32 [[COND]]
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshr.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C]])
+; CHECK-NEXT: ret i32 [[TMP0]]
;
entry:
%cmp = icmp eq i32 %c, 0
ret i32 %cond
}
-; Negative test - wrong shift.
+; Negative test - wrong shift for rotate (but can be folded to a generic funnel shift).
define i32 @not_fshr_5(i32 %a, i32 %b, i32 %c) {
; CHECK-LABEL: @not_fshr_5(
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]]
; CHECK: fshbb:
-; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[C]]
-; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[C]], [[SUB]]
-; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[B:%.*]], [[C]]
-; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
; CHECK-NEXT: br label [[END]]
; CHECK: end:
-; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[B]], [[ENTRY:%.*]] ], [ [[OR]], [[FSHBB]] ]
-; CHECK-NEXT: ret i32 [[COND]]
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshr.i32(i32 [[C]], i32 [[B:%.*]], i32 [[C]])
+; CHECK-NEXT: ret i32 [[TMP0]]
;
entry:
%cmp = icmp eq i32 %c, 0