}
/// Try to reduce a rotate pattern that includes a compare and select into a
-/// sequence of ALU ops only. Example:
+/// funnel shift intrinsic. Example:
/// rotl32(a, b) --> (b == 0 ? a : ((a >> (32 - b)) | (a << b)))
-/// --> (a >> (-b & 31)) | (a << (b & 31))
+/// --> call llvm.fshl.i32(a, a, b)
static Instruction *foldSelectRotate(SelectInst &Sel,
InstCombiner::BuilderTy &Builder) {
// The false value of the select must be a rotate of the true value.
return nullptr;
// This is a rotate that avoids shift-by-bitwidth UB in a suboptimal way.
- // Convert to safely bitmasked shifts.
- // TODO: When we can canonicalize to funnel shift intrinsics without risk of
- // performance regressions, replace this sequence with that call.
- Value *NegShAmt = Builder.CreateNeg(ShAmt);
- Value *MaskedShAmt = Builder.CreateAnd(ShAmt, Width - 1);
- Value *MaskedNegShAmt = Builder.CreateAnd(NegShAmt, Width - 1);
- Value *NewSA0 = ShAmt == SA0 ? MaskedShAmt : MaskedNegShAmt;
- Value *NewSA1 = ShAmt == SA1 ? MaskedShAmt : MaskedNegShAmt;
- Value *NewSh0 = Builder.CreateBinOp(ShiftOpcode0, TVal, NewSA0);
- Value *NewSh1 = Builder.CreateBinOp(ShiftOpcode1, TVal, NewSA1);
- return BinaryOperator::CreateOr(NewSh0, NewSh1);
+ // Convert to funnel shift intrinsic.
+ bool IsFshl = (ShAmt == SA0 && ShiftOpcode0 == BinaryOperator::Shl) ||
+ (ShAmt == SA1 && ShiftOpcode1 == BinaryOperator::Shl);
+ Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
+ Function *F = Intrinsic::getDeclaration(Sel.getModule(), IID, Sel.getType());
+ return IntrinsicInst::Create(F, { TVal, TVal, ShAmt });
}
Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
define i32 @rotr_select(i32 %x, i32 %shamt) {
; CHECK-LABEL: @rotr_select(
-; CHECK-NEXT: [[TMP1:%.*]] = sub i32 0, [[SHAMT:%.*]]
-; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[SHAMT]], 31
-; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP1]], 31
-; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[X:%.*]], [[TMP2]]
-; CHECK-NEXT: [[TMP5:%.*]] = shl i32 [[X]], [[TMP3]]
-; CHECK-NEXT: [[R:%.*]] = or i32 [[TMP4]], [[TMP5]]
+; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshr.i32(i32 [[X:%.*]], i32 [[X]], i32 [[SHAMT:%.*]])
; CHECK-NEXT: ret i32 [[R]]
;
%cmp = icmp eq i32 %shamt, 0
define i8 @rotr_select_commute(i8 %x, i8 %shamt) {
; CHECK-LABEL: @rotr_select_commute(
-; CHECK-NEXT: [[TMP1:%.*]] = sub i8 0, [[SHAMT:%.*]]
-; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[SHAMT]], 7
-; CHECK-NEXT: [[TMP3:%.*]] = and i8 [[TMP1]], 7
-; CHECK-NEXT: [[TMP4:%.*]] = shl i8 [[X:%.*]], [[TMP3]]
-; CHECK-NEXT: [[TMP5:%.*]] = lshr i8 [[X]], [[TMP2]]
-; CHECK-NEXT: [[R:%.*]] = or i8 [[TMP4]], [[TMP5]]
+; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.fshr.i8(i8 [[X:%.*]], i8 [[X]], i8 [[SHAMT:%.*]])
; CHECK-NEXT: ret i8 [[R]]
;
%cmp = icmp eq i8 %shamt, 0
define i16 @rotl_select(i16 %x, i16 %shamt) {
; CHECK-LABEL: @rotl_select(
-; CHECK-NEXT: [[TMP1:%.*]] = sub i16 0, [[SHAMT:%.*]]
-; CHECK-NEXT: [[TMP2:%.*]] = and i16 [[SHAMT]], 15
-; CHECK-NEXT: [[TMP3:%.*]] = and i16 [[TMP1]], 15
-; CHECK-NEXT: [[TMP4:%.*]] = lshr i16 [[X:%.*]], [[TMP3]]
-; CHECK-NEXT: [[TMP5:%.*]] = shl i16 [[X]], [[TMP2]]
-; CHECK-NEXT: [[R:%.*]] = or i16 [[TMP4]], [[TMP5]]
+; CHECK-NEXT: [[R:%.*]] = call i16 @llvm.fshl.i16(i16 [[X:%.*]], i16 [[X]], i16 [[SHAMT:%.*]])
; CHECK-NEXT: ret i16 [[R]]
;
%cmp = icmp eq i16 %shamt, 0
define <2 x i64> @rotl_select_commute(<2 x i64> %x, <2 x i64> %shamt) {
; CHECK-LABEL: @rotl_select_commute(
-; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i64> zeroinitializer, [[SHAMT:%.*]]
-; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[SHAMT]], <i64 63, i64 63>
-; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[TMP1]], <i64 63, i64 63>
-; CHECK-NEXT: [[TMP4:%.*]] = shl <2 x i64> [[X:%.*]], [[TMP2]]
-; CHECK-NEXT: [[TMP5:%.*]] = lshr <2 x i64> [[X]], [[TMP3]]
-; CHECK-NEXT: [[R:%.*]] = or <2 x i64> [[TMP4]], [[TMP5]]
+; CHECK-NEXT: [[R:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> [[X:%.*]], <2 x i64> [[X]], <2 x i64> [[SHAMT:%.*]])
; CHECK-NEXT: ret <2 x i64> [[R]]
;
%cmp = icmp eq <2 x i64> %shamt, zeroinitializer
; This should become a single funnel shift through a combination
; of aggressive-instcombine, simplifycfg, and instcombine.
; https://bugs.llvm.org/show_bug.cgi?id=34924
+; These are equivalent, but the value name with the new-pm shows a bug -
+; this code should not have been converted to a speculative select with
+; an intermediate transform.
define i32 @rotl(i32 %a, i32 %b) {
; OLDPM-LABEL: @rotl(
;
; NEWPM-LABEL: @rotl(
; NEWPM-NEXT: entry:
-; NEWPM-NEXT: [[TMP0:%.*]] = sub i32 0, [[B:%.*]]
-; NEWPM-NEXT: [[TMP1:%.*]] = and i32 [[B]], 31
-; NEWPM-NEXT: [[TMP2:%.*]] = and i32 [[TMP0]], 31
-; NEWPM-NEXT: [[TMP3:%.*]] = lshr i32 [[A:%.*]], [[TMP2]]
-; NEWPM-NEXT: [[TMP4:%.*]] = shl i32 [[A]], [[TMP1]]
-; NEWPM-NEXT: [[SPEC_SELECT:%.*]] = or i32 [[TMP3]], [[TMP4]]
+; NEWPM-NEXT: [[SPEC_SELECT:%.*]] = tail call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[A]], i32 [[B:%.*]])
; NEWPM-NEXT: ret i32 [[SPEC_SELECT]]
;
entry: