case Intrinsic::fshl:
case Intrinsic::fshr: {
+ // Canonicalize a shift amount constant operand to be modulo the bit-width.
+ unsigned BitWidth = II->getType()->getScalarSizeInBits();
+ Constant *ShAmtC;
+ if (match(II->getArgOperand(2), m_Constant(ShAmtC)) &&
+ !isa<ConstantExpr>(ShAmtC) && !ShAmtC->containsConstantExpression()) {
+ Constant *WidthC = ConstantInt::get(II->getType(), BitWidth);
+ Constant *ModuloC = ConstantExpr::getURem(ShAmtC, WidthC);
+ if (ModuloC != ShAmtC) {
+ II->setArgOperand(2, ModuloC);
+ return II;
+ }
+ }
+
const APInt *SA;
if (match(II->getArgOperand(2), m_APInt(SA))) {
Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1);
- unsigned BitWidth = SA->getBitWidth();
uint64_t ShiftAmt = SA->urem(BitWidth);
assert(ShiftAmt != 0 && "SimplifyCall should have handled zero shift");
// Normalize to funnel shift left.
// The shift amount (operand 2) of a funnel shift is modulo the bitwidth,
// so only the low bits of the shift amount are demanded if the bitwidth is
// a power-of-2.
- unsigned BitWidth = II->getType()->getScalarSizeInBits();
if (!isPowerOf2_32(BitWidth))
break;
APInt Op2Demanded = APInt::getLowBitsSet(BitWidth, Log2_32_Ceil(BitWidth));
define i32 @fshl_constant_shift_amount_modulo_bitwidth(i32 %x, i32 %y) {
; CHECK-LABEL: @fshl_constant_shift_amount_modulo_bitwidth(
-; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 33)
+; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 1)
; CHECK-NEXT: ret i32 [[R]]
;
%r = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 33)
define i33 @fshr_constant_shift_amount_modulo_bitwidth(i33 %x, i33 %y) {
; CHECK-LABEL: @fshr_constant_shift_amount_modulo_bitwidth(
-; CHECK-NEXT: [[R:%.*]] = call i33 @llvm.fshr.i33(i33 [[X:%.*]], i33 [[Y:%.*]], i33 34)
+; CHECK-NEXT: [[R:%.*]] = call i33 @llvm.fshr.i33(i33 [[X:%.*]], i33 [[Y:%.*]], i33 1)
; CHECK-NEXT: ret i33 [[R]]
;
%r = call i33 @llvm.fshr.i33(i33 %x, i33 %y, i33 34)
ret i33 %r
}
+@external_global = external global i8
+
+define i33 @fshr_constant_shift_amount_modulo_bitwidth_constexpr(i33 %x, i33 %y) {
+; CHECK-LABEL: @fshr_constant_shift_amount_modulo_bitwidth_constexpr(
+; CHECK-NEXT: [[R:%.*]] = call i33 @llvm.fshr.i33(i33 [[X:%.*]], i33 [[Y:%.*]], i33 ptrtoint (i8* @external_global to i33))
+; CHECK-NEXT: ret i33 [[R]]
+;
+ %shamt = ptrtoint i8* @external_global to i33
+ %r = call i33 @llvm.fshr.i33(i33 %x, i33 %y, i33 %shamt)
+ ret i33 %r
+}
+
define <2 x i32> @fshr_constant_shift_amount_modulo_bitwidth_vec(<2 x i32> %x, <2 x i32> %y) {
; CHECK-LABEL: @fshr_constant_shift_amount_modulo_bitwidth_vec(
-; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> <i32 34, i32 -1>)
+; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> <i32 2, i32 31>)
; CHECK-NEXT: ret <2 x i32> [[R]]
;
%r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> <i32 34, i32 -1>)
define <2 x i31> @fshl_constant_shift_amount_modulo_bitwidth_vec(<2 x i31> %x, <2 x i31> %y) {
; CHECK-LABEL: @fshl_constant_shift_amount_modulo_bitwidth_vec(
-; CHECK-NEXT: [[R:%.*]] = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> [[X:%.*]], <2 x i31> [[Y:%.*]], <2 x i31> <i31 34, i31 -1>)
+; CHECK-NEXT: [[R:%.*]] = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> [[X:%.*]], <2 x i31> [[Y:%.*]], <2 x i31> <i31 3, i31 1>)
; CHECK-NEXT: ret <2 x i31> [[R]]
;
%r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> %y, <2 x i31> <i31 34, i31 -1>)
ret <2 x i31> %r
}
-; The shift modulo bitwidth is the same for all vector elements, but this is not simplified yet.
+define <2 x i31> @fshl_constant_shift_amount_modulo_bitwidth_vec_const_expr(<2 x i31> %x, <2 x i31> %y) {
+; CHECK-LABEL: @fshl_constant_shift_amount_modulo_bitwidth_vec_const_expr(
+; CHECK-NEXT: [[R:%.*]] = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> [[X:%.*]], <2 x i31> [[Y:%.*]], <2 x i31> <i31 34, i31 ptrtoint (i8* @external_global to i31)>)
+; CHECK-NEXT: ret <2 x i31> [[R]]
+;
+ %shamt = ptrtoint i8* @external_global to i31
+ %r = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> %x, <2 x i31> %y, <2 x i31> <i31 34, i31 ptrtoint (i8* @external_global to i31)>)
+ ret <2 x i31> %r
+}
+
+; The shift modulo bitwidth is the same for all vector elements.
+
define <2 x i31> @fshl_only_op1_demanded_vec_nonsplat(<2 x i31> %x, <2 x i31> %y) {
; CHECK-LABEL: @fshl_only_op1_demanded_vec_nonsplat(
-; CHECK-NEXT: [[Z:%.*]] = call <2 x i31> @llvm.fshl.v2i31(<2 x i31> [[X:%.*]], <2 x i31> [[Y:%.*]], <2 x i31> <i31 7, i31 38>)
+; CHECK-NEXT: [[Z:%.*]] = lshr <2 x i31> [[Y:%.*]], <i31 24, i31 24>
; CHECK-NEXT: [[R:%.*]] = and <2 x i31> [[Z]], <i31 63, i31 31>
; CHECK-NEXT: ret <2 x i31> [[R]]
;