"The input must be 'shl'!");
Value *Masked, *ShiftShAmt;
- match(OuterShift, m_Shift(m_Value(Masked), m_Value(ShiftShAmt)));
+ match(OuterShift,
+ m_Shift(m_Value(Masked), m_ZExtOrSelf(m_Value(ShiftShAmt))));
+
+ // *If* there is a truncation between an outer shift and a possibly-mask,
+ // then said truncation *must* be one-use, else we can't perform the fold.
+ Value *Trunc;
+ if (match(Masked, m_CombineAnd(m_Trunc(m_Value(Masked)), m_Value(Trunc))) &&
+ !Trunc->hasOneUse())
+ return nullptr;
Type *NarrowestTy = OuterShift->getType();
Type *WidestTy = Masked->getType();
+ bool HadTrunc = WidestTy != NarrowestTy;
+
// The mask must be computed in a type twice as wide to ensure
// that no bits are lost if the sum-of-shifts is wider than the base type.
Type *ExtendedTy = WidestTy->getExtendedType();
Constant *NewMask;
if (match(Masked, m_c_And(m_CombineOr(MaskA, MaskB), m_Value(X)))) {
+ // Peek through an optional zext of the shift amount.
+ match(MaskShAmt, m_ZExtOrSelf(m_Value(MaskShAmt)));
+
+ // We have two shift amounts from two different shifts. The types of those
+ // shift amounts may not match. If that's the case let's bailout now.
+ if (MaskShAmt->getType() != ShiftShAmt->getType())
+ return nullptr;
+
// Can we simplify (MaskShAmt+ShiftShAmt) ?
auto *SumOfShAmts = dyn_cast_or_null<Constant>(SimplifyAddInst(
MaskShAmt, ShiftShAmt, /*IsNSW=*/false, /*IsNUW=*/false, Q));
} else if (match(Masked, m_c_And(m_CombineOr(MaskC, MaskD), m_Value(X))) ||
match(Masked, m_Shr(m_Shl(m_Value(X), m_Value(MaskShAmt)),
m_Deferred(MaskShAmt)))) {
+ // Peek through an optional zext of the shift amount.
+ match(MaskShAmt, m_ZExtOrSelf(m_Value(MaskShAmt)));
+
+ // We have two shift amounts from two different shifts. The types of those
+ // shift amounts may not match. If that's the case let's bailout now.
+ if (MaskShAmt->getType() != ShiftShAmt->getType())
+ return nullptr;
+
// Can we simplify (ShiftShAmt-MaskShAmt) ?
auto *ShAmtsDiff = dyn_cast_or_null<Constant>(SimplifySubInst(
ShiftShAmt, MaskShAmt, /*IsNSW=*/false, /*IsNUW=*/false, Q));
return nullptr;
}
+ // If we need to apply truncation, let's do it first, since we can.
+ // We have already ensured that the old truncation will go away.
+ if (HadTrunc)
+ X = Builder.CreateTrunc(X, NarrowestTy);
+
// No 'NUW'/'NSW'! We no longer know that we won't shift-out non-0 bits.
+ // We didn't change the Type of this outermost shift, so we can just do it.
auto *NewShift = BinaryOperator::Create(OuterShift->getOpcode(), X,
OuterShift->getOperand(1));
-
if (!NeedMask)
return NewShift;
; CHECK-NEXT: call void @use64(i64 [[T2]])
; CHECK-NEXT: call void @use64(i64 [[T3]])
; CHECK-NEXT: call void @use32(i32 [[T4]])
-; CHECK-NEXT: [[T5:%.*]] = and i64 [[T3]], [[X:%.*]]
-; CHECK-NEXT: [[T6:%.*]] = trunc i64 [[T5]] to i32
-; CHECK-NEXT: [[T7:%.*]] = shl i32 [[T6]], [[T4]]
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[X:%.*]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP1]], [[T4]]
+; CHECK-NEXT: [[T7:%.*]] = and i32 [[TMP2]], 2147483647
; CHECK-NEXT: ret i32 [[T7]]
;
%t0 = add i32 %nbits, -1
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]])
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T3]])
; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T4]])
-; CHECK-NEXT: [[T5:%.*]] = and <8 x i64> [[T3]], [[X:%.*]]
-; CHECK-NEXT: [[T6:%.*]] = trunc <8 x i64> [[T5]] to <8 x i32>
-; CHECK-NEXT: [[T7:%.*]] = shl <8 x i32> [[T6]], [[T4]]
+; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32>
+; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T4]]
+; CHECK-NEXT: [[T7:%.*]] = and <8 x i32> [[TMP2]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
; CHECK-NEXT: ret <8 x i32> [[T7]]
;
%t0 = add <8 x i32> %nbits, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]])
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T3]])
; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T4]])
-; CHECK-NEXT: [[T5:%.*]] = and <8 x i64> [[T3]], [[X:%.*]]
-; CHECK-NEXT: [[T6:%.*]] = trunc <8 x i64> [[T5]] to <8 x i32>
-; CHECK-NEXT: [[T7:%.*]] = shl <8 x i32> [[T6]], [[T4]]
+; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32>
+; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T4]]
+; CHECK-NEXT: [[T7:%.*]] = and <8 x i32> [[TMP2]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 undef, i32 2147483647>
; CHECK-NEXT: ret <8 x i32> [[T7]]
;
%t0 = add <8 x i32> %nbits, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 undef, i32 -1>
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]])
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T3]])
; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T4]])
-; CHECK-NEXT: [[T5:%.*]] = and <8 x i64> [[T3]], [[X:%.*]]
-; CHECK-NEXT: [[T6:%.*]] = trunc <8 x i64> [[T5]] to <8 x i32>
-; CHECK-NEXT: [[T7:%.*]] = shl <8 x i32> [[T6]], [[T4]]
+; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32>
+; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T4]]
+; CHECK-NEXT: [[T7:%.*]] = and <8 x i32> [[TMP2]], <i32 undef, i32 0, i32 1, i32 2147483647, i32 -1, i32 -1, i32 -1, i32 -1>
; CHECK-NEXT: ret <8 x i32> [[T7]]
;
%t0 = add <8 x i32> %nbits, <i32 -33, i32 -32, i32 -31, i32 -1, i32 0, i32 1, i32 31, i32 32>
; CHECK-NEXT: call void @use64(i64 [[T2]])
; CHECK-NEXT: call void @use64(i64 [[T3]])
; CHECK-NEXT: call void @use32(i32 [[T4]])
-; CHECK-NEXT: [[T5:%.*]] = and i64 [[T3]], [[X:%.*]]
-; CHECK-NEXT: [[T6:%.*]] = trunc i64 [[T5]] to i32
-; CHECK-NEXT: [[T7:%.*]] = shl i32 [[T6]], [[T4]]
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[X:%.*]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP1]], [[T4]]
+; CHECK-NEXT: [[T7:%.*]] = and i32 [[TMP2]], 2147483647
; CHECK-NEXT: ret i32 [[T7]]
;
%t0 = add i32 %nbits, -1
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]])
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T3]])
; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T4]])
-; CHECK-NEXT: [[T5:%.*]] = and <8 x i64> [[T3]], [[X:%.*]]
-; CHECK-NEXT: [[T6:%.*]] = trunc <8 x i64> [[T5]] to <8 x i32>
-; CHECK-NEXT: [[T7:%.*]] = shl <8 x i32> [[T6]], [[T4]]
+; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32>
+; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T4]]
+; CHECK-NEXT: [[T7:%.*]] = and <8 x i32> [[TMP2]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
; CHECK-NEXT: ret <8 x i32> [[T7]]
;
%t0 = add <8 x i32> %nbits, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]])
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T3]])
; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T4]])
-; CHECK-NEXT: [[T5:%.*]] = and <8 x i64> [[T3]], [[X:%.*]]
-; CHECK-NEXT: [[T6:%.*]] = trunc <8 x i64> [[T5]] to <8 x i32>
-; CHECK-NEXT: [[T7:%.*]] = shl <8 x i32> [[T6]], [[T4]]
+; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32>
+; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T4]]
+; CHECK-NEXT: [[T7:%.*]] = and <8 x i32> [[TMP2]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 undef, i32 2147483647>
; CHECK-NEXT: ret <8 x i32> [[T7]]
;
%t0 = add <8 x i32> %nbits, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 undef, i32 -1>
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]])
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T3]])
; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T4]])
-; CHECK-NEXT: [[T5:%.*]] = and <8 x i64> [[T3]], [[X:%.*]]
-; CHECK-NEXT: [[T6:%.*]] = trunc <8 x i64> [[T5]] to <8 x i32>
-; CHECK-NEXT: [[T7:%.*]] = shl <8 x i32> [[T6]], [[T4]]
+; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32>
+; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T4]]
+; CHECK-NEXT: [[T7:%.*]] = and <8 x i32> [[TMP2]], <i32 undef, i32 0, i32 1, i32 2147483647, i32 -1, i32 -1, i32 -1, i32 -1>
; CHECK-NEXT: ret <8 x i32> [[T7]]
;
%t0 = add <8 x i32> %nbits, <i32 -33, i32 -32, i32 -31, i32 -1, i32 0, i32 1, i32 31, i32 32>
; CHECK-NEXT: call void @use64(i64 [[T0]])
; CHECK-NEXT: call void @use64(i64 [[T1]])
; CHECK-NEXT: call void @use32(i32 [[T2]])
-; CHECK-NEXT: [[T3:%.*]] = and i64 [[T1]], [[X:%.*]]
-; CHECK-NEXT: [[T4:%.*]] = trunc i64 [[T3]] to i32
-; CHECK-NEXT: [[T5:%.*]] = shl i32 [[T4]], [[T2]]
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[X:%.*]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP1]], [[T2]]
+; CHECK-NEXT: [[T5:%.*]] = and i32 [[TMP2]], 2147483647
; CHECK-NEXT: ret i32 [[T5]]
;
%t0 = zext i32 %nbits to i64
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]])
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]])
; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]])
-; CHECK-NEXT: [[T3:%.*]] = and <8 x i64> [[T1]], [[X:%.*]]
-; CHECK-NEXT: [[T4:%.*]] = trunc <8 x i64> [[T3]] to <8 x i32>
-; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[T4]], [[T2]]
+; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32>
+; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T2]]
+; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP2]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
; CHECK-NEXT: ret <8 x i32> [[T5]]
;
%t0 = zext <8 x i32> %nbits to <8 x i64>
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]])
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]])
; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]])
-; CHECK-NEXT: [[T3:%.*]] = and <8 x i64> [[T1]], [[X:%.*]]
-; CHECK-NEXT: [[T4:%.*]] = trunc <8 x i64> [[T3]] to <8 x i32>
-; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[T4]], [[T2]]
+; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32>
+; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T2]]
+; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP2]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 undef, i32 2147483647>
; CHECK-NEXT: ret <8 x i32> [[T5]]
;
%t0 = zext <8 x i32> %nbits to <8 x i64>
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]])
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]])
; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]])
-; CHECK-NEXT: [[T3:%.*]] = and <8 x i64> [[T1]], [[X:%.*]]
-; CHECK-NEXT: [[T4:%.*]] = trunc <8 x i64> [[T3]] to <8 x i32>
-; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[T4]], [[T2]]
+; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32>
+; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T2]]
+; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP2]], <i32 undef, i32 1, i32 2147483647, i32 -1, i32 -1, i32 -1, i32 undef, i32 undef>
; CHECK-NEXT: ret <8 x i32> [[T5]]
;
%t0 = zext <8 x i32> %nbits to <8 x i64>
; CHECK-NEXT: call void @use64(i64 [[T1]])
; CHECK-NEXT: call void @use64(i64 [[T2]])
; CHECK-NEXT: call void @use32(i32 [[T3]])
-; CHECK-NEXT: [[T4:%.*]] = and i64 [[T2]], [[X:%.*]]
-; CHECK-NEXT: [[T5:%.*]] = trunc i64 [[T4]] to i32
-; CHECK-NEXT: [[T6:%.*]] = shl i32 [[T5]], [[T3]]
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[X:%.*]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP1]], [[T3]]
+; CHECK-NEXT: [[T6:%.*]] = and i32 [[TMP2]], 2147483647
; CHECK-NEXT: ret i32 [[T6]]
;
%t0 = zext i32 %nbits to i64
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]])
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]])
; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T3]])
-; CHECK-NEXT: [[T4:%.*]] = and <8 x i64> [[T2]], [[X:%.*]]
-; CHECK-NEXT: [[T5:%.*]] = trunc <8 x i64> [[T4]] to <8 x i32>
-; CHECK-NEXT: [[T6:%.*]] = shl <8 x i32> [[T5]], [[T3]]
+; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32>
+; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T3]]
+; CHECK-NEXT: [[T6:%.*]] = and <8 x i32> [[TMP2]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
; CHECK-NEXT: ret <8 x i32> [[T6]]
;
%t0 = zext <8 x i32> %nbits to <8 x i64>
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]])
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]])
; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T3]])
-; CHECK-NEXT: [[T4:%.*]] = and <8 x i64> [[T2]], [[X:%.*]]
-; CHECK-NEXT: [[T5:%.*]] = trunc <8 x i64> [[T4]] to <8 x i32>
-; CHECK-NEXT: [[T6:%.*]] = shl <8 x i32> [[T5]], [[T3]]
+; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32>
+; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T3]]
+; CHECK-NEXT: [[T6:%.*]] = and <8 x i32> [[TMP2]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 undef, i32 2147483647>
; CHECK-NEXT: ret <8 x i32> [[T6]]
;
%t0 = zext <8 x i32> %nbits to <8 x i64>
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]])
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]])
; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T3]])
-; CHECK-NEXT: [[T4:%.*]] = and <8 x i64> [[T2]], [[X:%.*]]
-; CHECK-NEXT: [[T5:%.*]] = trunc <8 x i64> [[T4]] to <8 x i32>
-; CHECK-NEXT: [[T6:%.*]] = shl <8 x i32> [[T5]], [[T3]]
+; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32>
+; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T3]]
+; CHECK-NEXT: [[T6:%.*]] = and <8 x i32> [[TMP2]], <i32 undef, i32 1, i32 2147483647, i32 -1, i32 -1, i32 -1, i32 undef, i32 undef>
; CHECK-NEXT: ret <8 x i32> [[T6]]
;
%t0 = zext <8 x i32> %nbits to <8 x i64>
; CHECK-NEXT: call void @use64(i64 [[T0]])
; CHECK-NEXT: call void @use64(i64 [[T1]])
; CHECK-NEXT: call void @use32(i32 [[T2]])
-; CHECK-NEXT: [[T3:%.*]] = lshr i64 [[T1]], [[T0]]
-; CHECK-NEXT: [[T4:%.*]] = trunc i64 [[T3]] to i32
-; CHECK-NEXT: [[T5:%.*]] = shl i32 [[T4]], [[T2]]
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[X]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP1]], [[T2]]
+; CHECK-NEXT: [[T5:%.*]] = and i32 [[TMP2]], 2147483647
; CHECK-NEXT: ret i32 [[T5]]
;
%t0 = zext i32 %nbits to i64
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]])
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]])
; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]])
-; CHECK-NEXT: [[T3:%.*]] = lshr <8 x i64> [[T1]], [[T0]]
-; CHECK-NEXT: [[T4:%.*]] = trunc <8 x i64> [[T3]] to <8 x i32>
-; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[T4]], [[T2]]
+; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32>
+; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T2]]
+; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP2]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
; CHECK-NEXT: ret <8 x i32> [[T5]]
;
%t0 = zext <8 x i32> %nbits to <8 x i64>
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]])
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]])
; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]])
-; CHECK-NEXT: [[T3:%.*]] = lshr <8 x i64> [[T1]], [[T0]]
-; CHECK-NEXT: [[T4:%.*]] = trunc <8 x i64> [[T3]] to <8 x i32>
-; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[T4]], [[T2]]
+; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32>
+; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T2]]
+; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP2]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 undef, i32 2147483647>
; CHECK-NEXT: ret <8 x i32> [[T5]]
;
%t0 = zext <8 x i32> %nbits to <8 x i64>
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]])
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]])
; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]])
-; CHECK-NEXT: [[T3:%.*]] = lshr <8 x i64> [[T1]], [[T0]]
-; CHECK-NEXT: [[T4:%.*]] = trunc <8 x i64> [[T3]] to <8 x i32>
-; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[T4]], [[T2]]
+; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32>
+; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T2]]
+; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP2]], <i32 undef, i32 1, i32 2147483647, i32 -1, i32 -1, i32 -1, i32 undef, i32 undef>
; CHECK-NEXT: ret <8 x i32> [[T5]]
;
%t0 = zext <8 x i32> %nbits to <8 x i64>
; CHECK-NEXT: call void @use64(i64 [[T2]])
; CHECK-NEXT: call void @use32(i32 [[T3]])
; CHECK-NEXT: call void @use64(i64 [[T4]])
-; CHECK-NEXT: [[T5:%.*]] = trunc i64 [[T4]] to i32
-; CHECK-NEXT: [[T6:%.*]] = shl i32 [[T5]], [[T3]]
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[X]] to i32
+; CHECK-NEXT: [[T6:%.*]] = shl i32 [[TMP1]], [[T3]]
; CHECK-NEXT: ret i32 [[T6]]
;
%t0 = zext i32 %nbits to i64
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]])
; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T3]])
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T4]])
-; CHECK-NEXT: [[T5:%.*]] = trunc <8 x i64> [[T4]] to <8 x i32>
-; CHECK-NEXT: [[T6:%.*]] = shl <8 x i32> [[T5]], [[T3]]
+; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32>
+; CHECK-NEXT: [[T6:%.*]] = shl <8 x i32> [[TMP1]], [[T3]]
; CHECK-NEXT: ret <8 x i32> [[T6]]
;
%t0 = zext <8 x i32> %nbits to <8 x i64>
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]])
; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T3]])
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T4]])
-; CHECK-NEXT: [[T5:%.*]] = trunc <8 x i64> [[T4]] to <8 x i32>
-; CHECK-NEXT: [[T6:%.*]] = shl <8 x i32> [[T5]], [[T3]]
+; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32>
+; CHECK-NEXT: [[T6:%.*]] = shl <8 x i32> [[TMP1]], [[T3]]
; CHECK-NEXT: ret <8 x i32> [[T6]]
;
%t0 = zext <8 x i32> %nbits to <8 x i64>
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]])
; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T3]])
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T4]])
-; CHECK-NEXT: [[T5:%.*]] = trunc <8 x i64> [[T4]] to <8 x i32>
-; CHECK-NEXT: [[T6:%.*]] = shl <8 x i32> [[T5]], [[T3]]
+; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32>
+; CHECK-NEXT: [[T6:%.*]] = shl <8 x i32> [[TMP1]], [[T3]]
; CHECK-NEXT: ret <8 x i32> [[T6]]
;
%t0 = zext <8 x i32> %nbits to <8 x i64>
; CHECK-NEXT: call void @use64(i64 [[T2]])
; CHECK-NEXT: call void @use32(i32 [[T3]])
; CHECK-NEXT: call void @use64(i64 [[T4]])
-; CHECK-NEXT: [[T5:%.*]] = trunc i64 [[T4]] to i32
-; CHECK-NEXT: [[T6:%.*]] = shl i32 [[T5]], [[T3]]
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[X]] to i32
+; CHECK-NEXT: [[T6:%.*]] = shl i32 [[TMP1]], [[T3]]
; CHECK-NEXT: ret i32 [[T6]]
;
%t0 = zext i32 %nbits to i64
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]])
; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T3]])
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T4]])
-; CHECK-NEXT: [[T5:%.*]] = trunc <8 x i64> [[T4]] to <8 x i32>
-; CHECK-NEXT: [[T6:%.*]] = shl <8 x i32> [[T5]], [[T3]]
+; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32>
+; CHECK-NEXT: [[T6:%.*]] = shl <8 x i32> [[TMP1]], [[T3]]
; CHECK-NEXT: ret <8 x i32> [[T6]]
;
%t0 = zext <8 x i32> %nbits to <8 x i64>
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]])
; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T3]])
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T4]])
-; CHECK-NEXT: [[T5:%.*]] = trunc <8 x i64> [[T4]] to <8 x i32>
-; CHECK-NEXT: [[T6:%.*]] = shl <8 x i32> [[T5]], [[T3]]
+; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32>
+; CHECK-NEXT: [[T6:%.*]] = shl <8 x i32> [[TMP1]], [[T3]]
; CHECK-NEXT: ret <8 x i32> [[T6]]
;
%t0 = zext <8 x i32> %nbits to <8 x i64>
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T3]])
; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T4]])
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T5]])
-; CHECK-NEXT: [[T6:%.*]] = trunc <8 x i64> [[T5]] to <8 x i32>
-; CHECK-NEXT: [[T7:%.*]] = shl <8 x i32> [[T6]], [[T4]]
+; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32>
+; CHECK-NEXT: [[T7:%.*]] = shl <8 x i32> [[TMP1]], [[T4]]
; CHECK-NEXT: ret <8 x i32> [[T7]]
;
%t0 = add <8 x i32> %nbits, <i32 -1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0>
; CHECK-NEXT: call void @use64(i64 [[T1]])
; CHECK-NEXT: call void @use32(i32 [[T2]])
; CHECK-NEXT: call void @use64(i64 [[T3]])
-; CHECK-NEXT: [[T4:%.*]] = trunc i64 [[T3]] to i32
-; CHECK-NEXT: [[T5:%.*]] = shl i32 [[T4]], [[T2]]
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[X]] to i32
+; CHECK-NEXT: [[T5:%.*]] = shl i32 [[TMP1]], [[T2]]
; CHECK-NEXT: ret i32 [[T5]]
;
%t0 = zext i32 %nbits to i64
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]])
; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]])
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T3]])
-; CHECK-NEXT: [[T4:%.*]] = trunc <8 x i64> [[T3]] to <8 x i32>
-; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[T4]], [[T2]]
+; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32>
+; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[TMP1]], [[T2]]
; CHECK-NEXT: ret <8 x i32> [[T5]]
;
%t0 = zext <8 x i32> %nbits to <8 x i64>
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]])
; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]])
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T3]])
-; CHECK-NEXT: [[T4:%.*]] = trunc <8 x i64> [[T3]] to <8 x i32>
-; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[T4]], [[T2]]
+; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32>
+; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[TMP1]], [[T2]]
; CHECK-NEXT: ret <8 x i32> [[T5]]
;
%t0 = zext <8 x i32> %nbits to <8 x i64>
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]])
; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]])
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T3]])
-; CHECK-NEXT: [[T4:%.*]] = trunc <8 x i64> [[T3]] to <8 x i32>
-; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[T4]], [[T2]]
+; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32>
+; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[TMP1]], [[T2]]
; CHECK-NEXT: ret <8 x i32> [[T5]]
;
%t0 = zext <8 x i32> %nbits to <8 x i64>
; CHECK-NEXT: call void @use64(i64 [[T2]])
; CHECK-NEXT: call void @use32(i32 [[T3]])
; CHECK-NEXT: call void @use64(i64 [[T4]])
-; CHECK-NEXT: [[T5:%.*]] = trunc i64 [[T4]] to i32
-; CHECK-NEXT: [[T6:%.*]] = shl i32 [[T5]], [[T3]]
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[X]] to i32
+; CHECK-NEXT: [[T6:%.*]] = shl i32 [[TMP1]], [[T3]]
; CHECK-NEXT: ret i32 [[T6]]
;
%t0 = zext i32 %nbits to i64
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]])
; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T3]])
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T4]])
-; CHECK-NEXT: [[T5:%.*]] = trunc <8 x i64> [[T4]] to <8 x i32>
-; CHECK-NEXT: [[T6:%.*]] = shl <8 x i32> [[T5]], [[T3]]
+; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32>
+; CHECK-NEXT: [[T6:%.*]] = shl <8 x i32> [[TMP1]], [[T3]]
; CHECK-NEXT: ret <8 x i32> [[T6]]
;
%t0 = zext <8 x i32> %nbits to <8 x i64>
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]])
; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T3]])
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T4]])
-; CHECK-NEXT: [[T5:%.*]] = trunc <8 x i64> [[T4]] to <8 x i32>
-; CHECK-NEXT: [[T6:%.*]] = shl <8 x i32> [[T5]], [[T3]]
+; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32>
+; CHECK-NEXT: [[T6:%.*]] = shl <8 x i32> [[TMP1]], [[T3]]
; CHECK-NEXT: ret <8 x i32> [[T6]]
;
%t0 = zext <8 x i32> %nbits to <8 x i64>
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]])
; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T3]])
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T4]])
-; CHECK-NEXT: [[T5:%.*]] = trunc <8 x i64> [[T4]] to <8 x i32>
-; CHECK-NEXT: [[T6:%.*]] = shl <8 x i32> [[T5]], [[T3]]
+; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32>
+; CHECK-NEXT: [[T6:%.*]] = shl <8 x i32> [[TMP1]], [[T3]]
; CHECK-NEXT: ret <8 x i32> [[T6]]
;
%t0 = zext <8 x i32> %nbits to <8 x i64>
; CHECK-NEXT: call void @use64(i64 [[T1]])
; CHECK-NEXT: call void @use32(i32 [[T2]])
; CHECK-NEXT: call void @use64(i64 [[T3]])
-; CHECK-NEXT: [[T4:%.*]] = trunc i64 [[T3]] to i32
-; CHECK-NEXT: [[T5:%.*]] = shl i32 [[T4]], [[T2]]
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[X]] to i32
+; CHECK-NEXT: [[T5:%.*]] = shl i32 [[TMP1]], [[T2]]
; CHECK-NEXT: ret i32 [[T5]]
;
%t0 = zext i32 %nbits to i64
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]])
; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]])
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T3]])
-; CHECK-NEXT: [[T4:%.*]] = trunc <8 x i64> [[T3]] to <8 x i32>
-; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[T4]], [[T2]]
+; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32>
+; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[TMP1]], [[T2]]
; CHECK-NEXT: ret <8 x i32> [[T5]]
;
%t0 = zext <8 x i32> %nbits to <8 x i64>
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]])
; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]])
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T3]])
-; CHECK-NEXT: [[T4:%.*]] = trunc <8 x i64> [[T3]] to <8 x i32>
-; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[T4]], [[T2]]
+; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32>
+; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[TMP1]], [[T2]]
; CHECK-NEXT: ret <8 x i32> [[T5]]
;
%t0 = zext <8 x i32> %nbits to <8 x i64>
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]])
; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]])
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T3]])
-; CHECK-NEXT: [[T4:%.*]] = trunc <8 x i64> [[T3]] to <8 x i32>
-; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[T4]], [[T2]]
+; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32>
+; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[TMP1]], [[T2]]
; CHECK-NEXT: ret <8 x i32> [[T5]]
;
%t0 = zext <8 x i32> %nbits to <8 x i64>
; CHECK-NEXT: call void @use64(i64 [[T1]])
; CHECK-NEXT: call void @use32(i32 [[T2]])
; CHECK-NEXT: call void @use64(i64 [[T3]])
-; CHECK-NEXT: [[T4:%.*]] = trunc i64 [[T3]] to i32
-; CHECK-NEXT: [[T5:%.*]] = shl i32 [[T4]], [[T2]]
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[X]] to i32
+; CHECK-NEXT: [[T5:%.*]] = shl i32 [[TMP1]], [[T2]]
; CHECK-NEXT: ret i32 [[T5]]
;
%t0 = zext i32 %nbits to i64
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]])
; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]])
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T3]])
-; CHECK-NEXT: [[T4:%.*]] = trunc <8 x i64> [[T3]] to <8 x i32>
-; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[T4]], [[T2]]
+; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32>
+; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[TMP1]], [[T2]]
; CHECK-NEXT: ret <8 x i32> [[T5]]
;
%t0 = zext <8 x i32> %nbits to <8 x i64>
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]])
; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]])
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T3]])
-; CHECK-NEXT: [[T4:%.*]] = trunc <8 x i64> [[T3]] to <8 x i32>
-; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[T4]], [[T2]]
+; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32>
+; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[TMP1]], [[T2]]
; CHECK-NEXT: ret <8 x i32> [[T5]]
;
%t0 = zext <8 x i32> %nbits to <8 x i64>
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]])
; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]])
; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T3]])
-; CHECK-NEXT: [[T4:%.*]] = trunc <8 x i64> [[T3]] to <8 x i32>
-; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[T4]], [[T2]]
+; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32>
+; CHECK-NEXT: [[T5:%.*]] = shl <8 x i32> [[TMP1]], [[T2]]
; CHECK-NEXT: ret <8 x i32> [[T5]]
;
%t0 = zext <8 x i32> %nbits to <8 x i64>