if (ShiftOpcode != Sh1->getOpcode())
return nullptr;
- // Did we match a pattern with truncation ?
- if (Trunc) {
- // For right-shifts we can't do any such simplifications. Leave as-is.
- if (ShiftOpcode != Instruction::BinaryOps::Shl)
- return nullptr; // FIXME: still could perform constant-folding.
- // If we saw truncation, we'll need to produce extra instruction,
- // and for that one of the operands of the shift must be one-use.
- if (!match(Sh0, m_c_BinOp(m_OneUse(m_Value()), m_Value())))
- return nullptr;
- }
+ // If we saw truncation, we'll need to produce extra instruction,
+ // and for that one of the operands of the shift must be one-use.
+ if (Trunc && !match(Sh0, m_c_BinOp(m_OneUse(m_Value()), m_Value())))
+ return nullptr;
// Can we fold (ShAmt0+ShAmt1) ?
auto *NewShAmt = dyn_cast_or_null<Constant>(
SQ.getWithInstruction(Sh0)));
if (!NewShAmt)
return nullptr; // Did not simplify.
- // Is the new shift amount smaller than the bit width of inner shift?
- if (!match(NewShAmt, m_SpecificInt_ICMP(
- ICmpInst::Predicate::ICMP_ULT,
- APInt(NewShAmt->getType()->getScalarSizeInBits(),
- X->getType()->getScalarSizeInBits()))))
+ unsigned NewShAmtBitWidth = NewShAmt->getType()->getScalarSizeInBits();
+ unsigned XBitWidth = X->getType()->getScalarSizeInBits();
+ // Is the new shift amount smaller than the bit width of inner/new shift?
+ if (!match(NewShAmt, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_ULT,
+ APInt(NewShAmtBitWidth, XBitWidth))))
return nullptr; // FIXME: could perform constant-folding.
+ // If there was a truncation, and we have a right-shift, we can only fold if
+ // we are left with the original sign bit.
+ // FIXME: zero shift amount is also legal here, but we can't *easily* check
+ // more than one predicate so it's not really worth it.
+ if (Trunc && ShiftOpcode != Instruction::BinaryOps::Shl &&
+ !match(NewShAmt,
+ m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_EQ,
+ APInt(NewShAmtBitWidth, XBitWidth - 1))))
+ return nullptr;
+
// All good, we can do this fold.
NewShAmt = ConstantExpr::getZExtOrBitCast(NewShAmt, X->getType());
define i16 @t0(i32 %x, i16 %y) {
; CHECK-LABEL: @t0(
-; CHECK-NEXT: [[T0:%.*]] = sub i16 32, [[Y:%.*]]
-; CHECK-NEXT: [[T1:%.*]] = zext i16 [[T0]] to i32
-; CHECK-NEXT: [[T2:%.*]] = ashr i32 [[X:%.*]], [[T1]]
-; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16
-; CHECK-NEXT: [[T4:%.*]] = add i16 [[Y]], -1
-; CHECK-NEXT: [[T5:%.*]] = ashr i16 [[T3]], [[T4]]
+; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[X:%.*]], 31
+; CHECK-NEXT: [[T5:%.*]] = trunc i32 [[TMP1]] to i16
; CHECK-NEXT: ret i16 [[T5]]
;
%t0 = sub i16 32, %y
define <2 x i16> @t1_vec_splat(<2 x i32> %x, <2 x i16> %y) {
; CHECK-LABEL: @t1_vec_splat(
-; CHECK-NEXT: [[T0:%.*]] = sub <2 x i16> <i16 32, i16 32>, [[Y:%.*]]
-; CHECK-NEXT: [[T1:%.*]] = zext <2 x i16> [[T0]] to <2 x i32>
-; CHECK-NEXT: [[T2:%.*]] = ashr <2 x i32> [[X:%.*]], [[T1]]
-; CHECK-NEXT: [[T3:%.*]] = trunc <2 x i32> [[T2]] to <2 x i16>
-; CHECK-NEXT: [[T4:%.*]] = add <2 x i16> [[Y]], <i16 -1, i16 -1>
-; CHECK-NEXT: [[T5:%.*]] = ashr <2 x i16> [[T3]], [[T4]]
+; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i32> [[X:%.*]], <i32 31, i32 31>
+; CHECK-NEXT: [[T5:%.*]] = trunc <2 x i32> [[TMP1]] to <2 x i16>
; CHECK-NEXT: ret <2 x i16> [[T5]]
;
%t0 = sub <2 x i16> <i16 32, i16 32>, %y
define <3 x i16> @t3_vec_nonsplat_undef0(<3 x i32> %x, <3 x i16> %y) {
; CHECK-LABEL: @t3_vec_nonsplat_undef0(
-; CHECK-NEXT: [[T0:%.*]] = sub <3 x i16> <i16 32, i16 undef, i16 32>, [[Y:%.*]]
-; CHECK-NEXT: [[T1:%.*]] = zext <3 x i16> [[T0]] to <3 x i32>
-; CHECK-NEXT: [[T2:%.*]] = ashr <3 x i32> [[X:%.*]], [[T1]]
-; CHECK-NEXT: [[T3:%.*]] = trunc <3 x i32> [[T2]] to <3 x i16>
-; CHECK-NEXT: [[T4:%.*]] = add <3 x i16> [[Y]], <i16 -1, i16 -1, i16 -1>
-; CHECK-NEXT: [[T5:%.*]] = ashr <3 x i16> [[T3]], [[T4]]
+; CHECK-NEXT: [[TMP1:%.*]] = ashr <3 x i32> [[X:%.*]], <i32 31, i32 0, i32 31>
+; CHECK-NEXT: [[T5:%.*]] = trunc <3 x i32> [[TMP1]] to <3 x i16>
; CHECK-NEXT: ret <3 x i16> [[T5]]
;
%t0 = sub <3 x i16> <i16 32, i16 undef, i16 32>, %y
define <3 x i16> @t4_vec_nonsplat_undef1(<3 x i32> %x, <3 x i16> %y) {
; CHECK-LABEL: @t4_vec_nonsplat_undef1(
-; CHECK-NEXT: [[T0:%.*]] = sub <3 x i16> <i16 32, i16 32, i16 32>, [[Y:%.*]]
-; CHECK-NEXT: [[T1:%.*]] = zext <3 x i16> [[T0]] to <3 x i32>
-; CHECK-NEXT: [[T2:%.*]] = ashr <3 x i32> [[X:%.*]], [[T1]]
-; CHECK-NEXT: [[T3:%.*]] = trunc <3 x i32> [[T2]] to <3 x i16>
-; CHECK-NEXT: [[T4:%.*]] = add <3 x i16> [[Y]], <i16 -1, i16 undef, i16 -1>
-; CHECK-NEXT: [[T5:%.*]] = ashr <3 x i16> [[T3]], [[T4]]
+; CHECK-NEXT: [[TMP1:%.*]] = ashr <3 x i32> [[X:%.*]], <i32 31, i32 0, i32 31>
+; CHECK-NEXT: [[T5:%.*]] = trunc <3 x i32> [[TMP1]] to <3 x i16>
; CHECK-NEXT: ret <3 x i16> [[T5]]
;
%t0 = sub <3 x i16> <i16 32, i16 32, i16 32>, %y
define <3 x i16> @t5_vec_nonsplat_undef1(<3 x i32> %x, <3 x i16> %y) {
; CHECK-LABEL: @t5_vec_nonsplat_undef1(
-; CHECK-NEXT: [[T0:%.*]] = sub <3 x i16> <i16 32, i16 undef, i16 32>, [[Y:%.*]]
-; CHECK-NEXT: [[T1:%.*]] = zext <3 x i16> [[T0]] to <3 x i32>
-; CHECK-NEXT: [[T2:%.*]] = ashr <3 x i32> [[X:%.*]], [[T1]]
-; CHECK-NEXT: [[T3:%.*]] = trunc <3 x i32> [[T2]] to <3 x i16>
-; CHECK-NEXT: [[T4:%.*]] = add <3 x i16> [[Y]], <i16 -1, i16 undef, i16 -1>
-; CHECK-NEXT: [[T5:%.*]] = ashr <3 x i16> [[T3]], [[T4]]
+; CHECK-NEXT: [[TMP1:%.*]] = ashr <3 x i32> [[X:%.*]], <i32 31, i32 0, i32 31>
+; CHECK-NEXT: [[T5:%.*]] = trunc <3 x i32> [[TMP1]] to <3 x i16>
; CHECK-NEXT: ret <3 x i16> [[T5]]
;
%t0 = sub <3 x i16> <i16 32, i16 undef, i16 32>, %y
; CHECK-NEXT: [[T1:%.*]] = zext i16 [[T0]] to i32
; CHECK-NEXT: [[T2:%.*]] = ashr i32 [[X:%.*]], [[T1]]
; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16
-; CHECK-NEXT: [[T4:%.*]] = add i16 [[Y]], -1
; CHECK-NEXT: call void @use16(i16 [[T3]])
-; CHECK-NEXT: [[T5:%.*]] = ashr i16 [[T3]], [[T4]]
+; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[X]], 31
+; CHECK-NEXT: [[T5:%.*]] = trunc i32 [[TMP1]] to i16
; CHECK-NEXT: ret i16 [[T5]]
;
%t0 = sub i16 32, %y
define i16 @t7_extrause1(i32 %x, i16 %y) {
; CHECK-LABEL: @t7_extrause1(
-; CHECK-NEXT: [[T0:%.*]] = sub i16 32, [[Y:%.*]]
-; CHECK-NEXT: [[T1:%.*]] = zext i16 [[T0]] to i32
-; CHECK-NEXT: [[T2:%.*]] = ashr i32 [[X:%.*]], [[T1]]
-; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16
-; CHECK-NEXT: [[T4:%.*]] = add i16 [[Y]], -1
+; CHECK-NEXT: [[T4:%.*]] = add i16 [[Y:%.*]], -1
; CHECK-NEXT: call void @use16(i16 [[T4]])
-; CHECK-NEXT: [[T5:%.*]] = ashr i16 [[T3]], [[T4]]
+; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[X:%.*]], 31
+; CHECK-NEXT: [[T5:%.*]] = trunc i32 [[TMP1]] to i16
; CHECK-NEXT: ret i16 [[T5]]
;
%t0 = sub i16 32, %y
define i16 @t0(i32 %x, i16 %y) {
; CHECK-LABEL: @t0(
-; CHECK-NEXT: [[T0:%.*]] = sub i16 32, [[Y:%.*]]
-; CHECK-NEXT: [[T1:%.*]] = zext i16 [[T0]] to i32
-; CHECK-NEXT: [[T2:%.*]] = lshr i32 [[X:%.*]], [[T1]]
-; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16
-; CHECK-NEXT: [[T4:%.*]] = add i16 [[Y]], -1
-; CHECK-NEXT: [[T5:%.*]] = lshr i16 [[T3]], [[T4]]
+; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 31
+; CHECK-NEXT: [[T5:%.*]] = trunc i32 [[TMP1]] to i16
; CHECK-NEXT: ret i16 [[T5]]
;
%t0 = sub i16 32, %y
define <2 x i16> @t1_vec_splat(<2 x i32> %x, <2 x i16> %y) {
; CHECK-LABEL: @t1_vec_splat(
-; CHECK-NEXT: [[T0:%.*]] = sub <2 x i16> <i16 32, i16 32>, [[Y:%.*]]
-; CHECK-NEXT: [[T1:%.*]] = zext <2 x i16> [[T0]] to <2 x i32>
-; CHECK-NEXT: [[T2:%.*]] = lshr <2 x i32> [[X:%.*]], [[T1]]
-; CHECK-NEXT: [[T3:%.*]] = trunc <2 x i32> [[T2]] to <2 x i16>
-; CHECK-NEXT: [[T4:%.*]] = add <2 x i16> [[Y]], <i16 -1, i16 -1>
-; CHECK-NEXT: [[T5:%.*]] = lshr <2 x i16> [[T3]], [[T4]]
+; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 31, i32 31>
+; CHECK-NEXT: [[T5:%.*]] = trunc <2 x i32> [[TMP1]] to <2 x i16>
; CHECK-NEXT: ret <2 x i16> [[T5]]
;
%t0 = sub <2 x i16> <i16 32, i16 32>, %y
define <3 x i16> @t3_vec_nonsplat_undef0(<3 x i32> %x, <3 x i16> %y) {
; CHECK-LABEL: @t3_vec_nonsplat_undef0(
-; CHECK-NEXT: [[T0:%.*]] = sub <3 x i16> <i16 32, i16 undef, i16 32>, [[Y:%.*]]
-; CHECK-NEXT: [[T1:%.*]] = zext <3 x i16> [[T0]] to <3 x i32>
-; CHECK-NEXT: [[T2:%.*]] = lshr <3 x i32> [[X:%.*]], [[T1]]
-; CHECK-NEXT: [[T3:%.*]] = trunc <3 x i32> [[T2]] to <3 x i16>
-; CHECK-NEXT: [[T4:%.*]] = add <3 x i16> [[Y]], <i16 -1, i16 -1, i16 -1>
-; CHECK-NEXT: [[T5:%.*]] = lshr <3 x i16> [[T3]], [[T4]]
+; CHECK-NEXT: [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 31, i32 0, i32 31>
+; CHECK-NEXT: [[T5:%.*]] = trunc <3 x i32> [[TMP1]] to <3 x i16>
; CHECK-NEXT: ret <3 x i16> [[T5]]
;
%t0 = sub <3 x i16> <i16 32, i16 undef, i16 32>, %y
define <3 x i16> @t4_vec_nonsplat_undef1(<3 x i32> %x, <3 x i16> %y) {
; CHECK-LABEL: @t4_vec_nonsplat_undef1(
-; CHECK-NEXT: [[T0:%.*]] = sub <3 x i16> <i16 32, i16 32, i16 32>, [[Y:%.*]]
-; CHECK-NEXT: [[T1:%.*]] = zext <3 x i16> [[T0]] to <3 x i32>
-; CHECK-NEXT: [[T2:%.*]] = lshr <3 x i32> [[X:%.*]], [[T1]]
-; CHECK-NEXT: [[T3:%.*]] = trunc <3 x i32> [[T2]] to <3 x i16>
-; CHECK-NEXT: [[T4:%.*]] = add <3 x i16> [[Y]], <i16 -1, i16 undef, i16 -1>
-; CHECK-NEXT: [[T5:%.*]] = lshr <3 x i16> [[T3]], [[T4]]
+; CHECK-NEXT: [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 31, i32 0, i32 31>
+; CHECK-NEXT: [[T5:%.*]] = trunc <3 x i32> [[TMP1]] to <3 x i16>
; CHECK-NEXT: ret <3 x i16> [[T5]]
;
%t0 = sub <3 x i16> <i16 32, i16 32, i16 32>, %y
define <3 x i16> @t5_vec_nonsplat_undef1(<3 x i32> %x, <3 x i16> %y) {
; CHECK-LABEL: @t5_vec_nonsplat_undef1(
-; CHECK-NEXT: [[T0:%.*]] = sub <3 x i16> <i16 32, i16 undef, i16 32>, [[Y:%.*]]
-; CHECK-NEXT: [[T1:%.*]] = zext <3 x i16> [[T0]] to <3 x i32>
-; CHECK-NEXT: [[T2:%.*]] = lshr <3 x i32> [[X:%.*]], [[T1]]
-; CHECK-NEXT: [[T3:%.*]] = trunc <3 x i32> [[T2]] to <3 x i16>
-; CHECK-NEXT: [[T4:%.*]] = add <3 x i16> [[Y]], <i16 -1, i16 undef, i16 -1>
-; CHECK-NEXT: [[T5:%.*]] = lshr <3 x i16> [[T3]], [[T4]]
+; CHECK-NEXT: [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 31, i32 0, i32 31>
+; CHECK-NEXT: [[T5:%.*]] = trunc <3 x i32> [[TMP1]] to <3 x i16>
; CHECK-NEXT: ret <3 x i16> [[T5]]
;
%t0 = sub <3 x i16> <i16 32, i16 undef, i16 32>, %y
; CHECK-NEXT: [[T1:%.*]] = zext i16 [[T0]] to i32
; CHECK-NEXT: [[T2:%.*]] = lshr i32 [[X:%.*]], [[T1]]
; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16
-; CHECK-NEXT: [[T4:%.*]] = add i16 [[Y]], -1
; CHECK-NEXT: call void @use16(i16 [[T3]])
-; CHECK-NEXT: [[T5:%.*]] = lshr i16 [[T3]], [[T4]]
+; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X]], 31
+; CHECK-NEXT: [[T5:%.*]] = trunc i32 [[TMP1]] to i16
; CHECK-NEXT: ret i16 [[T5]]
;
%t0 = sub i16 32, %y
define i16 @t7_extrause1(i32 %x, i16 %y) {
; CHECK-LABEL: @t7_extrause1(
-; CHECK-NEXT: [[T0:%.*]] = sub i16 32, [[Y:%.*]]
-; CHECK-NEXT: [[T1:%.*]] = zext i16 [[T0]] to i32
-; CHECK-NEXT: [[T2:%.*]] = lshr i32 [[X:%.*]], [[T1]]
-; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16
-; CHECK-NEXT: [[T4:%.*]] = add i16 [[Y]], -1
+; CHECK-NEXT: [[T4:%.*]] = add i16 [[Y:%.*]], -1
; CHECK-NEXT: call void @use16(i16 [[T4]])
-; CHECK-NEXT: [[T5:%.*]] = lshr i16 [[T3]], [[T4]]
+; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 31
+; CHECK-NEXT: [[T5:%.*]] = trunc i32 [[TMP1]] to i16
; CHECK-NEXT: ret i16 [[T5]]
;
%t0 = sub i16 32, %y
define i1 @highest_bit_test_via_lshr_with_truncation(i64 %data, i32 %nbits) {
; CHECK-LABEL: @highest_bit_test_via_lshr_with_truncation(
-; CHECK-NEXT: [[NUM_LOW_BITS_TO_SKIP:%.*]] = sub i32 64, [[NBITS:%.*]]
-; CHECK-NEXT: [[NUM_LOW_BITS_TO_SKIP_WIDE:%.*]] = zext i32 [[NUM_LOW_BITS_TO_SKIP]] to i64
-; CHECK-NEXT: [[HIGH_BITS_EXTRACTED:%.*]] = lshr i64 [[DATA:%.*]], [[NUM_LOW_BITS_TO_SKIP_WIDE]]
-; CHECK-NEXT: [[HIGH_BITS_EXTRACTED_NARROW:%.*]] = trunc i64 [[HIGH_BITS_EXTRACTED]] to i32
-; CHECK-NEXT: [[SKIP_ALL_BITS_TILL_SIGNBIT:%.*]] = add i32 [[NBITS]], -1
-; CHECK-NEXT: [[SIGNBIT:%.*]] = lshr i32 [[HIGH_BITS_EXTRACTED_NARROW]], [[SKIP_ALL_BITS_TILL_SIGNBIT]]
-; CHECK-NEXT: [[ISNEG:%.*]] = icmp ne i32 [[SIGNBIT]], 0
+; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt i64 [[DATA:%.*]], 0
; CHECK-NEXT: ret i1 [[ISNEG]]
;
%num_low_bits_to_skip = sub i32 64, %nbits
define i1 @highest_bit_test_via_ashr_with_truncation(i64 %data, i32 %nbits) {
; CHECK-LABEL: @highest_bit_test_via_ashr_with_truncation(
-; CHECK-NEXT: [[NUM_LOW_BITS_TO_SKIP:%.*]] = sub i32 64, [[NBITS:%.*]]
-; CHECK-NEXT: [[NUM_LOW_BITS_TO_SKIP_WIDE:%.*]] = zext i32 [[NUM_LOW_BITS_TO_SKIP]] to i64
-; CHECK-NEXT: [[HIGH_BITS_EXTRACTED:%.*]] = ashr i64 [[DATA:%.*]], [[NUM_LOW_BITS_TO_SKIP_WIDE]]
-; CHECK-NEXT: [[HIGH_BITS_EXTRACTED_NARROW:%.*]] = trunc i64 [[HIGH_BITS_EXTRACTED]] to i32
-; CHECK-NEXT: [[SKIP_ALL_BITS_TILL_SIGNBIT:%.*]] = add i32 [[NBITS]], -1
-; CHECK-NEXT: [[SIGNBIT:%.*]] = ashr i32 [[HIGH_BITS_EXTRACTED_NARROW]], [[SKIP_ALL_BITS_TILL_SIGNBIT]]
+; CHECK-NEXT: [[TMP1:%.*]] = ashr i64 [[DATA:%.*]], 63
+; CHECK-NEXT: [[SIGNBIT:%.*]] = trunc i64 [[TMP1]] to i32
; CHECK-NEXT: [[ISNEG:%.*]] = icmp ne i32 [[SIGNBIT]], 0
; CHECK-NEXT: ret i1 [[ISNEG]]
;