}
}
+ // (X >>u C1) >>u C --> X >>u (C1 + C)
if (match(Op0, m_LShr(m_Value(X), m_APInt(C1)))) {
- unsigned AmtSum = ShAmtC + C1->getZExtValue();
// Oversized shifts are simplified to zero in InstSimplify.
+ unsigned AmtSum = ShAmtC + C1->getZExtValue();
if (AmtSum < BitWidth)
- // (X >>u C1) >>u C --> X >>u (C1 + C)
return BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, AmtSum));
}
+ // If the first shift covers the number of bits truncated and the combined
+ // shift fits in the source width:
+ // (trunc (X >>u C1)) >>u C --> trunc (X >>u (C1 + C))
+ if (match(Op0, m_OneUse(m_Trunc(m_LShr(m_Value(X), m_APInt(C1)))))) {
+ unsigned SrcWidth = X->getType()->getScalarSizeInBits();
+ unsigned AmtSum = ShAmtC + C1->getZExtValue();
+ if (C1->uge(SrcWidth - BitWidth) && AmtSum < SrcWidth) {
+ Value *SumShift = Builder.CreateLShr(X, AmtSum, "sum.shift");
+ return new TruncInst(SumShift, Ty);
+ }
+ }
+
// Look for a "splat" mul pattern - it replicates bits across each half of
// a value, so a right shift is just a mask of the low bits:
// lshr i32 (mul nuw X, Pow2+1), 16 --> and X, Pow2-1
; CHECK-LABEL: @trunc_sandwich_use1(
; CHECK-NEXT: [[SH:%.*]] = lshr i32 [[X:%.*]], 28
; CHECK-NEXT: call void @use(i32 [[SH]])
-; CHECK-NEXT: [[TR:%.*]] = trunc i32 [[SH]] to i12
-; CHECK-NEXT: [[R:%.*]] = lshr i12 [[TR]], 2
+; CHECK-NEXT: [[SUM_SHIFT:%.*]] = lshr i32 [[X]], 30
+; CHECK-NEXT: [[R:%.*]] = trunc i32 [[SUM_SHIFT]] to i12
; CHECK-NEXT: ret i12 [[R]]
;
%sh = lshr i32 %x, 28
; CHECK-LABEL: @trunc_sandwich_splat_vec_use1(
; CHECK-NEXT: [[SH:%.*]] = lshr <3 x i14> [[X:%.*]], <i14 6, i14 6, i14 6>
; CHECK-NEXT: call void @usevec(<3 x i14> [[SH]])
-; CHECK-NEXT: [[TR:%.*]] = trunc <3 x i14> [[SH]] to <3 x i9>
-; CHECK-NEXT: [[R:%.*]] = lshr <3 x i9> [[TR]], <i9 5, i9 5, i9 5>
+; CHECK-NEXT: [[SUM_SHIFT:%.*]] = lshr <3 x i14> [[X]], <i14 11, i14 11, i14 11>
+; CHECK-NEXT: [[R:%.*]] = trunc <3 x i14> [[SUM_SHIFT]] to <3 x i9>
; CHECK-NEXT: ret <3 x i9> [[R]]
;
%sh = lshr <3 x i14> %x, <i14 6, i14 6, i14 6>
; CHECK-LABEL: @trunc_sandwich_min_shift1_use1(
; CHECK-NEXT: [[SH:%.*]] = lshr i32 [[X:%.*]], 20
; CHECK-NEXT: call void @use(i32 [[SH]])
-; CHECK-NEXT: [[TR:%.*]] = trunc i32 [[SH]] to i12
-; CHECK-NEXT: [[R:%.*]] = lshr i12 [[TR]], 1
+; CHECK-NEXT: [[SUM_SHIFT:%.*]] = lshr i32 [[X]], 21
+; CHECK-NEXT: [[R:%.*]] = trunc i32 [[SUM_SHIFT]] to i12
; CHECK-NEXT: ret i12 [[R]]
;
%sh = lshr i32 %x, 20
ret i12 %r
}
+; negative test - trunc is bigger than first shift
+
define i12 @trunc_sandwich_small_shift1_use1(i32 %x) {
; CHECK-LABEL: @trunc_sandwich_small_shift1_use1(
; CHECK-NEXT: [[SH:%.*]] = lshr i32 [[X:%.*]], 19
; CHECK-LABEL: @trunc_sandwich_max_sum_shift_use1(
; CHECK-NEXT: [[SH:%.*]] = lshr i32 [[X:%.*]], 20
; CHECK-NEXT: call void @use(i32 [[SH]])
-; CHECK-NEXT: [[TR:%.*]] = trunc i32 [[SH]] to i12
-; CHECK-NEXT: [[R:%.*]] = lshr i12 [[TR]], 11
+; CHECK-NEXT: [[SUM_SHIFT:%.*]] = lshr i32 [[X]], 31
+; CHECK-NEXT: [[R:%.*]] = trunc i32 [[SUM_SHIFT]] to i12
; CHECK-NEXT: ret i12 [[R]]
;
%sh = lshr i32 %x, 20
; CHECK-LABEL: @trunc_sandwich_max_sum_shift2_use1(
; CHECK-NEXT: [[SH:%.*]] = lshr i32 [[X:%.*]], 30
; CHECK-NEXT: call void @use(i32 [[SH]])
-; CHECK-NEXT: [[TR:%.*]] = trunc i32 [[SH]] to i12
-; CHECK-NEXT: [[R:%.*]] = lshr i12 [[TR]], 1
+; CHECK-NEXT: [[SUM_SHIFT:%.*]] = lshr i32 [[X]], 31
+; CHECK-NEXT: [[R:%.*]] = trunc i32 [[SUM_SHIFT]] to i12
; CHECK-NEXT: ret i12 [[R]]
;
%sh = lshr i32 %x, 30
ret i12 %r
}
+; negative test - but overshift is simplified to zero by another fold
+
define i12 @trunc_sandwich_big_sum_shift1_use1(i32 %x) {
; CHECK-LABEL: @trunc_sandwich_big_sum_shift1_use1(
; CHECK-NEXT: [[SH:%.*]] = lshr i32 [[X:%.*]], 21
ret i12 %r
}
+; negative test - but overshift is simplified to zero by another fold
+
define i12 @trunc_sandwich_big_sum_shift2_use1(i32 %x) {
; CHECK-LABEL: @trunc_sandwich_big_sum_shift2_use1(
; CHECK-NEXT: [[SH:%.*]] = lshr i32 [[X:%.*]], 31