}
}
+ // Similar to above, but look through an intermediate trunc instruction.
+ BinaryOperator *Shr;
+ if (match(Op0, m_OneUse(m_Trunc(m_OneUse(m_BinOp(Shr))))) &&
+ match(Shr, m_Shr(m_Value(X), m_APInt(C1)))) {
+ // The larger shift direction survives through the transform.
+ unsigned ShrAmtC = C1->getZExtValue();
+ unsigned ShDiff = ShrAmtC > ShAmtC ? ShrAmtC - ShAmtC : ShAmtC - ShrAmtC;
+ Constant *ShiftDiffC = ConstantInt::get(X->getType(), ShDiff);
+ auto ShiftOpc = ShrAmtC > ShAmtC ? Shr->getOpcode() : Instruction::Shl;
+
+ // If C1 > C:
+ // (trunc (X >> C1)) << C --> (trunc (X >> (C1 - C))) && (-1 << C)
+ // If C > C1:
+ // (trunc (X >> C1)) << C --> (trunc (X << (C - C1))) && (-1 << C)
+ Value *NewShift = Builder.CreateBinOp(ShiftOpc, X, ShiftDiffC, "sh.diff");
+ Value *Trunc = Builder.CreateTrunc(NewShift, Ty, "tr.sh.diff");
+ APInt Mask(APInt::getHighBitsSet(BitWidth, BitWidth - ShAmtC));
+ return BinaryOperator::CreateAnd(Trunc, ConstantInt::get(Ty, Mask));
+ }
+
if (match(Op0, m_Shl(m_Value(X), m_APInt(C1))) && C1->ult(BitWidth)) {
unsigned AmtSum = ShAmtC + C1->getZExtValue();
// Oversized shifts are simplified to zero in InstSimplify.
return BinaryOperator::CreateShl(X, ConstantInt::get(Ty, AmtSum));
}
- // Fold shl(trunc(shr(x,c1)),c2) -> trunc(and(shl(shr(x,c1),c2),c2'))
- // Require that the input operand is a non-poison shift-by-constant so that
- // we have confidence that the shifts will get folded together.
- Instruction *TrOp;
- const APInt *TrShiftAmt;
- if (match(Op0, m_OneUse(m_Trunc(m_Instruction(TrOp)))) &&
- match(TrOp, m_OneUse(m_Shr(m_Value(), m_APInt(TrShiftAmt)))) &&
- TrShiftAmt->ult(TrOp->getType()->getScalarSizeInBits())) {
- Type *SrcTy = TrOp->getType();
-
- // Okay, we'll do this xform. Make the shift of shift.
- unsigned SrcSize = SrcTy->getScalarSizeInBits();
- Constant *ShAmt = ConstantInt::get(SrcTy, C->zext(SrcSize));
-
- // (shift2 (shift1 & 0x00FF), c2)
- Value *NSh = Builder.CreateBinOp(I.getOpcode(), TrOp, ShAmt, I.getName());
-
- // For logical shifts, the truncation has the effect of making the high
- // part of the register be zeros. Emulate this by inserting an AND to
- // clear the top bits as needed. This 'and' will usually be zapped by
- // other xforms later if dead.
- Constant *MaskV =
- ConstantInt::get(SrcTy, APInt::getLowBitsSet(SrcSize, BitWidth));
-
- // The mask we constructed says what the trunc would do if occurring
- // between the shifts. We want to know the effect *after* the second
- // shift. We know that it is a logical shift by a constant, so adjust the
- // mask as appropriate.
- MaskV = ConstantExpr::get(I.getOpcode(), MaskV, ShAmt);
- // shift1 & 0x00FF
- Value *And = Builder.CreateAnd(NSh, MaskV, Op0->getName());
- // Return the value truncated to the interesting size.
- return new TruncInst(And, Ty);
- }
-
// If we have an opposite shift by the same amount, we may be able to
// reorder binops and shifts to eliminate math/logic.
auto isSuitableBinOpcode = [](Instruction::BinaryOps BinOpcode) {
; CHECK-NEXT: [[LSHR1:%.*]] = lshr i160 [[LOAD]], 136
; CHECK-NEXT: [[CAST1:%.*]] = trunc i160 [[LSHR1]] to i16
; CHECK-NEXT: [[AND1:%.*]] = and i16 [[CAST1]], 255
-; CHECK-NEXT: [[TMP1:%.*]] = lshr i160 [[LOAD]], 120
-; CHECK-NEXT: [[TMP2:%.*]] = trunc i160 [[TMP1]] to i16
-; CHECK-NEXT: [[SHL:%.*]] = and i16 [[TMP2]], -256
+; CHECK-NEXT: [[SH_DIFF:%.*]] = lshr i160 [[LOAD]], 120
+; CHECK-NEXT: [[TR_SH_DIFF:%.*]] = trunc i160 [[SH_DIFF]] to i16
+; CHECK-NEXT: [[SHL:%.*]] = and i16 [[TR_SH_DIFF]], -256
; CHECK-NEXT: [[OR:%.*]] = or i16 [[AND1]], [[SHL]]
; CHECK-NEXT: ret i16 [[OR]]
;
define i8 @shl_trunc_bigger_lshr(i32 %x) {
; CHECK-LABEL: @shl_trunc_bigger_lshr(
-; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 2
-; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
-; CHECK-NEXT: [[LT:%.*]] = and i8 [[TMP2]], -8
+; CHECK-NEXT: [[SH_DIFF:%.*]] = lshr i32 [[X:%.*]], 2
+; CHECK-NEXT: [[TR_SH_DIFF:%.*]] = trunc i32 [[SH_DIFF]] to i8
+; CHECK-NEXT: [[LT:%.*]] = and i8 [[TR_SH_DIFF]], -8
; CHECK-NEXT: ret i8 [[LT]]
;
%rt = lshr i32 %x, 5
define i8 @shl_trunc_smaller_lshr(i32 %x) {
; CHECK-LABEL: @shl_trunc_smaller_lshr(
; CHECK-NEXT: [[X_TR:%.*]] = trunc i32 [[X:%.*]] to i8
-; CHECK-NEXT: [[TMP1:%.*]] = shl i8 [[X_TR]], 2
-; CHECK-NEXT: [[LT:%.*]] = and i8 [[TMP1]], -32
+; CHECK-NEXT: [[TR_SH_DIFF:%.*]] = shl i8 [[X_TR]], 2
+; CHECK-NEXT: [[LT:%.*]] = and i8 [[TR_SH_DIFF]], -32
; CHECK-NEXT: ret i8 [[LT]]
;
%rt = lshr i32 %x, 3
define i24 @shl_trunc_bigger_ashr(i32 %x) {
; CHECK-LABEL: @shl_trunc_bigger_ashr(
-; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 [[X:%.*]], 9
-; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i24
-; CHECK-NEXT: [[LT:%.*]] = and i24 [[TMP2]], -8
+; CHECK-NEXT: [[SH_DIFF:%.*]] = ashr i32 [[X:%.*]], 9
+; CHECK-NEXT: [[TR_SH_DIFF:%.*]] = trunc i32 [[SH_DIFF]] to i24
+; CHECK-NEXT: [[LT:%.*]] = and i24 [[TR_SH_DIFF]], -8
; CHECK-NEXT: ret i24 [[LT]]
;
%rt = ashr i32 %x, 12
define i24 @shl_trunc_smaller_ashr(i32 %x) {
; CHECK-LABEL: @shl_trunc_smaller_ashr(
; CHECK-NEXT: [[X_TR:%.*]] = trunc i32 [[X:%.*]] to i24
-; CHECK-NEXT: [[TMP1:%.*]] = shl i24 [[X_TR]], 3
-; CHECK-NEXT: [[LT:%.*]] = and i24 [[TMP1]], -8192
+; CHECK-NEXT: [[TR_SH_DIFF:%.*]] = shl i24 [[X_TR]], 3
+; CHECK-NEXT: [[LT:%.*]] = and i24 [[TR_SH_DIFF]], -8192
; CHECK-NEXT: ret i24 [[LT]]
;
%rt = ashr i32 %x, 10