Instruction *NarrowestShift = XShift;
Type *WidestTy = WidestShift->getType();
- assert(NarrowestShift->getType() == I.getOperand(0)->getType() &&
+ Type *NarrowestTy = NarrowestShift->getType();
+ assert(NarrowestTy == I.getOperand(0)->getType() &&
"We did not look past any shifts while matching XShift though.");
bool HadTrunc = WidestTy != I.getOperand(0)->getType();
if (XShAmt->getType() != YShAmt->getType())
return nullptr;
+ // As input, we have the following pattern:
+ // icmp eq/ne (and ((x shift Q), (y oppositeshift K))), 0
+ // We want to rewrite that as:
+ // icmp eq/ne (and (x shift (Q+K)), y), 0 iff (Q+K) u< bitwidth(x)
+ // While we know that originally (Q+K) would not overflow
+ // (because 2 * (N-1) u<= iN -1), we have looked past extensions of
+ // shift amounts. so it may now overflow in smaller bitwidth.
+ // To ensure that does not happen, we need to ensure that the total maximal
+ // shift amount is still representable in that smaller bit width.
+ unsigned MaximalPossibleTotalShiftAmount =
+ (WidestTy->getScalarSizeInBits() - 1) +
+ (NarrowestTy->getScalarSizeInBits() - 1);
+ APInt MaximalRepresentableShiftAmount =
+ APInt::getAllOnesValue(XShAmt->getType()->getScalarSizeInBits());
+ if (MaximalRepresentableShiftAmount.ult(MaximalPossibleTotalShiftAmount))
+ return nullptr;
+
// Can we fold (XShAmt+YShAmt) ?
auto *NewShAmt = dyn_cast_or_null<Constant>(
SimplifyAddInst(XShAmt, YShAmt, /*isNSW=*/false,
ret i1 %tobool
}
-; FIXME: this is a miscompile. We should not transform this.
; See https://bugs.llvm.org/show_bug.cgi?id=44802
define i1 @pr44802(i3 %a, i3 %x, i3 %y) {
; CHECK-LABEL: @pr44802(
-; CHECK-NEXT: [[TMP1:%.*]] = and i3 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i3 [[TMP1]], 0
-; CHECK-NEXT: ret i1 [[TMP2]]
+; CHECK-NEXT: [[T0:%.*]] = icmp ne i3 [[A:%.*]], 0
+; CHECK-NEXT: [[T1:%.*]] = zext i1 [[T0]] to i3
+; CHECK-NEXT: [[T2:%.*]] = lshr i3 [[X:%.*]], [[T1]]
+; CHECK-NEXT: [[T3:%.*]] = shl i3 [[Y:%.*]], [[T1]]
+; CHECK-NEXT: [[T4:%.*]] = and i3 [[T2]], [[T3]]
+; CHECK-NEXT: [[T5:%.*]] = icmp ne i3 [[T4]], 0
+; CHECK-NEXT: ret i1 [[T5]]
;
%t0 = icmp ne i3 %a, 0
%t1 = zext i1 %t0 to i3