I, Builder.CreateIntrinsic(Intrinsic::ctpop, {I.getType()},
{Builder.CreateNot(X)}));
+ // Reduce multiplies for difference-of-squares by factoring:
+ // (X * X) - (Y * Y) --> (X + Y) * (X - Y)
+ if (match(Op0, m_OneUse(m_Mul(m_Value(X), m_Deferred(X)))) &&
+ match(Op1, m_OneUse(m_Mul(m_Value(Y), m_Deferred(Y))))) {
+ auto *OBO0 = cast<OverflowingBinaryOperator>(Op0);
+ auto *OBO1 = cast<OverflowingBinaryOperator>(Op1);
+ bool PropagateNSW = I.hasNoSignedWrap() && OBO0->hasNoSignedWrap() &&
+ OBO1->hasNoSignedWrap();
+ bool PropagateNUW = I.hasNoUnsignedWrap() && OBO0->hasNoUnsignedWrap() &&
+ OBO1->hasNoUnsignedWrap();
+ Value *Add = Builder.CreateAdd(X, Y, "add", PropagateNUW, PropagateNSW);
+ Value *Sub = Builder.CreateSub(X, Y, "sub", PropagateNUW, PropagateNSW);
+ Value *Mul = Builder.CreateMul(Add, Sub, "", PropagateNUW, PropagateNSW);
+ return replaceInstUsesWith(I, Mul);
+ }
+
return TryToNarrowDeduceFlags();
}
ret <2 x i8> %r
}
+; (X * X) - (Y * Y) --> (X + Y) * (X - Y)
+
define i8 @diff_of_squares(i8 %x, i8 %y) {
; CHECK-LABEL: @diff_of_squares(
-; CHECK-NEXT: [[X2:%.*]] = mul i8 [[X:%.*]], [[X]]
-; CHECK-NEXT: [[Y2:%.*]] = mul i8 [[Y:%.*]], [[Y]]
-; CHECK-NEXT: [[R:%.*]] = sub i8 [[X2]], [[Y2]]
+; CHECK-NEXT: [[ADD:%.*]] = add i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[X]], [[Y]]
+; CHECK-NEXT: [[R:%.*]] = mul i8 [[ADD]], [[SUB]]
; CHECK-NEXT: ret i8 [[R]]
;
%x2 = mul i8 %x, %x
ret i8 %r
}
+; All-or-nothing for propagation of no-wrap flags (possibly conservative)
+
define i5 @diff_of_squares_nuw(i5 %x, i5 %y) {
; CHECK-LABEL: @diff_of_squares_nuw(
-; CHECK-NEXT: [[X2:%.*]] = mul nuw i5 [[X:%.*]], [[X]]
-; CHECK-NEXT: [[Y2:%.*]] = mul nuw i5 [[Y:%.*]], [[Y]]
-; CHECK-NEXT: [[R:%.*]] = sub nuw i5 [[X2]], [[Y2]]
+; CHECK-NEXT: [[ADD:%.*]] = add nuw i5 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[SUB:%.*]] = sub nuw i5 [[X]], [[Y]]
+; CHECK-NEXT: [[R:%.*]] = mul nuw i5 [[ADD]], [[SUB]]
; CHECK-NEXT: ret i5 [[R]]
;
%x2 = mul nuw i5 %x, %x
ret i5 %r
}
+; All-or-nothing for propagation of no-wrap flags (possibly conservative)
+
define i5 @diff_of_squares_partial_nuw(i5 %x, i5 %y) {
; CHECK-LABEL: @diff_of_squares_partial_nuw(
-; CHECK-NEXT: [[X2:%.*]] = mul nuw i5 [[X:%.*]], [[X]]
-; CHECK-NEXT: [[Y2:%.*]] = mul nuw i5 [[Y:%.*]], [[Y]]
-; CHECK-NEXT: [[R:%.*]] = sub i5 [[X2]], [[Y2]]
+; CHECK-NEXT: [[ADD:%.*]] = add i5 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[SUB:%.*]] = sub i5 [[X]], [[Y]]
+; CHECK-NEXT: [[R:%.*]] = mul i5 [[ADD]], [[SUB]]
; CHECK-NEXT: ret i5 [[R]]
;
%x2 = mul nuw i5 %x, %x
ret i5 %r
}
+; All-or-nothing for propagation of no-wrap flags (possibly conservative)
+
define <2 x i5> @diff_of_squares_nsw(<2 x i5> %x, <2 x i5> %y) {
; CHECK-LABEL: @diff_of_squares_nsw(
-; CHECK-NEXT: [[X2:%.*]] = mul nsw <2 x i5> [[X:%.*]], [[X]]
-; CHECK-NEXT: [[Y2:%.*]] = mul nsw <2 x i5> [[Y:%.*]], [[Y]]
-; CHECK-NEXT: [[R:%.*]] = sub nsw <2 x i5> [[X2]], [[Y2]]
+; CHECK-NEXT: [[ADD:%.*]] = add nsw <2 x i5> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[SUB:%.*]] = sub nsw <2 x i5> [[X]], [[Y]]
+; CHECK-NEXT: [[R:%.*]] = mul nsw <2 x i5> [[ADD]], [[SUB]]
; CHECK-NEXT: ret <2 x i5> [[R]]
;
%x2 = mul nsw <2 x i5> %x, %x
ret <2 x i5> %r
}
+; All-or-nothing for propagation of no-wrap flags (possibly conservative)
+
define <2 x i5> @diff_of_squares_partial_nsw(<2 x i5> %x, <2 x i5> %y) {
; CHECK-LABEL: @diff_of_squares_partial_nsw(
-; CHECK-NEXT: [[X2:%.*]] = mul nsw <2 x i5> [[X:%.*]], [[X]]
-; CHECK-NEXT: [[Y2:%.*]] = mul <2 x i5> [[Y:%.*]], [[Y]]
-; CHECK-NEXT: [[R:%.*]] = sub nsw <2 x i5> [[X2]], [[Y2]]
+; CHECK-NEXT: [[ADD:%.*]] = add <2 x i5> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[SUB:%.*]] = sub <2 x i5> [[X]], [[Y]]
+; CHECK-NEXT: [[R:%.*]] = mul <2 x i5> [[ADD]], [[SUB]]
; CHECK-NEXT: ret <2 x i5> [[R]]
;
%x2 = mul nsw <2 x i5> %x, %x
ret <2 x i5> %r
}
+; negative test
+
define i8 @diff_of_squares_use1(i8 %x, i8 %y) {
; CHECK-LABEL: @diff_of_squares_use1(
; CHECK-NEXT: [[X2:%.*]] = mul i8 [[X:%.*]], [[X]]
ret i8 %r
}
+; negative test
+
define i8 @diff_of_squares_use2(i8 %x, i8 %y) {
; CHECK-LABEL: @diff_of_squares_use2(
; CHECK-NEXT: [[X2:%.*]] = mul i8 [[X:%.*]], [[X]]