case ISD::MUL: {
Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
- Known = KnownBits::mul(Known, Known2);
+ bool SelfMultiply = Op.getOperand(0) == Op.getOperand(1);
+ Known = KnownBits::mul(Known, Known2, SelfMultiply);
break;
}
case ISD::MULHU: {
assert((Op.getResNo() == 0 || Op.getResNo() == 1) && "Unknown result");
Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ bool SelfMultiply = Op.getOperand(0) == Op.getOperand(1);
if (Op.getResNo() == 0)
- Known = KnownBits::mul(Known, Known2);
+ Known = KnownBits::mul(Known, Known2, SelfMultiply);
else
Known = KnownBits::mulhu(Known, Known2);
break;
assert((Op.getResNo() == 0 || Op.getResNo() == 1) && "Unknown result");
Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ bool SelfMultiply = Op.getOperand(0) == Op.getOperand(1);
if (Op.getResNo() == 0)
- Known = KnownBits::mul(Known, Known2);
+ Known = KnownBits::mul(Known, Known2, SelfMultiply);
else
Known = KnownBits::mulhs(Known, Known2);
break;
define i64 @combine_mul_self_knownbits(i64 %x) {
; SSE-LABEL: combine_mul_self_knownbits:
; SSE: # %bb.0:
-; SSE-NEXT: movq %rdi, %rax
-; SSE-NEXT: imull %eax, %eax
-; SSE-NEXT: andl $2, %eax
+; SSE-NEXT: xorl %eax, %eax
; SSE-NEXT: retq
;
; AVX-LABEL: combine_mul_self_knownbits:
; AVX: # %bb.0:
-; AVX-NEXT: movq %rdi, %rax
-; AVX-NEXT: imull %eax, %eax
-; AVX-NEXT: andl $2, %eax
+; AVX-NEXT: xorl %eax, %eax
; AVX-NEXT: retq
%1 = mul i64 %x, %x
%2 = and i64 %1, 2
define <4 x i32> @combine_mul_self_knownbits_vector(<4 x i32> %x) {
; SSE-LABEL: combine_mul_self_knownbits_vector:
; SSE: # %bb.0:
-; SSE-NEXT: pmulld %xmm0, %xmm0
-; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: combine_mul_self_knownbits_vector:
; AVX: # %bb.0:
-; AVX-NEXT: vpmulld %xmm0, %xmm0, %xmm0
-; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2]
-; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
%1 = mul <4 x i32> %x, %x
%2 = and <4 x i32> %1, <i32 2, i32 2, i32 2, i32 2>