From d6fee6c3b0e7449d7550f25889702f6e8fd0c3c2 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 19 Jan 2022 17:16:31 +0000 Subject: [PATCH] [DAG] SelectionDAG::computeKnownBits - add mul(x,x) self-multiply handling (PR48683) Pass the SelfMultiply flag to KnownBits::mul() - added at D108992 https://alive2.llvm.org/ce/z/NN_eaR --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 9 ++++++--- llvm/test/CodeGen/X86/combine-mul.ll | 15 ++++----------- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 71fb1e3..52c69fe 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3081,7 +3081,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, case ISD::MUL: { Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); - Known = KnownBits::mul(Known, Known2); + bool SelfMultiply = Op.getOperand(0) == Op.getOperand(1); + Known = KnownBits::mul(Known, Known2, SelfMultiply); break; } case ISD::MULHU: { @@ -3100,8 +3101,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, assert((Op.getResNo() == 0 || Op.getResNo() == 1) && "Unknown result"); Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + bool SelfMultiply = Op.getOperand(0) == Op.getOperand(1); if (Op.getResNo() == 0) - Known = KnownBits::mul(Known, Known2); + Known = KnownBits::mul(Known, Known2, SelfMultiply); else Known = KnownBits::mulhu(Known, Known2); break; @@ -3110,8 +3112,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, assert((Op.getResNo() == 0 || Op.getResNo() == 1) && "Unknown result"); Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + bool SelfMultiply = Op.getOperand(0) == Op.getOperand(1); if (Op.getResNo() == 0) - Known = KnownBits::mul(Known, Known2); + Known = KnownBits::mul(Known, Known2, SelfMultiply); else Known = KnownBits::mulhs(Known, Known2); break; diff --git a/llvm/test/CodeGen/X86/combine-mul.ll b/llvm/test/CodeGen/X86/combine-mul.ll index bfa9da5..4370b22 100644 --- a/llvm/test/CodeGen/X86/combine-mul.ll +++ b/llvm/test/CodeGen/X86/combine-mul.ll @@ -366,16 +366,12 @@ define <2 x i64> @combine_mul_to_abs_v2i64(<2 x i64> %x) { define i64 @combine_mul_self_knownbits(i64 %x) { ; SSE-LABEL: combine_mul_self_knownbits: ; SSE: # %bb.0: -; SSE-NEXT: movq %rdi, %rax -; SSE-NEXT: imull %eax, %eax -; SSE-NEXT: andl $2, %eax +; SSE-NEXT: xorl %eax, %eax ; SSE-NEXT: retq ; ; AVX-LABEL: combine_mul_self_knownbits: ; AVX: # %bb.0: -; AVX-NEXT: movq %rdi, %rax -; AVX-NEXT: imull %eax, %eax -; AVX-NEXT: andl $2, %eax +; AVX-NEXT: xorl %eax, %eax ; AVX-NEXT: retq %1 = mul i64 %x, %x %2 = and i64 %1, 2 @@ -385,15 +381,12 @@ define i64 @combine_mul_self_knownbits(i64 %x) { define <4 x i32> @combine_mul_self_knownbits_vector(<4 x i32> %x) { ; SSE-LABEL: combine_mul_self_knownbits_vector: ; SSE: # %bb.0: -; SSE-NEXT: pmulld %xmm0, %xmm0 -; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-NEXT: xorps %xmm0, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_mul_self_knownbits_vector: ; AVX: # %bb.0: -; AVX-NEXT: vpmulld %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2] -; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq %1 = mul <4 x i32> %x, %x %2 = and <4 x i32> %1, -- 2.7.4