From c05a11108b9a9deb266c3c1758677462df61e05e Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 14 Jan 2020 11:51:09 +0000 Subject: [PATCH] [SelectionDAG] ComputeKnownBits - merge getValidMinimumShiftAmountConstant() and generic ISD::SHL handling. As mentioned by @nikic on rGef5debac4302, we can merge the guaranteed bottom zero bits from the shifted value, and then, if a min shift amount is known, zero out the bottom bits as well. --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 23 ++++++++++++----------- llvm/test/CodeGen/X86/combine-shl.ll | 4 ++-- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 64d439f..313e07b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2865,24 +2865,25 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, break; } case ISD::SHL: + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts)) { - Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); unsigned Shift = ShAmt->getZExtValue(); Known.Zero <<= Shift; Known.One <<= Shift; // Low bits are known zero. Known.Zero.setLowBits(Shift); - } else if (const APInt *ShMinAmt = - getValidMinimumShiftAmountConstant(Op, DemandedElts)) { - // Minimum shift low bits are known zero. - Known.Zero.setLowBits(ShMinAmt->getZExtValue()); - } else { - // No matter the shift amount, the trailing zeros will stay zero. - Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); - Known.Zero = - APInt::getLowBitsSet(BitWidth, Known.countMinTrailingZeros()); - Known.One.clearAllBits(); + break; } + + // No matter the shift amount, the trailing zeros will stay zero. + Known.Zero = APInt::getLowBitsSet(BitWidth, Known.countMinTrailingZeros()); + Known.One.clearAllBits(); + + // Minimum shift low bits are known zero. + if (const APInt *ShMinAmt = + getValidMinimumShiftAmountConstant(Op, DemandedElts)) + Known.Zero.setLowBits(ShMinAmt->getZExtValue()); break; case ISD::SRL: Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); diff --git a/llvm/test/CodeGen/X86/combine-shl.ll b/llvm/test/CodeGen/X86/combine-shl.ll index ae31dc4..4d37f32 100644 --- a/llvm/test/CodeGen/X86/combine-shl.ll +++ b/llvm/test/CodeGen/X86/combine-shl.ll @@ -877,7 +877,7 @@ define <4 x i32> @combine_vec_add_shl_and_nonsplat(<4 x i32> %a0) { ; SSE2-NEXT: pmuludq %xmm2, %xmm1 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; SSE2-NEXT: paddd {{.*}}(%rip), %xmm0 +; SSE2-NEXT: por {{.*}}(%rip), %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: combine_vec_add_shl_and_nonsplat: @@ -894,7 +894,7 @@ define <4 x i32> @combine_vec_add_shl_and_nonsplat(<4 x i32> %a0) { ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3],xmm1[4],xmm0[5],xmm1[6],xmm0[7] ; AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15] -; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq %1 = and <4 x i32> %a0, %2 = shl <4 x i32> %1, -- 2.7.4