From: Simon Pilgrim Date: Fri, 4 Mar 2022 17:41:28 +0000 (+0000) Subject: [X86] getTargetVShiftNode - peek through any zext node X-Git-Tag: upstream/15.0.7~14597 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=588d97e2461964ac5b821b946276c5600e0139a3;p=platform%2Fupstream%2Fllvm.git [X86] getTargetVShiftNode - peek through any zext node If the shift amount has been zero-extended, peek through as this might help us further canonicalize the shift amount. Fixes regression mentioned in rG147cfcbef1255ba2b4875b76708dab1a685085f5 --- diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 12b9195..c8371d0 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -25744,6 +25744,16 @@ static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT, ShAmt = DAG.getVectorShuffle(AmtVT, dl, ShAmt, DAG.getUNDEF(AmtVT), Mask); } + // Peek through any zext node if we can get back to a 128-bit source. + if (AmtVT.getScalarSizeInBits() == 64 && + (ShAmt.getOpcode() == ISD::ZERO_EXTEND || + ShAmt.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) && + ShAmt.getOperand(0).getValueType().isSimple() && + ShAmt.getOperand(0).getValueType().is128BitVector()) { + ShAmt = ShAmt.getOperand(0); + AmtVT = ShAmt.getSimpleValueType(); + } + // See if we can mask off the upper elements using the existing source node. // The shift uses the entire lower 64-bits of the amount vector, so no need to // do this for vXi64 types. @@ -25784,10 +25794,13 @@ static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT, // Zero-extend bottom element to v2i64 vector type, either by extension or // shuffle masking. if (!IsMasked && AmtVT.getScalarSizeInBits() < 64) { - if (Subtarget.hasSSE41()) + if (AmtVT == MVT::v4i32 && (ShAmt.getOpcode() == X86ISD::VBROADCAST || + ShAmt.getOpcode() == X86ISD::VBROADCAST_LOAD)) { + ShAmt = DAG.getNode(X86ISD::VZEXT_MOVL, SDLoc(ShAmt), MVT::v4i32, ShAmt); + } else if (Subtarget.hasSSE41()) { ShAmt = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(ShAmt), MVT::v2i64, ShAmt); - else { + } else { SDValue ByteShift = DAG.getTargetConstant( (128 - AmtVT.getScalarSizeInBits()) / 8, SDLoc(ShAmt), MVT::i8); ShAmt = DAG.getBitcast(MVT::v16i8, ShAmt); diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll index 53e5fa4..e9a9701 100644 --- a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll +++ b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll @@ -2105,18 +2105,16 @@ define <4 x i64> @PR52719(<4 x i64> %a0, i32 %a1) { ; AVX1-LABEL: PR52719: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovd %edi, %xmm1 -; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero -; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] -; AVX1-NEXT: vpsrlq %xmm1, %xmm3, %xmm4 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5 -; AVX1-NEXT: vpsrlq %xmm1, %xmm5, %xmm1 -; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1 -; AVX1-NEXT: vpsubq %xmm4, %xmm1, %xmm1 -; AVX1-NEXT: vpsrlq %xmm2, %xmm3, %xmm3 -; AVX1-NEXT: vpsrlq %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0 -; AVX1-NEXT: vpsubq %xmm3, %xmm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: vpsrlq %xmm1, %xmm2, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1-NEXT: vpsrlq %xmm1, %xmm3, %xmm3 +; AVX1-NEXT: vpxor %xmm2, %xmm3, %xmm3 +; AVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm3 +; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpsubq %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: PR52719: @@ -2170,28 +2168,23 @@ define <4 x i64> @PR52719(<4 x i64> %a0, i32 %a1) { ; ; X86-AVX1-LABEL: PR52719: ; X86-AVX1: # %bb.0: -; X86-AVX1-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm1 -; X86-AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; X86-AVX1-NEXT: vblendps {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3] -; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero -; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] -; X86-AVX1-NEXT: # xmm3 = mem[0,0] -; X86-AVX1-NEXT: vpsrlq %xmm2, %xmm3, %xmm4 -; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5 -; X86-AVX1-NEXT: vpsrlq %xmm2, %xmm5, %xmm2 -; X86-AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2 -; X86-AVX1-NEXT: vpsubq %xmm4, %xmm2, %xmm2 +; X86-AVX1-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; X86-AVX1-NEXT: # xmm2 = mem[0,0] +; X86-AVX1-NEXT: vpsrlq %xmm1, %xmm2, %xmm2 +; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 ; X86-AVX1-NEXT: vpsrlq %xmm1, %xmm3, %xmm3 +; X86-AVX1-NEXT: vpxor %xmm2, %xmm3, %xmm3 +; X86-AVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm3 ; X86-AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 -; X86-AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0 -; X86-AVX1-NEXT: vpsubq %xmm3, %xmm0, %xmm0 -; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; X86-AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; X86-AVX1-NEXT: vpsubq %xmm2, %xmm0, %xmm0 +; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 ; X86-AVX1-NEXT: retl ; ; X86-AVX2-LABEL: PR52719: ; X86-AVX2: # %bb.0: -; X86-AVX2-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %xmm1 -; X86-AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero +; X86-AVX2-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X86-AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648] ; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2 ; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0