From 74b98ab1dbd55f33a9e8e8215884e8712326ab3b Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 6 Feb 2022 12:53:11 +0000 Subject: [PATCH] [X86] Fold ZERO_EXTEND_VECTOR_INREG(BUILD_VECTOR(X,Y,?,?)) -> BUILD_VECTOR(X,0,Y,0) Helps avoid some unnecessary shift by splat amount extensions before shuffle combining gets limited by with one use checks --- llvm/lib/Target/X86/X86ISelLowering.cpp | 26 ++++++++++++++---- llvm/test/CodeGen/X86/vector-shift-ashr-128.ll | 38 ++++++++------------------ llvm/test/CodeGen/X86/vector-shift-ashr-256.ll | 9 +----- llvm/test/CodeGen/X86/vector-shift-ashr-512.ll | 2 -- 4 files changed, 32 insertions(+), 43 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 04b5984..a7736b9 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -53526,6 +53526,7 @@ static SDValue combineEXTEND_VECTOR_INREG(SDNode *N, SelectionDAG &DAG, unsigned Opcode = N->getOpcode(); unsigned InOpcode = In.getOpcode(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDLoc DL(N); // Try to merge vector loads and extend_inreg to an extload. if (!DCI.isBeforeLegalizeOps() && ISD::isNormalLoad(In.getNode()) && @@ -53538,10 +53539,9 @@ static SDValue combineEXTEND_VECTOR_INREG(SDNode *N, SelectionDAG &DAG, : ISD::ZEXTLOAD; EVT MemVT = VT.changeVectorElementType(SVT); if (TLI.isLoadExtLegal(Ext, VT, MemVT)) { - SDValue Load = - DAG.getExtLoad(Ext, SDLoc(N), VT, Ld->getChain(), Ld->getBasePtr(), - Ld->getPointerInfo(), MemVT, Ld->getOriginalAlign(), - Ld->getMemOperand()->getFlags()); + SDValue Load = DAG.getExtLoad( + Ext, DL, VT, Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), + MemVT, Ld->getOriginalAlign(), Ld->getMemOperand()->getFlags()); DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1)); return Load; } @@ -53550,7 +53550,7 @@ static SDValue combineEXTEND_VECTOR_INREG(SDNode *N, SelectionDAG &DAG, // Fold EXTEND_VECTOR_INREG(EXTEND_VECTOR_INREG(X)) -> EXTEND_VECTOR_INREG(X). if (Opcode == InOpcode) - return DAG.getNode(Opcode, SDLoc(N), VT, In.getOperand(0)); + return DAG.getNode(Opcode, DL, VT, In.getOperand(0)); // Fold EXTEND_VECTOR_INREG(EXTRACT_SUBVECTOR(EXTEND(X),0)) // -> EXTEND_VECTOR_INREG(X). @@ -53559,7 +53559,21 @@ static SDValue combineEXTEND_VECTOR_INREG(SDNode *N, SelectionDAG &DAG, In.getOperand(0).getOpcode() == getOpcode_EXTEND(Opcode) && In.getOperand(0).getOperand(0).getValueSizeInBits() == In.getValueSizeInBits()) - return DAG.getNode(Opcode, SDLoc(N), VT, In.getOperand(0).getOperand(0)); + return DAG.getNode(Opcode, DL, VT, In.getOperand(0).getOperand(0)); + + // Fold EXTEND_VECTOR_INREG(BUILD_VECTOR(X,Y,?,?)) -> BUILD_VECTOR(X,0,Y,0). + // TODO: Move to DAGCombine? + if (!DCI.isBeforeLegalizeOps() && Opcode == ISD::ZERO_EXTEND_VECTOR_INREG && + In.getOpcode() == ISD::BUILD_VECTOR && In.hasOneUse() && + In.getValueSizeInBits() == VT.getSizeInBits()) { + unsigned NumElts = VT.getVectorNumElements(); + unsigned Scale = VT.getScalarSizeInBits() / In.getScalarValueSizeInBits(); + EVT EltVT = In.getOperand(0).getValueType(); + SmallVector Elts(Scale * NumElts, DAG.getConstant(0, DL, EltVT)); + for (unsigned I = 0; I != NumElts; ++I) + Elts[I * Scale] = In.getOperand(I); + return DAG.getBitcast(VT, DAG.getBuildVector(In.getValueType(), DL, Elts)); + } // Attempt to combine as a shuffle. // TODO: General ZERO_EXTEND_VECTOR_INREG support. diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll index 3dda3df..27f26e9 100644 --- a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll @@ -1778,31 +1778,19 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) nounwind { } define <2 x i64> @PR52719(<2 x i64> %a0, i32 %a1) { -; SSE2-LABEL: PR52719: -; SSE2: # %bb.0: -; SSE2-NEXT: movd %edi, %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; SSE2-NEXT: psrlq %xmm1, %xmm2 -; SSE2-NEXT: psrlq %xmm1, %xmm0 -; SSE2-NEXT: pxor %xmm2, %xmm0 -; SSE2-NEXT: psubq %xmm2, %xmm0 -; SSE2-NEXT: retq -; -; SSE41-LABEL: PR52719: -; SSE41: # %bb.0: -; SSE41-NEXT: movd %edi, %xmm1 -; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero -; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; SSE41-NEXT: psrlq %xmm1, %xmm2 -; SSE41-NEXT: psrlq %xmm1, %xmm0 -; SSE41-NEXT: pxor %xmm2, %xmm0 -; SSE41-NEXT: psubq %xmm2, %xmm0 -; SSE41-NEXT: retq +; SSE-LABEL: PR52719: +; SSE: # %bb.0: +; SSE-NEXT: movd %edi, %xmm1 +; SSE-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; SSE-NEXT: psrlq %xmm1, %xmm2 +; SSE-NEXT: psrlq %xmm1, %xmm0 +; SSE-NEXT: pxor %xmm2, %xmm0 +; SSE-NEXT: psubq %xmm2, %xmm0 +; SSE-NEXT: retq ; ; AVX-LABEL: PR52719: ; AVX: # %bb.0: ; AVX-NEXT: vmovd %edi, %xmm1 -; AVX-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; AVX-NEXT: vpsrlq %xmm1, %xmm2, %xmm2 ; AVX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 @@ -1813,8 +1801,7 @@ define <2 x i64> @PR52719(<2 x i64> %a0, i32 %a1) { ; XOPAVX1-LABEL: PR52719: ; XOPAVX1: # %bb.0: ; XOPAVX1-NEXT: vmovd %edi, %xmm1 -; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] -; XOPAVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero +; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] ; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; XOPAVX1-NEXT: vpsubq %xmm1, %xmm2, %xmm1 ; XOPAVX1-NEXT: vpshaq %xmm1, %xmm0, %xmm0 @@ -1823,8 +1810,7 @@ define <2 x i64> @PR52719(<2 x i64> %a0, i32 %a1) { ; XOPAVX2-LABEL: PR52719: ; XOPAVX2: # %bb.0: ; XOPAVX2-NEXT: vmovd %edi, %xmm1 -; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1 -; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero +; XOPAVX2-NEXT: vpbroadcastq %xmm1, %xmm1 ; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; XOPAVX2-NEXT: vpsubq %xmm1, %xmm2, %xmm1 ; XOPAVX2-NEXT: vpshaq %xmm1, %xmm0, %xmm0 @@ -1834,7 +1820,6 @@ define <2 x i64> @PR52719(<2 x i64> %a0, i32 %a1) { ; AVX512: # %bb.0: ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512-NEXT: vmovd %edi, %xmm1 -; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; AVX512-NEXT: vpsraq %xmm1, %zmm0, %zmm0 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512-NEXT: vzeroupper @@ -1843,7 +1828,6 @@ define <2 x i64> @PR52719(<2 x i64> %a0, i32 %a1) { ; AVX512VL-LABEL: PR52719: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vmovd %edi, %xmm1 -; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; AVX512VL-NEXT: vpsraq %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll index e6c802e..25355a06 100644 --- a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll +++ b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll @@ -2148,8 +2148,6 @@ define <4 x i64> @PR52719(<4 x i64> %a0, i32 %a1) { ; AVX2-LABEL: PR52719: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovd %edi, %xmm1 -; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1 -; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2 ; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 @@ -2175,8 +2173,6 @@ define <4 x i64> @PR52719(<4 x i64> %a0, i32 %a1) { ; XOPAVX2-LABEL: PR52719: ; XOPAVX2: # %bb.0: ; XOPAVX2-NEXT: vmovd %edi, %xmm1 -; XOPAVX2-NEXT: vpbroadcastd %xmm1, %xmm1 -; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; XOPAVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2 ; XOPAVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 @@ -2188,16 +2184,13 @@ define <4 x i64> @PR52719(<4 x i64> %a0, i32 %a1) { ; AVX512: # %bb.0: ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512-NEXT: vmovd %edi, %xmm1 -; AVX512-NEXT: vpbroadcastd %xmm1, %xmm1 -; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; AVX512-NEXT: vpsraq %xmm1, %zmm0, %zmm0 ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512-NEXT: retq ; ; AVX512VL-LABEL: PR52719: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpbroadcastd %edi, %xmm1 -; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero +; AVX512VL-NEXT: vmovd %edi, %xmm1 ; AVX512VL-NEXT: vpsraq %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll index f477de7..832a613 100644 --- a/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll +++ b/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll @@ -501,8 +501,6 @@ define <8 x i64> @PR52719(<8 x i64> %a0, i32 %a1) { ; ALL-LABEL: PR52719: ; ALL: # %bb.0: ; ALL-NEXT: vmovd %edi, %xmm1 -; ALL-NEXT: vpbroadcastd %xmm1, %xmm1 -; ALL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero ; ALL-NEXT: vpsraq %xmm1, %zmm0, %zmm0 ; ALL-NEXT: retq %vec = insertelement <8 x i32> poison, i32 %a1, i64 0 -- 2.7.4