From 51c2efc104d14f763dbcaf5cd95fe4a3eb9561ac Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 31 Jan 2019 11:15:05 +0000 Subject: [PATCH] [X86][AVX] Fold vt1 concat_vectors(vt2 undef, vt2 broadcast(x)) --> vt1 broadcast(x) If we're not inserting the broadcast into the lowest subvector then we can avoid the insertion by just performing a larger broadcast. Avoids a regression when we enable AVX1 broadcasts in shuffle combining llvm-svn: 352742 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 6 +++++- llvm/test/CodeGen/X86/insert-into-constant-vector.ll | 12 ++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 39fdafe..8412c95 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -41593,6 +41593,7 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG, // If this is subv_broadcast insert into both halves, use a larger // subv_broadcast. + // TODO - handle X86ISD::VBROADCAST as well? if (SubVec.getOpcode() == X86ISD::SUBV_BROADCAST && SubVec == SubVec2) return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT, SubVec.getOperand(0)); @@ -41614,11 +41615,14 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG, SubVec2, Vec.getOperand(2)); return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Vec, SubVec, N->getOperand(2)); - } } } + // If this is a broadcast insert into an upper undef, use a larger broadcast. + if (Vec.isUndef() && IdxVal != 0 && SubVec.getOpcode() == X86ISD::VBROADCAST) + return DAG.getNode(X86ISD::VBROADCAST, dl, OpVT, SubVec.getOperand(0)); + return SDValue(); } diff --git a/llvm/test/CodeGen/X86/insert-into-constant-vector.ll b/llvm/test/CodeGen/X86/insert-into-constant-vector.ll index 9d95f98..ea4c4f1 100644 --- a/llvm/test/CodeGen/X86/insert-into-constant-vector.ll +++ b/llvm/test/CodeGen/X86/insert-into-constant-vector.ll @@ -289,31 +289,27 @@ define <8 x i32> @elt7_v8i32(i32 %x) { ; ; X32AVX2-LABEL: elt7_v8i32: ; X32AVX2: # %bb.0: -; X32AVX2-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm0 -; X32AVX2-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X32AVX2-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0 ; X32AVX2-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5,6],ymm0[7] ; X32AVX2-NEXT: retl ; ; X64AVX2-LABEL: elt7_v8i32: ; X64AVX2: # %bb.0: ; X64AVX2-NEXT: vmovd %edi, %xmm0 -; X64AVX2-NEXT: vpbroadcastd %xmm0, %xmm0 -; X64AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; X64AVX2-NEXT: vpbroadcastd %xmm0, %ymm0 ; X64AVX2-NEXT: vpblendd {{.*#+}} ymm0 = mem[0,1,2,3,4,5,6],ymm0[7] ; X64AVX2-NEXT: retq ; ; X32AVX512F-LABEL: elt7_v8i32: ; X32AVX512F: # %bb.0: -; X32AVX512F-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm0 -; X32AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X32AVX512F-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0 ; X32AVX512F-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5,6],ymm0[7] ; X32AVX512F-NEXT: retl ; ; X64AVX512F-LABEL: elt7_v8i32: ; X64AVX512F: # %bb.0: ; X64AVX512F-NEXT: vmovd %edi, %xmm0 -; X64AVX512F-NEXT: vpbroadcastd %xmm0, %xmm0 -; X64AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; X64AVX512F-NEXT: vpbroadcastd %xmm0, %ymm0 ; X64AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = mem[0,1,2,3,4,5,6],ymm0[7] ; X64AVX512F-NEXT: retq %ins = insertelement <8 x i32> , i32 %x, i32 7 -- 2.7.4