From 24ad2b5e7d8ceecb045bfc499107b9ed90439adb Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 31 Jul 2019 12:55:39 +0000 Subject: [PATCH] [X86][AVX] Ensure chained subvector insertions are the same size (PR42833) Before combining insert_subvector(insert_subvector(vec, sub0, c0), sub1, c1) patterns, ensure that the subvectors are all the same type. On AVX512 targets especially we might have a mixture of 128/256 subvector insertions. llvm-svn: 367429 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 2 ++ llvm/test/CodeGen/X86/oddsubvector.ll | 20 +++++++++++--------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 300fc3e..352f2ed 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -5538,6 +5538,7 @@ static bool collectConcatOps(SDNode *N, SmallVectorImpl &Ops) { if (VT.getSizeInBits() == (SubVT.getSizeInBits() * 2) && Idx == (VT.getVectorNumElements() / 2) && Src.getOpcode() == ISD::INSERT_SUBVECTOR && + Src.getOperand(1).getValueType() == SubVT && isNullConstant(Src.getOperand(2))) { Ops.push_back(Src.getOperand(1)); Ops.push_back(Sub); @@ -43978,6 +43979,7 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG, Vec.getOpcode() == ISD::INSERT_SUBVECTOR && OpVT.getSizeInBits() == SubVecVT.getSizeInBits() * 2 && isNullConstant(Vec.getOperand(2)) && !Vec.getOperand(0).isUndef() && + Vec.getOperand(1).getValueSizeInBits() == SubVecVT.getSizeInBits() && Vec.hasOneUse()) { Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, DAG.getUNDEF(OpVT), Vec.getOperand(1), Vec.getOperand(2)); diff --git a/llvm/test/CodeGen/X86/oddsubvector.ll b/llvm/test/CodeGen/X86/oddsubvector.ll index d38f21e..5bcfbe6 100644 --- a/llvm/test/CodeGen/X86/oddsubvector.ll +++ b/llvm/test/CodeGen/X86/oddsubvector.ll @@ -349,19 +349,21 @@ define void @PR42833() { ; AVX512: # %bb.0: ; AVX512-NEXT: movl {{.*}}(%rip), %eax ; AVX512-NEXT: vmovdqu c+{{.*}}(%rip), %ymm0 +; AVX512-NEXT: vmovdqu64 c+{{.*}}(%rip), %zmm1 ; AVX512-NEXT: addl c+{{.*}}(%rip), %eax -; AVX512-NEXT: vmovd %eax, %xmm1 -; AVX512-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2,3,4,5,6,7] -; AVX512-NEXT: vpaddd %ymm1, %ymm0, %ymm2 -; AVX512-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0],ymm0[1,2,3,4,5,6,7] -; AVX512-NEXT: vmovdqa c+{{.*}}(%rip), %xmm1 +; AVX512-NEXT: vmovd %eax, %xmm2 +; AVX512-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0],mem[1,2,3,4,5,6,7] +; AVX512-NEXT: vpaddd %ymm2, %ymm0, %ymm3 +; AVX512-NEXT: vpsllvd %ymm2, %ymm0, %ymm0 +; AVX512-NEXT: vpblendd {{.*#+}} ymm0 = ymm3[0],ymm0[1,2,3,4,5,6,7] +; AVX512-NEXT: vmovdqa c+{{.*}}(%rip), %xmm2 ; AVX512-NEXT: vmovdqu %ymm0, c+{{.*}}(%rip) ; AVX512-NEXT: vmovdqu c+{{.*}}(%rip), %ymm0 -; AVX512-NEXT: vmovdqu64 d+{{.*}}(%rip), %zmm2 -; AVX512-NEXT: vpinsrd $0, %eax, %xmm1, %xmm1 +; AVX512-NEXT: vmovdqu64 d+{{.*}}(%rip), %zmm3 +; AVX512-NEXT: vpinsrd $0, %eax, %xmm2, %xmm2 +; AVX512-NEXT: vinserti32x4 $0, %xmm2, %zmm1, %zmm1 ; AVX512-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm1 -; AVX512-NEXT: vpsubd %zmm1, %zmm2, %zmm1 +; AVX512-NEXT: vpsubd %zmm1, %zmm3, %zmm1 ; AVX512-NEXT: vmovdqu64 %zmm1, d+{{.*}}(%rip) ; AVX512-NEXT: vpaddd %ymm0, %ymm0, %ymm0 ; AVX512-NEXT: vmovdqu %ymm0, c+{{.*}}(%rip) -- 2.7.4