From 24e4e8087f3af2f3df1d5447006d0bb87c0ebd8b Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 31 Jul 2019 11:35:01 +0000 Subject: [PATCH] [X86][AVX] Add reduced test case for PR42833 llvm-svn: 367412 --- llvm/test/CodeGen/X86/oddsubvector.ll | 234 ++++++++++++++++++++++++++++++++++ 1 file changed, 234 insertions(+) diff --git a/llvm/test/CodeGen/X86/oddsubvector.ll b/llvm/test/CodeGen/X86/oddsubvector.ll index d11bf71..d38f21e 100644 --- a/llvm/test/CodeGen/X86/oddsubvector.ll +++ b/llvm/test/CodeGen/X86/oddsubvector.ll @@ -190,3 +190,237 @@ define <16 x i32> @PR42819(<8 x i32>* %a0) { %3 = shufflevector <16 x i32> zeroinitializer, <16 x i32> %2, <16 x i32> ret <16 x i32> %3 } + +@b = dso_local local_unnamed_addr global i32 0, align 4 +@c = dso_local local_unnamed_addr global [49 x i32] zeroinitializer, align 16 +@d = dso_local local_unnamed_addr global [49 x i32] zeroinitializer, align 16 + +define void @PR42833() { +; SSE2-LABEL: PR42833: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa c+{{.*}}(%rip), %xmm1 +; SSE2-NEXT: movdqa c+{{.*}}(%rip), %xmm0 +; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: addl {{.*}}(%rip), %eax +; SSE2-NEXT: movd %eax, %xmm2 +; SSE2-NEXT: movaps {{.*#+}} xmm3 = +; SSE2-NEXT: movss {{.*#+}} xmm3 = xmm2[0],xmm3[1,2,3] +; SSE2-NEXT: movdqa %xmm0, %xmm4 +; SSE2-NEXT: paddd %xmm3, %xmm4 +; SSE2-NEXT: pslld $23, %xmm3 +; SSE2-NEXT: paddd {{.*}}(%rip), %xmm3 +; SSE2-NEXT: cvttps2dq %xmm3, %xmm3 +; SSE2-NEXT: movdqa %xmm0, %xmm5 +; SSE2-NEXT: pmuludq %xmm3, %xmm5 +; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[0,2,2,3] +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] +; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3] +; SSE2-NEXT: pmuludq %xmm3, %xmm6 +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm6[0,2,2,3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1] +; SSE2-NEXT: movss {{.*#+}} xmm5 = xmm4[0],xmm5[1,2,3] +; SSE2-NEXT: movdqa d+{{.*}}(%rip), %xmm3 +; SSE2-NEXT: psubd %xmm1, %xmm3 +; SSE2-NEXT: paddd %xmm1, %xmm1 +; SSE2-NEXT: movdqa %xmm1, c+{{.*}}(%rip) +; SSE2-NEXT: movaps %xmm5, c+{{.*}}(%rip) +; SSE2-NEXT: movdqa c+{{.*}}(%rip), %xmm1 +; SSE2-NEXT: movdqa c+{{.*}}(%rip), %xmm4 +; SSE2-NEXT: movdqa d+{{.*}}(%rip), %xmm5 +; SSE2-NEXT: movdqa d+{{.*}}(%rip), %xmm6 +; SSE2-NEXT: movdqa d+{{.*}}(%rip), %xmm7 +; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] +; SSE2-NEXT: psubd %xmm0, %xmm7 +; SSE2-NEXT: psubd %xmm4, %xmm6 +; SSE2-NEXT: psubd %xmm1, %xmm5 +; SSE2-NEXT: movdqa %xmm5, d+{{.*}}(%rip) +; SSE2-NEXT: movdqa %xmm6, d+{{.*}}(%rip) +; SSE2-NEXT: movdqa %xmm3, d+{{.*}}(%rip) +; SSE2-NEXT: movdqa %xmm7, d+{{.*}}(%rip) +; SSE2-NEXT: paddd %xmm4, %xmm4 +; SSE2-NEXT: paddd %xmm1, %xmm1 +; SSE2-NEXT: movdqa %xmm1, c+{{.*}}(%rip) +; SSE2-NEXT: movdqa %xmm4, c+{{.*}}(%rip) +; SSE2-NEXT: retq +; +; SSE42-LABEL: PR42833: +; SSE42: # %bb.0: +; SSE42-NEXT: movdqa c+{{.*}}(%rip), %xmm1 +; SSE42-NEXT: movdqa c+{{.*}}(%rip), %xmm0 +; SSE42-NEXT: movd %xmm0, %eax +; SSE42-NEXT: addl {{.*}}(%rip), %eax +; SSE42-NEXT: movdqa {{.*#+}} xmm2 = +; SSE42-NEXT: pinsrd $0, %eax, %xmm2 +; SSE42-NEXT: movdqa %xmm0, %xmm3 +; SSE42-NEXT: paddd %xmm2, %xmm3 +; SSE42-NEXT: pslld $23, %xmm2 +; SSE42-NEXT: paddd {{.*}}(%rip), %xmm2 +; SSE42-NEXT: cvttps2dq %xmm2, %xmm2 +; SSE42-NEXT: pmulld %xmm0, %xmm2 +; SSE42-NEXT: pblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3,4,5,6,7] +; SSE42-NEXT: movdqa d+{{.*}}(%rip), %xmm3 +; SSE42-NEXT: psubd %xmm1, %xmm3 +; SSE42-NEXT: paddd %xmm1, %xmm1 +; SSE42-NEXT: movdqa %xmm1, c+{{.*}}(%rip) +; SSE42-NEXT: movdqa %xmm2, c+{{.*}}(%rip) +; SSE42-NEXT: movdqa c+{{.*}}(%rip), %xmm1 +; SSE42-NEXT: movdqa c+{{.*}}(%rip), %xmm2 +; SSE42-NEXT: movdqa d+{{.*}}(%rip), %xmm4 +; SSE42-NEXT: movdqa d+{{.*}}(%rip), %xmm5 +; SSE42-NEXT: movdqa d+{{.*}}(%rip), %xmm6 +; SSE42-NEXT: pinsrd $0, %eax, %xmm0 +; SSE42-NEXT: psubd %xmm0, %xmm6 +; SSE42-NEXT: psubd %xmm2, %xmm5 +; SSE42-NEXT: psubd %xmm1, %xmm4 +; SSE42-NEXT: movdqa %xmm4, d+{{.*}}(%rip) +; SSE42-NEXT: movdqa %xmm5, d+{{.*}}(%rip) +; SSE42-NEXT: movdqa %xmm3, d+{{.*}}(%rip) +; SSE42-NEXT: movdqa %xmm6, d+{{.*}}(%rip) +; SSE42-NEXT: paddd %xmm2, %xmm2 +; SSE42-NEXT: paddd %xmm1, %xmm1 +; SSE42-NEXT: movdqa %xmm1, c+{{.*}}(%rip) +; SSE42-NEXT: movdqa %xmm2, c+{{.*}}(%rip) +; SSE42-NEXT: retq +; +; AVX1-LABEL: PR42833: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqa c+{{.*}}(%rip), %xmm0 +; AVX1-NEXT: vmovd %xmm0, %eax +; AVX1-NEXT: addl {{.*}}(%rip), %eax +; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = +; AVX1-NEXT: vpinsrd $0, %eax, %xmm1, %xmm1 +; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm2 +; AVX1-NEXT: vmovdqa c+{{.*}}(%rip), %xmm3 +; AVX1-NEXT: vpslld $23, %xmm1, %xmm1 +; AVX1-NEXT: vpaddd {{.*}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1 +; AVX1-NEXT: vpmulld %xmm1, %xmm0, %xmm1 +; AVX1-NEXT: vpslld $1, %xmm3, %xmm3 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 +; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0],ymm1[1,2,3,4,5,6,7] +; AVX1-NEXT: vmovdqa d+{{.*}}(%rip), %xmm2 +; AVX1-NEXT: vpsubd c+{{.*}}(%rip), %xmm2, %xmm2 +; AVX1-NEXT: vmovups %ymm1, c+{{.*}}(%rip) +; AVX1-NEXT: vpinsrd $0, %eax, %xmm0, %xmm0 +; AVX1-NEXT: vmovdqa d+{{.*}}(%rip), %xmm1 +; AVX1-NEXT: vpsubd %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vmovdqa d+{{.*}}(%rip), %xmm1 +; AVX1-NEXT: vmovdqa c+{{.*}}(%rip), %xmm3 +; AVX1-NEXT: vpsubd %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vmovdqa d+{{.*}}(%rip), %xmm4 +; AVX1-NEXT: vmovdqa c+{{.*}}(%rip), %xmm5 +; AVX1-NEXT: vpsubd %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vmovdqa %xmm2, d+{{.*}}(%rip) +; AVX1-NEXT: vmovdqa %xmm4, d+{{.*}}(%rip) +; AVX1-NEXT: vmovdqa %xmm1, d+{{.*}}(%rip) +; AVX1-NEXT: vmovdqa %xmm0, d+{{.*}}(%rip) +; AVX1-NEXT: vpaddd %xmm3, %xmm3, %xmm0 +; AVX1-NEXT: vpaddd %xmm5, %xmm5, %xmm1 +; AVX1-NEXT: vmovdqa %xmm1, c+{{.*}}(%rip) +; AVX1-NEXT: vmovdqa %xmm0, c+{{.*}}(%rip) +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: PR42833: +; AVX2: # %bb.0: +; AVX2-NEXT: movl {{.*}}(%rip), %eax +; AVX2-NEXT: vmovdqu c+{{.*}}(%rip), %ymm0 +; AVX2-NEXT: addl c+{{.*}}(%rip), %eax +; AVX2-NEXT: vmovd %eax, %xmm1 +; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm1[0],mem[1,2,3,4,5,6,7] +; AVX2-NEXT: vpaddd %ymm2, %ymm0, %ymm3 +; AVX2-NEXT: vpsllvd %ymm2, %ymm0, %ymm2 +; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0],ymm2[1,2,3,4,5,6,7] +; AVX2-NEXT: vmovdqu %ymm2, c+{{.*}}(%rip) +; AVX2-NEXT: vmovdqu c+{{.*}}(%rip), %ymm2 +; AVX2-NEXT: vmovdqu d+{{.*}}(%rip), %ymm3 +; AVX2-NEXT: vmovdqu d+{{.*}}(%rip), %ymm4 +; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6,7] +; AVX2-NEXT: vpsubd %ymm0, %ymm4, %ymm0 +; AVX2-NEXT: vpsubd %ymm2, %ymm3, %ymm1 +; AVX2-NEXT: vmovdqu %ymm1, d+{{.*}}(%rip) +; AVX2-NEXT: vmovdqu %ymm0, d+{{.*}}(%rip) +; AVX2-NEXT: vpaddd %ymm2, %ymm2, %ymm0 +; AVX2-NEXT: vmovdqu %ymm0, c+{{.*}}(%rip) +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: PR42833: +; AVX512: # %bb.0: +; AVX512-NEXT: movl {{.*}}(%rip), %eax +; AVX512-NEXT: vmovdqu c+{{.*}}(%rip), %ymm0 +; AVX512-NEXT: addl c+{{.*}}(%rip), %eax +; AVX512-NEXT: vmovd %eax, %xmm1 +; AVX512-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2,3,4,5,6,7] +; AVX512-NEXT: vpaddd %ymm1, %ymm0, %ymm2 +; AVX512-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0],ymm0[1,2,3,4,5,6,7] +; AVX512-NEXT: vmovdqa c+{{.*}}(%rip), %xmm1 +; AVX512-NEXT: vmovdqu %ymm0, c+{{.*}}(%rip) +; AVX512-NEXT: vmovdqu c+{{.*}}(%rip), %ymm0 +; AVX512-NEXT: vmovdqu64 d+{{.*}}(%rip), %zmm2 +; AVX512-NEXT: vpinsrd $0, %eax, %xmm1, %xmm1 +; AVX512-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm1 +; AVX512-NEXT: vpsubd %zmm1, %zmm2, %zmm1 +; AVX512-NEXT: vmovdqu64 %zmm1, d+{{.*}}(%rip) +; AVX512-NEXT: vpaddd %ymm0, %ymm0, %ymm0 +; AVX512-NEXT: vmovdqu %ymm0, c+{{.*}}(%rip) +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq +; +; XOP-LABEL: PR42833: +; XOP: # %bb.0: +; XOP-NEXT: vmovdqa c+{{.*}}(%rip), %xmm0 +; XOP-NEXT: vmovd %xmm0, %eax +; XOP-NEXT: addl {{.*}}(%rip), %eax +; XOP-NEXT: vmovdqa {{.*#+}} xmm1 = +; XOP-NEXT: vpinsrd $0, %eax, %xmm1, %xmm1 +; XOP-NEXT: vpaddd %xmm1, %xmm0, %xmm2 +; XOP-NEXT: vmovdqa c+{{.*}}(%rip), %xmm3 +; XOP-NEXT: vpshld %xmm1, %xmm0, %xmm1 +; XOP-NEXT: vpslld $1, %xmm3, %xmm3 +; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 +; XOP-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0],ymm1[1,2,3,4,5,6,7] +; XOP-NEXT: vmovdqa d+{{.*}}(%rip), %xmm2 +; XOP-NEXT: vpsubd c+{{.*}}(%rip), %xmm2, %xmm2 +; XOP-NEXT: vmovups %ymm1, c+{{.*}}(%rip) +; XOP-NEXT: vpinsrd $0, %eax, %xmm0, %xmm0 +; XOP-NEXT: vmovdqa d+{{.*}}(%rip), %xmm1 +; XOP-NEXT: vpsubd %xmm0, %xmm1, %xmm0 +; XOP-NEXT: vmovdqa d+{{.*}}(%rip), %xmm1 +; XOP-NEXT: vmovdqa c+{{.*}}(%rip), %xmm3 +; XOP-NEXT: vpsubd %xmm3, %xmm1, %xmm1 +; XOP-NEXT: vmovdqa d+{{.*}}(%rip), %xmm4 +; XOP-NEXT: vmovdqa c+{{.*}}(%rip), %xmm5 +; XOP-NEXT: vpsubd %xmm5, %xmm4, %xmm4 +; XOP-NEXT: vmovdqa %xmm2, d+{{.*}}(%rip) +; XOP-NEXT: vmovdqa %xmm4, d+{{.*}}(%rip) +; XOP-NEXT: vmovdqa %xmm1, d+{{.*}}(%rip) +; XOP-NEXT: vmovdqa %xmm0, d+{{.*}}(%rip) +; XOP-NEXT: vpaddd %xmm3, %xmm3, %xmm0 +; XOP-NEXT: vpaddd %xmm5, %xmm5, %xmm1 +; XOP-NEXT: vmovdqa %xmm1, c+{{.*}}(%rip) +; XOP-NEXT: vmovdqa %xmm0, c+{{.*}}(%rip) +; XOP-NEXT: vzeroupper +; XOP-NEXT: retq + %1 = load i32, i32* @b, align 4 + %2 = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([49 x i32], [49 x i32]* @c, i64 0, i64 32) to <8 x i32>*), align 16 + %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <16 x i32> + %4 = extractelement <8 x i32> %2, i32 0 + %5 = add i32 %1, %4 + %6 = insertelement <8 x i32> , i32 %5, i32 0 + %7 = add <8 x i32> %2, %6 + %8 = shl <8 x i32> %2, %6 + %9 = shufflevector <8 x i32> %7, <8 x i32> %8, <8 x i32> + store <8 x i32> %9, <8 x i32>* bitcast (i32* getelementptr inbounds ([49 x i32], [49 x i32]* @c, i64 0, i64 32) to <8 x i32>*), align 16 + %10 = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr inbounds ([49 x i32], [49 x i32]* @c, i64 0, i64 40) to <8 x i32>*), align 16 + %11 = shufflevector <8 x i32> %10, <8 x i32> undef, <16 x i32> + %12 = load <16 x i32>, <16 x i32>* bitcast (i32* getelementptr inbounds ([49 x i32], [49 x i32]* @d, i64 0, i64 32) to <16 x i32>*), align 16 + %13 = insertelement <16 x i32> %3, i32 %5, i32 0 + %14 = shufflevector <16 x i32> %13, <16 x i32> %11, <16 x i32> + %15 = sub <16 x i32> %12, %14 + store <16 x i32> %15, <16 x i32>* bitcast (i32* getelementptr inbounds ([49 x i32], [49 x i32]* @d, i64 0, i64 32) to <16 x i32>*), align 16 + %16 = shl <8 x i32> %10, + store <8 x i32> %16, <8 x i32>* bitcast (i32* getelementptr inbounds ([49 x i32], [49 x i32]* @c, i64 0, i64 40) to <8 x i32>*), align 16 + ret void +} -- 2.7.4