From 32ed3031fa48c49ead3b5288cbea6937c557a470 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 19 Jul 2023 14:04:30 +0100 Subject: [PATCH] [X86] Add test coverage for Issue #63946 --- llvm/test/CodeGen/X86/sext-vsetcc.ll | 157 +++++++++++++++++++++++++++++++++++ 1 file changed, 157 insertions(+) diff --git a/llvm/test/CodeGen/X86/sext-vsetcc.ll b/llvm/test/CodeGen/X86/sext-vsetcc.ll index 91451d4..f42bac9 100644 --- a/llvm/test/CodeGen/X86/sext-vsetcc.ll +++ b/llvm/test/CodeGen/X86/sext-vsetcc.ll @@ -566,3 +566,160 @@ define <4 x i64> @PR50055_signed(ptr %src, ptr %dst) { store <8 x i32> %sext, ptr %dst, align 32 ret <4 x i64> %t2 } + +define <8 x i32> @PR63946(<8 x i32> %a0, <8 x i32> %b0) nounwind { +; SSE-LABEL: PR63946: +; SSE: # %bb.0: # %entry +; SSE-NEXT: movdqa %xmm1, %xmm4 +; SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm2[1,2,3,0] +; SSE-NEXT: pshufd {{.*#+}} xmm8 = xmm3[1,2,3,0] +; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm3[2,3,0,1] +; SSE-NEXT: pshufd {{.*#+}} xmm9 = xmm2[2,3,0,1] +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm3[3,0,1,2] +; SSE-NEXT: pshufd {{.*#+}} xmm10 = xmm2[3,0,1,2] +; SSE-NEXT: movdqa %xmm0, %xmm7 +; SSE-NEXT: pcmpeqd %xmm2, %xmm7 +; SSE-NEXT: movdqa %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE-NEXT: movdqa %xmm8, %xmm11 +; SSE-NEXT: pcmpeqd %xmm4, %xmm11 +; SSE-NEXT: movdqa %xmm6, %xmm12 +; SSE-NEXT: movdqa %xmm9, %xmm13 +; SSE-NEXT: movdqa %xmm5, %xmm14 +; SSE-NEXT: pcmpeqd %xmm4, %xmm14 +; SSE-NEXT: movdqa %xmm1, %xmm15 +; SSE-NEXT: pcmpeqd %xmm4, %xmm15 +; SSE-NEXT: pcmpeqd %xmm4, %xmm2 +; SSE-NEXT: pcmpeqd %xmm4, %xmm6 +; SSE-NEXT: pcmpeqd %xmm4, %xmm9 +; SSE-NEXT: movdqa %xmm10, %xmm7 +; SSE-NEXT: pcmpeqd %xmm4, %xmm10 +; SSE-NEXT: pcmpeqd %xmm3, %xmm4 +; SSE-NEXT: por %xmm4, %xmm11 +; SSE-NEXT: pcmpeqd %xmm0, %xmm12 +; SSE-NEXT: por {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 16-byte Folded Reload +; SSE-NEXT: pcmpeqd %xmm0, %xmm13 +; SSE-NEXT: pcmpeqd %xmm0, %xmm7 +; SSE-NEXT: por %xmm14, %xmm2 +; SSE-NEXT: por %xmm11, %xmm2 +; SSE-NEXT: pcmpeqd %xmm0, %xmm3 +; SSE-NEXT: por %xmm3, %xmm13 +; SSE-NEXT: por %xmm12, %xmm13 +; SSE-NEXT: por %xmm15, %xmm6 +; SSE-NEXT: pcmpeqd %xmm0, %xmm8 +; SSE-NEXT: por %xmm7, %xmm8 +; SSE-NEXT: pcmpeqd %xmm0, %xmm5 +; SSE-NEXT: por %xmm8, %xmm5 +; SSE-NEXT: por %xmm13, %xmm5 +; SSE-NEXT: por %xmm6, %xmm9 +; SSE-NEXT: por %xmm2, %xmm9 +; SSE-NEXT: packssdw %xmm9, %xmm5 +; SSE-NEXT: pcmpeqd %xmm0, %xmm1 +; SSE-NEXT: packssdw %xmm10, %xmm1 +; SSE-NEXT: por %xmm5, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; SSE-NEXT: pslld $31, %xmm0 +; SSE-NEXT: psrad $31, %xmm0 +; SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] +; SSE-NEXT: pslld $31, %xmm1 +; SSE-NEXT: psrad $31, %xmm1 +; SSE-NEXT: retq +; +; AVX2-LABEL: PR63946: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpshufd {{.*#+}} ymm2 = ymm1[1,2,3,0,5,6,7,4] +; AVX2-NEXT: vpshufd {{.*#+}} ymm3 = ymm1[2,3,0,1,6,7,4,5] +; AVX2-NEXT: vpshufd {{.*#+}} ymm4 = ymm1[3,0,1,2,7,4,5,6] +; AVX2-NEXT: vpermq {{.*#+}} ymm5 = ymm1[2,3,0,1] +; AVX2-NEXT: vpermq {{.*#+}} ymm6 = ymm2[2,3,0,1] +; AVX2-NEXT: vpermq {{.*#+}} ymm7 = ymm3[2,3,0,1] +; AVX2-NEXT: vpermq {{.*#+}} ymm8 = ymm4[2,3,0,1] +; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm1 +; AVX2-NEXT: vpcmpeqd %ymm0, %ymm2, %ymm2 +; AVX2-NEXT: vpor %ymm1, %ymm2, %ymm1 +; AVX2-NEXT: vpcmpeqd %ymm0, %ymm3, %ymm2 +; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 +; AVX2-NEXT: vpcmpeqd %ymm0, %ymm4, %ymm4 +; AVX2-NEXT: vextracti128 $1, %ymm4, %xmm9 +; AVX2-NEXT: vpcmpeqd %ymm0, %ymm5, %ymm5 +; AVX2-NEXT: vextracti128 $1, %ymm5, %xmm10 +; AVX2-NEXT: vpor %xmm3, %xmm10, %xmm3 +; AVX2-NEXT: vpcmpeqd %ymm0, %ymm6, %ymm6 +; AVX2-NEXT: vextracti128 $1, %ymm6, %xmm10 +; AVX2-NEXT: vpor %xmm10, %xmm9, %xmm9 +; AVX2-NEXT: vpcmpeqd %ymm0, %ymm7, %ymm7 +; AVX2-NEXT: vextracti128 $1, %ymm7, %xmm10 +; AVX2-NEXT: vpackssdw %xmm10, %xmm7, %xmm7 +; AVX2-NEXT: vpcmpeqd %ymm0, %ymm8, %ymm0 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm8 +; AVX2-NEXT: vpackssdw %xmm8, %xmm0, %xmm0 +; AVX2-NEXT: vpor %xmm0, %xmm7, %xmm0 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm7 +; AVX2-NEXT: vpor %xmm3, %xmm7, %xmm3 +; AVX2-NEXT: vpor %xmm3, %xmm9, %xmm3 +; AVX2-NEXT: vpor %xmm5, %xmm2, %xmm2 +; AVX2-NEXT: vpor %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vpor %xmm6, %xmm4, %xmm2 +; AVX2-NEXT: vpor %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vpackssdw %xmm3, %xmm1, %xmm1 +; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX2-NEXT: vpslld $31, %ymm0, %ymm0 +; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: PR63946: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512-NEXT: vpshufd {{.*#+}} ymm2 = ymm1[1,2,3,0,5,6,7,4] +; AVX512-NEXT: vpshufd {{.*#+}} ymm3 = ymm1[2,3,0,1,6,7,4,5] +; AVX512-NEXT: vpshufd {{.*#+}} ymm4 = ymm1[3,0,1,2,7,4,5,6] +; AVX512-NEXT: vpermq {{.*#+}} ymm5 = ymm1[2,3,0,1] +; AVX512-NEXT: vpermq {{.*#+}} ymm6 = ymm2[2,3,0,1] +; AVX512-NEXT: vpermq {{.*#+}} ymm7 = ymm3[2,3,0,1] +; AVX512-NEXT: vpermq {{.*#+}} ymm8 = ymm4[2,3,0,1] +; AVX512-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; AVX512-NEXT: vpcmpeqd %zmm0, %zmm2, %k1 +; AVX512-NEXT: vpcmpeqd %zmm0, %zmm3, %k3 +; AVX512-NEXT: vpcmpeqd %zmm0, %zmm4, %k2 +; AVX512-NEXT: vpcmpeqd %zmm0, %zmm5, %k4 +; AVX512-NEXT: vpcmpeqd %zmm0, %zmm6, %k5 +; AVX512-NEXT: vpcmpeqd %zmm0, %zmm7, %k6 +; AVX512-NEXT: vpcmpeqd %zmm0, %zmm8, %k7 +; AVX512-NEXT: korw %k0, %k1, %k0 +; AVX512-NEXT: korw %k3, %k0, %k0 +; AVX512-NEXT: korw %k4, %k0, %k0 +; AVX512-NEXT: korw %k2, %k0, %k0 +; AVX512-NEXT: korw %k5, %k0, %k0 +; AVX512-NEXT: korw %k6, %k0, %k0 +; AVX512-NEXT: korw %k7, %k0, %k1 +; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 +; AVX512-NEXT: retq +entry: + %shuffle = shufflevector <8 x i32> %b0, <8 x i32> poison, <8 x i32> + %shuffle1 = shufflevector <8 x i32> %b0, <8 x i32> poison, <8 x i32> + %shuffle2 = shufflevector <8 x i32> %shuffle, <8 x i32> poison, <8 x i32> + %shuffle3 = shufflevector <8 x i32> %b0, <8 x i32> poison, <8 x i32> + %shuffle4 = shufflevector <8 x i32> %shuffle, <8 x i32> poison, <8 x i32> + %shuffle5 = shufflevector <8 x i32> %shuffle1, <8 x i32> poison, <8 x i32> + %shuffle6 = shufflevector <8 x i32> %shuffle2, <8 x i32> poison, <8 x i32> + %cmp = icmp eq <8 x i32> %a0, %b0 + %cmp7 = icmp eq <8 x i32> %shuffle, %a0 + %cmp9 = icmp eq <8 x i32> %shuffle1, %a0 + %cmp11 = icmp eq <8 x i32> %shuffle2, %a0 + %cmp13 = icmp eq <8 x i32> %shuffle3, %a0 + %cmp15 = icmp eq <8 x i32> %shuffle4, %a0 + %cmp17 = icmp eq <8 x i32> %shuffle5, %a0 + %cmp19 = icmp eq <8 x i32> %shuffle6, %a0 + %or2365 = or <8 x i1> %cmp7, %cmp + %or2264 = or <8 x i1> %or2365, %cmp9 + %or2567 = or <8 x i1> %or2264, %cmp13 + %or2163 = or <8 x i1> %or2567, %cmp11 + %or62 = or <8 x i1> %or2163, %cmp15 + %or2466 = or <8 x i1> %or62, %cmp17 + %or2668 = or <8 x i1> %or2466, %cmp19 + %or26 = sext <8 x i1> %or2668 to <8 x i32> + ret <8 x i32> %or26 +} -- 2.7.4