From: Simon Pilgrim Date: Wed, 12 May 2021 14:46:52 +0000 (+0100) Subject: [X86][AVX] Add v4i64 shift-by-32 tests X-Git-Tag: llvmorg-14-init~6910 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=778562ada39f5353b735c4ac204eddedb072a94b;p=platform%2Fupstream%2Fllvm.git [X86][AVX] Add v4i64 shift-by-32 tests AVX1 could perform this as a v8f32 shuffle instead of splitting - based off PR46621 --- diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll index 81cadf7..0f1f23e 100644 --- a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll +++ b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll @@ -1659,3 +1659,77 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) nounwind { %shift = ashr <32 x i8> %a, ret <32 x i8> %shift } + +; +; Special Cases +; + +define <4 x i64> @shift32_v4i64(<4 x i64> %a) nounwind { +; AVX1-LABEL: shift32_v4i64: +; AVX1: # %bb.0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2 +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] +; AVX1-NEXT: vpsrad $31, %xmm0, %xmm2 +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: shift32_v4i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpsrad $31, %ymm0, %ymm1 +; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] +; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] +; AVX2-NEXT: retq +; +; XOPAVX1-LABEL: shift32_v4i64: +; XOPAVX1: # %bb.0: +; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [18446744073709551584,18446744073709551584] +; XOPAVX1-NEXT: vpshaq %xmm2, %xmm1, %xmm1 +; XOPAVX1-NEXT: vpshaq %xmm2, %xmm0, %xmm0 +; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; XOPAVX1-NEXT: retq +; +; XOPAVX2-LABEL: shift32_v4i64: +; XOPAVX2: # %bb.0: +; XOPAVX2-NEXT: vpsrad $31, %ymm0, %ymm1 +; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] +; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] +; XOPAVX2-NEXT: retq +; +; AVX512-LABEL: shift32_v4i64: +; AVX512: # %bb.0: +; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512-NEXT: vpsraq $32, %zmm0, %zmm0 +; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 +; AVX512-NEXT: retq +; +; AVX512VL-LABEL: shift32_v4i64: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpsraq $32, %ymm0, %ymm0 +; AVX512VL-NEXT: retq +; +; X86-AVX1-LABEL: shift32_v4i64: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; X86-AVX1-NEXT: vpsrad $31, %xmm1, %xmm2 +; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] +; X86-AVX1-NEXT: vpsrad $31, %xmm0, %xmm2 +; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] +; X86-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; X86-AVX1-NEXT: retl +; +; X86-AVX2-LABEL: shift32_v4i64: +; X86-AVX2: # %bb.0: +; X86-AVX2-NEXT: vpsrad $31, %ymm0, %ymm1 +; X86-AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] +; X86-AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] +; X86-AVX2-NEXT: retl + %shift = ashr <4 x i64> %a, + ret <4 x i64> %shift +} diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll index 36d2470..9fd0960 100644 --- a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll +++ b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll @@ -1390,6 +1390,63 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) nounwind { ret <32 x i8> %shift } +; +; Special Cases +; + +define <4 x i64> @shift32_v4i64(<4 x i64> %a) nounwind { +; AVX1-LABEL: shift32_v4i64: +; AVX1: # %bb.0: +; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: shift32_v4i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0 +; AVX2-NEXT: retq +; +; XOPAVX1-LABEL: shift32_v4i64: +; XOPAVX1: # %bb.0: +; XOPAVX1-NEXT: vpsrlq $32, %xmm0, %xmm1 +; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; XOPAVX1-NEXT: vpsrlq $32, %xmm0, %xmm0 +; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; XOPAVX1-NEXT: retq +; +; XOPAVX2-LABEL: shift32_v4i64: +; XOPAVX2: # %bb.0: +; XOPAVX2-NEXT: vpsrlq $32, %ymm0, %ymm0 +; XOPAVX2-NEXT: retq +; +; AVX512-LABEL: shift32_v4i64: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsrlq $32, %ymm0, %ymm0 +; AVX512-NEXT: retq +; +; AVX512VL-LABEL: shift32_v4i64: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpsrlq $32, %ymm0, %ymm0 +; AVX512VL-NEXT: retq +; +; X86-AVX1-LABEL: shift32_v4i64: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: vpsrlq $32, %xmm0, %xmm1 +; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; X86-AVX1-NEXT: vpsrlq $32, %xmm0, %xmm0 +; X86-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; X86-AVX1-NEXT: retl +; +; X86-AVX2-LABEL: shift32_v4i64: +; X86-AVX2: # %bb.0: +; X86-AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0 +; X86-AVX2-NEXT: retl + %shift = lshr <4 x i64> %a, + ret <4 x i64> %shift +} + define <4 x i32> @sh_trunc_sh_vec(<4 x i64> %x) { ; AVX1-LABEL: sh_trunc_sh_vec: ; AVX1: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll index da3cebc..0af2398 100644 --- a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll +++ b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll @@ -1298,3 +1298,60 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) nounwind { %shift = shl <32 x i8> %a, ret <32 x i8> %shift } + +; +; Special Cases +; + +define <4 x i64> @shift32_v4i64(<4 x i64> %a) nounwind { +; AVX1-LABEL: shift32_v4i64: +; AVX1: # %bb.0: +; AVX1-NEXT: vpsllq $32, %xmm0, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpsllq $32, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: shift32_v4i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpsllq $32, %ymm0, %ymm0 +; AVX2-NEXT: retq +; +; XOPAVX1-LABEL: shift32_v4i64: +; XOPAVX1: # %bb.0: +; XOPAVX1-NEXT: vpsllq $32, %xmm0, %xmm1 +; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; XOPAVX1-NEXT: vpsllq $32, %xmm0, %xmm0 +; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; XOPAVX1-NEXT: retq +; +; XOPAVX2-LABEL: shift32_v4i64: +; XOPAVX2: # %bb.0: +; XOPAVX2-NEXT: vpsllq $32, %ymm0, %ymm0 +; XOPAVX2-NEXT: retq +; +; AVX512-LABEL: shift32_v4i64: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsllq $32, %ymm0, %ymm0 +; AVX512-NEXT: retq +; +; AVX512VL-LABEL: shift32_v4i64: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpsllq $32, %ymm0, %ymm0 +; AVX512VL-NEXT: retq +; +; X86-AVX1-LABEL: shift32_v4i64: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: vpsllq $32, %xmm0, %xmm1 +; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; X86-AVX1-NEXT: vpsllq $32, %xmm0, %xmm0 +; X86-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; X86-AVX1-NEXT: retl +; +; X86-AVX2-LABEL: shift32_v4i64: +; X86-AVX2: # %bb.0: +; X86-AVX2-NEXT: vpsllq $32, %ymm0, %ymm0 +; X86-AVX2-NEXT: retl + %shift = shl <4 x i64> %a, + ret <4 x i64> %shift +}