From 240198fe0812402e6085033e43d34decdec3c0cf Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Wed, 27 May 2020 16:32:10 +0200 Subject: [PATCH] i386: Implement V2SF shuffles MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit 2020-05-27 Uroš Bizjak gcc/ChangeLog: * config/i386/mmx.md (mmx_pswapdsf2): Add SSE alternatives. Enable insn pattern for TARGET_MMX_WITH_SSE. (*mmx_movshdup): New insn pattern. (*mmx_movsldup): Ditto. (*mmx_movss): Ditto. * config/i386/i386-expand.c (ix86_vectorize_vec_perm_const): Handle E_V2SFmode. (expand_vec_perm_movs): Handle E_V2SFmode. (expand_vec_perm_even_odd): Ditto. (expand_vec_perm_broadcast_1): Assert that E_V2SFmode is already handled by standard shuffle patterns. gcc/testsuite/ChangeLog: * gcc.target/i386/vperm-v2sf.c: New test. --- gcc/config/i386/i386-expand.c | 14 ++++- gcc/config/i386/mmx.md | 89 ++++++++++++++++++++++++------ gcc/testsuite/gcc.target/i386/vperm-v2sf.c | 41 ++++++++++++++ 3 files changed, 124 insertions(+), 20 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/vperm-v2sf.c diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index 338b4f7..96f70ae 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -16319,6 +16319,7 @@ expand_vec_perm_movs (struct expand_vec_perm_d *d) return false; if (!(TARGET_SSE && vmode == V4SFmode) + && !(TARGET_MMX_WITH_SSE && vmode == V2SFmode) && !(TARGET_SSE2 && vmode == V2DFmode)) return false; @@ -18639,6 +18640,13 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd) /* These are always directly implementable by expand_vec_perm_1. */ gcc_unreachable (); + case E_V2SFmode: + gcc_assert (TARGET_MMX_WITH_SSE); + /* We have no suitable instructions. */ + if (d->testing_p) + return false; + break; + case E_V4HImode: if (d->testing_p) break; @@ -18834,8 +18842,9 @@ expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d) gcc_unreachable (); case E_V2DFmode: - case E_V2DImode: + case E_V2SFmode: case E_V4SFmode: + case E_V2DImode: case E_V2SImode: case E_V4SImode: /* These are always implementable using standard shuffle patterns. */ @@ -19329,6 +19338,7 @@ ix86_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, if (d.testing_p && TARGET_SSSE3) return true; break; + case E_V2SFmode: case E_V2SImode: case E_V4HImode: if (!TARGET_MMX_WITH_SSE) @@ -19367,7 +19377,7 @@ ix86_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, /* Implementable with shufps or pshufd. */ if (d.one_operand_p - && (d.vmode == V4SFmode + && (d.vmode == V4SFmode || d.vmode == V2SFmode || d.vmode == V4SImode || d.vmode == V2SImode)) return true; diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 215162d..271c1c2 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -938,32 +938,85 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define_insn "mmx_pswapdv2sf2" - [(set (match_operand:V2SF 0 "register_operand" "=y") - (vec_select:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "ym") - (parallel [(const_int 1) (const_int 0)])))] - "TARGET_3DNOW_A" - "pswapd\t{%1, %0|%0, %1}" - [(set_attr "type" "mmxcvt") - (set_attr "prefix_extra" "1") - (set_attr "mode" "V2SF")]) + [(set (match_operand:V2SF 0 "register_operand" "=y,x,Yv") + (vec_select:V2SF + (match_operand:V2SF 1 "register_mmxmem_operand" "ym,0,Yv") + (parallel [(const_int 1) (const_int 0)])))] + "TARGET_3DNOW_A || TARGET_MMX_WITH_SSE" + "@ + pswapd\t{%1, %0|%0, %1} + shufps\t{$0xe1, %1, %0|%0, %1, 0xe1} + vshufps\t{$0xe1, %1, %1, %0|%0, %1, %1, 0xe1}" + [(set_attr "isa" "*,sse_noavx,avx") + (set_attr "mmx_isa" "native,*,*") + (set_attr "type" "mmxcvt,ssemov,ssemov") + (set_attr "prefix_extra" "1,*,*") + (set_attr "mode" "V2SF,V4SF,V4SF")]) + +(define_insn "*mmx_movshdup" + [(set (match_operand:V2SF 0 "register_operand" "=v,x") + (vec_select:V2SF + (match_operand:V2SF 1 "register_operand" "v,0") + (parallel [(const_int 1) (const_int 1)])))] + "TARGET_MMX_WITH_SSE" + "@ + %vmovshdup\t{%1, %0|%0, %1} + shufps\t{$0xe5, %0, %0|%0, %0, 0xe5}" + [(set_attr "isa" "sse3,*") + (set_attr "type" "sse,sseshuf1") + (set_attr "length_immediate" "*,1") + (set_attr "prefix_rep" "1,*") + (set_attr "prefix" "maybe_vex,orig") + (set_attr "mode" "V4SF")]) + +(define_insn "*mmx_movsldup" + [(set (match_operand:V2SF 0 "register_operand" "=v,x") + (vec_select:V2SF + (match_operand:V2SF 1 "register_operand" "v,0") + (parallel [(const_int 0) (const_int 0)])))] + "TARGET_MMX_WITH_SSE" + "@ + %vmovsldup\t{%1, %0|%0, %1} + shufps\t{$0xe0, %0, %0|%0, %0, 0xe0}" + [(set_attr "isa" "sse3,*") + (set_attr "type" "sse,sseshuf1") + (set_attr "length_immediate" "*,1") + (set_attr "prefix_rep" "1,*") + (set_attr "prefix" "maybe_vex,orig") + (set_attr "mode" "V4SF")]) (define_insn "*vec_dupv2sf" - [(set (match_operand:V2SF 0 "register_operand" "=y,x,Yv") + [(set (match_operand:V2SF 0 "register_operand" "=y,Yv,x") (vec_duplicate:V2SF - (match_operand:SF 1 "register_operand" "0,0,Yv")))] + (match_operand:SF 1 "register_operand" "0,Yv,0")))] "TARGET_MMX || TARGET_MMX_WITH_SSE" "@ punpckldq\t%0, %0 - shufps\t{$0xe0, %0, %0|%0, %0, 0xe0} - %vmovsldup\t{%1, %0|%0, %1}" - [(set_attr "isa" "*,sse_noavx,sse3") + %vmovsldup\t{%1, %0|%0, %1} + shufps\t{$0xe0, %0, %0|%0, %0, 0xe0}" + [(set_attr "isa" "*,sse3,sse_noavx") (set_attr "mmx_isa" "native,*,*") - (set_attr "type" "mmxcvt,sseshuf1,sse") - (set_attr "length_immediate" "*,1,*") - (set_attr "prefix_rep" "*,*,1") - (set_attr "prefix" "*,orig,maybe_vex") + (set_attr "type" "mmxcvt,sse,sseshuf1") + (set_attr "length_immediate" "*,*,1") + (set_attr "prefix_rep" "*,1,*") + (set_attr "prefix" "*,maybe_vex,orig") (set_attr "mode" "DI,V4SF,V4SF")]) +(define_insn "*mmx_movss" + [(set (match_operand:V2SF 0 "register_operand" "=x,v") + (vec_merge:V2SF + (match_operand:V2SF 2 "register_operand" " x,v") + (match_operand:V2SF 1 "register_operand" " 0,v") + (const_int 1)))] + "TARGET_MMX_WITH_SSE" + "@ + movss\t{%2, %0|%0, %2} + vmovss\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx") + (set_attr "type" "ssemov") + (set_attr "prefix" "orig,maybe_evex") + (set_attr "mode" "SF")]) + (define_insn "*mmx_concatv2sf" [(set (match_operand:V2SF 0 "register_operand" "=y,y") (vec_concat:V2SF @@ -2015,7 +2068,7 @@ (vec_select:V2SI (match_operand:V2SI 1 "register_mmxmem_operand" "ym,Yv") (parallel [(const_int 1) (const_int 0)])))] - "TARGET_3DNOW_A || TARGET_MMX_WITH_SSE" + "TARGET_3DNOW_A" "@ pswapd\t{%1, %0|%0, %1} %vpshufd\t{$0xe1, %1, %0|%0, %1, 0xe1}"; diff --git a/gcc/testsuite/gcc.target/i386/vperm-v2sf.c b/gcc/testsuite/gcc.target/i386/vperm-v2sf.c new file mode 100644 index 0000000..7bf6def --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vperm-v2sf.c @@ -0,0 +1,41 @@ +/* { dg-do run { target { ! ia32 } } } */ +/* { dg-options "-O -msse2" } */ +/* { dg-require-effective-target sse2 } */ + +#include "isa-check.h" +#include "sse-os-support.h" + +typedef float S; +typedef float V __attribute__((vector_size(8))); +typedef int IV __attribute__((vector_size(8))); +typedef union { S s[2]; V v; } U; + +static U i[2], b, c; + +extern int memcmp (const void *, const void *, __SIZE_TYPE__); +#define assert(T) ((T) || (__builtin_trap (), 0)) + +#define TEST(E0, E1) \ + b.v = __builtin_shuffle (i[0].v, i[1].v, (IV){E0, E1}); \ + c.s[0] = i[0].s[E0]; \ + c.s[1] = i[0].s[E1]; \ + __asm__("" : : : "memory"); \ + assert (memcmp (&b, &c, sizeof(c)) == 0); + +#include "vperm-2-2.inc" + +int main() +{ + check_isa (); + + if (!sse_os_support ()) + exit (0); + + i[0].s[0] = 0; + i[0].s[1] = 1; + i[0].s[2] = 2; + i[0].s[3] = 3; + + check(); + return 0; +} -- 2.7.4