From: liuhongt Date: Thu, 3 Mar 2022 05:31:39 +0000 (+0800) Subject: Optimize v4si broadcast for noavx512vl. X-Git-Tag: upstream/12.2.0~1167 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=b1a741a03041782b34effcb820fbe19ba41a3b8f;p=platform%2Fupstream%2Fgcc.git Optimize v4si broadcast for noavx512vl. This will enable below - vbroadcastss .LC1(%rip), %xmm0 + movl $-45, %edx + vmovd %edx, %xmm0 + vpshufd $0, %xmm0, %xmm0 According to microbenchmark, it's faster than broadcast from memory for TARGET_INTER_UNIT_MOVES_TO_VEC. gcc/ChangeLog: * config/i386/sse.md (*vec_dupv4si): Disable memory operand for !TARGET_INTER_UNIT_MOVES_TO_VEC when prefer_for_speed. gcc/testsuite/ChangeLog: * gcc.target/i386/pr100865-8a.c: Adjust testcase. * gcc.target/i386/pr100865-8c.c: Ditto. * gcc.target/i386/pr100865-9c.c: Ditto. --- diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 0076475..e9292e6 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -25153,7 +25153,12 @@ (set_attr "length_immediate" "1,0,1") (set_attr "prefix_extra" "0,1,*") (set_attr "prefix" "maybe_vex,maybe_evex,orig") - (set_attr "mode" "TI,V4SF,V4SF")]) + (set_attr "mode" "TI,V4SF,V4SF") + (set (attr "preferred_for_speed") + (cond [(eq_attr "alternative" "1") + (symbol_ref "!TARGET_INTER_UNIT_MOVES_TO_VEC") + ] + (symbol_ref "true")))]) (define_insn "*vec_dupv2di" [(set (match_operand:V2DI 0 "register_operand" "=x,v,v,x") diff --git a/gcc/testsuite/gcc.target/i386/pr100865-8a.c b/gcc/testsuite/gcc.target/i386/pr100865-8a.c index 911b14d..544a14d 100644 --- a/gcc/testsuite/gcc.target/i386/pr100865-8a.c +++ b/gcc/testsuite/gcc.target/i386/pr100865-8a.c @@ -20,5 +20,5 @@ foo (void) array[i] = MK_CONST128_BROADCAST_SIGNED (-45); } -/* { dg-final { scan-assembler-times "(?:vpbroadcastd|vpshufd)\[\\t \]+\[^\n\]*, %xmm\[0-9\]+" 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times "(?:vpbroadcastd|vpshufd)\[\\t \]+\[^\n\]*, %xmm\[0-9\]+" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqa\[\\t \]%xmm\[0-9\]+, " 16 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr100865-8c.c b/gcc/testsuite/gcc.target/i386/pr100865-8c.c index 00682ed..efee048 100644 --- a/gcc/testsuite/gcc.target/i386/pr100865-8c.c +++ b/gcc/testsuite/gcc.target/i386/pr100865-8c.c @@ -3,5 +3,5 @@ #include "pr100865-8a.c" -/* { dg-final { scan-assembler-times "vpshufd\[\\t \]+\[^\n\]*, %xmm\[0-9\]+" 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times "vpshufd\[\\t \]+\[^\n\]*, %xmm\[0-9\]+" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqa\[\\t \]%xmm\[0-9\]+, " 16 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr100865-9c.c b/gcc/testsuite/gcc.target/i386/pr100865-9c.c index 8ffcdc1..e6f2590 100644 --- a/gcc/testsuite/gcc.target/i386/pr100865-9c.c +++ b/gcc/testsuite/gcc.target/i386/pr100865-9c.c @@ -3,5 +3,5 @@ #include "pr100865-9a.c" -/* { dg-final { scan-assembler-times "vpshufd\[\\t \]+\[^\n\]*, %xmm\[0-9\]+" 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times "vpshufd\[\\t \]+\[^\n\]*, %xmm\[0-9\]+" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqa\[\\t \]%xmm\[0-9\]+, " 16 } } */