Optimize v4si broadcast for noavx512vl.
authorliuhongt <hongtao.liu@intel.com>
Thu, 3 Mar 2022 05:31:39 +0000 (13:31 +0800)
committerliuhongt <hongtao.liu@intel.com>
Tue, 8 Mar 2022 01:02:05 +0000 (09:02 +0800)
This will enable below

-       vbroadcastss    .LC1(%rip), %xmm0
+       movl    $-45, %edx
+       vmovd   %edx, %xmm0
+       vpshufd $0, %xmm0, %xmm0

According to microbenchmark, it's faster than broadcast from memory
for TARGET_INTER_UNIT_MOVES_TO_VEC.

gcc/ChangeLog:

* config/i386/sse.md (*vec_dupv4si): Disable memory operand
for !TARGET_INTER_UNIT_MOVES_TO_VEC when prefer_for_speed.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr100865-8a.c: Adjust testcase.
* gcc.target/i386/pr100865-8c.c: Ditto.
* gcc.target/i386/pr100865-9c.c: Ditto.

gcc/config/i386/sse.md
gcc/testsuite/gcc.target/i386/pr100865-8a.c
gcc/testsuite/gcc.target/i386/pr100865-8c.c
gcc/testsuite/gcc.target/i386/pr100865-9c.c

index 0076475..e9292e6 100644 (file)
    (set_attr "length_immediate" "1,0,1")
    (set_attr "prefix_extra" "0,1,*")
    (set_attr "prefix" "maybe_vex,maybe_evex,orig")
-   (set_attr "mode" "TI,V4SF,V4SF")])
+   (set_attr "mode" "TI,V4SF,V4SF")
+   (set (attr "preferred_for_speed")
+     (cond [(eq_attr "alternative" "1")
+             (symbol_ref "!TARGET_INTER_UNIT_MOVES_TO_VEC")
+          ]
+          (symbol_ref "true")))])
 
 (define_insn "*vec_dupv2di"
   [(set (match_operand:V2DI 0 "register_operand"     "=x,v,v,x")
index 911b14d..544a14d 100644 (file)
@@ -20,5 +20,5 @@ foo (void)
     array[i] = MK_CONST128_BROADCAST_SIGNED (-45);
 }
 
-/* { dg-final { scan-assembler-times "(?:vpbroadcastd|vpshufd)\[\\t \]+\[^\n\]*, %xmm\[0-9\]+" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "(?:vpbroadcastd|vpshufd)\[\\t \]+\[^\n\]*, %xmm\[0-9\]+" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqa\[\\t \]%xmm\[0-9\]+, " 16 } } */
index 00682ed..efee048 100644 (file)
@@ -3,5 +3,5 @@
 
 #include "pr100865-8a.c"
 
-/* { dg-final { scan-assembler-times "vpshufd\[\\t \]+\[^\n\]*, %xmm\[0-9\]+" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vpshufd\[\\t \]+\[^\n\]*, %xmm\[0-9\]+" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqa\[\\t \]%xmm\[0-9\]+, " 16 } } */
index 8ffcdc1..e6f2590 100644 (file)
@@ -3,5 +3,5 @@
 
 #include "pr100865-9a.c"
 
-/* { dg-final { scan-assembler-times "vpshufd\[\\t \]+\[^\n\]*, %xmm\[0-9\]+" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vpshufd\[\\t \]+\[^\n\]*, %xmm\[0-9\]+" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqa\[\\t \]%xmm\[0-9\]+, " 16 } } */