AVX512FP16: Enable _Float16 autovectorization

author H.J. Lu <hjl.tools@gmail.com>

Mon, 28 Jan 2019 03:38:02 +0000 (19:38 -0800)

committer liuhongt <hongtao.liu@intel.com>

Fri, 10 Sep 2021 06:59:30 +0000 (14:59 +0800)
author H.J. Lu <hjl.tools@gmail.com>
Mon, 28 Jan 2019 03:38:02 +0000 (19:38 -0800)
committer liuhongt <hongtao.liu@intel.com>
Fri, 10 Sep 2021 06:59:30 +0000 (14:59 +0800)
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c

index 0c1aec5..cac8354 100644 (file)
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -678,6 +678,10 @@ ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
        extract = gen_avx_vextractf128v32qi;
        mode = V16QImode;
        break;
+    case E_V16HFmode:
+      extract = gen_avx_vextractf128v16hf;
+      mode = V8HFmode;
+      break;
      case E_V8SFmode:
        extract = gen_avx_vextractf128v8sf;
        mode = V4SFmode;
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c

index dc649f9..7b173bc 100644 (file)
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -22641,6 +22641,20 @@ ix86_preferred_simd_mode (scalar_mode mode)
        else
         return V2DImode;
  
+    case E_HFmode:
+      if (TARGET_AVX512FP16)
+       {
+         if (TARGET_AVX512VL)
+           {
+             if (TARGET_PREFER_AVX128)
+               return V8HFmode;
+             else if (TARGET_PREFER_AVX256)
+               return V16HFmode;
+           }
+         return V32HFmode;
+       }
+      return word_mode;
+
      case E_SFmode:
        if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
         return V16SFmode;
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md

index 0633916..2602460 100644 (file)
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -279,6 +279,10 @@
  (define_mode_iterator V_256
    [V32QI V16HI V8SI V4DI V8SF V4DF])
  
+;; All 256bit vector modes including HF vector mode
+(define_mode_iterator V_256H
+  [V32QI V16HI V8SI V4DI V8SF V4DF V16HF])
+
  ;; All 128bit and 256bit vector modes
  (define_mode_iterator V_128_256
    [V32QI V16QI V16HI V8HI V8SI V4SI V4DI V2DI V8SF V4SF V4DF V2DF])
@@ -406,8 +410,7 @@
     (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
     (V8SI "TARGET_AVX") V4SI
     (V4DI "TARGET_AVX") V2DI
-   (V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16")
-   (V8HF "TARGET_AVX512FP16")])
+   (V32HF "TARGET_AVX512BW") (V16HF "TARGET_AVX") V8HF])
  
  (define_mode_iterator VI_AVX2
    [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
@@ -752,7 +755,7 @@
    [V16SI V8DI (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
  (define_mode_iterator VIHF_AVX512BW
    [V16SI V8DI (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")
-  (V32HF "TARGET_AVX512FP16")])
+  (V32HF "TARGET_AVX512BW")])
  
  ;; Int-float size matches
  (define_mode_iterator VI4F_128 [V4SI V4SF])
@@ -9381,7 +9384,7 @@
  
  (define_expand "avx_vextractf128<mode>"
    [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
-   (match_operand:V_256 1 "register_operand")
+   (match_operand:V_256H 1 "register_operand")
     (match_operand:SI 2 "const_0_to_1_operand")]
    "TARGET_AVX"
  {
@@ -9868,7 +9871,7 @@
           (match_operand:V8HF 1 "register_operand" "v,v")
           (parallel
             [(match_operand:SI 2 "const_0_to_7_operand")])))]
-  "TARGET_AVX512FP16"
+  "TARGET_SSE2"
    "@
     vpextrw\t{%2, %1, %k0|%k0, %1, %2}
     vpextrw\t{%2, %1, %0|%0, %1, %2}"
@@ -9882,8 +9885,7 @@
     (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
     (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
     (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
-   (V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16")
-   (V8HF "TARGET_AVX512FP16")
+   (V32HF "TARGET_AVX512BW") (V16HF "TARGET_AVX") V8HF
     (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
     (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF
     (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
@@ -15615,7 +15617,7 @@
  
  ;; Modes handled by pinsr patterns.
  (define_mode_iterator PINSR_MODE
-  [(V16QI "TARGET_SSE4_1") V8HI (V8HF "TARGET_AVX512FP16")
+  [(V16QI "TARGET_SSE4_1") V8HI V8HF
     (V4SI "TARGET_SSE4_1")
     (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
  
@@ -23723,8 +23725,7 @@
     (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
     (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
     (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
-   (V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16")
-   (V8HF "TARGET_AVX512FP16")
+   (V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") V8HF
     (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
     (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")
     (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
@@ -23736,8 +23737,7 @@
     (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
     (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
     (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")
-   (V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16")
-   (V8HF "TARGET_AVX512FP16")
+   (V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") V8HF
     (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
     (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")
     (V4TI "TARGET_AVX512F")])
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-1.c b/gcc/testsuite/gcc.target/i386/vect-float16-1.c

new file mode 100644 (file)

index 0000000..0f82cf9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mno-avx512vl" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+     _Float16 *__restrict__ c)
+{
+  for (int i = 0; i < 256; i++)
+    a[i] = b[i] + c[i];
+}
+
+/* { dg-final { scan-assembler-times "vaddph" 8 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-10.c b/gcc/testsuite/gcc.target/i386/vect-float16-10.c

new file mode 100644 (file)

index 0000000..2176456
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-10.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mno-avx512vl" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+     _Float16 *__restrict__ c)
+{
+  for (int i = 0; i < 256; i++)
+    a[i] = b[i] / c[i];
+}
+
+/* { dg-final { scan-assembler-times "vdivph" 8 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-11.c b/gcc/testsuite/gcc.target/i386/vect-float16-11.c

new file mode 100644 (file)

index 0000000..e0409ce
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-11.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=128" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+     _Float16 *__restrict__ c)
+{
+  for (int i = 0; i < 128; i++)
+    a[i] = b[i] / c[i];
+}
+
+/* { dg-final { scan-assembler-times "vdivph" 16 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-12.c b/gcc/testsuite/gcc.target/i386/vect-float16-12.c

new file mode 100644 (file)

index 0000000..d92a25d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-12.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=256" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+     _Float16 *__restrict__ c)
+{
+  for (int i = 0; i < 256; i++)
+    a[i] = b[i] / c[i];
+}
+
+/* { dg-final { scan-assembler-times "vdivph" 16 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-2.c b/gcc/testsuite/gcc.target/i386/vect-float16-2.c

new file mode 100644 (file)

index 0000000..974fca4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-2.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=128" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+     _Float16 *__restrict__ c)
+{
+  for (int i = 0; i < 128; i++)
+    a[i] = b[i] + c[i];
+}
+
+/* { dg-final { scan-assembler-times "vaddph" 16 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-3.c b/gcc/testsuite/gcc.target/i386/vect-float16-3.c

new file mode 100644 (file)

index 0000000..9bca914
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-3.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=256" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+     _Float16 *__restrict__ c)
+{
+  for (int i = 0; i < 256; i++)
+    a[i] = b[i] + c[i];
+}
+
+/* { dg-final { scan-assembler-times "vaddph" 16 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-4.c b/gcc/testsuite/gcc.target/i386/vect-float16-4.c

new file mode 100644 (file)

index 0000000..e6f26f0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-4.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mno-avx512vl" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+     _Float16 *__restrict__ c)
+{
+  for (int i = 0; i < 256; i++)
+    a[i] = b[i] - c[i];
+}
+
+/* { dg-final { scan-assembler-times "vsubph" 8 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-5.c b/gcc/testsuite/gcc.target/i386/vect-float16-5.c

new file mode 100644 (file)

index 0000000..38f287b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-5.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=128" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+     _Float16 *__restrict__ c)
+{
+  for (int i = 0; i < 128; i++)
+    a[i] = b[i] - c[i];
+}
+
+/* { dg-final { scan-assembler-times "vsubph" 16 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-6.c b/gcc/testsuite/gcc.target/i386/vect-float16-6.c

new file mode 100644 (file)

index 0000000..bc9f787
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-6.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=256" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+     _Float16 *__restrict__ c)
+{
+  for (int i = 0; i < 256; i++)
+    a[i] = b[i] - c[i];
+}
+
+/* { dg-final { scan-assembler-times "vsubph" 16 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-7.c b/gcc/testsuite/gcc.target/i386/vect-float16-7.c

new file mode 100644 (file)

index 0000000..b4849cf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-7.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mno-avx512vl" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+     _Float16 *__restrict__ c)
+{
+  for (int i = 0; i < 256; i++)
+    a[i] = b[i] * c[i];
+}
+
+/* { dg-final { scan-assembler-times "vmulph" 8 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-8.c b/gcc/testsuite/gcc.target/i386/vect-float16-8.c

new file mode 100644 (file)

index 0000000..71631b1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-8.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=128" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+     _Float16 *__restrict__ c)
+{
+  for (int i = 0; i < 128; i++)
+    a[i] = b[i] * c[i];
+}
+
+/* { dg-final { scan-assembler-times "vmulph" 16 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-float16-9.c b/gcc/testsuite/gcc.target/i386/vect-float16-9.c

new file mode 100644 (file)

index 0000000..1be5c7f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-float16-9.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512fp16 -mavx512vl -mprefer-vector-width=256" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 types.  */
+
+void
+foo (_Float16 *__restrict__ a, _Float16 *__restrict__ b,
+     _Float16 *__restrict__ c)
+{
+  for (int i = 0; i < 256; i++)
+    a[i] = b[i] * c[i];
+}
+
+/* { dg-final { scan-assembler-times "vmulph" 16 } } */
author	H.J. Lu <hjl.tools@gmail.com>
	Mon, 28 Jan 2019 03:38:02 +0000 (19:38 -0800)
committer	liuhongt <hongtao.liu@intel.com>
	Fri, 10 Sep 2021 06:59:30 +0000 (14:59 +0800)
gcc/config/i386/i386-expand.c		patch \| blob \| history
gcc/config/i386/i386.c		patch \| blob \| history
gcc/config/i386/sse.md		patch \| blob \| history
gcc/testsuite/gcc.target/i386/vect-float16-1.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/i386/vect-float16-10.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/i386/vect-float16-11.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/i386/vect-float16-12.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/i386/vect-float16-2.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/i386/vect-float16-3.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/i386/vect-float16-4.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/i386/vect-float16-5.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/i386/vect-float16-6.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/i386/vect-float16-7.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/i386/vect-float16-8.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/i386/vect-float16-9.c	[new file with mode: 0644]	patch \| blob