AVX512FP16: Add expander for ceil/floor/trunc/roundeven.
authorliuhongt <hongtao.liu@intel.com>
Mon, 13 Jul 2020 06:19:21 +0000 (14:19 +0800)
committerliuhongt <hongtao.liu@intel.com>
Wed, 22 Sep 2021 04:56:31 +0000 (12:56 +0800)
gcc/ChangeLog:

* config/i386/i386.md (<rounding_insn>hf2): New expander.
(sse4_1_round<mode>2): Extend from MODEF to MODEFH.
* config/i386/sse.md (*sse4_1_round<ssescalarmodesuffix>):
Extend from VF_128 to VFH_128.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx512fp16-builtin-round-1.c: New test.

gcc/config/i386/i386.md
gcc/config/i386/sse.md
gcc/testsuite/gcc.target/i386/avx512fp16-builtin-round-1.c [new file with mode: 0644]

index ae1a81c..0b17414 100644 (file)
 \f
 
 (define_insn "sse4_1_round<mode>2"
-  [(set (match_operand:MODEF 0 "register_operand" "=x,x,x,v,v")
-       (unspec:MODEF
-         [(match_operand:MODEF 1 "nonimmediate_operand" "0,x,m,v,m")
+  [(set (match_operand:MODEFH 0 "register_operand" "=x,x,x,v,v")
+       (unspec:MODEFH
+         [(match_operand:MODEFH 1 "nonimmediate_operand" "0,x,m,v,m")
           (match_operand:SI 2 "const_0_to_15_operand" "n,n,n,n,n")]
          UNSPEC_ROUND))]
   "TARGET_SSE4_1"
   "TARGET_USE_FANCY_MATH_387
    && (flag_fp_int_builtin_inexact || !flag_trapping_math)")
 
+(define_expand "<rounding_insn>hf2"
+  [(parallel [(set (match_operand:HF 0 "register_operand")
+                  (unspec:HF [(match_operand:HF 1 "register_operand")]
+                    FRNDINT_ROUNDING))
+             (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_AVX512FP16"
+{
+  emit_insn (gen_sse4_1_roundhf2 (operands[0], operands[1],
+                                 GEN_INT (ROUND_<ROUNDING> | ROUND_NO_EXC)));
+  DONE;
+})
+
 (define_expand "<rounding_insn><mode>2"
   [(parallel [(set (match_operand:MODEF 0 "register_operand")
                   (unspec:MODEF [(match_operand:MODEF 1 "register_operand")]
index e8aef0d..9079613 100644 (file)
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*sse4_1_round<ssescalarmodesuffix>"
-  [(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x,v")
-       (vec_merge:VF_128
-         (vec_duplicate:VF_128
+  [(set (match_operand:VFH_128 0 "register_operand" "=Yr,*x,x,v")
+       (vec_merge:VFH_128
+         (vec_duplicate:VFH_128
            (unspec:<ssescalarmode>
              [(match_operand:<ssescalarmode> 2 "nonimmediate_operand" "Yrm,*xm,xm,vm")
               (match_operand:SI 3 "const_0_to_15_operand" "n,n,n,n")]
              UNSPEC_ROUND))
-         (match_operand:VF_128 1 "register_operand" "0,0,x,v")
+         (match_operand:VFH_128 1 "register_operand" "0,0,x,v")
          (const_int 1)))]
   "TARGET_SSE4_1"
   "@
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-builtin-round-1.c b/gcc/testsuite/gcc.target/i386/avx512fp16-builtin-round-1.c
new file mode 100644 (file)
index 0000000..3cab152
--- /dev/null
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -mavx512fp16" } */
+
+_Float16
+f1 (_Float16 x)
+{
+  return __builtin_truncf16 (x);
+}
+
+_Float16
+f2 (_Float16 x)
+{
+  return __builtin_floorf16 (x);
+}
+
+_Float16
+f3 (_Float16 x)
+{
+  return __builtin_ceilf16 (x);
+}
+
+_Float16
+f4 (_Float16 x)
+{
+  return __builtin_roundevenf16 (x);
+}
+
+/* { dg-final { scan-assembler-times "vrndscalesh\[ \\t\]+\\\$11\[^\n\r\]*xmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vrndscalesh\[ \\t\]+\\\$10\[^\n\r\]*xmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vrndscalesh\[ \\t\]+\\\$9\[^\n\r\]*xmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vrndscalesh\[ \\t\]+\\\$8\[^\n\r\]*xmm\[0-9\]" 1 } } */