i386: Roundeven expansion for SSE4.1+
authorTejas Joshi <tejasjoshi9673@gmail.com>
Mon, 26 Aug 2019 12:41:59 +0000 (12:41 +0000)
committerMartin Jambor <jamborm@gcc.gnu.org>
Mon, 26 Aug 2019 12:41:59 +0000 (14:41 +0200)
gcc/ChangeLog:

2019-08-26  Tejas Joshi  <tejasjoshi9673@gmail.com>
            Uros Bizjak  <ubizjak@gmail.com>

* builtins.c (mathfn_built_in_2): Change CASE_MATHFN to
CASE_MATHFN_FLOATN for roundeven.
* config/i386/i386.c (ix86_i387_mode_needed): Add case
I387_ROUNDEVEN.
(ix86_mode_needed): Likewise.
(ix86_mode_after): Likewise.
(ix86_mode_entry): Likewise.
(ix86_mode_exit): Likewise.
(ix86_emit_mode_set): Likewise.
(emit_i387_cw_initialization): Add case I387_CW_ROUNDEVEN.
* config/i386/i386.h (ix86_stack_slot): Add SLOT_CW_ROUNDEVEN.
(ix86_entry): Add I387_ROUNDEVEN.
(avx_u128_state): Add I387_CW_ANY.
* config/i386/i386.md: Define UNSPEC_FRNDINT_ROUNDEVEN.
(define_int_iterator): Likewise.
(define_int_attr): Likewise for rounding_insn, rounding and ROUNDING.
(define_constant): Define ROUND_ROUNDEVEN mode.
(define_attr): Add roundeven mode for i387_cw.
(<rouding_insn><mode>2): Add condition for ROUND_ROUNDEVEN.
* internal-fn.def (ROUNDEVEN): New builtin function.
* optabs.def (roundeven_optab): New optab.

gcc/testsuite/ChangeLog:

2019-08-26  Tejas Joshi  <tejasjoshi9673@gmail.com>

* gcc.target/i386/sse4_1-round-roundeven-1.c: New test.
* gcc.target/i386/sse4_1-round-roundeven-2.c: New test.

Co-Authored-By: Uros Bizjak <ubizjak@gmail.com>
From-SVN: r274928

gcc/ChangeLog
gcc/config/i386/i386.c
gcc/config/i386/i386.h
gcc/config/i386/i386.md
gcc/internal-fn.def
gcc/optabs.def
gcc/reg-stack.c
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/sse4_1-round-roundeven-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/sse4_1-round-roundeven-2.c [new file with mode: 0644]

index c67cbe3..aa2bec6 100644 (file)
@@ -1,4 +1,29 @@
 2019-08-26  Tejas Joshi  <tejasjoshi9673@gmail.com>
+            Uros Bizjak  <ubizjak@gmail.com>
+
+       * builtins.c (mathfn_built_in_2): Change CASE_MATHFN to
+       CASE_MATHFN_FLOATN for roundeven.
+       * config/i386/i386.c (ix86_i387_mode_needed): Add case
+       I387_ROUNDEVEN.
+       (ix86_mode_needed): Likewise.
+       (ix86_mode_after): Likewise.
+       (ix86_mode_entry): Likewise.
+       (ix86_mode_exit): Likewise.
+       (ix86_emit_mode_set): Likewise.
+       (emit_i387_cw_initialization): Add case I387_CW_ROUNDEVEN.
+       * config/i386/i386.h (ix86_stack_slot): Add SLOT_CW_ROUNDEVEN.
+       (ix86_entry): Add I387_ROUNDEVEN.
+       (avx_u128_state): Add I387_CW_ANY.
+       * config/i386/i386.md: Define UNSPEC_FRNDINT_ROUNDEVEN.
+       (define_int_iterator): Likewise.
+       (define_int_attr): Likewise for rounding_insn, rounding and ROUNDING.
+       (define_constant): Define ROUND_ROUNDEVEN mode.
+       (define_attr): Add roundeven mode for i387_cw.
+       (<rouding_insn><mode>2): Add condition for ROUND_ROUNDEVEN.
+       * internal-fn.def (ROUNDEVEN): New builtin function.
+       * optabs.def (roundeven_optab): New optab.
+
+2019-08-26  Tejas Joshi  <tejasjoshi9673@gmail.com>
 
         * builtins.c (mathfn_built_in_2): Added CASE_MATHFN_FLOATN
         for ROUNDEVEN.
index 49ab50e..c712c03 100644 (file)
@@ -13557,6 +13557,11 @@ ix86_i387_mode_needed (int entity, rtx_insn *insn)
 
   switch (entity)
     {
+    case I387_ROUNDEVEN:
+      if (mode == I387_CW_ROUNDEVEN)
+       return mode;
+      break;
+
     case I387_TRUNC:
       if (mode == I387_CW_TRUNC)
        return mode;
@@ -13591,6 +13596,7 @@ ix86_mode_needed (int entity, rtx_insn *insn)
       return ix86_dirflag_mode_needed (insn);
     case AVX_U128:
       return ix86_avx_u128_mode_needed (insn);
+    case I387_ROUNDEVEN:
     case I387_TRUNC:
     case I387_FLOOR:
     case I387_CEIL:
@@ -13651,6 +13657,7 @@ ix86_mode_after (int entity, int mode, rtx_insn *insn)
       return mode;
     case AVX_U128:
       return ix86_avx_u128_mode_after (mode, insn);
+    case I387_ROUNDEVEN:
     case I387_TRUNC:
     case I387_FLOOR:
     case I387_CEIL:
@@ -13703,6 +13710,7 @@ ix86_mode_entry (int entity)
       return ix86_dirflag_mode_entry ();
     case AVX_U128:
       return ix86_avx_u128_mode_entry ();
+    case I387_ROUNDEVEN:
     case I387_TRUNC:
     case I387_FLOOR:
     case I387_CEIL:
@@ -13740,6 +13748,7 @@ ix86_mode_exit (int entity)
       return X86_DIRFLAG_ANY;
     case AVX_U128:
       return ix86_avx_u128_mode_exit ();
+    case I387_ROUNDEVEN:
     case I387_TRUNC:
     case I387_FLOOR:
     case I387_CEIL:
@@ -13774,6 +13783,12 @@ emit_i387_cw_initialization (int mode)
 
   switch (mode)
     {
+    case I387_CW_ROUNDEVEN:
+      /* round to nearest */
+      emit_insn (gen_andhi3 (reg, reg, GEN_INT (0x0c00)));
+      slot = SLOT_CW_ROUNDEVEN;
+      break;
+
     case I387_CW_TRUNC:
       /* round toward zero (truncate) */
       emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
@@ -13820,6 +13835,7 @@ ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
       if (mode == AVX_U128_CLEAN)
        emit_insn (gen_avx_vzeroupper ());
       break;
+    case I387_ROUNDEVEN:
     case I387_TRUNC:
     case I387_FLOOR:
     case I387_CEIL:
index 167b73e..a1d0484 100644 (file)
@@ -2511,6 +2511,7 @@ enum ix86_stack_slot
 {
   SLOT_TEMP = 0,
   SLOT_CW_STORED,
+  SLOT_CW_ROUNDEVEN,
   SLOT_CW_TRUNC,
   SLOT_CW_FLOOR,
   SLOT_CW_CEIL,
@@ -2522,6 +2523,7 @@ enum ix86_entity
 {
   X86_DIRFLAG = 0,
   AVX_U128,
+  I387_ROUNDEVEN,
   I387_TRUNC,
   I387_FLOOR,
   I387_CEIL,
@@ -2557,7 +2559,7 @@ enum avx_u128_state
 
 #define NUM_MODES_FOR_MODE_SWITCHING                   \
   { X86_DIRFLAG_ANY, AVX_U128_ANY,                     \
-    I387_CW_ANY, I387_CW_ANY, I387_CW_ANY }
+    I387_CW_ANY, I387_CW_ANY, I387_CW_ANY, I387_CW_ANY  }
 
 \f
 /* Avoid renaming of stack registers, as doing so in combination with
index 9951d46..7ad9788 100644 (file)
   UNSPEC_FXAM
 
   ;; x87 Rounding
+  UNSPEC_FRNDINT_ROUNDEVEN
   UNSPEC_FRNDINT_FLOOR
   UNSPEC_FRNDINT_CEIL
   UNSPEC_FRNDINT_TRUNC
 
 ;; Constants to represent rounding modes in the ROUND instruction
 (define_constants
-  [(ROUND_FLOOR                        0x1)
+  [(ROUND_ROUNDEVEN            0x0)
+   (ROUND_FLOOR                        0x1)
    (ROUND_CEIL                 0x2)
    (ROUND_TRUNC                        0x3)
    (ROUND_MXCSR                        0x4)
 
 ;; Defines rounding mode of an FP operation.
 
-(define_attr "i387_cw" "trunc,floor,ceil,uninitialized,any"
+(define_attr "i387_cw" "roundeven,floor,ceil,trunc,uninitialized,any"
   (const_string "any"))
 
 ;; Define attribute to indicate AVX insns with partial XMM register update.
 })
 
 (define_int_iterator FRNDINT_ROUNDING
-       [UNSPEC_FRNDINT_FLOOR
+       [UNSPEC_FRNDINT_ROUNDEVEN
+        UNSPEC_FRNDINT_FLOOR
         UNSPEC_FRNDINT_CEIL
         UNSPEC_FRNDINT_TRUNC])
 
 
 ;; Base name for define_insn
 (define_int_attr rounding_insn
-       [(UNSPEC_FRNDINT_FLOOR "floor")
+       [(UNSPEC_FRNDINT_ROUNDEVEN "roundeven")
+        (UNSPEC_FRNDINT_FLOOR "floor")
         (UNSPEC_FRNDINT_CEIL "ceil")
         (UNSPEC_FRNDINT_TRUNC "btrunc")
         (UNSPEC_FIST_FLOOR "floor")
         (UNSPEC_FIST_CEIL "ceil")])
 
 (define_int_attr rounding
-       [(UNSPEC_FRNDINT_FLOOR "floor")
+       [(UNSPEC_FRNDINT_ROUNDEVEN "roundeven")
+        (UNSPEC_FRNDINT_FLOOR "floor")
         (UNSPEC_FRNDINT_CEIL "ceil")
         (UNSPEC_FRNDINT_TRUNC "trunc")
         (UNSPEC_FIST_FLOOR "floor")
         (UNSPEC_FIST_CEIL "ceil")])
 
 (define_int_attr ROUNDING
-       [(UNSPEC_FRNDINT_FLOOR "FLOOR")
+       [(UNSPEC_FRNDINT_ROUNDEVEN "ROUNDEVEN")
+        (UNSPEC_FRNDINT_FLOOR "FLOOR")
         (UNSPEC_FRNDINT_CEIL "CEIL")
         (UNSPEC_FRNDINT_TRUNC "TRUNC")
         (UNSPEC_FIST_FLOOR "FLOOR")
        || TARGET_MIX_SSE_I387)
     && (flag_fp_int_builtin_inexact || !flag_trapping_math))
    || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
-       && (TARGET_SSE4_1 || flag_fp_int_builtin_inexact
-          || !flag_trapping_math))"
+       && (TARGET_SSE4_1
+         || (ROUND_<ROUNDING> != ROUND_ROUNDEVEN
+             && (flag_fp_int_builtin_inexact || !flag_trapping_math))))"
 {
   if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
       && (TARGET_SSE4_1 || flag_fp_int_builtin_inexact || !flag_trapping_math))
index 9461693..b5a6ca3 100644 (file)
@@ -238,6 +238,7 @@ DEF_INTERNAL_FLT_FLOATN_FN (FLOOR, ECF_CONST, floor, unary)
 DEF_INTERNAL_FLT_FLOATN_FN (NEARBYINT, ECF_CONST, nearbyint, unary)
 DEF_INTERNAL_FLT_FLOATN_FN (RINT, ECF_CONST, rint, unary)
 DEF_INTERNAL_FLT_FLOATN_FN (ROUND, ECF_CONST, round, unary)
+DEF_INTERNAL_FLT_FLOATN_FN (ROUNDEVEN, ECF_CONST, roundeven, unary)
 DEF_INTERNAL_FLT_FLOATN_FN (TRUNC, ECF_CONST, btrunc, unary)
 
 /* Binary math functions.  */
index 5283e67..0860b38 100644 (file)
@@ -271,6 +271,7 @@ OPTAB_D (fnms_optab, "fnms$a4")
 
 OPTAB_D (rint_optab, "rint$a2")
 OPTAB_D (round_optab, "round$a2")
+OPTAB_D (roundeven_optab, "roundeven$a2")
 OPTAB_D (floor_optab, "floor$a2")
 OPTAB_D (ceil_optab, "ceil$a2")
 OPTAB_D (btrunc_optab, "btrunc$a2")
index 710f14a..0f0089a 100644 (file)
@@ -1817,6 +1817,7 @@ subst_stack_regs_pat (rtx_insn *insn, stack_ptr regstack, rtx pat)
              case UNSPEC_FRNDINT:
              case UNSPEC_F2XM1:
 
+             case UNSPEC_FRNDINT_ROUNDEVEN:
              case UNSPEC_FRNDINT_FLOOR:
              case UNSPEC_FRNDINT_CEIL:
              case UNSPEC_FRNDINT_TRUNC:
index c68c823..b5a2d7b 100644 (file)
@@ -1,5 +1,10 @@
 2019-08-26  Tejas Joshi  <tejasjoshi9673@gmail.com>
 
+       * gcc.target/i386/sse4_1-round-roundeven-1.c: New test.
+       * gcc.target/i386/sse4_1-round-roundeven-2.c: New test.
+
+2019-08-26  Tejas Joshi  <tejasjoshi9673@gmail.com>
+
         * gcc.dg/torture/builtin-round-roundeven.c: New test.
         * gcc.dg/torture/builtin-round-roundevenf128.c: Likewise.
 
diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-round-roundeven-1.c b/gcc/testsuite/gcc.target/i386/sse4_1-round-roundeven-1.c
new file mode 100644 (file)
index 0000000..3633263
--- /dev/null
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse4.1" } */
+
+__attribute__((noinline, noclone)) double
+f1 (double x)
+{
+  return __builtin_roundeven (x);
+}
+
+__attribute__((noinline, noclone)) float
+f2 (float x)
+{
+  return __builtin_roundevenf (x);
+}
+
+/* { dg-final { scan-assembler-times "roundsd\[^\n\r\]*xmm" 1 } } */
+/* { dg-final { scan-assembler-times "roundss\[^\n\r\]*xmm" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-round-roundeven-2.c b/gcc/testsuite/gcc.target/i386/sse4_1-round-roundeven-2.c
new file mode 100644 (file)
index 0000000..9505796
--- /dev/null
@@ -0,0 +1,15 @@
+/* { dg-do run } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O2 -msse4.1" } */
+
+#include "sse4_1-check.h"
+#include "sse4_1-round-roundeven-1.c"
+
+static void
+sse4_1_test (void)
+{
+  if (f1 (0.5) != 0.0 || f1 (1.5) != 2.0 || f1 (-0.5) != 0.0 || f1 (-1.5) != -2.0)
+    abort ();
+  if (f2 (0.5f) != 0.0f || f2 (1.5f) != 2.0f || f2 (-0.5f) != 0.0f || f2 (-1.5f) != -2.0f)
+    abort ();
+}