Optimize sparc {ceil,floor}{,f} using vis2 'siam' instruction.

author David S. Miller <davem@davemloft.net>

Tue, 15 Jan 2013 05:47:29 +0000 (21:47 -0800)

committer David S. Miller <davem@davemloft.net>

Tue, 15 Jan 2013 05:47:29 +0000 (21:47 -0800)
author David S. Miller <davem@davemloft.net>
Tue, 15 Jan 2013 05:47:29 +0000 (21:47 -0800)
committer David S. Miller <davem@davemloft.net>
Tue, 15 Jan 2013 05:47:29 +0000 (21:47 -0800)
diff --git a/ChangeLog b/ChangeLog

index 9fa68b8..0f5a017 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,46 @@
  2013-01-14  David S. Miller  <davem@davemloft.net>
  
+       * sysdeps/sparc/sparc-ifunc.h (SPARC_ASM_IFUNC2): New macro.
+       (SPARC_ASM_VIS2_IFUNC): Likewise.
+       (SPARC_ASM_VIS3_VIS2_IFUNC): Likewise.
+       * sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceil-vis3.S: Make
+       use of 'siam' instruction.
+       * sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceilf-vis3.S:
+       Likewise.
+       * sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floor-vis3.S:
+       Likewise.
+       * sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floorf-vis3.S:
+       Likewise.
+       * sysdeps/sparc/sparc64/fpu/multiarch/s_ceil-vis3.S: Likewise.
+       * sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf-vis3.S: Likewise.
+       * sysdeps/sparc/sparc64/fpu/multiarch/s_floor-vis3.S: Likewise.
+       * sysdeps/sparc/sparc64/fpu/multiarch/s_floorf-vis3.S: Likewise.
+       * sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceil-vis2.S: New
+       file.
+       * sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceilf-vis2.S: New
+       file.
+       * sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floor-vis2.S: New
+       file.
+       * sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floorf-vis2.S: New
+       file.
+       * sysdeps/sparc/sparc64/fpu/multiarch/s_ceil-vis2.S: New file.
+       * sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf-vis2.S: New file.
+       * sysdeps/sparc/sparc64/fpu/multiarch/s_floor-vis2.S: New file.
+       * sysdeps/sparc/sparc64/fpu/multiarch/s_floorf-vis2.S: New file.
+       * sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceil.S: Hook in
+       new VIS2 routines.
+       * sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceilf.S: Likewise.
+       * sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floor.S: Likewise.
+       * sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floorf.S:
+       Likewise.
+       * sysdeps/sparc/sparc64/fpu/multiarch/s_ceil.S: Likewise.
+       * sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf.S: Likewise.
+       * sysdeps/sparc/sparc64/fpu/multiarch/s_floor.S: Likewise.
+       * sysdeps/sparc/sparc64/fpu/multiarch/s_floorf.S: Likewise.
+       * sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/Makefile: Add new VIS2
+       routines to libm-sysdep_routines.
+       * sysdeps/sparc/sparc64/fpu/multiarch/Makefile: Likewise.
+
         * sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/Makefile: Add vis3
         fdim/fdimf to libm-sysdep_routines.
         * sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_fdim-vis3.S: New
diff --git a/sysdeps/sparc/sparc-ifunc.h b/sysdeps/sparc/sparc-ifunc.h

index edff5c8..f68161f 100644 (file)
--- a/sysdeps/sparc/sparc-ifunc.h
+++ b/sysdeps/sparc/sparc-ifunc.h
@@ -51,6 +51,33 @@ ENTRY (__##name)                                     \
          mov    %o1, %o0;                               \
  END (__##name)
  
+#  define SPARC_ASM_IFUNC2(name, m1, f1, m2, f2, dflt) \
+ENTRY (__##name)                                       \
+       .type   __##name, @gnu_indirect_function;       \
+       SETUP_PIC_REG_LEAF(o3, o5);                     \
+       set     m1, %o1;                                \
+       andcc   %o0, %o1, %g0;                          \
+       be      8f;                                     \
+        nop;                                           \
+       sethi   %gdop_hix22(f1), %o1;                   \
+       xor     %o1, %gdop_lox10(f1), %o1;              \
+       ba      10f;                                    \
+        nop;                                           \
+8:     set     m2, %o1;                                \
+       andcc   %o0, %o1, %g0;                          \
+       be      9f;                                     \
+        nop;                                           \
+       sethi   %gdop_hix22(f2), %o1;                   \
+       xor     %o1, %gdop_lox10(f2), %o1;              \
+       ba      10f;                                    \
+        nop;                                           \
+9:     sethi   %gdop_hix22(dflt), %o1;                 \
+       xor     %o1, %gdop_lox10(dflt), %o1;            \
+10:    add     %o3, %o1, %o1;                          \
+       retl;                                           \
+        mov    %o1, %o0;                               \
+END (__##name)
+
  # else /* SHARED */
  
  # ifdef __arch64__
@@ -82,19 +109,54 @@ ENTRY (__##name)                                   \
          mov    %o1, %o0;                               \
  END (__##name)
  
+#  define SPARC_ASM_IFUNC2(name, m1, f1, m2, f2, dflt) \
+ENTRY (__##name)                                       \
+       .type   __##name, @gnu_indirect_function;       \
+       set     m1, %o1;                                \
+       andcc   %o0, %o1, %g0;                          \
+       be      8f;                                     \
+        nop;                                           \
+       SET(f1, %g1, %o1);                              \
+       ba      10f;                                    \
+        nop;                                           \
+8:     set     m2, %o1;                                \
+       andcc   %o0, %o1, %g0;                          \
+       be      9f;                                     \
+        nop;                                           \
+       SET(f2, %g1, %o1);                              \
+       ba      10f;                                    \
+        nop;                                           \
+9:     SET(dflt, %g1, %o1);                            \
+10:    retl;                                           \
+        mov    %o1, %o0;                               \
+END (__##name)
+
  # endif /* SHARED */
  
+#define SPARC_ASM_VIS2_IFUNC(name)                     \
+       SPARC_ASM_IFUNC1(name, HWCAP_SPARC_VIS2,        \
+                        __##name##_vis2, __##name##_generic)
+
  # ifdef HAVE_AS_VIS3_SUPPORT
  
  #define SPARC_ASM_VIS3_IFUNC(name)                     \
         SPARC_ASM_IFUNC1(name, HWCAP_SPARC_VIS3,        \
                          __##name##_vis3, __##name##_generic)
  
+#define SPARC_ASM_VIS3_VIS2_IFUNC(name)                        \
+       SPARC_ASM_IFUNC2(name, HWCAP_SPARC_VIS3,        \
+                        __##name##_vis3,               \
+                        HWCAP_SPARC_VIS2,              \
+                        __##name##_vis2, __##name##_generic)
+
  # else /* HAVE_AS_VIS3_SUPPORT */
  
  #define SPARC_ASM_VIS3_IFUNC(name)                     \
         SPARC_ASM_IFUNC_DFLT(name, __##name##_generic)
  
+#define SPARC_ASM_VIS3_VIS2_IFUNC(name)                        \
+       SPARC_ASM_VIS2_IFUNC(name)
+
  # endif /* HAVE_AS_VIS3_SUPPORT */
  
  
diff --git a/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/Makefile b/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/Makefile

index 6349681..561b0ee 100644 (file)
--- a/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/Makefile
+++ b/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/Makefile
@@ -1,4 +1,6 @@
  ifeq ($(subdir),math)
+libm-sysdep_routines += s_ceil-vis2 s_ceilf-vis2 \
+                       s_floor-vis2 s_floorf-vis2
  ifeq ($(have-as-vis3),yes)
  libm-sysdep_routines += m_copysignf-vis3 m_copysign-vis3 s_ceilf-vis3 \
                         s_ceil-vis3 s_fabs-vis3 s_fabsf-vis3 s_floor-vis3 \
diff --git a/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceil-vis2.S b/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceil-vis2.S

new file mode 100644 (file)

index 0000000..9438800
--- /dev/null
+++ b/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceil-vis2.S
@@ -0,0 +1,61 @@
+/* ceil function, sparc32 v9 vis2 version.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by David S. Miller <davem@davemloft.net>, 2013.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+       /* 'siam' (Set Interval Arithmetic Mode) is used to quickly override
+          the rounding mode during this routine.
+
+          We add then subtract (or subtract than add if the initial
+          value was negative) 2**23 to the value, then subtract it
+          back out.
+
+          This will clear out the fractional portion of the value and,
+          with suitable 'siam' initiated rouding mode settings, round
+          the final result in the proper direction.  */
+
+#define TWO_FIFTYTWO   0x43300000              /* 2**52 */
+
+#define ZERO           %f10                    /* 0.0 */
+#define SIGN_BIT       %f12                    /* -0.0 */
+
+ENTRY (__ceil_vis2)
+       sethi   %hi(TWO_FIFTYTWO), %o2
+       sllx    %o0, 32, %o0
+       or      %o0, %o1, %o0
+       stx     %o0, [%sp + 72]
+       sllx    %o2, 32, %o2
+       fzero   ZERO
+       ldd     [%sp + 72], %f0
+       fnegd   ZERO, SIGN_BIT
+       stx     %o2, [%sp + 72]
+       fabsd   %f0, %f14
+       ldd     [%sp + 72], %f16
+       fcmpd   %fcc3, %f14, %f16
+       fmovduge %fcc3, ZERO, %f16
+       fand    %f0, SIGN_BIT, SIGN_BIT
+       for     %f16, SIGN_BIT, %f16
+       siam    (1 << 2) | 2
+       faddd   %f0, %f16, %f18
+       siam    (1 << 2) | 0
+       fsubd   %f18, %f16, %f18
+       siam    (0 << 2)
+       retl
+        for    %f18, SIGN_BIT, %f0
+END (__ceil_vis2)
diff --git a/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceil-vis3.S b/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceil-vis3.S

index 0c2140d..aebff5c 100644 (file)
--- a/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceil-vis3.S
+++ b/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceil-vis3.S
@@ -19,27 +19,21 @@
  
  #include <sysdep.h>
  
-       /* Since changing the rounding mode is extremely expensive, we
-          try to round up using a method that is rounding mode
-          agnostic.
+       /* 'siam' (Set Interval Arithmetic Mode) is used to quickly override
+          the rounding mode during this routine.
  
            We add then subtract (or subtract than add if the initial
            value was negative) 2**23 to the value, then subtract it
            back out.
  
-          This will clear out the fractional portion of the value.
-          One of two things will happen for non-whole initial values.
-          Either the rounding mode will round it up, or it will be
-          rounded down.  If the value started out whole, it will be
-          equal after the addition and subtraction.  This means we
-          can accurately detect with one test whether we need to add
-          another 1.0 to round it up properly.
+          This will clear out the fractional portion of the value and,
+          with suitable 'siam' initiated rouding mode settings, round
+          the final result in the proper direction.
  
-          VIS instructions are used to facilitate the formation of
-          easier constants, and the propagation of the sign bit.  */
+          We also use VIS3 moves to avoid using the stack to transfer
+          values between float and integer registers.  */
  
  #define TWO_FIFTYTWO   0x43300000              /* 2**52 */
-#define ONE_DOT_ZERO   0x3ff00000              /* 1.0 */
  
  #define ZERO           %f10                    /* 0.0 */
  #define SIGN_BIT       %f12                    /* -0.0 */
@@ -47,32 +41,22 @@
  ENTRY (__ceil_vis3)
         sethi   %hi(TWO_FIFTYTWO), %o2
         sllx    %o0, 32, %o0
-       sethi   %hi(ONE_DOT_ZERO), %o3
+       sllx    %o2, 32, %o2
         or      %o0, %o1, %o0
         movxtod %o0, %f0
-       sllx    %o2, 32, %o2
         fzero   ZERO
-       sllx    %o3, 32, %o3
-
         fnegd   ZERO, SIGN_BIT
-
         movxtod %o2, %f16
         fabsd   %f0, %f14
-
         fcmpd   %fcc3, %f14, %f16
-
         fmovduge %fcc3, ZERO, %f16
         fand    %f0, SIGN_BIT, SIGN_BIT
-
         for     %f16, SIGN_BIT, %f16
+       siam    (1 << 2) | 2
         faddd   %f0, %f16, %f18
+       siam    (1 << 2) | 0
         fsubd   %f18, %f16, %f18
-       fcmpd   %fcc2, %f18, %f0
-       movxtod %o3, %f20
-
-       fmovduge %fcc2, ZERO, %f20
-       faddd   %f18, %f20, %f0
-       fabsd   %f0, %f0
+       siam    (0 << 2)
         retl
-        for    %f0, SIGN_BIT, %f0
+        for    %f18, SIGN_BIT, %f0
  END (__ceil_vis3)
diff --git a/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceil.S b/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceil.S

index 835703f..efc8d49 100644 (file)
--- a/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceil.S
+++ b/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceil.S
@@ -1,7 +1,7 @@
  #include <sparc-ifunc.h>
  #include <math_ldbl_opt.h>
  
-SPARC_ASM_VIS3_IFUNC(ceil)
+SPARC_ASM_VIS3_VIS2_IFUNC(ceil)
  
  weak_alias (__ceil, ceil)
  
diff --git a/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceilf-vis2.S b/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceilf-vis2.S

new file mode 100644 (file)

index 0000000..bc51676
--- /dev/null
+++ b/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceilf-vis2.S
@@ -0,0 +1,58 @@
+/* Float ceil function, sparc32 v9 vis2 version.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by David S. Miller <davem@davemloft.net>, 2013.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+       /* 'siam' (Set Interval Arithmetic Mode) is used to quickly override
+          the rounding mode during this routine.
+
+          We add then subtract (or subtract than add if the initial
+          value was negative) 2**23 to the value, then subtract it
+          back out.
+
+          This will clear out the fractional portion of the value and,
+          with suitable 'siam' initiated rouding mode settings, round
+          the final result in the proper direction.  */
+
+#define TWO_TWENTYTHREE        0x4b000000              /* 2**23 */
+
+#define ZERO           %f10                    /* 0.0 */
+#define SIGN_BIT       %f12                    /* -0.0 */
+
+ENTRY (__ceilf_vis2)
+       st      %o0, [%sp + 68]
+       sethi   %hi(TWO_TWENTYTHREE), %o2
+       fzeros  ZERO
+       ld      [%sp + 68], %f0
+       fnegs   ZERO, SIGN_BIT
+       st      %o2, [%sp + 68]
+       fabss   %f0, %f14
+       ld      [%sp + 68], %f16
+       fcmps   %fcc3, %f14, %f16
+       fmovsuge %fcc3, ZERO, %f16
+       fands   %f0, SIGN_BIT, SIGN_BIT
+       fors    %f16, SIGN_BIT, %f16
+       siam    (1 << 2) | 2
+       fadds   %f0, %f16, %f1
+       siam    (1 << 2) | 0
+       fsubs   %f1, %f16, %f1
+       siam    (0 << 2)
+       retl
+        fors   %f1, SIGN_BIT, %f0
+END (__ceilf_vis2)
diff --git a/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceilf-vis3.S b/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceilf-vis3.S

index 7d30c0b..0a6768c 100644 (file)
--- a/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceilf-vis3.S
+++ b/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceilf-vis3.S
@@ -19,27 +19,21 @@
  
  #include <sysdep.h>
  
-       /* Since changing the rounding mode is extremely expensive, we
-          try to round up using a method that is rounding mode
-          agnostic.
+       /* 'siam' (Set Interval Arithmetic Mode) is used to quickly override
+          the rounding mode during this routine.
  
            We add then subtract (or subtract than add if the initial
            value was negative) 2**23 to the value, then subtract it
            back out.
  
-          This will clear out the fractional portion of the value.
-          One of two things will happen for non-whole initial values.
-          Either the rounding mode will round it up, or it will be
-          rounded down.  If the value started out whole, it will be
-          equal after the addition and subtraction.  This means we
-          can accurately detect with one test whether we need to add
-          another 1.0 to round it up properly.
+          This will clear out the fractional portion of the value and,
+          with suitable 'siam' initiated rouding mode settings, round
+          the final result in the proper direction.
  
-          VIS instructions are used to facilitate the formation of
-          easier constants, and the propagation of the sign bit.  */
+          We also use VIS3 moves to avoid using the stack to transfer
+          values between float and integer registers.  */
  
  #define TWO_TWENTYTHREE        0x4b000000              /* 2**23 */
-#define ONE_DOT_ZERO   0x3f800000              /* 1.0 */
  
  #define ZERO           %f10                    /* 0.0 */
  #define SIGN_BIT       %f12                    /* -0.0 */
@@ -47,28 +41,19 @@
  ENTRY (__ceilf_vis3)
         movwtos %o0, %f0
         sethi   %hi(TWO_TWENTYTHREE), %o2
-       sethi   %hi(ONE_DOT_ZERO), %o3
         fzeros  ZERO
-
         fnegs   ZERO, SIGN_BIT
-
         movwtos %o2, %f16
         fabss   %f0, %f14
-
         fcmps   %fcc3, %f14, %f16
-
         fmovsuge %fcc3, ZERO, %f16
         fands   %f0, SIGN_BIT, SIGN_BIT
-
         fors    %f16, SIGN_BIT, %f16
+       siam    (1 << 2) | 2
         fadds   %f0, %f16, %f1
+       siam    (1 << 2) | 0
         fsubs   %f1, %f16, %f1
-       fcmps   %fcc2, %f1, %f0
-       movwtos %o3, %f9
-
-       fmovsuge %fcc2, ZERO, %f9
-       fadds   %f1, %f9, %f0
-       fabss   %f0, %f0
+       siam    (0 << 2)
         retl
-        fors   %f0, SIGN_BIT, %f0
+        fors   %f1, SIGN_BIT, %f0
  END (__ceilf_vis3)
diff --git a/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceilf.S b/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceilf.S

index 3047dd8..1c72a57 100644 (file)
--- a/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceilf.S
+++ b/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceilf.S
@@ -1,6 +1,6 @@
  #include <sparc-ifunc.h>
  
-SPARC_ASM_VIS3_IFUNC(ceilf)
+SPARC_ASM_VIS3_VIS2_IFUNC(ceilf)
  
  weak_alias (__ceilf, ceilf)
  
diff --git a/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floor-vis2.S b/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floor-vis2.S

new file mode 100644 (file)

index 0000000..3b5e8fd
--- /dev/null
+++ b/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floor-vis2.S
@@ -0,0 +1,61 @@
+/* floor function, sparc32 v9 vis2 version.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by David S. Miller <davem@davemloft.net>, 2013.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+       /* 'siam' (Set Interval Arithmetic Mode) is used to quickly override
+          the rounding mode during this routine.
+
+          We add then subtract (or subtract than add if the initial
+          value was negative) 2**23 to the value, then subtract it
+          back out.
+
+          This will clear out the fractional portion of the value and,
+          with suitable 'siam' initiated rouding mode settings, round
+          the final result in the proper direction.  */
+
+#define TWO_FIFTYTWO   0x43300000              /* 2**52 */
+
+#define ZERO           %f10                    /* 0.0 */
+#define SIGN_BIT       %f12                    /* -0.0 */
+
+ENTRY (__floor_vis2)
+       sethi   %hi(TWO_FIFTYTWO), %o2
+       sllx    %o0, 32, %o0
+       or      %o0, %o1, %o0
+       stx     %o0, [%sp + 72]
+       sllx    %o2, 32, %o2
+       fzero   ZERO
+       ldd     [%sp + 72], %f0
+       fnegd   ZERO, SIGN_BIT
+       stx     %o2, [%sp + 72]
+       fabsd   %f0, %f14
+       ldd     [%sp + 72], %f16
+       fcmpd   %fcc3, %f14, %f16
+       fmovduge %fcc3, ZERO, %f16
+       fand    %f0, SIGN_BIT, SIGN_BIT
+       for     %f16, SIGN_BIT, %f16
+       siam    (1 << 2) | 3
+       faddd   %f0, %f16, %f18
+       siam    (1 << 2) | 0
+       fsubd   %f18, %f16, %f18
+       siam    (0 << 2)
+       retl
+        for    %f18, SIGN_BIT, %f0
+END (__floor_vis2)
diff --git a/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floor-vis3.S b/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floor-vis3.S

index 8445f1d..41fdfac 100644 (file)
--- a/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floor-vis3.S
+++ b/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floor-vis3.S
@@ -19,27 +19,21 @@
  
  #include <sysdep.h>
  
-       /* Since changing the rounding mode is extremely expensive, we
-          try to round up using a method that is rounding mode
-          agnostic.
+       /* 'siam' (Set Interval Arithmetic Mode) is used to quickly override
+          the rounding mode during this routine.
  
            We add then subtract (or subtract than add if the initial
            value was negative) 2**23 to the value, then subtract it
            back out.
  
-          This will clear out the fractional portion of the value.
-          One of two things will happen for non-whole initial values.
-          Either the rounding mode will round it up, or it will be
-          rounded down.  If the value started out whole, it will be
-          equal after the addition and subtraction.  This means we
-          can accurately detect with one test whether we need to add
-          another 1.0 to round it up properly.
+          This will clear out the fractional portion of the value and,
+          with suitable 'siam' initiated rouding mode settings, round
+          the final result in the proper direction.
  
-          VIS instructions are used to facilitate the formation of
-          easier constants, and the propagation of the sign bit.  */
+          We also use VIS3 moves to avoid using the stack to transfer
+          values between float and integer registers.  */
  
  #define TWO_FIFTYTWO   0x43300000              /* 2**52 */
-#define ONE_DOT_ZERO   0x3ff00000              /* 1.0 */
  
  #define ZERO           %f10                    /* 0.0 */
  #define SIGN_BIT       %f12                    /* -0.0 */
@@ -47,32 +41,22 @@
  ENTRY (__floor_vis3)
         sethi   %hi(TWO_FIFTYTWO), %o2
         sllx    %o0, 32, %o0
-       sethi   %hi(ONE_DOT_ZERO), %o3
+       sllx    %o2, 32, %o2
         or      %o0, %o1, %o0
         movxtod %o0, %f0
-       sllx    %o2, 32, %o2
         fzero   ZERO
-       sllx    %o3, 32, %o3
-
         fnegd   ZERO, SIGN_BIT
-
         movxtod %o2, %f16
         fabsd   %f0, %f14
-
         fcmpd   %fcc3, %f14, %f16
-
         fmovduge %fcc3, ZERO, %f16
         fand    %f0, SIGN_BIT, SIGN_BIT
-
         for     %f16, SIGN_BIT, %f16
+       siam    (1 << 2) | 3
         faddd   %f0, %f16, %f18
+       siam    (1 << 2) | 0
         fsubd   %f18, %f16, %f18
-       fcmpd   %fcc2, %f18, %f0
-       movxtod %o3, %f20
-
-       fmovdule %fcc2, ZERO, %f20
-       fsubd   %f18, %f20, %f0
-       fabsd   %f0, %f0
+       siam    (0 << 2)
         retl
-        for    %f0, SIGN_BIT, %f0
+        for    %f18, SIGN_BIT, %f0
  END (__floor_vis3)
diff --git a/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floor.S b/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floor.S

index 37aeb43..1fe4b95 100644 (file)
--- a/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floor.S
+++ b/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floor.S
@@ -1,7 +1,7 @@
  #include <sparc-ifunc.h>
  #include <math_ldbl_opt.h>
  
-SPARC_ASM_VIS3_IFUNC(floor)
+SPARC_ASM_VIS3_VIS2_IFUNC(floor)
  
  weak_alias (__floor, floor)
  
diff --git a/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floorf-vis2.S b/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floorf-vis2.S

new file mode 100644 (file)

index 0000000..4f73121
--- /dev/null
+++ b/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floorf-vis2.S
@@ -0,0 +1,58 @@
+/* Float floor function, sparc32 v9 vis2 version.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by David S. Miller <davem@davemloft.net>, 2013.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+       /* 'siam' (Set Interval Arithmetic Mode) is used to quickly override
+          the rounding mode during this routine.
+
+          We add then subtract (or subtract than add if the initial
+          value was negative) 2**23 to the value, then subtract it
+          back out.
+
+          This will clear out the fractional portion of the value and,
+          with suitable 'siam' initiated rouding mode settings, round
+          the final result in the proper direction.  */
+
+#define TWO_TWENTYTHREE        0x4b000000              /* 2**23 */
+
+#define ZERO           %f10                    /* 0.0 */
+#define SIGN_BIT       %f12                    /* -0.0 */
+
+ENTRY (__floorf_vis2)
+       st      %o0, [%sp + 68]
+       sethi   %hi(TWO_TWENTYTHREE), %o2
+       fzeros  ZERO
+       ld      [%sp + 68], %f0
+       fnegs   ZERO, SIGN_BIT
+       st      %o2, [%sp + 68]
+       fabss   %f0, %f14
+       ld      [%sp + 68], %f16
+       fcmps   %fcc3, %f14, %f16
+       fmovsuge %fcc3, ZERO, %f16
+       fands   %f0, SIGN_BIT, SIGN_BIT
+       fors    %f16, SIGN_BIT, %f16
+       siam    (1 << 2) | 3
+       fadds   %f0, %f16, %f1
+       siam    (1 << 2) | 0
+       fsubs   %f1, %f16, %f1
+       siam    (0 << 2)
+       retl
+        fors   %f1, SIGN_BIT, %f0
+END (__floorf_vis2)
diff --git a/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floorf-vis3.S b/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floorf-vis3.S

index 133a0a4..fe2d2da 100644 (file)
--- a/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floorf-vis3.S
+++ b/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floorf-vis3.S
@@ -19,27 +19,21 @@
  
  #include <sysdep.h>
  
-       /* Since changing the rounding mode is extremely expensive, we
-          try to round up using a method that is rounding mode
-          agnostic.
+       /* 'siam' (Set Interval Arithmetic Mode) is used to quickly override
+          the rounding mode during this routine.
  
            We add then subtract (or subtract than add if the initial
            value was negative) 2**23 to the value, then subtract it
            back out.
  
-          This will clear out the fractional portion of the value.
-          One of two things will happen for non-whole initial values.
-          Either the rounding mode will round it up, or it will be
-          rounded down.  If the value started out whole, it will be
-          equal after the addition and subtraction.  This means we
-          can accurately detect with one test whether we need to add
-          another 1.0 to round it up properly.
+          This will clear out the fractional portion of the value and,
+          with suitable 'siam' initiated rouding mode settings, round
+          the final result in the proper direction.
  
-          VIS instructions are used to facilitate the formation of
-          easier constants, and the propagation of the sign bit.  */
+          We also use VIS3 moves to avoid using the stack to transfer
+          values between float and integer registers.  */
  
  #define TWO_TWENTYTHREE        0x4b000000              /* 2**23 */
-#define ONE_DOT_ZERO   0x3f800000              /* 1.0 */
  
  #define ZERO           %f10                    /* 0.0 */
  #define SIGN_BIT       %f12                    /* -0.0 */
@@ -47,28 +41,19 @@
  ENTRY (__floorf_vis3)
         movwtos %o0, %f0
         sethi   %hi(TWO_TWENTYTHREE), %o2
-       sethi   %hi(ONE_DOT_ZERO), %o3
         fzeros  ZERO
-
         fnegs   ZERO, SIGN_BIT
-
         movwtos %o2, %f16
         fabss   %f0, %f14
-
         fcmps   %fcc3, %f14, %f16
-
         fmovsuge %fcc3, ZERO, %f16
         fands   %f0, SIGN_BIT, SIGN_BIT
-
         fors    %f16, SIGN_BIT, %f16
+       siam    (1 << 2) | 3
         fadds   %f0, %f16, %f1
+       siam    (1 << 2) | 0
         fsubs   %f1, %f16, %f1
-       fcmps   %fcc2, %f1, %f0
-       movwtos %o3, %f9
-
-       fmovsule %fcc2, ZERO, %f9
-       fsubs   %f1, %f9, %f0
-       fabss   %f0, %f0
+       siam    (0 << 2)
         retl
-        fors   %f0, SIGN_BIT, %f0
+        fors   %f1, SIGN_BIT, %f0
  END (__floorf_vis3)
diff --git a/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floorf.S b/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floorf.S

index 31cda38..d2a83cb 100644 (file)
--- a/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floorf.S
+++ b/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floorf.S
@@ -1,6 +1,6 @@
  #include <sparc-ifunc.h>
  
-SPARC_ASM_VIS3_IFUNC(floorf)
+SPARC_ASM_VIS3_VIS2_IFUNC(floorf)
  
  weak_alias (__floorf, floorf)
  
diff --git a/sysdeps/sparc/sparc64/fpu/multiarch/Makefile b/sysdeps/sparc/sparc64/fpu/multiarch/Makefile

index 8345848..7a5a9dd 100644 (file)
--- a/sysdeps/sparc/sparc64/fpu/multiarch/Makefile
+++ b/sysdeps/sparc/sparc64/fpu/multiarch/Makefile
@@ -1,4 +1,6 @@
  ifeq ($(subdir),math)
+libm-sysdep_routines += s_ceil-vis2 s_ceilf-vis2 \
+                       s_floor-vis2 s_floorf-vis2
  ifeq ($(have-as-vis3),yes)
  libm-sysdep_routines += m_signbitf-vis3 m_signbit-vis3 s_ceilf-vis3 \
                         s_ceil-vis3 m_finitef-vis3 m_finite-vis3 \
diff --git a/sysdeps/sparc/sparc64/fpu/multiarch/s_ceil-vis2.S b/sysdeps/sparc/sparc64/fpu/multiarch/s_ceil-vis2.S

new file mode 100644 (file)

index 0000000..50d96a6
--- /dev/null
+++ b/sysdeps/sparc/sparc64/fpu/multiarch/s_ceil-vis2.S
@@ -0,0 +1,57 @@
+/* ceil function, sparc64 vis2 version.
+   Copyright (C) 2012-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by David S. Miller <davem@davemloft.net>, 2012.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+       /* 'siam' (Set Interval Arithmetic Mode) is used to quickly override
+          the rounding mode during this routine.
+
+          We add then subtract (or subtract than add if the initial
+          value was negative) 2**23 to the value, then subtract it
+          back out.
+
+          This will clear out the fractional portion of the value and,
+          with suitable 'siam' initiated rouding mode settings, round
+          the final result in the proper direction.  */
+
+#define TWO_FIFTYTWO   0x43300000              /* 2**52 */
+
+#define ZERO           %f10                    /* 0.0 */
+#define SIGN_BIT       %f12                    /* -0.0 */
+
+ENTRY (__ceil_vis2)
+       sethi   %hi(TWO_FIFTYTWO), %o2
+       fzero   ZERO
+       sllx    %o2, 32, %o2
+       fnegd   ZERO, SIGN_BIT
+       stx     %o2, [%sp + STACK_BIAS + 128]
+       fabsd   %f0, %f14
+       ldd     [%sp + STACK_BIAS + 128], %f16
+       fcmpd   %fcc3, %f14, %f16
+       fmovduge %fcc3, ZERO, %f16
+       fand    %f0, SIGN_BIT, SIGN_BIT
+       for     %f16, SIGN_BIT, %f16
+       siam    (1 << 2) | 2
+       faddd   %f0, %f16, %f18
+       siam    (1 << 2) | 0
+       fsubd   %f18, %f16, %f18
+       siam    (0 << 2)
+       retl
+        for    %f18, SIGN_BIT, %f0
+END (__ceil_vis2)
diff --git a/sysdeps/sparc/sparc64/fpu/multiarch/s_ceil-vis3.S b/sysdeps/sparc/sparc64/fpu/multiarch/s_ceil-vis3.S

index 9a598ea..6acff09 100644 (file)
--- a/sysdeps/sparc/sparc64/fpu/multiarch/s_ceil-vis3.S
+++ b/sysdeps/sparc/sparc64/fpu/multiarch/s_ceil-vis3.S
@@ -19,57 +19,41 @@
  
  #include <sysdep.h>
  
-       /* Since changing the rounding mode is extremely expensive, we
-          try to round up using a method that is rounding mode
-          agnostic.
+       /* 'siam' (Set Interval Arithmetic Mode) is used to quickly override
+          the rounding mode during this routine.
  
            We add then subtract (or subtract than add if the initial
            value was negative) 2**23 to the value, then subtract it
            back out.
  
-          This will clear out the fractional portion of the value.
-          One of two things will happen for non-whole initial values.
-          Either the rounding mode will round it up, or it will be
-          rounded down.  If the value started out whole, it will be
-          equal after the addition and subtraction.  This means we
-          can accurately detect with one test whether we need to add
-          another 1.0 to round it up properly.
+          This will clear out the fractional portion of the value and,
+          with suitable 'siam' initiated rouding mode settings, round
+          the final result in the proper direction.
  
-          VIS instructions are used to facilitate the formation of
-          easier constants, and the propagation of the sign bit.  */
+          We also use VIS3 moves to avoid using the stack to transfer
+          values between float and integer registers.  */
  
  #define TWO_FIFTYTWO   0x43300000              /* 2**52 */
-#define ONE_DOT_ZERO   0x3ff00000              /* 1.0 */
  
  #define ZERO           %f10                    /* 0.0 */
  #define SIGN_BIT       %f12                    /* -0.0 */
  
  ENTRY (__ceil_vis3)
         sethi   %hi(TWO_FIFTYTWO), %o2
-       sethi   %hi(ONE_DOT_ZERO), %o3
         fzero   ZERO
-
         sllx    %o2, 32, %o2
         fnegd   ZERO, SIGN_BIT
-
-       sllx    %o3, 32, %o3
         movxtod %o2, %f16
         fabsd   %f0, %f14
-
         fcmpd   %fcc3, %f14, %f16
-
         fmovduge %fcc3, ZERO, %f16
         fand    %f0, SIGN_BIT, SIGN_BIT
-
         for     %f16, SIGN_BIT, %f16
+       siam    (1 << 2) | 2
         faddd   %f0, %f16, %f18
+       siam    (1 << 2) | 0
         fsubd   %f18, %f16, %f18
-       fcmpd   %fcc2, %f18, %f0
-       movxtod %o3, %f20
-
-       fmovduge %fcc2, ZERO, %f20
-       faddd   %f18, %f20, %f0
-       fabsd   %f0, %f0
+       siam    (0 << 2)
         retl
-        for    %f0, SIGN_BIT, %f0
+        for    %f18, SIGN_BIT, %f0
  END (__ceil_vis3)
diff --git a/sysdeps/sparc/sparc64/fpu/multiarch/s_ceil.S b/sysdeps/sparc/sparc64/fpu/multiarch/s_ceil.S

index fa4c7c3..e7822bc 100644 (file)
--- a/sysdeps/sparc/sparc64/fpu/multiarch/s_ceil.S
+++ b/sysdeps/sparc/sparc64/fpu/multiarch/s_ceil.S
@@ -1,6 +1,6 @@
  #include <sparc-ifunc.h>
  
-SPARC_ASM_VIS3_IFUNC(ceil)
+SPARC_ASM_VIS3_VIS2_IFUNC(ceil)
  
  weak_alias (__ceil, ceil)
  
diff --git a/sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf-vis2.S b/sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf-vis2.S

new file mode 100644 (file)

index 0000000..cd5937b
--- /dev/null
+++ b/sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf-vis2.S
@@ -0,0 +1,56 @@
+/* Float ceil function, sparc64 vis2 version.
+   Copyright (C) 2012-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by David S. Miller <davem@davemloft.net>, 2012.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+       /* 'siam' (Set Interval Arithmetic Mode) is used to quickly override
+          the rounding mode during this routine.
+
+          We add then subtract (or subtract than add if the initial
+          value was negative) 2**23 to the value, then subtract it
+          back out.
+
+          This will clear out the fractional portion of the value and,
+          with suitable 'siam' initiated rouding mode settings, round
+          the final result in the proper direction.  */
+
+#define TWO_TWENTYTHREE        0x4b000000              /* 2**23 */
+
+#define ZERO           %f10                    /* 0.0 */
+#define SIGN_BIT       %f12                    /* -0.0 */
+
+ENTRY (__ceilf_vis2)
+       sethi   %hi(TWO_TWENTYTHREE), %o2
+       fzeros  ZERO
+       fnegs   ZERO, SIGN_BIT
+       st      %o2, [%sp + STACK_BIAS + 128]
+       fabss   %f1, %f14
+       ld      [%sp + STACK_BIAS + 128], %f16
+       fcmps   %fcc3, %f14, %f16
+       fmovsuge %fcc3, ZERO, %f16
+       fands   %f1, SIGN_BIT, SIGN_BIT
+       fors    %f16, SIGN_BIT, %f16
+       siam    (1 << 2) | 2
+       fadds   %f1, %f16, %f5
+       siam    (1 << 2) | 0
+       fsubs   %f5, %f16, %f5
+       siam    (0 << 2)
+       retl
+        fors   %f5, SIGN_BIT, %f0
+END (__ceilf_vis2)
diff --git a/sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf-vis3.S b/sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf-vis3.S

index 8c63590..b3ec348 100644 (file)
--- a/sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf-vis3.S
+++ b/sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf-vis3.S
@@ -19,55 +19,40 @@
  
  #include <sysdep.h>
  
-       /* Since changing the rounding mode is extremely expensive, we
-          try to round up using a method that is rounding mode
-          agnostic.
+       /* 'siam' (Set Interval Arithmetic Mode) is used to quickly override
+          the rounding mode during this routine.
  
            We add then subtract (or subtract than add if the initial
            value was negative) 2**23 to the value, then subtract it
            back out.
  
-          This will clear out the fractional portion of the value.
-          One of two things will happen for non-whole initial values.
-          Either the rounding mode will round it up, or it will be
-          rounded down.  If the value started out whole, it will be
-          equal after the addition and subtraction.  This means we
-          can accurately detect with one test whether we need to add
-          another 1.0 to round it up properly.
+          This will clear out the fractional portion of the value and,
+          with suitable 'siam' initiated rouding mode settings, round
+          the final result in the proper direction.
  
-          VIS instructions are used to facilitate the formation of
-          easier constants, and the propagation of the sign bit.  */
+          We also use VIS3 moves to avoid using the stack to transfer
+          values between float and integer registers.  */
  
  #define TWO_TWENTYTHREE        0x4b000000              /* 2**23 */
-#define ONE_DOT_ZERO   0x3f800000              /* 1.0 */
  
  #define ZERO           %f10                    /* 0.0 */
  #define SIGN_BIT       %f12                    /* -0.0 */
  
  ENTRY (__ceilf_vis3)
         sethi   %hi(TWO_TWENTYTHREE), %o2
-       sethi   %hi(ONE_DOT_ZERO), %o3
         fzeros  ZERO
-
         fnegs   ZERO, SIGN_BIT
-
         movwtos %o2, %f16
         fabss   %f1, %f14
-
         fcmps   %fcc3, %f14, %f16
-
         fmovsuge %fcc3, ZERO, %f16
         fands   %f1, SIGN_BIT, SIGN_BIT
-
         fors    %f16, SIGN_BIT, %f16
+       siam    (1 << 2) | 2
         fadds   %f1, %f16, %f5
+       siam    (1 << 2) | 0
         fsubs   %f5, %f16, %f5
-       fcmps   %fcc2, %f5, %f1
-       movwtos %o3, %f9
-
-       fmovsuge %fcc2, ZERO, %f9
-       fadds   %f5, %f9, %f0
-       fabss   %f0, %f0
+       siam    (0 << 2)
         retl
-        fors   %f0, SIGN_BIT, %f0
+        fors   %f5, SIGN_BIT, %f0
  END (__ceilf_vis3)
diff --git a/sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf.S b/sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf.S

index 3047dd8..1c72a57 100644 (file)
--- a/sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf.S
+++ b/sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf.S
@@ -1,6 +1,6 @@
  #include <sparc-ifunc.h>
  
-SPARC_ASM_VIS3_IFUNC(ceilf)
+SPARC_ASM_VIS3_VIS2_IFUNC(ceilf)
  
  weak_alias (__ceilf, ceilf)
  
diff --git a/sysdeps/sparc/sparc64/fpu/multiarch/s_floor-vis2.S b/sysdeps/sparc/sparc64/fpu/multiarch/s_floor-vis2.S

new file mode 100644 (file)

index 0000000..5479ced
--- /dev/null
+++ b/sysdeps/sparc/sparc64/fpu/multiarch/s_floor-vis2.S
@@ -0,0 +1,57 @@
+/* floor function, sparc64 vis2 version.
+   Copyright (C) 2012-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by David S. Miller <davem@davemloft.net>, 2012.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+       /* 'siam' (Set Interval Arithmetic Mode) is used to quickly override
+          the rounding mode during this routine.
+
+          We add then subtract (or subtract than add if the initial
+          value was negative) 2**23 to the value, then subtract it
+          back out.
+
+          This will clear out the fractional portion of the value and,
+          with suitable 'siam' initiated rouding mode settings, round
+          the final result in the proper direction.  */
+
+#define TWO_FIFTYTWO   0x43300000              /* 2**52 */
+
+#define ZERO           %f10                    /* 0.0 */
+#define SIGN_BIT       %f12                    /* -0.0 */
+
+ENTRY (__floor_vis2)
+       sethi   %hi(TWO_FIFTYTWO), %o2
+       fzero   ZERO
+       sllx    %o2, 32, %o2
+       fnegd   ZERO, SIGN_BIT
+       stx     %o2, [%sp + STACK_BIAS + 128]
+       fabsd   %f0, %f14
+       ldd     [%sp + STACK_BIAS + 128], %f16
+       fcmpd   %fcc3, %f14, %f16
+       fmovduge %fcc3, ZERO, %f16
+       fand    %f0, SIGN_BIT, SIGN_BIT
+       for     %f16, SIGN_BIT, %f16
+       siam    (1 << 2) | 3
+       faddd   %f0, %f16, %f18
+       siam    (1 << 2) | 0
+       fsubd   %f18, %f16, %f18
+       siam    (0 << 2)
+       retl
+        for    %f18, SIGN_BIT, %f0
+END (__floor_vis2)
diff --git a/sysdeps/sparc/sparc64/fpu/multiarch/s_floor-vis3.S b/sysdeps/sparc/sparc64/fpu/multiarch/s_floor-vis3.S

index 169d206..c2ffe9f 100644 (file)
--- a/sysdeps/sparc/sparc64/fpu/multiarch/s_floor-vis3.S
+++ b/sysdeps/sparc/sparc64/fpu/multiarch/s_floor-vis3.S
@@ -19,57 +19,41 @@
  
  #include <sysdep.h>
  
-       /* Since changing the rounding mode is extremely expensive, we
-          try to round up using a method that is rounding mode
-          agnostic.
+       /* 'siam' (Set Interval Arithmetic Mode) is used to quickly override
+          the rounding mode during this routine.
  
            We add then subtract (or subtract than add if the initial
            value was negative) 2**23 to the value, then subtract it
            back out.
  
-          This will clear out the fractional portion of the value.
-          One of two things will happen for non-whole initial values.
-          Either the rounding mode will round it up, or it will be
-          rounded down.  If the value started out whole, it will be
-          equal after the addition and subtraction.  This means we
-          can accurately detect with one test whether we need to add
-          another 1.0 to round it up properly.
+          This will clear out the fractional portion of the value and,
+          with suitable 'siam' initiated rouding mode settings, round
+          the final result in the proper direction.
  
-          VIS instructions are used to facilitate the formation of
-          easier constants, and the propagation of the sign bit.  */
+          We also use VIS3 moves to avoid using the stack to transfer
+          values between float and integer registers.  */
  
  #define TWO_FIFTYTWO   0x43300000              /* 2**52 */
-#define ONE_DOT_ZERO   0x3ff00000              /* 1.0 */
  
  #define ZERO           %f10                    /* 0.0 */
  #define SIGN_BIT       %f12                    /* -0.0 */
  
  ENTRY (__floor_vis3)
         sethi   %hi(TWO_FIFTYTWO), %o2
-       sethi   %hi(ONE_DOT_ZERO), %o3
         fzero   ZERO
-
         sllx    %o2, 32, %o2
         fnegd   ZERO, SIGN_BIT
-
-       sllx    %o3, 32, %o3
         movxtod %o2, %f16
         fabsd   %f0, %f14
-
         fcmpd   %fcc3, %f14, %f16
-
         fmovduge %fcc3, ZERO, %f16
         fand    %f0, SIGN_BIT, SIGN_BIT
-
         for     %f16, SIGN_BIT, %f16
+       siam    (1 << 2) | 3
         faddd   %f0, %f16, %f18
+       siam    (1 << 2) | 0
         fsubd   %f18, %f16, %f18
-       fcmpd   %fcc2, %f18, %f0
-       movxtod %o3, %f20
-
-       fmovdule %fcc2, ZERO, %f20
-       fsubd   %f18, %f20, %f0
-       fabsd   %f0, %f0
+       siam    (0 << 2)
         retl
-        for    %f0, SIGN_BIT, %f0
+        for    %f18, SIGN_BIT, %f0
  END (__floor_vis3)
diff --git a/sysdeps/sparc/sparc64/fpu/multiarch/s_floor.S b/sysdeps/sparc/sparc64/fpu/multiarch/s_floor.S

index 6ae9947..989ccab 100644 (file)
--- a/sysdeps/sparc/sparc64/fpu/multiarch/s_floor.S
+++ b/sysdeps/sparc/sparc64/fpu/multiarch/s_floor.S
@@ -1,6 +1,6 @@
  #include <sparc-ifunc.h>
  
-SPARC_ASM_VIS3_IFUNC(floor)
+SPARC_ASM_VIS3_VIS2_IFUNC(floor)
  
  weak_alias (__floor, floor)
  
diff --git a/sysdeps/sparc/sparc64/fpu/multiarch/s_floorf-vis2.S b/sysdeps/sparc/sparc64/fpu/multiarch/s_floorf-vis2.S

new file mode 100644 (file)

index 0000000..935fa85
--- /dev/null
+++ b/sysdeps/sparc/sparc64/fpu/multiarch/s_floorf-vis2.S
@@ -0,0 +1,56 @@
+/* Float floor function, sparc64 vis2 version.
+   Copyright (C) 2012-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by David S. Miller <davem@davemloft.net>, 2012.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+       /* 'siam' (Set Interval Arithmetic Mode) is used to quickly override
+          the rounding mode during this routine.
+
+          We add then subtract (or subtract than add if the initial
+          value was negative) 2**23 to the value, then subtract it
+          back out.
+
+          This will clear out the fractional portion of the value and,
+          with suitable 'siam' initiated rouding mode settings, round
+          the final result in the proper direction.  */
+
+#define TWO_TWENTYTHREE        0x4b000000              /* 2**23 */
+
+#define ZERO           %f10                    /* 0.0 */
+#define SIGN_BIT       %f12                    /* -0.0 */
+
+ENTRY (__floorf_vis2)
+       sethi   %hi(TWO_TWENTYTHREE), %o2
+       fzeros  ZERO
+       fnegs   ZERO, SIGN_BIT
+       st      %o2, [%sp + STACK_BIAS + 128]
+       fabss   %f1, %f14
+       ld      [%sp + STACK_BIAS + 128], %f16
+       fcmps   %fcc3, %f14, %f16
+       fmovsuge %fcc3, ZERO, %f16
+       fands   %f1, SIGN_BIT, SIGN_BIT
+       fors    %f16, SIGN_BIT, %f16
+       siam    (1 << 2) | 3
+       fadds   %f1, %f16, %f5
+       siam    (1 << 2) | 0
+       fsubs   %f5, %f16, %f5
+       siam    (0 << 2)
+       retl
+        fors   %f5, SIGN_BIT, %f0
+END (__floorf_vis2)
diff --git a/sysdeps/sparc/sparc64/fpu/multiarch/s_floorf-vis3.S b/sysdeps/sparc/sparc64/fpu/multiarch/s_floorf-vis3.S

index 65be297..225e17e 100644 (file)
--- a/sysdeps/sparc/sparc64/fpu/multiarch/s_floorf-vis3.S
+++ b/sysdeps/sparc/sparc64/fpu/multiarch/s_floorf-vis3.S
@@ -19,55 +19,40 @@
  
  #include <sysdep.h>
  
-       /* Since changing the rounding mode is extremely expensive, we
-          try to round up using a method that is rounding mode
-          agnostic.
+       /* 'siam' (Set Interval Arithmetic Mode) is used to quickly override
+          the rounding mode during this routine.
  
            We add then subtract (or subtract than add if the initial
            value was negative) 2**23 to the value, then subtract it
            back out.
  
-          This will clear out the fractional portion of the value.
-          One of two things will happen for non-whole initial values.
-          Either the rounding mode will round it up, or it will be
-          rounded down.  If the value started out whole, it will be
-          equal after the addition and subtraction.  This means we
-          can accurately detect with one test whether we need to add
-          another 1.0 to round it up properly.
+          This will clear out the fractional portion of the value and,
+          with suitable 'siam' initiated rouding mode settings, round
+          the final result in the proper direction.
  
-          VIS instructions are used to facilitate the formation of
-          easier constants, and the propagation of the sign bit.  */
+          We also use VIS3 moves to avoid using the stack to transfer
+          values between float and integer registers.  */
  
  #define TWO_TWENTYTHREE        0x4b000000              /* 2**23 */
-#define ONE_DOT_ZERO   0x3f800000              /* 1.0 */
  
  #define ZERO           %f10                    /* 0.0 */
  #define SIGN_BIT       %f12                    /* -0.0 */
  
  ENTRY (__floorf_vis3)
         sethi   %hi(TWO_TWENTYTHREE), %o2
-       sethi   %hi(ONE_DOT_ZERO), %o3
         fzeros  ZERO
-
         fnegs   ZERO, SIGN_BIT
-
-       movwtos %o2, %f16
+       movwtos %o2, %f16
         fabss   %f1, %f14
-
         fcmps   %fcc3, %f14, %f16
-
         fmovsuge %fcc3, ZERO, %f16
         fands   %f1, SIGN_BIT, SIGN_BIT
-
         fors    %f16, SIGN_BIT, %f16
+       siam    (1 << 2) | 3
         fadds   %f1, %f16, %f5
+       siam    (1 << 2) | 0
         fsubs   %f5, %f16, %f5
-       fcmps   %fcc2, %f5, %f1
-       movwtos %o3, %f9
-
-       fmovsule %fcc2, ZERO, %f9
-       fsubs   %f5, %f9, %f0
-       fabss   %f0, %f0
+       siam    (0 << 2)
         retl
-        fors   %f0, SIGN_BIT, %f0
+        fors   %f5, SIGN_BIT, %f0
  END (__floorf_vis3)
diff --git a/sysdeps/sparc/sparc64/fpu/multiarch/s_floorf.S b/sysdeps/sparc/sparc64/fpu/multiarch/s_floorf.S

index 31cda38..d2a83cb 100644 (file)
--- a/sysdeps/sparc/sparc64/fpu/multiarch/s_floorf.S
+++ b/sysdeps/sparc/sparc64/fpu/multiarch/s_floorf.S
@@ -1,6 +1,6 @@
  #include <sparc-ifunc.h>
  
-SPARC_ASM_VIS3_IFUNC(floorf)
+SPARC_ASM_VIS3_VIS2_IFUNC(floorf)
  
  weak_alias (__floorf, floorf)
author	David S. Miller <davem@davemloft.net>
	Tue, 15 Jan 2013 05:47:29 +0000 (21:47 -0800)
committer	David S. Miller <davem@davemloft.net>
	Tue, 15 Jan 2013 05:47:29 +0000 (21:47 -0800)
ChangeLog		patch \| blob \| history
sysdeps/sparc/sparc-ifunc.h		patch \| blob \| history
sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/Makefile		patch \| blob \| history
sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceil-vis2.S	[new file with mode: 0644]	patch \| blob
sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceil-vis3.S		patch \| blob \| history
sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceil.S		patch \| blob \| history
sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceilf-vis2.S	[new file with mode: 0644]	patch \| blob
sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceilf-vis3.S		patch \| blob \| history
sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceilf.S		patch \| blob \| history
sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floor-vis2.S	[new file with mode: 0644]	patch \| blob
sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floor-vis3.S		patch \| blob \| history
sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floor.S		patch \| blob \| history
sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floorf-vis2.S	[new file with mode: 0644]	patch \| blob
sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floorf-vis3.S		patch \| blob \| history
sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floorf.S		patch \| blob \| history
sysdeps/sparc/sparc64/fpu/multiarch/Makefile		patch \| blob \| history
sysdeps/sparc/sparc64/fpu/multiarch/s_ceil-vis2.S	[new file with mode: 0644]	patch \| blob
sysdeps/sparc/sparc64/fpu/multiarch/s_ceil-vis3.S		patch \| blob \| history
sysdeps/sparc/sparc64/fpu/multiarch/s_ceil.S		patch \| blob \| history
sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf-vis2.S	[new file with mode: 0644]	patch \| blob
sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf-vis3.S		patch \| blob \| history
sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf.S		patch \| blob \| history
sysdeps/sparc/sparc64/fpu/multiarch/s_floor-vis2.S	[new file with mode: 0644]	patch \| blob
sysdeps/sparc/sparc64/fpu/multiarch/s_floor-vis3.S		patch \| blob \| history
sysdeps/sparc/sparc64/fpu/multiarch/s_floor.S		patch \| blob \| history
sysdeps/sparc/sparc64/fpu/multiarch/s_floorf-vis2.S	[new file with mode: 0644]	patch \| blob
sysdeps/sparc/sparc64/fpu/multiarch/s_floorf-vis3.S		patch \| blob \| history
sysdeps/sparc/sparc64/fpu/multiarch/s_floorf.S		patch \| blob \| history