Fix x86 SSE cosf, sinf issues

author Liubov Dmitrieva <liubov.dmitrieva@gmail.com>

Mon, 10 Sep 2012 09:44:49 +0000 (11:44 +0200)

committer Andreas Jaeger <aj@suse.de>

Mon, 10 Sep 2012 09:44:49 +0000 (11:44 +0200)
author Liubov Dmitrieva <liubov.dmitrieva@gmail.com>
Mon, 10 Sep 2012 09:44:49 +0000 (11:44 +0200)
committer Andreas Jaeger <aj@suse.de>
Mon, 10 Sep 2012 09:44:49 +0000 (11:44 +0200)
diff --git a/ChangeLog b/ChangeLog

index 30a0727..e87c0a3 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+2012-09-10  Liubov Dmitrieva  <liubov.dmitrieva@gmail.com>
+
+       * sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S: Fix
+       unwind info if defined PIC. Fix special cases description.
+       * sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S: Likewise.
+
+       * sysdeps/x86_64/fpu/s_sinf.S: Fix special cases description, fix
+       DP_HI_MASK entry.
+       * sysdeps/x86_64/fpu/s_cosf.S: Likewise.
+
  2012-09-07  H.J. Lu  <hongjiu.lu@intel.com>
  
         * scripts/check-local-headers.sh: Add "shopt -s nullglob".
diff --git a/sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S b/sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S

index 2b5a2a5..405c6ea 100644 (file)
--- a/sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S
+++ b/sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S
@@ -50,25 +50,29 @@
   *  9) if x is NaN, return x-x.
   *
   * Special cases:
- *  cos(+-0)==+-0 not raising inexact/underflow,
- *  cos(subnormal) raises inexact/underflow
- *  cos(min_normalized) raises inexact/underflow
- *  cos(normalized) raises inexact
- *  cos(Inf) = NaN, raises invalid, sets errno to EDOM
- *  cos(NaN) = NaN
+ *  cos(+-0) = 1 not raising inexact,
+ *  cos(subnormal) raises inexact,
+ *  cos(min_normalized) raises inexact,
+ *  cos(normalized) raises inexact,
+ *  cos(Inf) = NaN, raises invalid, sets errno to EDOM,
+ *  cos(NaN) = NaN.
   */
  
  #ifdef PIC
  # define MO1(symbol)                   L(symbol)##@GOTOFF(%ebx)
  # define MO2(symbol,reg2,_scale)       L(symbol)##@GOTOFF(%ebx,reg2,_scale)
-# define SAVE_BX                       pushl   %ebx
-# define RESTORE_BX                    popl    %ebx
+# define CFI_PUSH(REG) cfi_adjust_cfa_offset(4); cfi_rel_offset(REG,0)
+# define CFI_POP(REG)  cfi_adjust_cfa_offset(-4); cfi_restore(REG)
+# define PUSH(REG)                     pushl REG; CFI_PUSH(REG)
+# define POP(REG)                      popl REG; CFI_POP(REG)
+# define ENTRANCE                      PUSH(%ebx); LOAD_PIC_REG(bx)
+# define RETURN                                POP(%ebx); ret; CFI_PUSH(%ebx)
  # define ARG_X                         8(%esp)
  #else
  # define MO1(symbol)                   L(symbol)
  # define MO2(symbol,reg2,_scale)       L(symbol)(,reg2,_scale)
-# define SAVE_BX
-# define RESTORE_BX
+# define ENTRANCE
+# define RETURN                                ret
  # define ARG_X                         4(%esp)
  #endif
  
@@ -76,11 +80,7 @@
  ENTRY(__cosf_sse2)
         /* Input: single precision x on stack at address ARG_X */
  
-#ifdef PIC
-       SAVE_BX
-       LOAD_PIC_REG(bx)
-#endif
-
+       ENTRANCE
         movl    ARG_X, %eax             /* Bits of x */
         cvtss2sd ARG_X, %xmm0           /* DP x */
         andl    $0x7fffffff, %eax       /* |x| */
@@ -143,8 +143,7 @@ L(reconstruction):
         fldl    0(%esp)                 /* ...to FPU.  */
         /* Return back 4 bytes of stack frame */
         lea     8(%esp), %esp
-       RESTORE_BX
-       ret
+       RETURN
  
         .p2align        4
  L(sin_poly):
@@ -183,9 +182,7 @@ L(sin_poly):
         fldl    0(%esp)                 /* ...to FPU.  */
         /* Return back 4 bytes of stack frame */
         lea     8(%esp), %esp
-       RESTORE_BX
-       ret
-
+       RETURN
  
         .p2align        4
  L(large_args):
@@ -275,7 +272,6 @@ L(very_large_skip2):
  
         jmp     L(reconstruction)       /* end of very_large_args peth */
  
-
         .p2align        4
  L(arg_less_pio4):
         /* Here if |x|<Pi/4 */
@@ -307,8 +303,7 @@ L(epilogue):
         flds    0(%esp)                 /* ...to FPU.  */
         /* Return back 4 bytes of stack frame */
         lea     4(%esp), %esp
-       RESTORE_BX
-       ret
+       RETURN
  
         .p2align        4
  L(arg_less_2pn5):
@@ -353,7 +348,6 @@ L(skip_errno_setting):
         jmp     L(epilogue)
  END(__cosf_sse2)
  
-
         .section .rodata, "a"
         .p2align 3
  L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
@@ -540,8 +534,8 @@ L(DP_ABS_MASK): /* Mask for getting DP absolute value */
         .p2align 3
  L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
         .long   0x00000000,0xffffffff
-       .type L(DP_ABS_MASK), @object
-       ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
+       .type L(DP_HI_MASK), @object
+       ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
  
         .p2align 4
  L(SP_ABS_MASK): /* Mask for getting SP absolute value */
diff --git a/sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S b/sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S

index cda1750..49d59b5 100644 (file)
--- a/sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S
+++ b/sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S
@@ -50,25 +50,29 @@
   *  9) if x is NaN, return x-x.
   *
   * Special cases:
- *  sin(+-0)==+-0 not raising inexact/underflow,
- *  sin(subnormal) raises inexact/underflow
- *  sin(min_normalized) raises inexact/underflow
- *  sin(normalized) raises inexact
- *  sin(Inf) = NaN, raises invalid, sets errno to EDOM
- *  sin(NaN) = NaN
+ *  sin(+-0) = +-0 not raising inexact/underflow,
+ *  sin(subnormal) raises inexact/underflow,
+ *  sin(min_normalized) raises inexact/underflow,
+ *  sin(normalized) raises inexact,
+ *  sin(Inf) = NaN, raises invalid, sets errno to EDOM,
+ *  sin(NaN) = NaN.
   */
  
  #ifdef PIC
  # define MO1(symbol)                   L(symbol)##@GOTOFF(%ebx)
  # define MO2(symbol,reg2,_scale)       L(symbol)##@GOTOFF(%ebx,reg2,_scale)
-# define SAVE_BX                       pushl   %ebx
-# define RESTORE_BX                    popl    %ebx
+# define CFI_PUSH(REG) cfi_adjust_cfa_offset(4); cfi_rel_offset(REG,0)
+# define CFI_POP(REG)  cfi_adjust_cfa_offset(-4); cfi_restore(REG)
+# define PUSH(REG)                     pushl REG; CFI_PUSH(REG)
+# define POP(REG)                      popl REG; CFI_POP(REG)
+# define ENTRANCE                      PUSH(%ebx); LOAD_PIC_REG(bx)
+# define RETURN                                POP(%ebx); ret; CFI_PUSH(%ebx)
  # define ARG_X                         8(%esp)
  #else
  # define MO1(symbol)                   L(symbol)
  # define MO2(symbol,reg2,_scale)       L(symbol)(,reg2,_scale)
-# define SAVE_BX
-# define RESTORE_BX
+# define ENTRANCE
+# define RETURN                                ret
  # define ARG_X                         4(%esp)
  #endif
  
@@ -76,11 +80,7 @@
  ENTRY(__sinf_sse2)
         /* Input: single precision x on stack at address ARG_X */
  
-#ifdef PIC
-       SAVE_BX
-       LOAD_PIC_REG(bx)
-#endif
-
+       ENTRANCE
         movl    ARG_X, %eax             /* Bits of x */
         cvtss2sd ARG_X, %xmm0           /* DP x */
         andl    $0x7fffffff, %eax       /* |x| */
@@ -145,8 +145,7 @@ L(reconstruction):
         fldl    0(%esp)                 /* ...to FPU.  */
         /* Return back 4 bytes of stack frame */
         lea     8(%esp), %esp
-       RESTORE_BX
-       ret
+       RETURN
  
         .p2align        4
  L(sin_poly):
@@ -186,9 +185,7 @@ L(sin_poly):
         fldl    0(%esp)                 /* ...to FPU.  */
         /* Return back 4 bytes of stack frame */
         lea     8(%esp), %esp
-       RESTORE_BX
-       ret
-
+       RETURN
  
         .p2align        4
  L(large_args):
@@ -281,10 +278,6 @@ L(very_large_skip2):
  
         jmp     L(reconstruction)       /* end of very_large_args peth */
  
-
-
-
-
         .p2align        4
  L(arg_less_pio4):
         /* Here if |x|<Pi/4 */
@@ -320,8 +313,7 @@ L(epilogue):
         flds    0(%esp)                 /* ...to FPU.  */
         /* Return back 4 bytes of stack frame */
         lea     4(%esp), %esp
-       RESTORE_BX
-       ret
+       RETURN
  
         .p2align        4
  L(arg_less_2pn5):
@@ -376,7 +368,6 @@ L(skip_errno_setting):
         jmp     L(epilogue)
  END(__sinf_sse2)
  
-
         .section .rodata, "a"
         .p2align 3
  L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
@@ -569,7 +560,7 @@ L(DP_ABS_MASK): /* Mask for getting DP absolute value */
         .p2align 3
  L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
         .long   0x00000000,0xffffffff
-       .type L(DP_ABS_MASK), @object
-       ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
+       .type L(DP_HI_MASK), @object
+       ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
  
  weak_alias (__sinf, sinf)
diff --git a/sysdeps/x86_64/fpu/s_cosf.S b/sysdeps/x86_64/fpu/s_cosf.S

index 7eeefe8..dc8c76a 100644 (file)
--- a/sysdeps/x86_64/fpu/s_cosf.S
+++ b/sysdeps/x86_64/fpu/s_cosf.S
@@ -50,12 +50,12 @@
   *  9) if x is NaN, return x-x.
   *
   * Special cases:
- *  cos(+-0)==+-0 not raising inexact/underflow,
- *  cos(subnormal) raises inexact/underflow
- *  cos(min_normalized) raises inexact/underflow
- *  cos(normalized) raises inexact
- *  cos(Inf) = NaN, raises invalid, sets errno to EDOM
- *  cos(NaN) = NaN
+ *  cos(+-0) = 1 not raising inexact,
+ *  cos(subnormal) raises inexact,
+ *  cos(min_normalized) raises inexact,
+ *  cos(normalized) raises inexact,
+ *  cos(Inf) = NaN, raises invalid, sets errno to EDOM,
+ *  cos(NaN) = NaN.
   */
  
         .text
@@ -163,10 +163,6 @@ L(sin_poly):
         cvtsd2ss %xmm3, %xmm0           /* SP result */
         ret
  
-
-
-
-
         .p2align        4
  L(large_args):
         /* Here if |x|>=9*Pi/4 */
@@ -257,7 +253,6 @@ L(very_large_skip2):
  
         jmp     L(reconstruction)       /* end of very_large_args peth */
  
-
         .p2align        4
  L(arg_less_pio4):
         /* Here if |x|<Pi/4 */
@@ -317,7 +312,6 @@ L(arg_inf_or_nan):
  
         /* Here if x is Inf. Set errno to EDOM.  */
         call    JUMPTARGET(__errno_location)
-       lea     (%rax), %rax
         movl    $EDOM, (%rax)
  
         .p2align        4
@@ -328,8 +322,6 @@ L(skip_errno_setting):
         ret
  END(__cosf)
  
-
-
         .section .rodata, "a"
         .p2align 3
  L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
@@ -516,8 +508,8 @@ L(DP_ABS_MASK): /* Mask for getting DP absolute value */
         .p2align 3
  L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
         .long   0x00000000,0xffffffff
-       .type L(DP_ABS_MASK), @object
-       ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
+       .type L(DP_HI_MASK), @object
+       ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
  
         .p2align 4
  L(SP_ABS_MASK): /* Mask for getting SP absolute value */
diff --git a/sysdeps/x86_64/fpu/s_sinf.S b/sysdeps/x86_64/fpu/s_sinf.S

index 295ba3d..9a6c87f 100644 (file)
--- a/sysdeps/x86_64/fpu/s_sinf.S
+++ b/sysdeps/x86_64/fpu/s_sinf.S
@@ -50,12 +50,12 @@
   *  9) if x is NaN, return x-x.
   *
   * Special cases:
- *  sin(+-0)==+-0 not raising inexact/underflow,
- *  sin(subnormal) raises inexact/underflow
- *  sin(min_normalized) raises inexact/underflow
- *  sin(normalized) raises inexact
- *  sin(Inf) = NaN, raises invalid, sets errno to EDOM
- *  sin(NaN) = NaN
+ *  sin(+-0) = +-0 not raising inexact/underflow,
+ *  sin(subnormal) raises inexact/underflow,
+ *  sin(min_normalized) raises inexact/underflow,
+ *  sin(normalized) raises inexact,
+ *  sin(Inf) = NaN, raises invalid, sets errno to EDOM,
+ *  sin(NaN) = NaN.
   */
  
         .text
@@ -168,7 +168,6 @@ L(sin_poly):
         cvtsd2ss %xmm3, %xmm0           /* SP result */
         ret
  
-
         .p2align        4
  L(large_args):
         /* Here if |x|>=9*Pi/4 */
@@ -262,7 +261,6 @@ L(very_large_skip2):
  
         jmp     L(reconstruction)       /* end of very_large_args peth */
  
-
         .p2align        4
  L(arg_less_pio4):
         /* Here if |x|<Pi/4 */
@@ -340,7 +338,6 @@ L(arg_inf_or_nan):
  
         /* Here if x is Inf. Set errno to EDOM.  */
         call    JUMPTARGET(__errno_location)
-       lea     (%rax), %rax
         movl    $EDOM, (%rax)
  
         .p2align        4
@@ -351,8 +348,6 @@ L(skip_errno_setting):
         ret
  END(__sinf)
  
-
-
         .section .rodata, "a"
         .p2align 3
  L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
@@ -545,8 +540,8 @@ L(DP_ABS_MASK): /* Mask for getting DP absolute value */
         .p2align 3
  L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
         .long   0x00000000,0xffffffff
-       .type L(DP_ABS_MASK),@object
-       ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
+       .type L(DP_HI_MASK),@object
+       ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
  
         .p2align 4
  L(SP_ABS_MASK): /* Mask for getting SP absolute value */
author	Liubov Dmitrieva <liubov.dmitrieva@gmail.com>
	Mon, 10 Sep 2012 09:44:49 +0000 (11:44 +0200)
committer	Andreas Jaeger <aj@suse.de>
	Mon, 10 Sep 2012 09:44:49 +0000 (11:44 +0200)
ChangeLog		patch \| blob \| history
sysdeps/i386/i686/fpu/multiarch/s_cosf-sse2.S		patch \| blob \| history
sysdeps/i386/i686/fpu/multiarch/s_sinf-sse2.S		patch \| blob \| history
sysdeps/x86_64/fpu/s_cosf.S		patch \| blob \| history
sysdeps/x86_64/fpu/s_sinf.S		patch \| blob \| history