From 48dcf1c597b5e90d40020319758467fe6d35b15f Mon Sep 17 00:00:00 2001 From: Carlos O'Donell Date: Fri, 21 Apr 2006 00:27:20 +0000 Subject: [PATCH] 2006-04-20 Carlos O'Donell * sysdeps/hppa/fpu/fclrexcpt.c (feclearexcept): Use union to align parameters. Specify memory clobbers. * sysdeps/hppa/fpu/fedisblxcpt.c (fedisableexcept): Likewise. * sysdeps/hppa/fpu/feenablxcpt.c (feenableexcept): Likewise. * sysdeps/hppa/fpu/fegetenv.c (fegetenv): Do not save exception register. Use memcpy to align buffer. * sysdeps/hppa/fpu/fegetexcept.c (fegetexcept): Store and reload fr0. Use union to align parameters. * sysdeps/hppa/fpu/fegetround.c (fegetround): Likewise. * sysdeps/hppa/fpu/feholdexcpt.c (feholdexcept): Do not save exception registers. Define libm_hidden_def. * sysdeps/hppa/fpu/fesetenv.c (fesetenv): Do not save exception registers. * sysdeps/hppa/fpu/fesetround.c (fesetround): Use union to align parameters, speficy memory clobbers. Define libm_hidde_def * sysdeps/hppa/fpu/feupdateenv.c (feupdateenv): Use union to align parameters. Use memcpy to align buffer. * sysdeps/hppa/fpu/fgetexcptflg.c (fegetexceptflag): Likewise. * sysdeps/hppa/fpu/fsetexcptflg.c (fesetexceptflag): Likewise. * sysdeps/hppa/fpu/ftestexcept.c (fetestexcept): Likewise. * sysdeps/hppa/fpu/libm-test-ulps: Update. * sysdeps/hppa/fpu/bits/fenv.h: Add ABI comments. --- ChangeLog.hppa | 25 +++++++++++++++++++++++++ sysdeps/hppa/fpu/bits/fenv.h | 7 ++++++- sysdeps/hppa/fpu/fclrexcpt.c | 9 ++++----- sysdeps/hppa/fpu/fedisblxcpt.c | 11 ++++++----- sysdeps/hppa/fpu/feenablxcpt.c | 11 ++++++----- sysdeps/hppa/fpu/fegetenv.c | 12 +++++++----- sysdeps/hppa/fpu/fegetexcept.c | 8 +++++--- sysdeps/hppa/fpu/fegetround.c | 8 +++++--- sysdeps/hppa/fpu/feholdexcpt.c | 37 ++++++++++++++++++------------------- sysdeps/hppa/fpu/fesetenv.c | 38 ++++++++++++++++++-------------------- sysdeps/hppa/fpu/fesetround.c | 16 +++++++++------- sysdeps/hppa/fpu/feupdateenv.c | 19 +++++++++++-------- sysdeps/hppa/fpu/fgetexcptflg.c | 8 +++++--- sysdeps/hppa/fpu/fsetexcptflg.c | 12 +++++------- sysdeps/hppa/fpu/ftestexcept.c | 8 +++++--- sysdeps/hppa/fpu/libm-test-ulps | 19 ++++++++++--------- 16 files changed, 145 insertions(+), 103 deletions(-) diff --git a/ChangeLog.hppa b/ChangeLog.hppa index 401a2a2..e937d9b 100644 --- a/ChangeLog.hppa +++ b/ChangeLog.hppa @@ -1,3 +1,28 @@ +2006-04-20 Carlos O'Donell + + * sysdeps/hppa/fpu/fclrexcpt.c (feclearexcept): Use union to + align parameters. Specify memory clobbers. + * sysdeps/hppa/fpu/fedisblxcpt.c (fedisableexcept): Likewise. + * sysdeps/hppa/fpu/feenablxcpt.c (feenableexcept): Likewise. + * sysdeps/hppa/fpu/fegetenv.c (fegetenv): Do not save exception + register. Use memcpy to align buffer. + * sysdeps/hppa/fpu/fegetexcept.c (fegetexcept): Store and reload + fr0. Use union to align parameters. + * sysdeps/hppa/fpu/fegetround.c (fegetround): Likewise. + * sysdeps/hppa/fpu/feholdexcpt.c (feholdexcept): Do not save + exception registers. Define libm_hidden_def. + * sysdeps/hppa/fpu/fesetenv.c (fesetenv): Do not save exception + registers. + * sysdeps/hppa/fpu/fesetround.c (fesetround): Use union to + align parameters, speficy memory clobbers. Define libm_hidde_def + * sysdeps/hppa/fpu/feupdateenv.c (feupdateenv): Use union to align + parameters. Use memcpy to align buffer. + * sysdeps/hppa/fpu/fgetexcptflg.c (fegetexceptflag): Likewise. + * sysdeps/hppa/fpu/fsetexcptflg.c (fesetexceptflag): Likewise. + * sysdeps/hppa/fpu/ftestexcept.c (fetestexcept): Likewise. + * sysdeps/hppa/fpu/libm-test-ulps: Update. + * sysdeps/hppa/fpu/bits/fenv.h: Add ABI comments. + 2006-04-19 Carlos O'Donell * sysdeps/unix/sysv/linux/hppa/bits/mman.h [__USE_GNU]: diff --git a/sysdeps/hppa/fpu/bits/fenv.h b/sysdeps/hppa/fpu/bits/fenv.h index c5f8c43..6d83b14 100644 --- a/sysdeps/hppa/fpu/bits/fenv.h +++ b/sysdeps/hppa/fpu/bits/fenv.h @@ -62,7 +62,12 @@ typedef unsigned int fexcept_t; /* Type representing floating-point environment. This structure corresponds to the layout of the status and exception words in the - register file. */ + register file. The exception registers are never saved/stored by + userspace. This structure is also not correctly aligned ever, in + an ABI error we left out __aligned(8) and subsequently all of our + fenv functions must accept unaligned input, align the input, and + then use assembly to store fr0. This is a performance hit, but + means the ABI is stable. */ typedef struct { unsigned int __status_word; diff --git a/sysdeps/hppa/fpu/fclrexcpt.c b/sysdeps/hppa/fpu/fclrexcpt.c index a7c6982..d74a449 100644 --- a/sysdeps/hppa/fpu/fclrexcpt.c +++ b/sysdeps/hppa/fpu/fclrexcpt.c @@ -23,14 +23,13 @@ int feclearexcept (int excepts) { - unsigned int sw[2]; + union { unsigned long long l; unsigned int sw[2]; } s; /* Get the current status word. */ - __asm__ ("fstd %%fr0,0(%1)" : "=m" (*sw) : "r" (sw)); - + __asm__ ("fstd %%fr0,0(%1)" : "=m" (s.l) : "r" (&s.l) : "%r0"); /* Clear all the relevant bits. */ - sw[0] &= ~((excepts & FE_ALL_EXCEPT) << 27); - __asm__ ("fldd 0(%0),%%fr0" : : "r" (sw)); + s.sw[0] &= ~((excepts & FE_ALL_EXCEPT) << 27); + __asm__ ("fldd 0(%0),%%fr0" : : "r" (&s.l), "m" (s.l) : "%r0"); /* Success. */ return 0; diff --git a/sysdeps/hppa/fpu/fedisblxcpt.c b/sysdeps/hppa/fpu/fedisblxcpt.c index aac6bbf..8d2e664 100644 --- a/sysdeps/hppa/fpu/fedisblxcpt.c +++ b/sysdeps/hppa/fpu/fedisblxcpt.c @@ -23,15 +23,16 @@ int fedisableexcept (int excepts) { - unsigned int sw[2], old_exc; + union { unsigned long long l; unsigned int sw[2]; } s; + unsigned int old_exc; /* Get the current status word. */ - __asm__ ("fstd %%fr0,0(%1)" : "=m" (*sw) : "r" (sw)); + __asm__ ("fstd %%fr0,0(%1)" : "=m" (s.l) : "r" (&s.l) : "%r0"); - old_exc = sw[0] & FE_ALL_EXCEPT; + old_exc = s.sw[0] & FE_ALL_EXCEPT; - sw[0] &= ~(excepts & FE_ALL_EXCEPT); - __asm__ ("fldd 0(%0),%%fr0" : : "r" (sw)); + s.sw[0] &= ~(excepts & FE_ALL_EXCEPT); + __asm__ ("fldd 0(%0),%%fr0" : : "r" (&s.l), "m" (s.l) : "%r0"); return old_exc; } diff --git a/sysdeps/hppa/fpu/feenablxcpt.c b/sysdeps/hppa/fpu/feenablxcpt.c index 9ce3ca8..4b17a60 100644 --- a/sysdeps/hppa/fpu/feenablxcpt.c +++ b/sysdeps/hppa/fpu/feenablxcpt.c @@ -23,15 +23,16 @@ int feenableexcept (int excepts) { - unsigned int sw[2], old_exc; + union { unsigned long long l; unsigned int sw[2]; } s; + unsigned int old_exc; /* Get the current status word. */ - __asm__ ("fstd %%fr0,0(%1)" : "=m" (*sw) : "r" (sw)); + __asm__ ("fstd %%fr0,0(%1)" : "=m" (s.l) : "r" (&s.l) : "%r0"); - old_exc = sw[0] & FE_ALL_EXCEPT; + old_exc = s.sw[0] & FE_ALL_EXCEPT; - sw[0] |= (excepts & FE_ALL_EXCEPT); - __asm__ ("fldd 0(%0),%%fr0" : : "r" (sw)); + s.sw[0] |= (excepts & FE_ALL_EXCEPT); + __asm__ ("fldd 0(%0),%%fr0" : : "r" (&s.l), "m" (s.l) : "%r0"); return old_exc; } diff --git a/sysdeps/hppa/fpu/fegetenv.c b/sysdeps/hppa/fpu/fegetenv.c index b87317b..fcf5d2d 100644 --- a/sysdeps/hppa/fpu/fegetenv.c +++ b/sysdeps/hppa/fpu/fegetenv.c @@ -19,15 +19,17 @@ 02111-1307 USA. */ #include +#include int fegetenv (fenv_t *envp) { + unsigned long long buf[4], *bufptr = buf; + __asm__ ( - "fstd,ma %%fr0,8(%1)\n" - "fstd,ma %%fr1,8(%1)\n" - "fstd,ma %%fr2,8(%1)\n" - "fstd %%fr3,0(%1)\n" - : "=m" (*envp), "+r" (envp)); + "fstd,ma %%fr0,8(%1) \n\t" + "fldd -8(%1),%%fr0 \n\t" + : "=m" (buf), "+r" (bufptr) : : "%r0"); + memcpy(envp, buf, sizeof (*envp)); return 0; } diff --git a/sysdeps/hppa/fpu/fegetexcept.c b/sysdeps/hppa/fpu/fegetexcept.c index efd1d7d..d249dc6 100644 --- a/sysdeps/hppa/fpu/fegetexcept.c +++ b/sysdeps/hppa/fpu/fegetexcept.c @@ -23,10 +23,12 @@ int fegetexcept (void) { - unsigned int sw[2]; + union { unsigned long long l; unsigned int sw[2] } s; /* Get the current status word. */ - __asm__ ("fstd %%fr0,0(%1)" : "=m" (*sw) : "r" (sw)); + __asm__ ("fstd %%fr0,0(%1) \n\t" + "fldd 0(%1),%%fr0 \n\t" + : "=m" (s.l) : "r" (&s.l) : "%r0"); - return sw[0] & FE_ALL_EXCEPT; + return (s.sw[0] & FE_ALL_EXCEPT); } diff --git a/sysdeps/hppa/fpu/fegetround.c b/sysdeps/hppa/fpu/fegetround.c index aefedbc..1e606c9 100644 --- a/sysdeps/hppa/fpu/fegetround.c +++ b/sysdeps/hppa/fpu/fegetround.c @@ -23,10 +23,12 @@ int fegetround (void) { - unsigned int sw[2]; + union { unsigned long long l; unsigned int sw[2] } s; /* Get the current status word. */ - __asm__ ("fstd %%fr0,0(%1)" : "=m" (*sw) : "r" (sw)); + __asm__ ("fstd %%fr0,0(%1) \n\t" + "fldd 0(%1),%%fr0 \n\t" + : "=m" (s.l) : "r" (&s.l)); - return sw[0] & FE_DOWNWARD; + return (s.sw[0] & FE_DOWNWARD); } diff --git a/sysdeps/hppa/fpu/feholdexcpt.c b/sysdeps/hppa/fpu/feholdexcpt.c index 5aec015..ac6eb58 100644 --- a/sysdeps/hppa/fpu/feholdexcpt.c +++ b/sysdeps/hppa/fpu/feholdexcpt.c @@ -24,33 +24,32 @@ int feholdexcept (fenv_t *envp) { - fenv_t clear; - fenv_t * _regs = envp; + union { unsigned long long buf[4]; fenv_t env; } clear; + unsigned long long *bufptr; /* Store the environment. */ + bufptr = clear.buf; __asm__ ( "fstd,ma %%fr0,8(%1)\n" - "fstd,ma %%fr1,8(%1)\n" - "fstd,ma %%fr2,8(%1)\n" - "fstd %%fr3,0(%1)\n" - : "=m" (*_regs), "+r" (_regs)); - memcpy (&clear, envp, sizeof (clear)); - - /* Now clear all exceptions. */ - clear.__status_word &= ~(FE_ALL_EXCEPT << 27); - memset (clear.__exception, 0, sizeof (clear.__exception)); + : "=m" (clear), "+r" (bufptr) : : "%r0"); + memcpy (envp, &clear.env, sizeof (fenv_t)); + /* Clear exception queues */ + memset (clear.env.__exception, 0, sizeof (clear.env.__exception)); /* And set all exceptions to non-stop. */ - clear.__status_word &= ~FE_ALL_EXCEPT; + clear.env.__status_word &= ~FE_ALL_EXCEPT; + /* Now clear all flags */ + clear.env.__status_word &= ~(FE_ALL_EXCEPT << 27); - /* Load the new environment. */ - _regs = &clear; + /* Load the new environment. Note: fr0 must load last to enable T-bit + Thus we start bufptr at the end and work backwards */ + bufptr = (unsigned int)(clear.buf) + sizeof(unsigned int)*4; __asm__ ( - "fldd,ma 8(%0),%%fr0\n" - "fldd,ma 8(%0),%%fr1\n" - "fldd,ma 8(%0),%%fr2\n" - "fldd 0(%0),%%fr3\n" - : : "r" (_regs)); + "fldd,mb -8(%0),%%fr0\n" + : : "r" (bufptr), "m" (clear) : "%r0"); return 0; } + +libm_hidden_def (feholdexcept) + diff --git a/sysdeps/hppa/fpu/fesetenv.c b/sysdeps/hppa/fpu/fesetenv.c index 5267732..b5753ef 100644 --- a/sysdeps/hppa/fpu/fesetenv.c +++ b/sysdeps/hppa/fpu/fesetenv.c @@ -25,40 +25,38 @@ int fesetenv (const fenv_t *envp) { - fenv_t temp; - fenv_t * _regs = &temp; + union { unsigned long long buf[4]; fenv_t env; } temp; + unsigned long long *bufptr; /* Install the environment specified by ENVP. But there are a few values which we do not want to come from the saved environment. Therefore, we get the current environment and replace the values we want to use from the environment specified by the parameter. */ + bufptr = temp.buf; __asm__ ( "fstd,ma %%fr0,8(%1)\n" - "fstd,ma %%fr1,8(%1)\n" - "fstd,ma %%fr2,8(%1)\n" - "fstd %%fr3,0(%1)\n" - : "=m" (*_regs), "+r" (_regs)); + : "=m" (temp), "+r" (bufptr) : : "%r0"); - temp.__status_word &= ~(FE_ALL_EXCEPT - | (FE_ALL_EXCEPT << 27) - | FE_DOWNWARD); + temp.env.__status_word &= ~(FE_ALL_EXCEPT + | (FE_ALL_EXCEPT << 27) + | FE_DOWNWARD); if (envp == FE_DFL_ENV) ; else if (envp == FE_NOMASK_ENV) - temp.__status_word |= FE_ALL_EXCEPT; + temp.env.__status_word |= FE_ALL_EXCEPT; else - temp.__status_word |= (envp->__status_word - & (FE_ALL_EXCEPT - | FE_DOWNWARD - | (FE_ALL_EXCEPT << 27))); + temp.env.__status_word |= (envp->__status_word + & (FE_ALL_EXCEPT + | FE_DOWNWARD + | (FE_ALL_EXCEPT << 27))); - /* Load the new environment. */ + /* Load the new environment. We use bufptr again since the + initial asm has modified the value of the register and here + we take advantage of that to load in reverse order so fr0 + is loaded last and T-Bit is enabled. */ __asm__ ( - "fldd,ma -8(%1),%%fr3\n" - "fldd,ma -8(%1),%%fr2\n" - "fldd,ma -8(%1),%%fr1\n" - "fldd 0(%1),%%fr0\n" - : "=m" (*_regs), "+r" (_regs)); + "fldd,mb -8(%1),%%fr0\n" + : "=m" (temp), "+r" (bufptr) : : "%r0" ); /* Success. */ return 0; diff --git a/sysdeps/hppa/fpu/fesetround.c b/sysdeps/hppa/fpu/fesetround.c index 3687624..9f30c24 100644 --- a/sysdeps/hppa/fpu/fesetround.c +++ b/sysdeps/hppa/fpu/fesetround.c @@ -23,17 +23,19 @@ int fesetround (int round) { - unsigned int sw[2]; + union { unsigned long long l; unsigned int sw[2]; } s; if (round & ~FE_DOWNWARD) - /* ROUND is not a valid rounding mode. */ + /* round is not a valid rounding mode. */ return 1; - + /* Get the current status word. */ - __asm__ ("fstd %%fr0,0(%1)" : "=m" (*sw) : "r" (sw)); - sw[0] &= ~FE_DOWNWARD; - sw[0] |= round; - __asm__ ("fldd 0(%0),%%fr0" : : "r" (sw)); + __asm__ ("fstd %%fr0,0(%1)" : "=m" (s.l) : "r" (&s.l) : "%r0"); + s.sw[0] &= ~FE_DOWNWARD; + s.sw[0] |= round & FE_DOWNWARD; + __asm__ ("fldd 0(%0),%%fr0" : : "r" (&s.l), "m" (s.l) : "%r0"); return 0; } + +libm_hidden_def (fesetround) diff --git a/sysdeps/hppa/fpu/feupdateenv.c b/sysdeps/hppa/fpu/feupdateenv.c index 7d50282..1714006 100644 --- a/sysdeps/hppa/fpu/feupdateenv.c +++ b/sysdeps/hppa/fpu/feupdateenv.c @@ -19,19 +19,22 @@ 02111-1307 USA. */ #include +#include int feupdateenv (const fenv_t *envp) { - unsigned int sw[2]; - - /* Get the current exception status. */ - __asm__ ("fstd %%fr0,0(%1)" : "=m" (*sw) : "r" (sw)); + union { unsigned long long l; unsigned int sw[2]; } s; + fenv_t temp; + /* Get the current exception status */ + __asm__ ("fstd %%fr0,0(%1) \n\t" + "fldd 0(%1),%%fr0 \n\t" + : "=m" (s.l) : "r" (&s.l)); + memcpy(&temp, envp, sizeof(fenv_t)); + /* Currently raised exceptions not cleared */ + temp.__status_word |= s.sw[0] & (FE_ALL_EXCEPT << 27); /* Install new environment. */ - fesetenv (envp); - /* Raise the saved exceptions */ - feraiseexcept(sw[0] & FE_ALL_EXCEPT); - + fesetenv (&temp); /* Success. */ return 0; } diff --git a/sysdeps/hppa/fpu/fgetexcptflg.c b/sysdeps/hppa/fpu/fgetexcptflg.c index 27766ec..d5bcfe3 100644 --- a/sysdeps/hppa/fpu/fgetexcptflg.c +++ b/sysdeps/hppa/fpu/fgetexcptflg.c @@ -23,12 +23,14 @@ int fegetexceptflag (fexcept_t *flagp, int excepts) { - unsigned int sw[2]; + union { unsigned long long l; unsigned int sw[2]; } s; /* Get the current status word. */ - __asm__ ("fstd %%fr0,0(%1)" : "=m" (*sw) : "r" (sw)); + __asm__ ("fstd %%fr0,0(%1) \n\t" + "fldd 0(%1),%%fr0 \n\t" + : "=m" (s.l) : "r" (&s.l) : "%r0"); - *flagp = (sw[0] >> 27) & excepts & FE_ALL_EXCEPT; + *flagp = (s.sw[0] >> 27) & excepts & FE_ALL_EXCEPT; /* Success. */ return 0; diff --git a/sysdeps/hppa/fpu/fsetexcptflg.c b/sysdeps/hppa/fpu/fsetexcptflg.c index af35f5a..4ec3a92 100644 --- a/sysdeps/hppa/fpu/fsetexcptflg.c +++ b/sysdeps/hppa/fpu/fsetexcptflg.c @@ -24,16 +24,14 @@ int fesetexceptflag (const fexcept_t *flagp, int excepts) { - unsigned int sw[2]; + union { unsigned long long l; unsigned int sw[2]; } s; /* Get the current status word. */ - __asm__ ("fstd %%fr0,0(%1)" : "=m" (*sw) : "r" (sw)); - - /* Install new enable trap bits */ - sw[0] |= (*flagp & excepts & FE_ALL_EXCEPT) << 27; - + __asm__ ("fstd %%fr0,0(%1)" : "=m" (s.l) : "r" (&s.l) : "%r0"); + /* Install new raised trap bits */ + s.sw[0] |= (*flagp & excepts & FE_ALL_EXCEPT) << 27; /* Store the new status word. */ - __asm__ ("fldd 0(%0),%%fr0" : : "r" (sw)); + __asm__ ("fldd 0(%0),%%fr0" : : "r" (&s.l), "m" (s.l) : "%r0"); /* Success. */ return 0; diff --git a/sysdeps/hppa/fpu/ftestexcept.c b/sysdeps/hppa/fpu/ftestexcept.c index d08d4d6..ac6d4b2 100644 --- a/sysdeps/hppa/fpu/ftestexcept.c +++ b/sysdeps/hppa/fpu/ftestexcept.c @@ -23,10 +23,12 @@ int fetestexcept (int excepts) { - unsigned int sw[2]; + union { unsigned long long l; unsigned int sw[2] } s; /* Get the current status word. */ - __asm__ ("fstd %%fr0,0(%1)" : "=m" (*sw) : "r" (sw)); + __asm__ ("fstd %%fr0,0(%1) \n\t" + "fldd 0(%1),%%fr0 \n\t" + : "=m" (s.l) : "r" (&s.l)); - return (sw[0] >> 27) & excepts & FE_ALL_EXCEPT; + return (s.sw[0] >> 27) & excepts & FE_ALL_EXCEPT; } diff --git a/sysdeps/hppa/fpu/libm-test-ulps b/sysdeps/hppa/fpu/libm-test-ulps index b514496..c4ffefa 100644 --- a/sysdeps/hppa/fpu/libm-test-ulps +++ b/sysdeps/hppa/fpu/libm-test-ulps @@ -1,6 +1,9 @@ # Begin of automatic generation # atan2 +Test "atan2 (-0.00756827042671106339, -.001792735857538728036) == -1.80338464113663849327153994380": +float: 6 +ifloat: 6 Test "atan2 (-0.75, -1.0) == -2.49809154479650885165983415456218025": float: 3 ifloat: 3 @@ -258,9 +261,6 @@ float: 1 ifloat: 1 # ctan -Test "Real part of: ctan (-2 - 3 i) == 0.376402564150424829275122113032269084e-2 - 1.00323862735360980144635859782192726 i": -double: 1 -idouble: 1 Test "Imaginary part of: ctan (0.75 + 1.25 i) == 0.160807785916206426725166058173438663 + 0.975363285031235646193581759755216379 i": double: 1 idouble: 1 @@ -479,6 +479,11 @@ Test "log1p (-0.25) == -0.287682072451780927439219005993827432": float: 1 ifloat: 1 +# lround +Test "lround (1071930.0008) == 1071930": +double: -214511494 +idouble: -214511494 + # sincos Test "sincos (M_PI_6l*2.0, &sin_res, &cos_res) puts 0.5 in cos_res": double: 1 @@ -640,8 +645,8 @@ idouble: 1 # Maximal error of functions: Function: "atan2": -float: 3 -ifloat: 3 +float: 6 +ifloat: 6 Function: "atanh": float: 1 @@ -777,10 +782,6 @@ Function: Real part of "csqrt": float: 1 ifloat: 1 -Function: Real part of "ctan": -double: 1 -idouble: 1 - Function: Imaginary part of "ctan": double: 1 idouble: 1 -- 2.7.4