From 0347518d6326846cd5fdbe4b472dfc85a2dfc78c Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Mon, 16 Apr 2012 22:08:04 -0400 Subject: [PATCH] ia64: strip trailing whitespace Many ia64 files have trailing whitespace which gets in the way and annoys me. So strip it away: find `find sysdeps/ -name ia64` -type f -exec sed -i 's:[[:space:]]*$::' {} + Signed-off-by: Mike Frysinger --- ChangeLog.ia64 | 107 + sysdeps/ia64/fpu/e_acosf.S | 238 +- sysdeps/ia64/fpu/e_acoshl.S | 482 ++-- sysdeps/ia64/fpu/e_acosl.S | 126 +- sysdeps/ia64/fpu/e_asinf.S | 254 +-- sysdeps/ia64/fpu/e_asinl.S | 126 +- sysdeps/ia64/fpu/e_atan2f.S | 256 +-- sysdeps/ia64/fpu/e_atanhl.S | 130 +- sysdeps/ia64/fpu/e_coshl.S | 108 +- sysdeps/ia64/fpu/e_exp.S | 2 +- sysdeps/ia64/fpu/e_expf.S | 4 +- sysdeps/ia64/fpu/e_fmodl.S | 4 +- sysdeps/ia64/fpu/e_hypot.S | 46 +- sysdeps/ia64/fpu/e_hypotf.S | 52 +- sysdeps/ia64/fpu/e_hypotl.S | 44 +- sysdeps/ia64/fpu/e_log.S | 6 +- sysdeps/ia64/fpu/e_log2.S | 78 +- sysdeps/ia64/fpu/e_log2f.S | 78 +- sysdeps/ia64/fpu/e_log2l.S | 92 +- sysdeps/ia64/fpu/e_logl.S | 226 +- sysdeps/ia64/fpu/e_powf.S | 6 +- sysdeps/ia64/fpu/e_remainder.S | 134 +- sysdeps/ia64/fpu/e_remainderf.S | 148 +- sysdeps/ia64/fpu/e_remainderl.S | 140 +- sysdeps/ia64/fpu/e_scalb.S | 2 +- sysdeps/ia64/fpu/e_scalbf.S | 2 +- sysdeps/ia64/fpu/e_scalbl.S | 2 +- sysdeps/ia64/fpu/e_sinhl.S | 110 +- sysdeps/ia64/fpu/e_sqrt.S | 32 +- sysdeps/ia64/fpu/e_sqrtf.S | 38 +- sysdeps/ia64/fpu/e_sqrtl.S | 26 +- sysdeps/ia64/fpu/libm_cpu_defs.h | 4 +- sysdeps/ia64/fpu/libm_error_codes.h | 10 +- sysdeps/ia64/fpu/libm_frexp.S | 34 +- sysdeps/ia64/fpu/libm_frexpf.S | 34 +- sysdeps/ia64/fpu/libm_frexpl.S | 34 +- sysdeps/ia64/fpu/libm_scalblnf.S | 4 +- sysdeps/ia64/fpu/libm_tan.S | 94 +- sysdeps/ia64/fpu/s_asinhl.S | 194 +- sysdeps/ia64/fpu/s_atanf.S | 248 +-- sysdeps/ia64/fpu/s_atanl.S | 150 +- sysdeps/ia64/fpu/s_cbrtl.S | 14 +- sysdeps/ia64/fpu/s_cos.S | 46 +- sysdeps/ia64/fpu/s_cosf.S | 36 +- sysdeps/ia64/fpu/s_erf.S | 114 +- sysdeps/ia64/fpu/s_erfc.S | 256 +-- sysdeps/ia64/fpu/s_erfcf.S | 186 +- sysdeps/ia64/fpu/s_erfcl.S | 448 ++-- sysdeps/ia64/fpu/s_erff.S | 66 +- sysdeps/ia64/fpu/s_erfl.S | 236 +- sysdeps/ia64/fpu/s_expm1.S | 2 +- sysdeps/ia64/fpu/s_expm1f.S | 6 +- sysdeps/ia64/fpu/s_expm1l.S | 186 +- sysdeps/ia64/fpu/s_fabs.S | 30 +- sysdeps/ia64/fpu/s_fabsf.S | 30 +- sysdeps/ia64/fpu/s_fabsl.S | 30 +- sysdeps/ia64/fpu/s_finite.S | 2 +- sysdeps/ia64/fpu/s_fma.S | 24 +- sysdeps/ia64/fpu/s_fmaf.S | 24 +- sysdeps/ia64/fpu/s_fmal.S | 24 +- sysdeps/ia64/fpu/s_fmax.S | 28 +- sysdeps/ia64/fpu/s_fmaxf.S | 28 +- sysdeps/ia64/fpu/s_fmaxl.S | 28 +- sysdeps/ia64/fpu/s_fpclassify.S | 2 +- sysdeps/ia64/fpu/s_frexp.c | 10 +- sysdeps/ia64/fpu/s_frexpf.c | 10 +- sysdeps/ia64/fpu/s_frexpl.c | 10 +- sysdeps/ia64/fpu/s_ldexp.c | 12 +- sysdeps/ia64/fpu/s_ldexpf.c | 12 +- sysdeps/ia64/fpu/s_ldexpl.c | 12 +- sysdeps/ia64/fpu/s_log1pl.S | 236 +- sysdeps/ia64/fpu/s_modf.S | 42 +- sysdeps/ia64/fpu/s_modff.S | 42 +- sysdeps/ia64/fpu/s_modfl.S | 42 +- sysdeps/ia64/fpu/s_nextafter.S | 66 +- sysdeps/ia64/fpu/s_nextafterf.S | 66 +- sysdeps/ia64/fpu/s_nextafterl.S | 66 +- sysdeps/ia64/fpu/s_nexttoward.S | 66 +- sysdeps/ia64/fpu/s_nexttowardf.S | 66 +- sysdeps/ia64/fpu/s_nexttowardl.S | 64 +- sysdeps/ia64/fpu/s_round.S | 2 +- sysdeps/ia64/fpu/s_roundf.S | 2 +- sysdeps/ia64/fpu/s_roundl.S | 2 +- sysdeps/ia64/fpu/s_scalblnf.c | 12 +- sysdeps/ia64/fpu/s_scalbn.c | 12 +- sysdeps/ia64/fpu/s_scalbnf.c | 12 +- sysdeps/ia64/fpu/s_scalbnl.c | 12 +- sysdeps/ia64/fpu/s_signbit.S | 2 +- sysdeps/ia64/fpu/s_significand.S | 34 +- sysdeps/ia64/fpu/s_significandf.S | 32 +- sysdeps/ia64/fpu/s_significandl.S | 38 +- sysdeps/ia64/fpu/s_tan.S | 18 +- sysdeps/ia64/fpu/s_tanf.S | 18 +- sysdeps/ia64/fpu/s_tanh.S | 130 +- sysdeps/ia64/fpu/s_tanhf.S | 70 +- sysdeps/ia64/fpu/s_tanhl.S | 234 +- sysdeps/ia64/fpu/s_tanl.S | 22 +- sysdeps/ia64/fpu/w_tgamma.S | 196 +- sysdeps/ia64/fpu/w_tgammaf.S | 112 +- sysdeps/ia64/fpu/w_tgammal.S | 3094 +++++++++++++------------- sysdeps/ia64/softpipe.h | 2 +- sysdeps/ia64/strchr.S | 8 +- sysdeps/ia64/strlen.S | 6 +- sysdeps/ia64/strncmp.S | 2 +- sysdeps/unix/sysv/linux/ia64/register-dump.h | 2 +- 105 files changed, 5377 insertions(+), 5270 deletions(-) diff --git a/ChangeLog.ia64 b/ChangeLog.ia64 index 198dd0b..4c9b9f1 100644 --- a/ChangeLog.ia64 +++ b/ChangeLog.ia64 @@ -1,5 +1,112 @@ 2012-04-22 Mike Frysinger + * sysdeps/ia64/fpu/e_acosf.S: Trim trailing whitespace. + * sysdeps/ia64/fpu/e_acoshl.S: Likewise. + * sysdeps/ia64/fpu/e_acosl.S: Likewise. + * sysdeps/ia64/fpu/e_asinf.S: Likewise. + * sysdeps/ia64/fpu/e_asinl.S: Likewise. + * sysdeps/ia64/fpu/e_atan2f.S: Likewise. + * sysdeps/ia64/fpu/e_atanhl.S: Likewise. + * sysdeps/ia64/fpu/e_coshl.S: Likewise. + * sysdeps/ia64/fpu/e_exp.S: Likewise. + * sysdeps/ia64/fpu/e_expf.S: Likewise. + * sysdeps/ia64/fpu/e_fmodl.S: Likewise. + * sysdeps/ia64/fpu/e_hypot.S: Likewise. + * sysdeps/ia64/fpu/e_hypotf.S: Likewise. + * sysdeps/ia64/fpu/e_hypotl.S: Likewise. + * sysdeps/ia64/fpu/e_log.S: Likewise. + * sysdeps/ia64/fpu/e_log2.S: Likewise. + * sysdeps/ia64/fpu/e_log2f.S: Likewise. + * sysdeps/ia64/fpu/e_log2l.S: Likewise. + * sysdeps/ia64/fpu/e_logl.S: Likewise. + * sysdeps/ia64/fpu/e_powf.S: Likewise. + * sysdeps/ia64/fpu/e_remainder.S: Likewise. + * sysdeps/ia64/fpu/e_remainderf.S: Likewise. + * sysdeps/ia64/fpu/e_remainderl.S: Likewise. + * sysdeps/ia64/fpu/e_scalb.S: Likewise. + * sysdeps/ia64/fpu/e_scalbf.S: Likewise. + * sysdeps/ia64/fpu/e_scalbl.S: Likewise. + * sysdeps/ia64/fpu/e_sinhl.S: Likewise. + * sysdeps/ia64/fpu/e_sqrt.S: Likewise. + * sysdeps/ia64/fpu/e_sqrtf.S: Likewise. + * sysdeps/ia64/fpu/e_sqrtl.S: Likewise. + * sysdeps/ia64/fpu/libm_cpu_defs.h: Likewise. + * sysdeps/ia64/fpu/libm_error_codes.h: Likewise. + * sysdeps/ia64/fpu/libm_frexp.S: Likewise. + * sysdeps/ia64/fpu/libm_frexpf.S: Likewise. + * sysdeps/ia64/fpu/libm_frexpl.S: Likewise. + * sysdeps/ia64/fpu/libm_scalblnf.S: Likewise. + * sysdeps/ia64/fpu/libm_tan.S: Likewise. + * sysdeps/ia64/fpu/s_asinhl.S: Likewise. + * sysdeps/ia64/fpu/s_atanf.S: Likewise. + * sysdeps/ia64/fpu/s_atanl.S: Likewise. + * sysdeps/ia64/fpu/s_cbrtl.S: Likewise. + * sysdeps/ia64/fpu/s_cos.S: Likewise. + * sysdeps/ia64/fpu/s_cosf.S: Likewise. + * sysdeps/ia64/fpu/s_erf.S: Likewise. + * sysdeps/ia64/fpu/s_erfc.S: Likewise. + * sysdeps/ia64/fpu/s_erfcf.S: Likewise. + * sysdeps/ia64/fpu/s_erfcl.S: Likewise. + * sysdeps/ia64/fpu/s_erff.S: Likewise. + * sysdeps/ia64/fpu/s_erfl.S: Likewise. + * sysdeps/ia64/fpu/s_expm1.S: Likewise. + * sysdeps/ia64/fpu/s_expm1f.S: Likewise. + * sysdeps/ia64/fpu/s_expm1l.S: Likewise. + * sysdeps/ia64/fpu/s_fabs.S: Likewise. + * sysdeps/ia64/fpu/s_fabsf.S: Likewise. + * sysdeps/ia64/fpu/s_fabsl.S: Likewise. + * sysdeps/ia64/fpu/s_finite.S: Likewise. + * sysdeps/ia64/fpu/s_fma.S: Likewise. + * sysdeps/ia64/fpu/s_fmaf.S: Likewise. + * sysdeps/ia64/fpu/s_fmal.S: Likewise. + * sysdeps/ia64/fpu/s_fmax.S: Likewise. + * sysdeps/ia64/fpu/s_fmaxf.S: Likewise. + * sysdeps/ia64/fpu/s_fmaxl.S: Likewise. + * sysdeps/ia64/fpu/s_fpclassify.S: Likewise. + * sysdeps/ia64/fpu/s_frexp.c: Likewise. + * sysdeps/ia64/fpu/s_frexpf.c: Likewise. + * sysdeps/ia64/fpu/s_frexpl.c: Likewise. + * sysdeps/ia64/fpu/s_ldexp.c: Likewise. + * sysdeps/ia64/fpu/s_ldexpf.c: Likewise. + * sysdeps/ia64/fpu/s_ldexpl.c: Likewise. + * sysdeps/ia64/fpu/s_log1pl.S: Likewise. + * sysdeps/ia64/fpu/s_modf.S: Likewise. + * sysdeps/ia64/fpu/s_modff.S: Likewise. + * sysdeps/ia64/fpu/s_modfl.S: Likewise. + * sysdeps/ia64/fpu/s_nextafter.S: Likewise. + * sysdeps/ia64/fpu/s_nextafterf.S: Likewise. + * sysdeps/ia64/fpu/s_nextafterl.S: Likewise. + * sysdeps/ia64/fpu/s_nexttoward.S: Likewise. + * sysdeps/ia64/fpu/s_nexttowardf.S: Likewise. + * sysdeps/ia64/fpu/s_nexttowardl.S: Likewise. + * sysdeps/ia64/fpu/s_round.S: Likewise. + * sysdeps/ia64/fpu/s_roundf.S: Likewise. + * sysdeps/ia64/fpu/s_roundl.S: Likewise. + * sysdeps/ia64/fpu/s_scalblnf.c: Likewise. + * sysdeps/ia64/fpu/s_scalbn.c: Likewise. + * sysdeps/ia64/fpu/s_scalbnf.c: Likewise. + * sysdeps/ia64/fpu/s_scalbnl.c: Likewise. + * sysdeps/ia64/fpu/s_signbit.S: Likewise. + * sysdeps/ia64/fpu/s_significand.S: Likewise. + * sysdeps/ia64/fpu/s_significandf.S: Likewise. + * sysdeps/ia64/fpu/s_significandl.S: Likewise. + * sysdeps/ia64/fpu/s_tan.S: Likewise. + * sysdeps/ia64/fpu/s_tanf.S: Likewise. + * sysdeps/ia64/fpu/s_tanh.S: Likewise. + * sysdeps/ia64/fpu/s_tanhf.S: Likewise. + * sysdeps/ia64/fpu/s_tanhl.S: Likewise. + * sysdeps/ia64/fpu/s_tanl.S: Likewise. + * sysdeps/ia64/fpu/w_tgamma.S: Likewise. + * sysdeps/ia64/fpu/w_tgammaf.S: Likewise. + * sysdeps/ia64/fpu/w_tgammal.S: Likewise. + * sysdeps/ia64/softpipe.h: Likewise. + * sysdeps/ia64/strchr.S: Likewise. + * sysdeps/ia64/strlen.S: Likewise. + * sysdeps/ia64/strncmp.S: Likewise. + * sysdeps/unix/sysv/linux/ia64/register-dump.h: Likewise. + +2012-04-22 Mike Frysinger + * sysdeps/ia64/Implies: Copied from the main tree. * sysdeps/ia64/Makefile: Likewise. * sysdeps/ia64/Versions: Likewise. diff --git a/sysdeps/ia64/fpu/e_acosf.S b/sysdeps/ia64/fpu/e_acosf.S index 68b0b2e..bdcac59 100644 --- a/sysdeps/ia64/fpu/e_acosf.S +++ b/sysdeps/ia64/fpu/e_acosf.S @@ -61,7 +61,7 @@ // The acosf function returns the arc cosine in the range [0, +pi] radians. // acos(1) returns +0 -// acos(x) returns a Nan and raises the invalid exception for |x| >1 +// acos(x) returns a Nan and raises the invalid exception for |x| >1 // |x| <= sqrt(2)/2. get Ax and Bx @@ -249,355 +249,355 @@ LOCAL_OBJECT_END(acosf_coeff_2_table) .section .text GLOBAL_LIBM_ENTRY(acosf) - + // Load the addresses of the two tables. // Then, load the coefficients and other constants. -{ .mfi +{ .mfi alloc r32 = ar.pfs,1,8,4,0 fnma.s1 acosf_t = f8,f8,f1 dep.z ACOSF_GR_1by2 = 0x3f,24,8 // 0x3f000000 -} -{ .mfi +} +{ .mfi addl ACOSF_Addr1 = @ltoff(acosf_coeff_1_table),gp fma.s1 acosf_x2 = f8,f8,f0 addl ACOSF_Addr2 = @ltoff(acosf_coeff_2_table),gp ;; } - -{ .mfi + +{ .mfi ld8 ACOSF_Addr1 = [ACOSF_Addr1] fmerge.s acosf_abs_x = f1,f8 dep ACOSF_GR_3by2 = -1,r0,22,8 // 0x3fc00000 -} -{ .mlx +} +{ .mlx nop.m 999 movl ACOSF_GR_5by2 = 0x40200000;; } - -{ .mfi + +{ .mfi setf.s acosf_1by2 = ACOSF_GR_1by2 fmerge.s acosf_sgn_x = f8,f1 nop.i 999 -} -{ .mfi +} +{ .mfi ld8 ACOSF_Addr2 = [ACOSF_Addr2] nop.f 0 nop.i 999;; } - -{ .mfi + +{ .mfi setf.s acosf_5by2 = ACOSF_GR_5by2 fcmp.lt.s1 p11,p12 = f8,f0 nop.i 999;; } -{ .mmf +{ .mmf ldfpd acosf_coeff_P1,acosf_coeff_P4 = [ACOSF_Addr1],16 setf.s acosf_3by2 = ACOSF_GR_3by2 fclass.m.unc p8,p0 = f8, 0xc3 ;; //@qnan | @snan } - -{ .mfi + +{ .mfi ldfpd acosf_coeff_P7,acosf_coeff_P6 = [ACOSF_Addr1],16 fma.s1 acosf_t2 = acosf_t,acosf_t,f0 nop.i 999 -} -{ .mfi +} +{ .mfi ldfpd acosf_coeff_P3,acosf_coeff_P8 = [ACOSF_Addr2],16 fma.s1 acosf_x4 = acosf_x2,acosf_x2,f0 nop.i 999;; } - -{ .mfi + +{ .mfi ldfpd acosf_coeff_P9,acosf_const_sqrt2by2 = [ACOSF_Addr1] fclass.m.unc p10,p0 = f8, 0x07 //@zero nop.i 999 -} -{ .mfi +} +{ .mfi ldfpd acosf_coeff_P5,acosf_coeff_P2 = [ACOSF_Addr2],16 fma.s1 acosf_x3 = f8,acosf_x2,f0 nop.i 999;; } - -{ .mfi + +{ .mfi ldfd acosf_const_piby2 = [ACOSF_Addr2] frsqrta.s1 acosf_B,p0 = acosf_t nop.i 999 -} -{ .mfb +} +{ .mfb nop.m 999 (p8) fma.s.s0 f8 = f8,f1,f0 (p8) br.ret.spnt b0 ;; // Exit if x=nan } - -{ .mfb + +{ .mfb nop.m 999 fcmp.eq.s1 p6,p0 = acosf_abs_x,f1 (p10) br.cond.spnt ACOSF_ZERO ;; // Branch if x=0 -} - -{ .mfi +} + +{ .mfi nop.m 999 fcmp.gt.s1 p9,p0 = acosf_abs_x,f1 nop.i 999;; -} - -{ .mfi +} + +{ .mfi nop.m 999 fma.s1 acosf_x8 = acosf_x4,acosf_x4,f0 nop.i 999 -} -{ .mfb +} +{ .mfb nop.m 999 fma.s1 acosf_t4 = acosf_t2,acosf_t2,f0 (p6) br.cond.spnt ACOSF_ABS_ONE ;; // Branch if |x|=1 -} +} -{ .mfi +{ .mfi nop.m 999 fma.s1 acosf_x5 = acosf_x2,acosf_x3,f0 nop.i 999 } -{ .mfb +{ .mfb (p9) mov GR_Parameter_TAG = 59 fma.s1 acosf_yby2 = acosf_t,acosf_1by2,f0 (p9) br.cond.spnt __libm_error_region ;; // Branch if |x|>1 } -{ .mfi +{ .mfi nop.m 999 fma.s1 acosf_Az = acosf_t,acosf_B,f0 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fma.s1 acosf_B2 = acosf_B,acosf_B,f0 nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 fma.s1 acosf_poly_p1 = f8,acosf_coeff_P1,f0 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fma.s1 acosf_2poly_p1 = acosf_coeff_P1,acosf_t,f1 nop.i 999;; } -{ .mfi +{ .mfi nop.m 999 fma.s1 acosf_poly_p3 = acosf_coeff_P4,acosf_x2,acosf_coeff_P3 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fma.s1 acosf_2poly_p6 = acosf_coeff_P7,acosf_t,acosf_coeff_P6 nop.i 999;; -} +} -{ .mfi +{ .mfi nop.m 999 fma.s1 acosf_poly_p7 = acosf_x2,acosf_coeff_P8,acosf_coeff_P7 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fma.s1 acosf_2poly_p2 = acosf_coeff_P3,acosf_t,acosf_coeff_P2 nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 fma.s1 acosf_poly_p5 = acosf_x2,acosf_coeff_P6,acosf_coeff_P5 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fma.s1 acosf_2poly_p4 = acosf_coeff_P5,acosf_t,acosf_coeff_P4 nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 fma.s1 acosf_x11 = acosf_x8,acosf_x3,f0 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fnma.s1 acosf_dz = acosf_B2,acosf_yby2,acosf_1by2 nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 fma.s1 acosf_poly_p1a = acosf_x2,acosf_poly_p1,f8 nop.i 999 } -{ .mfi +{ .mfi nop.m 999 fma.s1 acosf_2poly_p8 = acosf_coeff_P9,acosf_t,acosf_coeff_P8 nop.i 999;; } - + // Get the absolute value of x and determine the region in which x lies -{ .mfi +{ .mfi nop.m 999 fcmp.le.s1 p7,p8 = acosf_abs_x,acosf_const_sqrt2by2 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fma.s1 acosf_poly_p2 = acosf_x2,acosf_poly_p3,acosf_coeff_P2 nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 fma.s1 acosf_poly_p7a = acosf_x4,acosf_coeff_P9,acosf_poly_p7 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fma.s1 acosf_2poly_p2a = acosf_2poly_p2,acosf_t2,acosf_2poly_p1 nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 (p8) fma.s1 acosf_sgnx_t4 = acosf_sgn_x,acosf_t4,f0 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 (p8) fma.s1 acosf_2poly_p4a = acosf_2poly_p6,acosf_t2,acosf_2poly_p4 nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 (p8) fma.s1 acosf_Sz = acosf_5by2,acosf_dz,acosf_3by2 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 (p8) fma.s1 acosf_d2z = acosf_dz,acosf_dz,f0 nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 (p8) fnma.d.s1 acosf_sgn_x_piby2 = acosf_sgn_x,acosf_const_piby2,acosf_const_piby2 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 (p7) fma.s1 acosf_poly_Ax = acosf_x5,acosf_poly_p2,acosf_poly_p1a nop.i 999;; -} - -{ .mfi +} + +{ .mfi nop.m 999 (p7) fma.s1 acosf_poly_Bx = acosf_x4,acosf_poly_p7a,acosf_poly_p5 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 (p8) fma.s1 acosf_sgnx_2poly_p2 = acosf_sgn_x,acosf_2poly_p2a,f0 nop.i 999;; -} - -{ .mfi +} + +{ .mfi nop.m 999 fcmp.eq.s0 p6,p0 = f8,f0 // Only purpose is to set D if x denormal nop.i 999 } -{ .mfi +{ .mfi nop.m 999 (p8) fma.s1 acosf_2poly_p4b = acosf_2poly_p8,acosf_t4,acosf_2poly_p4a nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 (p8) fma.s1 acosf_Fz = acosf_d2z,acosf_Sz,acosf_dz nop.i 999;; -} +} - -{ .mfi + +{ .mfi nop.m 999 (p8) fma.d.s1 acosf_Pt = acosf_2poly_p4b,acosf_sgnx_t4,acosf_sgnx_2poly_p2 nop.i 999;; -} - -{ .mfi +} + +{ .mfi nop.m 999 (p8) fma.d.s1 acosf_z = acosf_Az,acosf_Fz,acosf_Az nop.i 999 ;; -} - -{ .mfi +} + +{ .mfi nop.m 999 (p7) fma.d.s1 acosf_sinf1 = acosf_x11,acosf_poly_Bx,acosf_poly_Ax nop.i 999;; -} - +} + .pred.rel "mutex",p8,p7 //acosf_pred_GTsqrt2by2,acosf_pred_LEsqrt2by2 -{ .mfi +{ .mfi nop.m 999 (p8) fma.s.s0 f8 = acosf_z,acosf_Pt,acosf_sgn_x_piby2 nop.i 999 -} - -{ .mfb +} + +{ .mfb nop.m 999 (p7) fms.s.s0 f8 = acosf_const_piby2,f1,acosf_sinf1 br.ret.sptk b0 ;; -} +} ACOSF_ZERO: // Here if x=0 -{ .mfb +{ .mfb nop.m 999 fma.s.s0 f8 = acosf_const_piby2,f1,f0 // acosf(0)=pi/2 br.ret.sptk b0 ;; -} +} ACOSF_ABS_ONE: .pred.rel "mutex",p11,p12 // Here if |x|=1 -{ .mfi +{ .mfi nop.m 999 (p11) fma.s.s0 f8 = acosf_const_piby2,f1,acosf_const_piby2 // acosf(-1)=pi nop.i 999 -} -{ .mfb +} +{ .mfb nop.m 999 (p12) fma.s.s0 f8 = f1,f0,f0 // acosf(1)=0 br.ret.sptk b0 ;; -} +} GLOBAL_LIBM_END(acosf) diff --git a/sysdeps/ia64/fpu/e_acoshl.S b/sysdeps/ia64/fpu/e_acoshl.S index 42e1f39..1ce292c 100644 --- a/sysdeps/ia64/fpu/e_acoshl.S +++ b/sysdeps/ia64/fpu/e_acoshl.S @@ -21,25 +21,25 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // //********************************************************************* // -// History: +// History: // 10/01/01 Initial version // 10/10/01 Performance inproved // 12/11/01 Changed huges_logp to not be global @@ -57,7 +57,7 @@ // // Overview of operation //============================================================== -// +// // There are 6 paths: // 1. x = 1 // Return acoshl(x) = 0; @@ -67,37 +67,37 @@ // // 3. x = [S,Q]Nan or +INF // Return acoshl(x) = x + x; -// +// // 4. 'Near 1': 1 < x < 1+1/8 -// Return acoshl(x) = sqrtl(2*y)*(1-P(y)/Q(y)), +// Return acoshl(x) = sqrtl(2*y)*(1-P(y)/Q(y)), // where y = 1, P(y)/Q(y) - rational approximation // // 5. 'Huges': x > 0.5*2^64 // Return acoshl(x) = (logl(2*x-1)); -// +// // 6. 'Main path': 1+1/8 < x < 0.5*2^64 // b_hi + b_lo = x + sqrt(x^2 - 1); // acoshl(x) = logl_special(b_hi, b_lo); -// -// Algorithm description +// +// Algorithm description //============================================================== // // I. Near 1 path algorithm // ************************************************************** -// The formula is acoshl(x) = sqrtl(2*y)*(1-P(y)/Q(y)), +// The formula is acoshl(x) = sqrtl(2*y)*(1-P(y)/Q(y)), // where y = 1, P(y)/Q(y) - rational approximation // // 1) y = x - 1, y2 = 2 * y // // 2) Compute in parallel sqrtl(2*y) and P(y)/Q(y) // a) sqrtl computation method described below (main path algorithm, item 2)) -// As result we obtain (gg+gl) - multiprecision result +// As result we obtain (gg+gl) - multiprecision result // as pair of double extended values // b) P(y) and Q(y) calculated without any extra precision manipulations // c) P/Q division: // y = frcpa(Q) initial approximation of 1/Q // z = P*y initial approximation of P/Q -// +// // e = 1 - b*y // e2 = e + e^2 // e1 = e^2 @@ -121,7 +121,7 @@ // b) res = ((((gl + ll) + lh) + hl) + hh) + gg; // (exactly in this order) // -// II. Main path algorithm +// II. Main path algorithm // ( thanks to Peter Markstein for the idea of sqrt(x^2+1) computation! ) // ********************************************************************** // @@ -130,19 +130,19 @@ // 1) m2 = (m2_hi+m2_lo) = x^2-1 obtaining // ------------------------------------ // m2_hi = x2_hi - 1, where x2_hi = x * x; -// m2_lo = x2_lo + p1_lo, where -// x2_lo = FMS(x*x-x2_hi), +// m2_lo = x2_lo + p1_lo, where +// x2_lo = FMS(x*x-x2_hi), // p1_lo = (1 + m2_hi) - x2_hi; // // 2) g = (g_hi+g_lo) = sqrt(m2) = sqrt(m2_hi+m2_lo) // ---------------------------------------------- // r = invsqrt(m2_hi) (8-bit reciprocal square root approximation); // g = m2_hi * r (first 8 bit-approximation of sqrt); -// +// // h = 0.5 * r; // e = 0.5 - g * h; // g = g * e + g (second 16 bit-approximation of sqrt); -// +// // h = h * e + h; // e = 0.5 - g * h; // g = g * e + g (third 32 bit-approximation of sqrt); @@ -150,7 +150,7 @@ // h = h * e + h; // e = 0.5 - g * h; // g_hi = g * e + g (fourth 64 bit-approximation of sqrt); -// +// // Remainder computation: // h = h * e + h; // d = (m2_hi - g_hi * g_hi) + m2_lo; @@ -160,15 +160,15 @@ // ------------------------------------------------------------------- // b_hi = (g_hi + x) + gl; // b_lo = (x - b_hi) + g_hi + gl; -// +// // Now we pass b presented as sum b_hi + b_lo to special version // of logl function which accept a pair of arguments as -// mutiprecision value. -// +// mutiprecision value. +// // Special log algorithm overview // ================================ // Here we use a table lookup method. The basic idea is that in -// order to compute logl(Arg) for an argument Arg in [1,2), +// order to compute logl(Arg) for an argument Arg in [1,2), // we construct a value G such that G*Arg is close to 1 and that // logl(1/G) is obtainable easily from a table of values calculated // beforehand. Thus @@ -198,7 +198,7 @@ // G := G_1 * G_2 * G_3 // r := (G * S_hi - 1) + G * S_lo // -// These G_j's have the property that the product is exactly +// These G_j's have the property that the product is exactly // representable and that |r| < 2^(-12) as a result. // // Step 2: Approximation @@ -217,11 +217,11 @@ // // Registers used //============================================================== -// Floating Point registers used: +// Floating Point registers used: // f8, input // f32 -> f95 (64 registers) -// General registers used: +// General registers used: // r32 -> r67 (36 registers) // Predicate registers used: @@ -229,15 +229,15 @@ // p7 for 'NaNs, Inf' path // p8 for 'near 1' path // p9 for 'huges' path -// p10 for x = 1 +// p10 for x = 1 // p11 for x < 1 // //********************************************************************* // IEEE Special Conditions: // // acoshl(+inf) = +inf -// acoshl(-inf) = QNaN -// acoshl(1) = 0 +// acoshl(-inf) = QNaN +// acoshl(1) = 0 // acoshl(x<1) = QNaN // acoshl(SNaN) = QNaN // acoshl(QNaN) = QNaN @@ -245,38 +245,38 @@ // Data tables //============================================================== - + RODATA .align 64 // Near 1 path rational aproximation coefficients LOCAL_OBJECT_START(Poly_P) -data8 0xB0978143F695D40F, 0x3FF1 // .84205539791447100108478906277453574946e-4 -data8 0xB9800D841A8CAD29, 0x3FF6 // .28305085180397409672905983082168721069e-2 -data8 0xC889F455758C1725, 0x3FF9 // .24479844297887530847660233111267222945e-1 -data8 0x9BE1DFF006F45F12, 0x3FFB // .76114415657565879842941751209926938306e-1 -data8 0x9E34AF4D372861E0, 0x3FFB // .77248925727776366270605984806795850504e-1 -data8 0xF3DC502AEE14C4AE, 0x3FA6 // .3077953476682583606615438814166025592e-26 +data8 0xB0978143F695D40F, 0x3FF1 // .84205539791447100108478906277453574946e-4 +data8 0xB9800D841A8CAD29, 0x3FF6 // .28305085180397409672905983082168721069e-2 +data8 0xC889F455758C1725, 0x3FF9 // .24479844297887530847660233111267222945e-1 +data8 0x9BE1DFF006F45F12, 0x3FFB // .76114415657565879842941751209926938306e-1 +data8 0x9E34AF4D372861E0, 0x3FFB // .77248925727776366270605984806795850504e-1 +data8 0xF3DC502AEE14C4AE, 0x3FA6 // .3077953476682583606615438814166025592e-26 LOCAL_OBJECT_END(Poly_P) // LOCAL_OBJECT_START(Poly_Q) -data8 0xF76E3FD3C7680357, 0x3FF1 // .11798413344703621030038719253730708525e-3 -data8 0xD107D2E7273263AE, 0x3FF7 // .63791065024872525660782716786703188820e-2 -data8 0xB609BE5CDE206AEF, 0x3FFB // .88885771950814004376363335821980079985e-1 -data8 0xF7DEACAC28067C8A, 0x3FFD // .48412074662702495416825113623936037072302 -data8 0x8F9BE5890CEC7E38, 0x3FFF // 1.1219450873557867470217771071068369729526 -data8 0xED4F06F3D2BC92D1, 0x3FFE // .92698710873331639524734537734804056798748 +data8 0xF76E3FD3C7680357, 0x3FF1 // .11798413344703621030038719253730708525e-3 +data8 0xD107D2E7273263AE, 0x3FF7 // .63791065024872525660782716786703188820e-2 +data8 0xB609BE5CDE206AEF, 0x3FFB // .88885771950814004376363335821980079985e-1 +data8 0xF7DEACAC28067C8A, 0x3FFD // .48412074662702495416825113623936037072302 +data8 0x8F9BE5890CEC7E38, 0x3FFF // 1.1219450873557867470217771071068369729526 +data8 0xED4F06F3D2BC92D1, 0x3FFE // .92698710873331639524734537734804056798748 LOCAL_OBJECT_END(Poly_Q) -// Q coeffs +// Q coeffs LOCAL_OBJECT_START(Constants_Q) -data4 0x00000000,0xB1721800,0x00003FFE,0x00000000 +data4 0x00000000,0xB1721800,0x00003FFE,0x00000000 data4 0x4361C4C6,0x82E30865,0x0000BFE2,0x00000000 data4 0x328833CB,0xCCCCCAF2,0x00003FFC,0x00000000 data4 0xA9D4BAFB,0x80000077,0x0000BFFD,0x00000000 data4 0xAAABE3D2,0xAAAAAAAA,0x00003FFD,0x00000000 -data4 0xFFFFDAB7,0xFFFFFFFF,0x0000BFFD,0x00000000 +data4 0xFFFFDAB7,0xFFFFFFFF,0x0000BFFD,0x00000000 LOCAL_OBJECT_END(Constants_Q) // Z1 - 16 bit fixed @@ -391,7 +391,7 @@ data4 0x3F71D488,0x3D693B9D data8 0xBE049391B6B7C239 LOCAL_OBJECT_END(Constants_G_H_h2) -// G3 and H3 - IEEE single and h3 - IEEE double +// G3 and H3 - IEEE single and h3 - IEEE double LOCAL_OBJECT_START(Constants_G_H_h3) data4 0x3F7FFC00,0x38800100 data8 0x3D355595562224CD @@ -481,16 +481,16 @@ FR_QQ3 = f41 FR_QQ4 = f42 FR_QQ5 = f43 -FR_Q1 = f44 -FR_Q2 = f45 -FR_Q3 = f46 -FR_Q4 = f47 +FR_Q1 = f44 +FR_Q2 = f45 +FR_Q3 = f46 +FR_Q4 = f47 FR_Half = f48 FR_Two = f49 -FR_log2_hi = f50 -FR_log2_lo = f51 +FR_log2_hi = f50 +FR_log2_lo = f51 FR_X2 = f52 @@ -512,14 +512,14 @@ FR_XM12 = f64 // Special logl registers -FR_XLog_Hi = f65 -FR_XLog_Lo = f66 +FR_XLog_Hi = f65 +FR_XLog_Lo = f66 -FR_Y_hi = f67 +FR_Y_hi = f67 FR_Y_lo = f68 -FR_S_hi = f69 -FR_S_lo = f70 +FR_S_hi = f69 +FR_S_lo = f70 FR_poly_lo = f71 FR_poly_hi = f72 @@ -530,19 +530,19 @@ FR_h = f75 FR_G2 = f76 FR_H2 = f77 -FR_h2 = f78 +FR_h2 = f78 -FR_r = f79 -FR_rsq = f80 -FR_rcub = f81 +FR_r = f79 +FR_rsq = f80 +FR_rcub = f81 -FR_float_N = f82 +FR_float_N = f82 -FR_G3 = f83 -FR_H3 = f84 -FR_h3 = f85 +FR_G3 = f83 +FR_H3 = f84 +FR_h3 = f85 -FR_2_to_minus_N = f86 +FR_2_to_minus_N = f86 // Near 1 registers @@ -561,7 +561,7 @@ FR_QV3 = f75 FR_QV2 = f76 FR_Y0 = f77 -FR_Q0 = f78 +FR_Q0 = f78 FR_E0 = f79 FR_E2 = f80 FR_E1 = f81 @@ -601,23 +601,23 @@ GR_Poly_P = r37 GR_Poly_Q = r38 // Special logl registers -GR_Index1 = r39 -GR_Index2 = r40 -GR_signif = r41 -GR_X_0 = r42 -GR_X_1 = r43 -GR_X_2 = r44 +GR_Index1 = r39 +GR_Index2 = r40 +GR_signif = r41 +GR_X_0 = r42 +GR_X_1 = r43 +GR_X_2 = r44 GR_minus_N = r45 -GR_Z_1 = r46 -GR_Z_2 = r47 -GR_N = r48 -GR_Bias = r49 -GR_M = r50 -GR_Index3 = r51 -GR_exp_2tom80 = r52 -GR_exp_mask = r53 -GR_exp_2tom7 = r54 -GR_ad_ln10 = r55 +GR_Z_1 = r46 +GR_Z_2 = r47 +GR_N = r48 +GR_Bias = r49 +GR_M = r50 +GR_Index3 = r51 +GR_exp_2tom80 = r52 +GR_exp_mask = r53 +GR_exp_2tom7 = r54 +GR_ad_ln10 = r55 GR_ad_tbl_1 = r56 GR_ad_tbl_2 = r57 GR_ad_tbl_3 = r58 @@ -652,29 +652,29 @@ GLOBAL_LIBM_ENTRY(acoshl) addl GR_Poly_Q = @ltoff(Poly_Q), gp // Address of Q-coeff table fma.s1 FR_X2 = FR_Arg, FR_Arg, f0 // Obtain x^2 addl GR_Poly_P = @ltoff(Poly_P), gp // Address of P-coeff table -};; +};; -{ .mfi +{ .mfi getf.d GR_Arg = FR_Arg // get arument as double (int64) fma.s0 FR_Two = f1, f1, f1 // construct 2.0 addl GR_ad_z_1 = @ltoff(Constants_Z_1#),gp // logl tables } -{ .mlx - nop.m 0 +{ .mlx + nop.m 0 movl GR_TwoP63 = 0x43E8000000000000 // 0.5*2^63 (huge arguments) -};; +};; -{ .mfi +{ .mfi ld8 GR_Poly_P = [GR_Poly_P] // get actual P-coeff table address fcmp.eq.s1 p10, p0 = FR_Arg, f1 // if arg == 1 (return 0) nop.i 0 } -{ .mlx +{ .mlx ld8 GR_Poly_Q = [GR_Poly_Q] // get actual Q-coeff table address movl GR_OneP125 = 0x3FF2000000000000 // 1.125 (near 1 path bound) };; -{ .mfi +{ .mfi ld8 GR_ad_z_1 = [GR_ad_z_1] // Get pointer to Constants_Z_1 fclass.m p7,p0 = FR_Arg, 0xe3 // if arg NaN inf cmp.le p9, p0 = GR_TwoP63, GR_Arg // if arg > 0.5*2^63 ('huges') @@ -683,31 +683,31 @@ GLOBAL_LIBM_ENTRY(acoshl) cmp.ge p8, p0 = GR_OneP125, GR_Arg // if arg<1.125 -near 1 path fms.s1 FR_XM1 = FR_Arg, f1, f1 // X0 = X-1 (for near 1 path) (p11) br.cond.spnt acoshl_lt_pone // error branch (less than 1) -};; +};; -{ .mmi +{ .mmi setf.exp FR_Half = GR_Half // construct 0.5 (p9) setf.s FR_XLog_Lo = r0 // Low of logl arg=0 (Huges path) mov GR_exp_mask = 0x1FFFF // Create exponent mask -};; +};; -{ .mmf +{ .mmf (p8) ldfe FR_PP5 = [GR_Poly_P],16 // Load P5 (p8) ldfe FR_QQ5 = [GR_Poly_Q],16 // Load Q5 fms.s1 FR_M2 = FR_X2, f1, f1 // m2 = x^2 - 1 };; -{ .mfi +{ .mfi (p8) ldfe FR_QQ4 = [GR_Poly_Q],16 // Load Q4 - fms.s1 FR_M2L = FR_Arg, FR_Arg, FR_X2 // low part of + fms.s1 FR_M2L = FR_Arg, FR_Arg, FR_X2 // low part of // m2 = fma(X*X - m2) add GR_ad_tbl_1 = 0x040, GR_ad_z_1 // Point to Constants_G_H_h1 } { .mfb -(p8) ldfe FR_PP4 = [GR_Poly_P],16 // Load P4 +(p8) ldfe FR_PP4 = [GR_Poly_P],16 // Load P4 (p7) fma.s0 FR_Res = FR_Arg,f1,FR_Arg // r = a + a (Nan, Inf) (p7) br.ret.spnt b0 // return (Nan, Inf) -};; +};; { .mfi (p8) ldfe FR_PP3 = [GR_Poly_P],16 // Load P3 @@ -719,9 +719,9 @@ GLOBAL_LIBM_ENTRY(acoshl) (p9) fms.s1 FR_XLog_Hi = FR_Two, FR_Arg, f1 // Hi of log arg = 2*X-1 (p9) br.cond.spnt huges_logl // special version of log } -;; +;; -{ .mfi +{ .mfi (p8) ldfe FR_PP2 = [GR_Poly_P],16 // Load P2 (p8) fma.s1 FR_2XM1 = FR_Two, FR_XM1, f0 // 2X0 = 2 * X0 add GR_ad_z_2 = 0x140, GR_ad_z_1 // Point to Constants_Z_2 @@ -729,18 +729,18 @@ GLOBAL_LIBM_ENTRY(acoshl) { .mfb (p8) ldfe FR_QQ2 = [GR_Poly_Q],16 // Load Q2 (p10) fma.s0 FR_Res = f0,f1,f0 // r = 0 (arg = 1) -(p10) br.ret.spnt b0 // return (arg = 1) -};; +(p10) br.ret.spnt b0 // return (arg = 1) +};; -{ .mmi +{ .mmi (p8) ldfe FR_PP1 = [GR_Poly_P],16 // Load P1 (p8) ldfe FR_QQ1 = [GR_Poly_Q],16 // Load Q1 add GR_ad_tbl_2 = 0x180, GR_ad_z_1 // Point to Constants_G_H_h2 } ;; -{ .mfi -(p8) ldfe FR_PP0 = [GR_Poly_P] // Load P0 +{ .mfi +(p8) ldfe FR_PP0 = [GR_Poly_P] // Load P0 fma.s1 FR_Tmp = f1, f1, FR_M2 // Tmp = 1 + m2 add GR_ad_tbl_3 = 0x280, GR_ad_z_1 // Point to Constants_G_H_h3 } @@ -748,17 +748,17 @@ GLOBAL_LIBM_ENTRY(acoshl) (p8) ldfe FR_QQ0 = [GR_Poly_Q] nop.f 0 (p8) br.cond.spnt near_1 // near 1 path -};; -{ .mfi +};; +{ .mfi ldfe FR_log2_hi = [GR_ad_q],16 // Load log2_hi nop.f 0 mov GR_Bias = 0x0FFFF // Create exponent bias };; -{ .mfi +{ .mfi nop.m 0 frsqrta.s1 FR_Rcp, p0 = FR_M2 // Rcp = 1/m2 reciprocal appr. nop.i 0 -};; +};; { .mfi ldfe FR_log2_lo = [GR_ad_q],16 // Load log2_lo @@ -773,7 +773,7 @@ GLOBAL_LIBM_ENTRY(acoshl) nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_HH = FR_Half, FR_Rcp, f0 // h = 0.5 * Rcp nop.i 0 };; @@ -783,14 +783,14 @@ GLOBAL_LIBM_ENTRY(acoshl) nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_M2L = FR_Tmp, f1, FR_M2L // low part of m2 = Tmp+m2l nop.i 0 };; { .mfi ldfe FR_Q2 = [GR_ad_q],16 // Load Q2 - fma.s1 FR_GG = FR_GG, FR_EE, FR_GG // g = g * e + g + fma.s1 FR_GG = FR_GG, FR_EE, FR_GG // g = g * e + g // 16 bit Newton Raphson iteration nop.i 0 } @@ -807,7 +807,7 @@ GLOBAL_LIBM_ENTRY(acoshl) };; { .mfi nop.m 0 - fma.s1 FR_GG = FR_GG, FR_EE, FR_GG // g = g * e + g + fma.s1 FR_GG = FR_GG, FR_EE, FR_GG // g = g * e + g // 32 bit Newton Raphson iteration nop.i 0 } @@ -825,7 +825,7 @@ GLOBAL_LIBM_ENTRY(acoshl) { .mfi nop.m 0 - fma.s1 FR_GG = FR_GG, FR_EE, FR_GG // g = g * e + g + fma.s1 FR_GG = FR_GG, FR_EE, FR_GG // g = g * e + g // 64 bit Newton Raphson iteration nop.i 0 } @@ -920,7 +920,7 @@ GLOBAL_LIBM_ENTRY(acoshl) { .mfi nop.m 0 nop.f 0 - extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1 + extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1 };; { .mfi @@ -952,7 +952,7 @@ GLOBAL_LIBM_ENTRY(acoshl) pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15 // Get bits 30-15 of X_1 * Z_2 };; -// WE CANNOT USE GR_X_2 IN NEXT 3 CYCLES ("DEAD" ZONE!) +// WE CANNOT USE GR_X_2 IN NEXT 3 CYCLES ("DEAD" ZONE!) // BECAUSE OF POSSIBLE 10 CLOCKS STALL! // (Just nops added - nothing to do here) @@ -1093,7 +1093,7 @@ GLOBAL_LIBM_ENTRY(acoshl) { .mfi nop.m 0 - fadd.s0 FR_Y_lo = FR_poly_hi, FR_poly_lo + fadd.s0 FR_Y_lo = FR_poly_hi, FR_poly_lo // Y_lo=poly_hi+poly_lo nop.i 0 };; @@ -1166,7 +1166,7 @@ huges_logl: { .mmi ldfe FR_log2_lo = [GR_ad_q],16 // Load log2_lo - sub GR_N = GR_N, GR_Bias + sub GR_N = GR_N, GR_Bias mov GR_exp_2tom80 = 0x0ffaf // Exponent of 2^-80 };; @@ -1185,7 +1185,7 @@ huges_logl: { .mmi ldfe FR_Q2 = [GR_ad_q],16 // Load Q2 nop.m 0 - extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1 + extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1 };; { .mmi @@ -1218,7 +1218,7 @@ huges_logl: pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15 // Get bits 30-15 of X_1*Z_2 };; -// WE CANNOT USE GR_X_2 IN NEXT 3 CYCLES ("DEAD" ZONE!) +// WE CANNOT USE GR_X_2 IN NEXT 3 CYCLES ("DEAD" ZONE!) // BECAUSE OF POSSIBLE 10 CLOCKS STALL! // (Just nops added - nothing to do here) @@ -1344,7 +1344,7 @@ huges_logl: };; { .mfi nop.m 0 - fadd.s0 FR_Y_lo = FR_poly_hi, FR_poly_lo // Y_lo=poly_hi+poly_lo + fadd.s0 FR_Y_lo = FR_poly_hi, FR_poly_lo // Y_lo=poly_hi+poly_lo nop.i 0 };; { .mfb @@ -1356,279 +1356,279 @@ huges_logl: // NEAR ONE INTERVAL near_1: -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 frsqrta.s1 FR_Rcp, p0 = FR_2XM1 // Rcp = 1/x reciprocal appr. &SQRT& - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_PV6 = FR_PP5, FR_XM1, FR_PP4 // pv6 = P5*xm1+P4 $POLY$ - nop.i 0 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_QV6 = FR_QQ5, FR_XM1, FR_QQ4 // qv6 = Q5*xm1+Q4 $POLY$ - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_PV4 = FR_PP3, FR_XM1, FR_PP2 // pv4 = P3*xm1+P2 $POLY$ - nop.i 0 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_QV4 = FR_QQ3, FR_XM1, FR_QQ2 // qv4 = Q3*xm1+Q2 $POLY$ - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_XM12 = FR_XM1, FR_XM1, f0 // xm1^2 = xm1 * xm1 $POLY$ - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_PV2 = FR_PP1, FR_XM1, FR_PP0 // pv2 = P1*xm1+P0 $POLY$ - nop.i 0 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_QV2 = FR_QQ1, FR_XM1, FR_QQ0 // qv2 = Q1*xm1+Q0 $POLY$ - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 - fma.s1 FR_GG = FR_Rcp, FR_2XM1, f0 // g = Rcp * x &SQRT& - nop.i 0 +{ .mfi + nop.m 0 + fma.s1 FR_GG = FR_Rcp, FR_2XM1, f0 // g = Rcp * x &SQRT& + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_HH = FR_Half, FR_Rcp, f0 // h = 0.5 * Rcp &SQRT& - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_PV3 = FR_XM12, FR_PV6, FR_PV4//pv3=pv6*xm1^2+pv4 $POLY$ - nop.i 0 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_QV3 = FR_XM12, FR_QV6, FR_QV4//qv3=qv6*xm1^2+qv4 $POLY$ - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fnma.s1 FR_EE = FR_GG, FR_HH, FR_Half // e = 0.5 - g * h &SQRT& - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_PP = FR_XM12, FR_PV3, FR_PV2 //pp=pv3*xm1^2+pv2 $POLY$ - nop.i 0 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_QQ = FR_XM12, FR_QV3, FR_QV2 //qq=qv3*xm1^2+qv2 $POLY$ - nop.i 0 + nop.i 0 };; { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_GG = FR_GG, FR_EE, FR_GG // g = g * e + g &SQRT& - nop.i 0 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_HH = FR_HH, FR_EE, FR_HH // h = h * e + h &SQRT& - nop.i 0 + nop.i 0 };; { .mfi - nop.m 0 + nop.m 0 frcpa.s1 FR_Y0,p0 = f1,FR_QQ // y = frcpa(b) #DIV# - nop.i 0 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fnma.s1 FR_EE = FR_GG, FR_HH, FR_Half // e = 0.5 - g*h &SQRT& - nop.i 0 + nop.i 0 };; { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_Q0 = FR_PP,FR_Y0,f0 // q = a*y #DIV# - nop.i 0 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fnma.s1 FR_E0 = FR_Y0,FR_QQ,f1 // e = 1 - b*y #DIV# - nop.i 0 + nop.i 0 };; { .mfi - nop.m 0 - fma.s1 FR_GG = FR_GG, FR_EE, FR_GG // g = g * e + g &SQRT& - nop.i 0 + nop.m 0 + fma.s1 FR_GG = FR_GG, FR_EE, FR_GG // g = g * e + g &SQRT& + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_HH = FR_HH, FR_EE, FR_HH // h = h * e + h &SQRT& - nop.i 0 + nop.i 0 };; { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_E2 = FR_E0,FR_E0,FR_E0 // e2 = e+e^2 #DIV# - nop.i 0 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_E1 = FR_E0,FR_E0,f0 // e1 = e^2 #DIV# - nop.i 0 + nop.i 0 };; { .mfi - nop.m 0 + nop.m 0 fnma.s1 FR_EE = FR_GG, FR_HH, FR_Half // e = 0.5 - g * h &SQRT& - nop.i 0 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fnma.s1 FR_DD = FR_GG, FR_GG, FR_2XM1 // d = x - g * g &SQRT& - nop.i 0 + nop.i 0 };; { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_Y1 = FR_Y0,FR_E2,FR_Y0 // y1 = y+y*e2 #DIV# - nop.i 0 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_E3 = FR_E1,FR_E1,FR_E0 // e3 = e+e1^2 #DIV# - nop.i 0 + nop.i 0 };; { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_GG = FR_DD, FR_HH, FR_GG // g = d * h + g &SQRT& - nop.i 0 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_HH = FR_HH, FR_EE, FR_HH // h = h * e + h &SQRT& - nop.i 0 + nop.i 0 };; { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_Y2 = FR_Y1,FR_E3,FR_Y0 // y2 = y+y1*e3 #DIV# - nop.i 0 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fnma.s1 FR_R0 = FR_QQ,FR_Q0,FR_PP // r = a-b*q #DIV# - nop.i 0 + nop.i 0 };; { .mfi - nop.m 0 - fnma.s1 FR_DD = FR_GG, FR_GG, FR_2XM1 // d = x - g * g &SQRT& - nop.i 0 + nop.m 0 + fnma.s1 FR_DD = FR_GG, FR_GG, FR_2XM1 // d = x - g * g &SQRT& + nop.i 0 };; { .mfi - nop.m 0 + nop.m 0 fnma.s1 FR_E4 = FR_QQ,FR_Y2,f1 // e4 = 1-b*y2 #DIV# - nop.i 0 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_X_Hi = FR_R0,FR_Y2,FR_Q0 // x = q+r*y2 #DIV# - nop.i 0 + nop.i 0 };; { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_GL = FR_DD, FR_HH, f0 // gl = d * h &SQRT& - nop.i 0 + nop.i 0 };; { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_Y3 = FR_Y2,FR_E4,FR_Y2 // y3 = y2+y2*e4 #DIV# - nop.i 0 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fnma.s1 FR_R1 = FR_QQ,FR_X_Hi,FR_PP // r1 = a-b*x #DIV# - nop.i 0 + nop.i 0 };; { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_HH = FR_GG, FR_X_Hi, f0 // hh = gg * x_hi - nop.i 0 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_LH = FR_GL, FR_X_Hi, f0 // lh = gl * x_hi - nop.i 0 + nop.i 0 };; { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_X_lo = FR_R1,FR_Y3,f0 // x_lo = r1*y3 #DIV# - nop.i 0 + nop.i 0 };; { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_LL = FR_GL, FR_X_lo, f0 // ll = gl*x_lo - nop.i 0 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_HL = FR_GG, FR_X_lo, f0 // hl = gg * x_lo - nop.i 0 + nop.i 0 };; { .mfi - nop.m 0 + nop.m 0 fms.s1 FR_Res = FR_GL, f1, FR_LL // res = gl + ll - nop.i 0 + nop.i 0 };; { .mfi - nop.m 0 + nop.m 0 fms.s1 FR_Res = FR_Res, f1, FR_LH // res = res + lh - nop.i 0 + nop.i 0 };; { .mfi - nop.m 0 + nop.m 0 fms.s1 FR_Res = FR_Res, f1, FR_HL // res = res + hl - nop.i 0 + nop.i 0 };; { .mfi - nop.m 0 + nop.m 0 fms.s1 FR_Res = FR_Res, f1, FR_HH // res = res + hh - nop.i 0 + nop.i 0 };; { .mfb - nop.m 0 + nop.m 0 fma.s0 FR_Res = FR_Res, f1, FR_GG // result = res + gg br.ret.sptk b0 // Exit for near 1 path };; @@ -1639,9 +1639,9 @@ near_1: acoshl_lt_pone: { .mfi - nop.m 0 + nop.m 0 fmerge.s FR_Arg_X = FR_Arg, FR_Arg - nop.i 0 + nop.i 0 };; { .mfb mov GR_Parameter_TAG = 135 @@ -1679,7 +1679,7 @@ LOCAL_LIBM_ENTRY(__libm_error_region) { .mib stfe [GR_Parameter_X] = FR_Arg_X // Parameter 1 to stack add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address - nop.b 0 + nop.b 0 } { .mib stfe [GR_Parameter_Y] = FR_Res // Parameter 3 to stack diff --git a/sysdeps/ia64/fpu/e_acosl.S b/sysdeps/ia64/fpu/e_acosl.S index 4fd345b..0983bc4 100644 --- a/sysdeps/ia64/fpu/e_acosl.S +++ b/sysdeps/ia64/fpu/e_acosl.S @@ -690,70 +690,70 @@ F_CS6 = f36 F_CS7 = f37 F_CS8 = f38 F_CS9 = f39 -F_S23 = f40 -F_S45 = f41 -F_S67 = f42 -F_S89 = f43 -F_S25 = f44 -F_S69 = f45 -F_S29 = f46 -F_X2 = f47 -F_X4 = f48 -F_TSQRT = f49 -F_DTX = f50 -F_R = f51 -F_R2 = f52 -F_R3 = f53 -F_R4 = f54 - -F_C3 = f55 -F_C5 = f56 -F_C7 = f57 -F_C9 = f58 -F_P79 = f59 -F_P35 = f60 -F_P39 = f61 - -F_ATHI = f62 -F_ATLO = f63 - -F_T1 = f64 -F_Y = f65 -F_Y2 = f66 -F_ANDMASK = f67 -F_ORMASK = f68 -F_S = f69 -F_05 = f70 -F_SQRT_1S2 = f71 -F_DS = f72 -F_Z = f73 -F_1T2 = f74 -F_DZ = f75 -F_ZE = f76 -F_YZ = f77 -F_Y1S2 = f78 -F_Y1S2X = f79 -F_1X = f80 -F_ST = f81 -F_1T2_ST = f82 -F_TSS = f83 -F_Y1S2X2 = f84 -F_DZ_TERM = f85 -F_DTS = f86 -F_DS2X = f87 -F_T2 = f88 -F_ZY1S2S = f89 -F_Y1S2_1X = f90 +F_S23 = f40 +F_S45 = f41 +F_S67 = f42 +F_S89 = f43 +F_S25 = f44 +F_S69 = f45 +F_S29 = f46 +F_X2 = f47 +F_X4 = f48 +F_TSQRT = f49 +F_DTX = f50 +F_R = f51 +F_R2 = f52 +F_R3 = f53 +F_R4 = f54 + +F_C3 = f55 +F_C5 = f56 +F_C7 = f57 +F_C9 = f58 +F_P79 = f59 +F_P35 = f60 +F_P39 = f61 + +F_ATHI = f62 +F_ATLO = f63 + +F_T1 = f64 +F_Y = f65 +F_Y2 = f66 +F_ANDMASK = f67 +F_ORMASK = f68 +F_S = f69 +F_05 = f70 +F_SQRT_1S2 = f71 +F_DS = f72 +F_Z = f73 +F_1T2 = f74 +F_DZ = f75 +F_ZE = f76 +F_YZ = f77 +F_Y1S2 = f78 +F_Y1S2X = f79 +F_1X = f80 +F_ST = f81 +F_1T2_ST = f82 +F_TSS = f83 +F_Y1S2X2 = f84 +F_DZ_TERM = f85 +F_DTS = f86 +F_DS2X = f87 +F_T2 = f88 +F_ZY1S2S = f89 +F_Y1S2_1X = f90 F_TS = f91 -F_PI2_LO = f92 -F_PI2_HI = f93 -F_S19 = f94 -F_INV1T2_2 = f95 -F_CORR = f96 -F_DZ0 = f97 - -F_C11 = f98 -F_C13 = f99 +F_PI2_LO = f92 +F_PI2_HI = f93 +F_S19 = f94 +F_INV1T2_2 = f95 +F_CORR = f96 +F_DZ0 = f97 + +F_C11 = f98 +F_C13 = f99 F_C15 = f100 F_C17 = f101 F_P1113 = f102 diff --git a/sysdeps/ia64/fpu/e_asinf.S b/sysdeps/ia64/fpu/e_asinf.S index af24165..74a18dd 100644 --- a/sysdeps/ia64/fpu/e_asinf.S +++ b/sysdeps/ia64/fpu/e_asinf.S @@ -40,9 +40,9 @@ // History //============================================================== // 02/02/00 Initial version -// 06/28/00 Improved speed +// 06/28/00 Improved speed // 06/31/00 Changed register allocation because of some duplicate macros -// moved nan exit bundle up to gain a cycle. +// moved nan exit bundle up to gain a cycle. // 08/08/00 Improved speed by avoiding SIR flush. // 08/15/00 Bundle added after call to __libm_error_support to properly // set [the previously overwritten] GR_Parameter_RESULT. @@ -53,13 +53,13 @@ // 05/20/02 Cleaned up namespace and sf0 syntax // 02/06/03 Reordered header: .section, .global, .proc, .align - + // Description //========================================= // The asinf function computes the arc sine of x in the range [-pi,+pi]. // A doman error occurs for arguments not in the range [-1,+1]. // asinf(+-0) returns +-0 -// asinf(x) returns a Nan and raises the invalid exception for |x| >1 +// asinf(x) returns a Nan and raises the invalid exception for |x| >1 // The acosf function returns the arc cosine in the range [0, +pi] radians. // A doman error occurs for arguments not in the range [-1,+1]. @@ -252,351 +252,351 @@ LOCAL_OBJECT_END(asinf_coeff_2_table) .section .text GLOBAL_LIBM_ENTRY(asinf) - + // Load the addresses of the two tables. // Then, load the coefficients and other constants. -{ .mfi +{ .mfi alloc r32 = ar.pfs,1,8,4,0 fnma.s1 asinf_t = f8,f8,f1 dep.z ASINF_GR_1by2 = 0x3f,24,8 // 0x3f000000 -} -{ .mfi +} +{ .mfi addl ASINF_Addr1 = @ltoff(asinf_coeff_1_table),gp fma.s1 asinf_x2 = f8,f8,f0 addl ASINF_Addr2 = @ltoff(asinf_coeff_2_table),gp ;; } - -{ .mfi + +{ .mfi ld8 ASINF_Addr1 = [ASINF_Addr1] fmerge.s asinf_abs_x = f1,f8 dep ASINF_GR_3by2 = -1,r0,22,8 // 0x3fc00000 -} -{ .mlx +} +{ .mlx nop.m 999 movl ASINF_GR_5by2 = 0x40200000;; } - -{ .mfi + +{ .mfi setf.s asinf_1by2 = ASINF_GR_1by2 fmerge.s asinf_sgn_x = f8,f1 nop.i 999 -} -{ .mfi +} +{ .mfi ld8 ASINF_Addr2 = [ASINF_Addr2] nop.f 0 nop.i 999;; } - -{ .mfi + +{ .mfi setf.s asinf_5by2 = ASINF_GR_5by2 fcmp.lt.s1 p11,p12 = f8,f0 nop.i 999;; } -{ .mmf +{ .mmf ldfpd asinf_coeff_P1,asinf_coeff_P4 = [ASINF_Addr1],16 setf.s asinf_3by2 = ASINF_GR_3by2 fclass.m.unc p8,p0 = f8, 0xc3 ;; //@qnan | @snan } - -{ .mfi + +{ .mfi ldfpd asinf_coeff_P7,asinf_coeff_P6 = [ASINF_Addr1],16 fma.s1 asinf_t2 = asinf_t,asinf_t,f0 nop.i 999 -} -{ .mfi +} +{ .mfi ldfpd asinf_coeff_P3,asinf_coeff_P8 = [ASINF_Addr2],16 fma.s1 asinf_x4 = asinf_x2,asinf_x2,f0 nop.i 999;; } - -{ .mfi + +{ .mfi ldfpd asinf_coeff_P9,asinf_const_sqrt2by2 = [ASINF_Addr1] fclass.m.unc p10,p0 = f8, 0x07 //@zero nop.i 999 -} -{ .mfi +} +{ .mfi ldfpd asinf_coeff_P5,asinf_coeff_P2 = [ASINF_Addr2],16 fma.s1 asinf_x3 = f8,asinf_x2,f0 nop.i 999;; } - -{ .mfi + +{ .mfi ldfd asinf_const_piby2 = [ASINF_Addr2] frsqrta.s1 asinf_B,p0 = asinf_t nop.i 999 -} -{ .mfb +} +{ .mfb nop.m 999 (p8) fma.s.s0 f8 = f8,f1,f0 (p8) br.ret.spnt b0 ;; // Exit if x=nan } - -{ .mfb + +{ .mfb nop.m 999 fcmp.eq.s1 p6,p0 = asinf_abs_x,f1 (p10) br.ret.spnt b0 ;; // Exit if x=0 -} - -{ .mfi +} + +{ .mfi nop.m 999 fcmp.gt.s1 p9,p0 = asinf_abs_x,f1 nop.i 999;; -} - -{ .mfi +} + +{ .mfi nop.m 999 fma.s1 asinf_x8 = asinf_x4,asinf_x4,f0 nop.i 999 -} -{ .mfb +} +{ .mfb nop.m 999 fma.s1 asinf_t4 = asinf_t2,asinf_t2,f0 (p6) br.cond.spnt ASINF_ABS_ONE ;; // Branch if |x|=1 -} +} -{ .mfi +{ .mfi nop.m 999 fma.s1 asinf_x5 = asinf_x2,asinf_x3,f0 nop.i 999 } -{ .mfb +{ .mfb (p9) mov GR_Parameter_TAG = 62 fma.s1 asinf_yby2 = asinf_t,asinf_1by2,f0 (p9) br.cond.spnt __libm_error_region ;; // Branch if |x|>1 } -{ .mfi +{ .mfi nop.m 999 fma.s1 asinf_Az = asinf_t,asinf_B,f0 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fma.s1 asinf_B2 = asinf_B,asinf_B,f0 nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 fma.s1 asinf_poly_p1 = f8,asinf_coeff_P1,f0 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fma.s1 asinf_2poly_p1 = asinf_coeff_P1,asinf_t,f1 nop.i 999;; } -{ .mfi +{ .mfi nop.m 999 fma.s1 asinf_poly_p3 = asinf_coeff_P4,asinf_x2,asinf_coeff_P3 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fma.s1 asinf_2poly_p6 = asinf_coeff_P7,asinf_t,asinf_coeff_P6 nop.i 999;; -} +} -{ .mfi +{ .mfi nop.m 999 fma.s1 asinf_poly_p7 = asinf_x2,asinf_coeff_P8,asinf_coeff_P7 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fma.s1 asinf_2poly_p2 = asinf_coeff_P3,asinf_t,asinf_coeff_P2 nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 fma.s1 asinf_poly_p5 = asinf_x2,asinf_coeff_P6,asinf_coeff_P5 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fma.s1 asinf_2poly_p4 = asinf_coeff_P5,asinf_t,asinf_coeff_P4 nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 fma.d.s1 asinf_x11 = asinf_x8,asinf_x3,f0 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fnma.s1 asinf_dz = asinf_B2,asinf_yby2,asinf_1by2 nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 fma.s1 asinf_poly_p1a = asinf_x2,asinf_poly_p1,f8 nop.i 999 } -{ .mfi +{ .mfi nop.m 999 fma.s1 asinf_2poly_p8 = asinf_coeff_P9,asinf_t,asinf_coeff_P8 nop.i 999;; } - + // Get the absolute value of x and determine the region in which x lies -{ .mfi +{ .mfi nop.m 999 fcmp.le.s1 p7,p8 = asinf_abs_x,asinf_const_sqrt2by2 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fma.s1 asinf_poly_p2 = asinf_x2,asinf_poly_p3,asinf_coeff_P2 nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 fma.s1 asinf_poly_p7a = asinf_x4,asinf_coeff_P9,asinf_poly_p7 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fma.s1 asinf_2poly_p2a = asinf_2poly_p2,asinf_t2,asinf_2poly_p1 nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 (p8) fma.s1 asinf_sgnx_t4 = asinf_sgn_x,asinf_t4,f0 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 (p8) fma.s1 asinf_2poly_p4a = asinf_2poly_p6,asinf_t2,asinf_2poly_p4 nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 (p8) fma.s1 asinf_Sz = asinf_5by2,asinf_dz,asinf_3by2 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 (p8) fma.s1 asinf_d2z = asinf_dz,asinf_dz,f0 nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 (p8) fma.s1 asinf_sgn_x_piby2 = asinf_sgn_x,asinf_const_piby2,f0 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 (p7) fma.d.s1 asinf_poly_Ax = asinf_x5,asinf_poly_p2,asinf_poly_p1a nop.i 999;; -} - -{ .mfi +} + +{ .mfi nop.m 999 (p7) fma.d.s1 asinf_poly_Bx = asinf_x4,asinf_poly_p7a,asinf_poly_p5 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 (p8) fma.s1 asinf_sgnx_2poly_p2 = asinf_sgn_x,asinf_2poly_p2a,f0 nop.i 999;; -} - -{ .mfi +} + +{ .mfi nop.m 999 fcmp.eq.s0 p6,p0 = f8,f0 // Only purpose is to set D if x denormal nop.i 999 } -{ .mfi +{ .mfi nop.m 999 (p8) fma.s1 asinf_2poly_p4b = asinf_2poly_p8,asinf_t4,asinf_2poly_p4a nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 (p8) fma.s1 asinf_Fz = asinf_d2z,asinf_Sz,asinf_dz nop.i 999;; -} +} + - -{ .mfi +{ .mfi nop.m 999 (p8) fma.d.s1 asinf_Pt = asinf_2poly_p4b,asinf_sgnx_t4,asinf_sgnx_2poly_p2 nop.i 999;; -} - -{ .mfi +} + +{ .mfi nop.m 999 (p8) fma.d.s1 asinf_z = asinf_Az,asinf_Fz,asinf_Az nop.i 999;; -} - +} + .pred.rel "mutex",p8,p7 //asinf_pred_GTsqrt2by2,asinf_pred_LEsqrt2by2 -{ .mfi +{ .mfi nop.m 999 (p8) fnma.s.s0 f8 = asinf_z,asinf_Pt,asinf_sgn_x_piby2 nop.i 999 -} - -{ .mfb +} + +{ .mfb nop.m 999 (p7) fma.s.s0 f8 = asinf_x11,asinf_poly_Bx,asinf_poly_Ax br.ret.sptk b0 ;; -} +} ASINF_ABS_ONE: // Here for short exit if |x|=1 -{ .mfb +{ .mfb nop.m 999 fma.s.s0 f8 = asinf_sgn_x,asinf_const_piby2,f0 br.ret.sptk b0 -} +} ;; GLOBAL_LIBM_END(asinf) // Stack operations when calling error support. -// (1) (2) -// sp -> + psp -> + -// | | -// | | <- GR_Y -// | | -// | <-GR_Y Y2->| -// | | -// | | <- GR_X -// | | -// sp-64 -> + sp -> + -// save ar.pfs save b0 -// save gp +// (1) (2) +// sp -> + psp -> + +// | | +// | | <- GR_Y +// | | +// | <-GR_Y Y2->| +// | | +// | | <- GR_X +// | | +// sp-64 -> + sp -> + +// save ar.pfs save b0 +// save gp // Stack operations when calling error support. diff --git a/sysdeps/ia64/fpu/e_asinl.S b/sysdeps/ia64/fpu/e_asinl.S index ad65a73..50e03e3 100644 --- a/sysdeps/ia64/fpu/e_asinl.S +++ b/sysdeps/ia64/fpu/e_asinl.S @@ -687,70 +687,70 @@ F_CS6 = f36 F_CS7 = f37 F_CS8 = f38 F_CS9 = f39 -F_S23 = f40 -F_S45 = f41 -F_S67 = f42 -F_S89 = f43 -F_S25 = f44 -F_S69 = f45 -F_S29 = f46 -F_X2 = f47 -F_X4 = f48 -F_TSQRT = f49 -F_DTX = f50 -F_R = f51 -F_R2 = f52 -F_R3 = f53 -F_R4 = f54 - -F_C3 = f55 -F_C5 = f56 -F_C7 = f57 -F_C9 = f58 -F_P79 = f59 -F_P35 = f60 -F_P39 = f61 - -F_ATHI = f62 -F_ATLO = f63 - -F_T1 = f64 -F_Y = f65 -F_Y2 = f66 -F_ANDMASK = f67 -F_ORMASK = f68 -F_S = f69 -F_05 = f70 -F_SQRT_1S2 = f71 -F_DS = f72 -F_Z = f73 -F_1T2 = f74 -F_DZ = f75 -F_ZE = f76 -F_YZ = f77 -F_Y1S2 = f78 -F_Y1S2X = f79 -F_1X = f80 -F_ST = f81 -F_1T2_ST = f82 -F_TSS = f83 -F_Y1S2X2 = f84 -F_DZ_TERM = f85 -F_DTS = f86 -F_DS2X = f87 -F_T2 = f88 -F_ZY1S2S = f89 -F_Y1S2_1X = f90 +F_S23 = f40 +F_S45 = f41 +F_S67 = f42 +F_S89 = f43 +F_S25 = f44 +F_S69 = f45 +F_S29 = f46 +F_X2 = f47 +F_X4 = f48 +F_TSQRT = f49 +F_DTX = f50 +F_R = f51 +F_R2 = f52 +F_R3 = f53 +F_R4 = f54 + +F_C3 = f55 +F_C5 = f56 +F_C7 = f57 +F_C9 = f58 +F_P79 = f59 +F_P35 = f60 +F_P39 = f61 + +F_ATHI = f62 +F_ATLO = f63 + +F_T1 = f64 +F_Y = f65 +F_Y2 = f66 +F_ANDMASK = f67 +F_ORMASK = f68 +F_S = f69 +F_05 = f70 +F_SQRT_1S2 = f71 +F_DS = f72 +F_Z = f73 +F_1T2 = f74 +F_DZ = f75 +F_ZE = f76 +F_YZ = f77 +F_Y1S2 = f78 +F_Y1S2X = f79 +F_1X = f80 +F_ST = f81 +F_1T2_ST = f82 +F_TSS = f83 +F_Y1S2X2 = f84 +F_DZ_TERM = f85 +F_DTS = f86 +F_DS2X = f87 +F_T2 = f88 +F_ZY1S2S = f89 +F_Y1S2_1X = f90 F_TS = f91 -F_PI2_LO = f92 -F_PI2_HI = f93 -F_S19 = f94 -F_INV1T2_2 = f95 -F_CORR = f96 -F_DZ0 = f97 - -F_C11 = f98 -F_C13 = f99 +F_PI2_LO = f92 +F_PI2_HI = f93 +F_S19 = f94 +F_INV1T2_2 = f95 +F_CORR = f96 +F_DZ0 = f97 + +F_C11 = f98 +F_C13 = f99 F_C15 = f100 F_C17 = f101 F_P1113 = f102 diff --git a/sysdeps/ia64/fpu/e_atan2f.S b/sysdeps/ia64/fpu/e_atan2f.S index 67618f0..5ff561d 100644 --- a/sysdeps/ia64/fpu/e_atan2f.S +++ b/sysdeps/ia64/fpu/e_atan2f.S @@ -80,9 +80,9 @@ //.. //..Suppose (v,u) = (y,x), we calculate atan(v/u) as follows: //..A = y * frcpa(x) (so A = (y/x)(1 - beta)) -//..atan(y/x) = atan(A) + atan( ((y/x)-A))/(1 + (y/x)A) ), the second term is +//..atan(y/x) = atan(A) + atan( ((y/x)-A))/(1 + (y/x)A) ), the second term is //..a correction. -//..atan(A) is approximated by a polynomial +//..atan(A) is approximated by a polynomial //..A + p1 A^3 + p2 A^5 + ... + p10 A^21, //..atan(G) is approximated as follows: //..Let G = (y - Ax)/(x + Ay), atan(G) can be approximated by G + g * p1 @@ -90,9 +90,9 @@ //.. //..Suppose (v,u) = (x,y), we calculate atan(v/u) as follows: //..Z = x * frcpa(y) (so Z = (x/y)(1 - beta)) -//..atan(x/y) = atan(Z) + atan( ((x/y)-Z))/(1 + (x/y)Z) ), the second term is +//..atan(x/y) = atan(Z) + atan( ((x/y)-Z))/(1 + (x/y)Z) ), the second term is //..a correction. -//..atan(Z) is approximated by a polynomial +//..atan(Z) is approximated by a polynomial //..Z + p1 Z^3 + p2 Z^5 + ... + p10 Z^21, //..atan(T) is approximated as follows: //..Let T = (x - Ay)/(y + Ax), atan(T) can be approximated by T + t * p1 @@ -103,7 +103,7 @@ //..atan(A) ~=~ A + p1 A^3 + ... + P10 A^21 //.. //..This polynomial is computed as follows: -//..Asq = A*A; Acub = A*Asq, A4 = Asq*Asq +//..Asq = A*A; Acub = A*Asq, A4 = Asq*Asq //..A5 = Asq*Acub, A6 = Asq*A4; A11 = A5 * A6 //.. //..poly_A1 = p9 + Asq*p10, poly_A2 = p7 + Asq*p8, poly_A3 = p5 + Asq*p6 @@ -112,7 +112,7 @@ //.. //..poly_A4 = p1 * A //,,poly_A5 = p3 + Asq * p4, poly_A4 = A + Asq*poly_A4 -//..poly_A5 = p2 + Asq * poly_A5 +//..poly_A5 = p2 + Asq * poly_A5 //..poly_A4 = poly_A4 + A5 * poly_A5 //.. //..atan_A = poly_A4 + A11 * poly_A1 @@ -132,7 +132,7 @@ //..atan(A) ~=~ A + p1 A^3 + ... + P10 A^21 //.. //..This polynomial is computed as follows: -//..Asq = A*A; Acub = A*Asq, A4 = Asq*Asq +//..Asq = A*A; Acub = A*Asq, A4 = Asq*Asq //..A5 = Asq*Acub, A6 = Asq*A4; A11 = A5 * A6 //.. //..poly_A1 = p9 + Asq*p10, poly_A2 = p7 + Asq*p8, poly_A3 = p5 + Asq*p6 @@ -141,7 +141,7 @@ //.. //..poly_A4 = p1 * A //,,poly_A5 = p3 + Asq * p4, poly_A4 = A + Asq*poly_A4 -//..poly_A5 = p2 + Asq * poly_A5 +//..poly_A5 = p2 + Asq * poly_A5 //..poly_A4 = poly_A4 + A5 * poly_A5 //.. //..atan_A = poly_A4 + A11 * poly_A1 @@ -154,34 +154,34 @@ //coef_pj, j = 1,2,...,10; atan(A) ~=~ A + p1 A^3 + p2 A^5 + ... + p10 A^21 // // coef_p1 = -.3333332707155439167401311806315789E+00 -// coef_p1 in dbl = BFD5 5555 1219 1621 +// coef_p1 in dbl = BFD5 5555 1219 1621 // // coef_p2 = .1999967670926658391827857030875748E+00 -// coef_p2 in dbl = 3FC9 997E 7AFB FF4E +// coef_p2 in dbl = 3FC9 997E 7AFB FF4E // // coef_p3 = -.1427989384500152360161563301087296E+00 -// coef_p3 in dbl = BFC2 473C 5145 EE38 +// coef_p3 in dbl = BFC2 473C 5145 EE38 // // coef_p4 = .1105852823460720770079031213661163E+00 -// coef_p4 in dbl = 3FBC 4F51 2B18 65F5 +// coef_p4 in dbl = 3FBC 4F51 2B18 65F5 // // coef_p5 = -.8811839915595312348625710228448363E-01 -// coef_p5 in dbl = BFB6 8EED 6A8C FA32 +// coef_p5 in dbl = BFB6 8EED 6A8C FA32 // // coef_p6 = .6742329836955067042153645159059714E-01 -// coef_p6 in dbl = 3FB1 42A7 3D7C 54E3 +// coef_p6 in dbl = 3FB1 42A7 3D7C 54E3 // // coef_p7 = -.4468571068774672908561591262231909E-01 -// coef_p7 in dbl = BFA6 E10B A401 393F +// coef_p7 in dbl = BFA6 E10B A401 393F // // coef_p8 = .2252333246746511135532726960586493E-01 -// coef_p8 in dbl = 3F97 105B 4160 F86B +// coef_p8 in dbl = 3F97 105B 4160 F86B // // coef_p9 = -.7303884867007574742501716845542314E-02 -// coef_p9 in dbl = BF7D EAAD AA33 6451 +// coef_p9 in dbl = BF7D EAAD AA33 6451 // // coef_p10 = .1109686868355312093949039454619058E-02 -// coef_p10 in dbl = 3F52 2E5D 33BC 9BAA +// coef_p10 in dbl = 3F52 2E5D 33BC 9BAA // // Special values @@ -354,333 +354,333 @@ LOCAL_OBJECT_END(atan2f_coef_table2) .section .text GLOBAL_IEEE754_ENTRY(atan2f) - -{ .mfi + +{ .mfi alloc r32 = ar.pfs,1,5,4,0 frcpa.s1 atan2f_Z0,p0 = f1,f8 // Approx to 1/y nop.i 999 -} -{ .mfi +} +{ .mfi addl atan2f_GR_Addr_1 = @ltoff(atan2f_coef_table1),gp fma.s1 atan2f_xsq = f9,f9,f0 nop.i 999 ;; } - -{ .mfi + +{ .mfi ld8 atan2f_GR_Addr_1 = [atan2f_GR_Addr_1] frcpa.s1 atan2f_A0,p0 = f1,f9 // Approx to 1/x nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fma.s1 atan2f_ysq = f8,f8,f0 nop.i 999 ;; } - -{ .mfi + +{ .mfi nop.m 999 fcmp.ge.s1 p8,p9 = f9,f0 // Set p8 if x>=0, p9 if x<0 nop.i 999 } -{ .mfi +{ .mfi nop.m 999 fma.s1 atan2f_xy = f9,f8,f0 nop.i 999 ;; } - - -{ .mfi + + +{ .mfi add atan2f_GR_Addr_2 = 0x30, atan2f_GR_Addr_1 fmerge.s atan2f_sgn_Y = f8,f1 nop.i 999 ;; -} - -{ .mmf +} + +{ .mmf ldfpd atan2f_coef_p1,atan2f_coef_p10 = [atan2f_GR_Addr_1],16 ldfpd atan2f_coef_p9,atan2f_coef_p8 = [atan2f_GR_Addr_2],16 fclass.m p10,p0 = f9,0xe7 // Test x @inf|@snan|@qnan|@zero -} +} ;; - -{ .mfi + +{ .mfi ldfpd atan2f_coef_p7,atan2f_coef_p6 = [atan2f_GR_Addr_1],16 fma.s1 atan2f_T_denom = atan2f_Z0,atan2f_xsq,f8 nop.i 999 -} -{ .mfi +} +{ .mfi ldfpd atan2f_coef_p5,atan2f_coef_p4 = [atan2f_GR_Addr_2],16 fma.s1 atan2f_Z = atan2f_Z0,f9,f0 nop.i 999 ;; } - -{ .mfi + +{ .mfi ldfpd atan2f_coef_p3,atan2f_coef_p2 = [atan2f_GR_Addr_1],16 fma.s1 atan2f_G_denom = atan2f_A0,atan2f_ysq,f9 nop.i 999 -} -{ .mfi +} +{ .mfi ldfpd atan2f_const_piby2,atan2f_const_pi = [atan2f_GR_Addr_2],16 fma.s1 atan2f_A = atan2f_A0,f8,f0 nop.i 999 ;; } -{ .mfi +{ .mfi ldfpd atan2f_const_piby4,atan2f_const_3piby4 = [atan2f_GR_Addr_2] fclass.m p11,p0 = f8,0xe7 // Test y @inf|@snan|@qnan|@zero nop.i 999 -} -{ .mfb +} +{ .mfb nop.m 999 fnma.s1 atan2f_T_numer = atan2f_Z0,atan2f_xy,f9 (p10) br.cond.spnt ATAN2F_XY_INF_NAN_ZERO ;; // Branch on x nan,inf,zero -} +} // p6 if |y|>|x|, p7 if |x|>=|y| , use xsq and ysq for test -{ .mfi +{ .mfi nop.m 999 fcmp.gt.s1 p6,p7 = atan2f_ysq,atan2f_xsq nop.i 999 } -{ .mfb +{ .mfb nop.m 999 fnma.s1 atan2f_G_numer = atan2f_A0,atan2f_xy,f8 (p11) br.cond.spnt ATAN2F_XY_INF_NAN_ZERO ;; // Branch on y nan,inf,zero } - -{ .mfi + +{ .mfi nop.m 999 (p8) fma.s1 atan2f_const_1 = atan2f_sgn_Y,f0,f0 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 (p9) fma.s1 atan2f_const_1 = atan2f_sgn_Y,f1,f0 nop.i 999 ;; } - -{ .mfi + +{ .mfi nop.m 999 (p6) fnma.s1 atan2f_U = atan2f_Z,f1,f0 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 (p6) fma.s1 atan2f_Usq = atan2f_Z,atan2f_Z,f0 nop.i 999 ;; -} +} + - -{ .mfi +{ .mfi nop.m 999 (p7) fma.s1 atan2f_U = atan2f_A,f1,f0 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 (p7) fma.s1 atan2f_Usq = atan2f_A,atan2f_A,f0 nop.i 999 ;; } -{ .mfi +{ .mfi nop.m 999 (p6) frcpa.s1 atan2f_Q1,p0 = f1,atan2f_T_denom nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 (p6) fma.s1 atan2f_R_denom = atan2f_T_denom,f1,f0 nop.i 999 ;; -} +} -{ .mfi +{ .mfi nop.m 999 (p7) frcpa.s1 atan2f_Q1,p0 = f1,atan2f_G_denom nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 (p7) fma.s1 atan2f_R_denom = atan2f_G_denom,f1,f0 nop.i 999 ;; -} +} -{ .mfi +{ .mfi nop.m 999 (p6) fnma.s1 atan2f_R_numer = atan2f_T_numer,f1,f0 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 (p7) fma.s1 atan2f_R_numer = atan2f_G_numer,f1,f0 nop.i 999 ;; -} +} + - -{ .mfi +{ .mfi nop.m 999 (p6) fnma.s1 atan2f_p1rnum = atan2f_T_numer,atan2f_coef_p1,f0 nop.i 999 ;; -} -{ .mfi +} +{ .mfi nop.m 999 (p7) fma.s1 atan2f_p1rnum = atan2f_G_numer,atan2f_coef_p1,f0 nop.i 999 ;; -} +} + - -{ .mfi +{ .mfi nop.m 999 fma.s1 atan2f_U4 = atan2f_Usq,atan2f_Usq,f0 nop.i 999 } -{ .mfi +{ .mfi nop.m 999 fma.s1 atan2f_poly_u109 = atan2f_Usq,atan2f_coef_p10,atan2f_coef_p9 nop.i 999 ;; -} - -{ .mfi +} + +{ .mfi nop.m 999 fma.s1 atan2f_poly_u87 = atan2f_Usq,atan2f_coef_p8,atan2f_coef_p7 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fma.s1 atan2f_poly_u65 = atan2f_Usq,atan2f_coef_p6,atan2f_coef_p5 nop.i 999 ;; } - - -{ .mfi + + +{ .mfi nop.m 999 fma.s1 atan2f_poly_u43 = atan2f_Usq,atan2f_coef_p4,atan2f_coef_p3 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fnma.s1 atan2f_Q_beta = atan2f_Q1,atan2f_R_denom,f1 nop.i 999 ;; } -{ .mfi +{ .mfi nop.m 999 fma.s1 atan2f_poly_u21 = atan2f_Usq,atan2f_coef_p2,atan2f_coef_p1 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fma.s1 atan2f_r = atan2f_Q1,atan2f_R_numer,f0 nop.i 999 ;; } -{ .mfi +{ .mfi nop.m 999 (p6) fma.s1 atan2f_C = atan2f_sgn_Y,atan2f_const_piby2,f0 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 (p7) fma.s1 atan2f_C = atan2f_const_1,atan2f_const_pi,f0 nop.i 999 ;; -} +} -{ .mfi +{ .mfi nop.m 999 fma.s1 atan2f_U6 = atan2f_U4,atan2f_Usq,f0 nop.i 999 } -{ .mfi +{ .mfi nop.m 999 fma.s1 atan2f_U8 = atan2f_U4,atan2f_U4,f0 nop.i 999 ;; } -{ .mfi +{ .mfi nop.m 999 fma.s1 atan2f_poly_u10to7 = atan2f_U4,atan2f_poly_u109,atan2f_poly_u87 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fma.s1 atan2f_pR = atan2f_p1rnum,atan2f_Q1,f0 nop.i 999 ;; -} +} -{ .mfi +{ .mfi nop.m 999 fma.s1 atan2f_poly_u6to3 = atan2f_U4,atan2f_poly_u65,atan2f_poly_u43 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fma.s1 atan2f_Q2 = atan2f_Q1,atan2f_Q_beta,atan2f_Q1 nop.i 999 ;; } -{ .mfi +{ .mfi nop.m 999 fma.s1 atan2f_Q_beta2 = atan2f_Q_beta,atan2f_Q_beta,f0 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fma.s1 atan2f_rsq = atan2f_r,atan2f_r,f0 nop.i 999 ;; } -{ .mfi +{ .mfi nop.m 999 fma.s1 atan2f_poly_u210 = atan2f_Usq,atan2f_poly_u21,f1 nop.i 999 ;; -} - +} + { .mfi nop.m 999 fcmp.eq.s0 p8,p0 = f8,f9 // Dummy op to set flag on denormal inputs nop.i 999 } -{ .mfi +{ .mfi nop.m 999 fma.s1 atan2f_poly_u10to3 = atan2f_U8,atan2f_poly_u10to7,atan2f_poly_u6to3 nop.i 999 ;; -} +} -{ .mfi +{ .mfi nop.m 999 fma.s1 atan2f_Q3 = atan2f_Q2,atan2f_Q_beta2,atan2f_Q2 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fma.s1 atan2f_pRC = atan2f_rsq,atan2f_pR,atan2f_C nop.i 999 ;; -} - -{ .mfi +} + +{ .mfi nop.m 999 fma.s1 atan2f_poly_u10to0 = atan2f_U6,atan2f_poly_u10to3,atan2f_poly_u210 nop.i 999 ;; -} +} -{ .mfi +{ .mfi nop.m 999 fma.s1 atan2f_pQRC = atan2f_R_numer,atan2f_Q3,atan2f_pRC nop.i 999 ;; -} +} -{ .mfb +{ .mfb nop.m 999 fma.s.s0 f8 = atan2f_U,atan2f_poly_u10to0,atan2f_pQRC br.ret.sptk b0 ;; -} +} diff --git a/sysdeps/ia64/fpu/e_atanhl.S b/sysdeps/ia64/fpu/e_atanhl.S index cee1ba1..846a89e 100644 --- a/sysdeps/ia64/fpu/e_atanhl.S +++ b/sysdeps/ia64/fpu/e_atanhl.S @@ -1,4 +1,4 @@ -.file "atanhl.s" +.file "atanhl.s" // Copyright (c) 2001 - 2003, Intel Corporation @@ -21,25 +21,25 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,INCLUDING,BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,INCLUDING,BUT NOT // LIMITED TO,THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT,INDIRECT,INCIDENTAL,SPECIAL, // EXEMPLARY,OR CONSEQUENTIAL DAMAGES (INCLUDING,BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,DATA,OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,DATA,OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY,WHETHER IN CONTRACT,STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE,EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE,EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code,and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // //********************************************************************* // -// History: +// History: // 09/10/01 Initial version // 12/11/01 Corrected .restore syntax // 05/20/02 Cleaned up namespace and sf0 syntax @@ -50,7 +50,7 @@ // //********************************************************************* // -// Function: atanhl(x) computes the principle value of the inverse +// Function: atanhl(x) computes the principle value of the inverse // hyperbolic tangent of x. // //********************************************************************* @@ -71,10 +71,10 @@ // IEEE Special Conditions: // // atanhl(inf) = QNaN -// atanhl(-inf) = QNaN -// atanhl(+/-0) = +/-0 -// atanhl(1) = +inf -// atanhl(-1) = -inf +// atanhl(-inf) = QNaN +// atanhl(+/-0) = +/-0 +// atanhl(1) = +inf +// atanhl(-1) = -inf // atanhl(|x|>1) = QNaN // atanhl(SNaN) = QNaN // atanhl(QNaN) = QNaN @@ -96,8 +96,8 @@ // Case atanhl_regular: // // Here we use formula atanhl(x) = sign(x)*log1pl(2*|x|/(1-|x|))/2 and -// calculation is subdivided into two stages. The first stage is -// calculating of X = 2*|x|/(1-|x|). The second one is calculating of +// calculation is subdivided into two stages. The first stage is +// calculating of X = 2*|x|/(1-|x|). The second one is calculating of // sign(x)*log1pl(X)/2. To obtain required accuracy we use precise division // algorythm output of which is a pair of two extended precision values those // approximate result of division with accuracy higher than working @@ -114,7 +114,7 @@ // // y = frcpa(b) initial approximation of 1/b // q = a*y initial approximation of a/b -// +// // e = 1 - b*y // e2 = e + e^2 // e1 = e^2 @@ -131,12 +131,12 @@ // r1 = a - b*X // r1 = r1 - b_lo*X // X_lo = r1*y3 low part of a/b -// +// // 2. special log1p algorithm overview // *********************************** // // Here we use a table lookup method. The basic idea is that in -// order to compute logl(Arg) = log1pl (Arg-1) for an argument Arg in [1,2), +// order to compute logl(Arg) = log1pl (Arg-1) for an argument Arg in [1,2), // we construct a value G such that G*Arg is close to 1 and that // logl(1/G) is obtainable easily from a table of values calculated // beforehand. Thus @@ -167,7 +167,7 @@ // G := G_1 * G_2 * G_3 // r := (G * S_hi - 1) + G * S_lo // -// These G_j's have the property that the product is exactly +// These G_j's have the property that the product is exactly // representable and that |r| < 2^(-12) as a result. // // Step 2: Approximation @@ -201,7 +201,7 @@ data8 0x9249249249249249,0x00003FFC // C7 data8 0xCCCCCCCCCCCCCCCD,0x00003FFC // C5 data8 0xAAAAAAAAAAAAAAAA,0x00003FFD // C3 data4 0x3f000000 // 1/2 -data4 0x00000000 // pad +data4 0x00000000 // pad data4 0x00000000 data4 0x00000000 LOCAL_OBJECT_END(Constants_TaylorSeries) @@ -328,7 +328,7 @@ data4 0x3F71D488,0x3D693B9D data8 0xBE049391B6B7C239 LOCAL_OBJECT_END(Constants_G_H_h2) -// G3 and H3 - IEEE single and h3 - IEEE double +// G3 and H3 - IEEE single and h3 - IEEE double LOCAL_OBJECT_START(Constants_G_H_h3) data4 0x3F7FFC00,0x38800100 data8 0x3D355595562224CD @@ -538,78 +538,78 @@ GLOBAL_LIBM_ENTRY(atanhl) alloc r32 = ar.pfs,0,17,4,0 fnma.s1 FR_Bp = f8,f1,f1 // b = 1 - |arg| (for x>0) mov GR_ExpMask = 0x1ffff -} -{ .mfi +} +{ .mfi addl GR_ad_taylor = @ltoff(Constants_TaylorSeries),gp fma.s1 FR_Bn = f8,f1,f1 // b = 1 - |arg| (for x<0) mov GR_NearZeroBound = 0xfffa // biased exp of 1/32 -};; -{ .mfi +};; +{ .mfi getf.exp GR_ArgExp = f8 fcmp.lt.s1 p6,p7 = f8,f0 // is negative? nop.i 0 -} -{ .mfi +} +{ .mfi ld8 GR_ad_taylor = [GR_ad_taylor] fmerge.s FR_abs_x = f1,f8 nop.i 0 -};; -{ .mfi +};; +{ .mfi nop.m 0 fclass.m p8,p0 = f8,0x1C7 // is arg NaT,Q/SNaN or +/-0 ? nop.i 0 } -{ .mfi +{ .mfi nop.m 0 fma.s1 FR_x2 = f8,f8,f0 nop.i 0 -};; -{ .mfi +};; +{ .mfi add GR_ad_z_1 = 0x0F0,GR_ad_taylor fclass.m p9,p0 = f8,0x0a // is arg -denormal ? add GR_ad_taylor_2 = 0x010,GR_ad_taylor -} -{ .mfi +} +{ .mfi add GR_ad_05 = 0x080,GR_ad_taylor nop.f 0 nop.i 0 -};; -{ .mfi +};; +{ .mfi ldfe FR_C17 = [GR_ad_taylor],32 fclass.m p10,p0 = f8,0x09 // is arg +denormal ? add GR_ad_tbl_1 = 0x040,GR_ad_z_1 // point to Constants_G_H_h1 -} -{ .mfb +} +{ .mfb add GR_ad_z_2 = 0x140,GR_ad_z_1 // point to Constants_Z_2 (p8) fma.s0 f8 = f8,f1,f0 // NaN or +/-0 (p8) br.ret.spnt b0 // exit for Nan or +/-0 -};; -{ .mfi +};; +{ .mfi ldfe FR_C15 = [GR_ad_taylor_2],32 fclass.m p15,p0 = f8,0x23 // is +/-INF ? add GR_ad_tbl_2 = 0x180,GR_ad_z_1 // point to Constants_G_H_h2 -} -{ .mfb +} +{ .mfb ldfe FR_C13 = [GR_ad_taylor],32 (p9) fnma.s0 f8 = f8,f8,f8 // -denormal (p9) br.ret.spnt b0 // exit for -denormal -};; -{ .mfi +};; +{ .mfi ldfe FR_C11 = [GR_ad_taylor_2],32 fcmp.eq.s0 p13,p0 = FR_abs_x,f1 // is |arg| = 1? nop.i 0 -} -{ .mfb +} +{ .mfb ldfe FR_C9 = [GR_ad_taylor],32 (p10) fma.s0 f8 = f8,f8,f8 // +denormal (p10) br.ret.spnt b0 // exit for +denormal -};; -{ .mfi +};; +{ .mfi ldfe FR_C7 = [GR_ad_taylor_2],32 (p6) frcpa.s1 FR_Yn,p11 = f1,FR_Bn // y = frcpa(b) and GR_ArgExp = GR_ArgExp,GR_ExpMask // biased exponent -} -{ .mfb +} +{ .mfb ldfe FR_C5 = [GR_ad_taylor],32 fnma.s1 FR_B = FR_abs_x,f1,f1 // b = 1 - |arg| (p15) br.cond.spnt atanhl_gt_one // |arg| > 1 @@ -639,20 +639,20 @@ GLOBAL_LIBM_ENTRY(atanhl) ldfs FR_Half = [GR_ad_05] (p7) fnma.s1 FR_B_lo = FR_Bp,f1,f1 nop.i 0 -};; +};; { .mfi nop.m 0 - (p6) fnma.s1 FR_E0 = FR_Yn,FR_Bn,f1 // e = 1-b*y + (p6) fnma.s1 FR_E0 = FR_Yn,FR_Bn,f1 // e = 1-b*y nop.i 0 -} -{ .mfb +} +{ .mfb nop.m 0 (p6) fma.s1 FR_Y0 = FR_Yn,f1,f0 (p8) br.cond.spnt atanhl_gt_one // |arg| > 1 };; { .mfi nop.m 0 - (p7) fnma.s1 FR_E0 = FR_Yp,FR_Bp,f1 + (p7) fnma.s1 FR_E0 = FR_Yp,FR_Bp,f1 nop.i 0 } { .mfi @@ -804,11 +804,11 @@ GLOBAL_LIBM_ENTRY(atanhl) { .mfi ldfe FR_log2_lo = [GR_ad_q],16 // load log2_lo nop.f 0 - sub GR_N = GR_N,GR_Bias + sub GR_N = GR_N,GR_Bias };; { .mfi ldfe FR_Q4 = [GR_ad_q],16 // load Q4 - fms.s1 FR_S_lo = FR_AA,f1,FR_Z // form S_lo = AA - Z + fms.s1 FR_S_lo = FR_AA,f1,FR_Z // form S_lo = AA - Z sub GR_minus_N = GR_Bias,GR_N // form exponent of 2^(-N) };; { .mmf @@ -820,7 +820,7 @@ GLOBAL_LIBM_ENTRY(atanhl) { .mfi ldfe FR_Q2 = [GR_ad_q],16 // load Q2 nop.f 0 - extr.u GR_Index2 = GR_X_1,6,4 // extract bits 6-9 of X_1 + extr.u GR_Index2 = GR_X_1,6,4 // extract bits 6-9 of X_1 };; { .mmi ldfe FR_Q1 = [GR_ad_q] // load Q1 @@ -862,17 +862,17 @@ GLOBAL_LIBM_ENTRY(atanhl) } { .mfi nop.m 0 - nop.f 0 + nop.f 0 nop.i 0 };; { .mfi nop.m 0 - nop.f 0 + nop.f 0 nop.i 0 };; { .mfi nop.m 0 - nop.f 0 + nop.f 0 nop.i 0 };; @@ -1068,7 +1068,7 @@ atanhl_near_zero: { .mfb nop.m 0 fma.s0 f8 = FR_C17,FR_x3,f8 - br.ret.sptk b0 + br.ret.sptk b0 };; atanhl_eq_one: diff --git a/sysdeps/ia64/fpu/e_coshl.S b/sysdeps/ia64/fpu/e_coshl.S index b5872d0..43da1ab 100644 --- a/sysdeps/ia64/fpu/e_coshl.S +++ b/sysdeps/ia64/fpu/e_coshl.S @@ -21,25 +21,25 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History //============================================================== -// 02/02/00 Initial version +// 02/02/00 Initial version // 04/04/00 Unwind support added // 08/15/00 Bundle added after call to __libm_error_support to properly // set [the previously overwritten] GR_Parameter_RESULT. @@ -56,12 +56,12 @@ // // Registers used //============================================================== -// general registers: +// general registers: // r14 -> r40 // predicate registers used: // p6 -> p11 // floating-point registers used: -// f9 -> f15; f32 -> f90; +// f9 -> f15; f32 -> f90; // f8 has input, then output // // Overview of operation @@ -82,7 +82,7 @@ // 1. COSH_BY_POLY 0 < |x| < 0.25 // =============== // Evaluate cosh(x) by a 12th order polynomial -// Care is take for the order of multiplication; and P2 is not exactly 1/4!, +// Care is take for the order of multiplication; and P2 is not exactly 1/4!, // P3 is not exactly 1/6!, etc. // cosh(x) = 1 + (P1*x^2 + P2*x^4 + P3*x^6 + P4*x^8 + P5*x^10 + P6*x^12) // @@ -90,18 +90,18 @@ // ============= // cosh(x) = cosh(B+R) // = cosh(B)cosh(R) + sinh(B)sinh(R) -// +// // ax = |x| = M*log2/64 + R // B = M*log2/64 -// M = 64*N + j +// M = 64*N + j // We will calculate M and get N as (M-j)/64 // The division is a shift. // exp(B) = exp(N*log2 + j*log2/64) // = 2^N * 2^(j*log2/64) // cosh(B) = 1/2(e^B + e^-B) -// = 1/2(2^N * 2^(j*log2/64) + 2^-N * 2^(-j*log2/64)) -// cosh(B) = (2^(N-1) * 2^(j*log2/64) + 2^(-N-1) * 2^(-j*log2/64)) -// sinh(B) = (2^(N-1) * 2^(j*log2/64) - 2^(-N-1) * 2^(-j*log2/64)) +// = 1/2(2^N * 2^(j*log2/64) + 2^-N * 2^(-j*log2/64)) +// cosh(B) = (2^(N-1) * 2^(j*log2/64) + 2^(-N-1) * 2^(-j*log2/64)) +// sinh(B) = (2^(N-1) * 2^(j*log2/64) - 2^(-N-1) * 2^(-j*log2/64)) // 2^(j*log2/64) is stored as Tjhi + Tjlo , j= -32,....,32 // Tjhi is double-extended (80-bit) and Tjlo is single(32-bit) // @@ -109,7 +109,7 @@ // R = ax - M*log2_by_64_hi - M*log2_by_64_lo // exp(R) = 1 + R +R^2(1/2! + R(1/3! + R(1/4! + ... + R(1/n!)...) // = 1 + p_odd + p_even -// where the p_even uses the A coefficients and the p_even uses +// where the p_even uses the A coefficients and the p_even uses // the B coefficients // // So sinh(R) = 1 + p_odd + p_even -(1 -p_odd -p_even)/2 = p_odd @@ -173,7 +173,7 @@ GR_Parameter_RESULT = r39 GR_Parameter_TAG = r40 -f_ABS_X = f9 +f_ABS_X = f9 f_X2 = f10 f_X4 = f11 f_tmp = f14 @@ -228,16 +228,16 @@ f_Tmjlo = f68 f_S_hi = f69 f_SC_hi_temp = f70 -f_C_lo_temp1 = f71 -f_C_lo_temp2 = f72 -f_C_lo_temp3 = f73 -f_C_lo_temp4 = f73 +f_C_lo_temp1 = f71 +f_C_lo_temp2 = f72 +f_C_lo_temp3 = f73 +f_C_lo_temp4 = f73 f_C_lo = f74 f_C_hi = f75 -f_Y_hi = f77 -f_Y_lo_temp = f78 -f_Y_lo = f79 +f_Y_hi = f77 +f_Y_lo_temp = f78 +f_Y_lo = f79 f_NORM_X = f80 f_P1 = f81 @@ -442,7 +442,7 @@ GLOBAL_IEEE754_ENTRY(coshl) } { .mfi nop.m 0 - fnorm.s1 f_NORM_X = f8 + fnorm.s1 f_NORM_X = f8 mov r_exp_2tom57 = 0xffff-57 } ;; @@ -450,7 +450,7 @@ GLOBAL_IEEE754_ENTRY(coshl) { .mfi setf.d f_RSHF_2TO57 = r_rshf_2to57 // Form const 1.100 * 2^120 fclass.m p10,p0 = f8, 0x0b // Test for denorm - mov r_exp_mask = 0x1ffff + mov r_exp_mask = 0x1ffff } { .mlx setf.sig f_INV_LN2_2TO63 = r_sig_inv_ln2 // Form 1/ln2 * 2^63 @@ -490,7 +490,7 @@ COSH_COMMON: add r_ad5 = 0x580, r_ad1 // Point to j_lo_table midpoint } { .mib - ldfe f_log2by64_hi = [r_ad1],16 + ldfe f_log2by64_hi = [r_ad1],16 and r_exp_x = r_exp_mask, r_signexp_x (p7) br.ret.spnt b0 // Exit if x=0 } @@ -498,36 +498,36 @@ COSH_COMMON: // Get the A coefficients for COSH_BY_TBL { .mfi - ldfe f_A1 = [r_ad3],16 + ldfe f_A1 = [r_ad3],16 fcmp.lt.s1 p8,p9 = f8,f0 // Test for x<0 cmp.lt p7,p0 = r_exp_x, r_exp_0_25 // Test x < 0.25 } { .mfb add r_ad2o = 0x30, r_ad2e // Point to p_table odd coeffs -(p6) fma.s0 f8 = f8,f8,f0 // Result for x nan, inf +(p6) fma.s0 f8 = f8,f8,f0 // Result for x nan, inf (p6) br.ret.spnt b0 // Exit for x nan, inf } ;; // Calculate X2 = ax*ax for COSH_BY_POLY { .mfi - ldfe f_log2by64_lo = [r_ad1],16 + ldfe f_log2by64_lo = [r_ad1],16 nop.f 0 nop.i 0 } { .mfb - ldfe f_A2 = [r_ad3],16 + ldfe f_A2 = [r_ad3],16 fma.s1 f_X2 = f_NORM_X, f_NORM_X, f0 (p7) br.cond.spnt COSH_BY_POLY } ;; // Here if |x| >= 0.25 -COSH_BY_TBL: +COSH_BY_TBL: // ****************************************************** // STEP 1 (TBL and EXP) - Argument reduction // ****************************************************** -// Get the following constants. +// Get the following constants. // Inv_log2by64 // log2by64_hi // log2by64_lo @@ -581,20 +581,20 @@ COSH_BY_TBL: // Subtract RSHF constant to get rounded M as a floating point value // M_temp * 2^(63-6) - 2^63 { .mfb - ldfe f_B3 = [r_ad3],16 + ldfe f_B3 = [r_ad3],16 fms.s1 f_M = f_M_temp, f_2TOM57, f_RSHF (p6) br.cond.spnt COSH_HUGE // Branch if result will overflow } ;; { .mfi - getf.sig r_M = f_M_temp + getf.sig r_M = f_M_temp nop.f 0 cmp.ge p7,p6 = r_exp_x, r_exp_32 // Test if x >= 32 } ;; -// Calculate j. j is the signed extension of the six lsb of M. It +// Calculate j. j is the signed extension of the six lsb of M. It // has a range of -32 thru 31. // Calculate R @@ -637,8 +637,8 @@ COSH_BY_TBL: // N = (M-j)/64 { .mfi ldfe f_Tjhi = [r_ad_J_hi] - fnma.s1 f_R = f_M, f_log2by64_lo, f_R_temp - shr r_N = r_Mmj, 0x6 // N = (M-j)/64 + fnma.s1 f_R = f_M, f_log2by64_lo, f_R_temp + shr r_N = r_Mmj, 0x6 // N = (M-j)/64 } { .mfi shladd r_ad_mJ_hi = r_mj, 4, r_ad4 // pointer to Tmjhi @@ -713,8 +713,8 @@ COSH_BY_TBL: } ;; -// -// If TBL, +// +// If TBL, // Calculate S_hi and S_lo, and C_hi // SC_hi_temp = sneg * Tmjhi // S_hi = spos * Tjhi - SC_hi_temp @@ -724,12 +724,12 @@ COSH_BY_TBL: { .mfi nop.m 0 -(p6) fma.s1 f_SC_hi_temp = f_sneg, f_Tmjhi, f0 +(p6) fma.s1 f_SC_hi_temp = f_sneg, f_Tmjhi, f0 nop.i 0 } ;; -// If TBL, +// If TBL, // C_lo_temp3 = sneg * Tmjlo // C_lo_temp4 = spos * Tjlo + C_lo_temp3 // C_lo_temp4 = spos * Tjlo + (sneg * Tmjlo) @@ -752,7 +752,7 @@ COSH_BY_TBL: } ;; -// If EXP, +// If EXP, // Compute 2^(N-1) * Tjhi and 2^(N-1) * Tjlo { .mfi nop.m 0 @@ -811,7 +811,7 @@ COSH_BY_TBL: { .mfi nop.m 0 -(p6) fma.s1 f_C_lo_temp2 = f_sneg, f_Tmjhi, f_C_lo_temp1 +(p6) fma.s1 f_C_lo_temp2 = f_sneg, f_Tmjhi, f_C_lo_temp1 nop.i 0 } ;; @@ -836,7 +836,7 @@ COSH_BY_TBL: ;; // If TBL, -// Y_hi = C_hi +// Y_hi = C_hi // Y_lo = S_hi*p_odd + (C_hi*p_even + C_lo) { .mfi nop.m 0 @@ -883,7 +883,7 @@ COSH_BY_TBL: // Here if 0 < |x| < 0.25 -COSH_BY_POLY: +COSH_BY_POLY: { .mmf ldfe f_P6 = [r_ad2e],16 ldfe f_P5 = [r_ad2o],16 @@ -900,7 +900,7 @@ COSH_BY_POLY: { .mmi ldfe f_P2 = [r_ad2e],16 - ldfe f_P1 = [r_ad2o],16 + ldfe f_P1 = [r_ad2o],16 nop.i 0 } ;; @@ -1007,7 +1007,7 @@ COSH_DENORM: // Here if |x| >= overflow limit -COSH_HUGE: +COSH_HUGE: // for COSH_HUGE, put 24000 in exponent; take sign from input { .mmi mov r_exp_huge = 0x15dbf @@ -1018,7 +1018,7 @@ COSH_HUGE: ;; { .mfi - alloc r32 = ar.pfs,0,5,4,0 + alloc r32 = ar.pfs,0,5,4,0 fma.s1 f_signed_hi_lo = f_huge, f1, f1 nop.i 0 } @@ -1061,7 +1061,7 @@ LOCAL_LIBM_ENTRY(__libm_error_region) { .mib stfe [GR_Parameter_X] = f8 // STORE Parameter 1 on stack add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address - nop.b 0 + nop.b 0 } { .mib stfe [GR_Parameter_Y] = f_pre_result // STORE Parameter 3 on stack diff --git a/sysdeps/ia64/fpu/e_exp.S b/sysdeps/ia64/fpu/e_exp.S index fcc247f..f17bc26 100644 --- a/sysdeps/ia64/fpu/e_exp.S +++ b/sysdeps/ia64/fpu/e_exp.S @@ -693,7 +693,7 @@ EXP_CERTAIN_UNDERFLOW: nop.i 0 } ;; - + { .mfb nop.m 0 fma.d.s0 f8 = fTmp, fTmp, f0 // Set I,U, tiny (+0.0) result diff --git a/sysdeps/ia64/fpu/e_expf.S b/sysdeps/ia64/fpu/e_expf.S index 6fe0a83..75f918f 100644 --- a/sysdeps/ia64/fpu/e_expf.S +++ b/sysdeps/ia64/fpu/e_expf.S @@ -257,7 +257,7 @@ LOCAL_OBJECT_END(_expf_table) .section .text GLOBAL_IEEE754_ENTRY(expf) - + { .mlx addl rTblAddr = @ltoff(_expf_table),gp movl r64DivLn2 = 0x40571547652B82FE // 64/ln(2) @@ -612,7 +612,7 @@ EXP_CERTAIN_UNDERFLOW: nop.i 0 } ;; - + { .mfb nop.m 0 fma.s.s0 f8 = fTmp, fTmp, f0 // Set I,U, tiny (+0.0) result diff --git a/sysdeps/ia64/fpu/e_fmodl.S b/sysdeps/ia64/fpu/e_fmodl.S index 3e87eb0..258e4ef 100644 --- a/sysdeps/ia64/fpu/e_fmodl.S +++ b/sysdeps/ia64/fpu/e_fmodl.S @@ -366,7 +366,7 @@ loop64: { .mfi nop.m 0 - // Final iteration (p8): is FR_ABS_A the correct remainder + // Final iteration (p8): is FR_ABS_A the correct remainder // (quotient was not overestimated) ? (p8) fcmp.lt.unc.s1 p6, p10 = FR_QREM, f0 nop.i 0 @@ -392,7 +392,7 @@ loop64: nop.m 0 // add b to estimated remainder (to cover the case when the quotient was // overestimated) - // also set correct sign by using + // also set correct sign by using // FR_B_SGN_A = |b|*sgn(a), FR_ROUNDCONST = sgn(a) (p6) fma.s0 f8 = FR_QREM, FR_ROUNDCONST, FR_B_SGN_A nop.b 0 diff --git a/sysdeps/ia64/fpu/e_hypot.S b/sysdeps/ia64/fpu/e_hypot.S index 36cfd1e..1df1f64 100644 --- a/sysdeps/ia64/fpu/e_hypot.S +++ b/sysdeps/ia64/fpu/e_hypot.S @@ -21,25 +21,25 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // //********************************************************************* // -// History: +// History: // 02/02/00 hand-optimized // 04/04/00 Unwind support added // 06/20/00 new version @@ -86,7 +86,7 @@ // x2 = x * x in double-extended // y2 = y * y in double-extended // temp = x2 + y2 in double-extended -// sqrt(temp) rounded to double +// sqrt(temp) rounded to double // //********************************************************************* @@ -114,7 +114,7 @@ GLOBAL_IEEE754_ENTRY(hypot) // Compute x*x fma.s1 f10=f8,f8,f0 // r2=bias-1 - mov r2=0xfffe + mov r2=0xfffe } {.mfi // 63/8 @@ -135,8 +135,8 @@ GLOBAL_IEEE754_ENTRY(hypot) {.mfi nop.m 0 // if possible overflow, copy f8 to f32 - // set Denormal, if necessary - // (p8) + // set Denormal, if necessary + // (p8) fma.d.s0 f32=f8,f1,f0 nop.i 0;; } @@ -235,11 +235,11 @@ GLOBAL_IEEE754_ENTRY(hypot) { .mfi nop.m 0 -// Identify Natvals, Infs, NaNs, and Zeros +// Identify Natvals, Infs, NaNs, and Zeros // and return result fclass.m.unc p7, p0 = f12, 0x1E7 nop.i 0;; -} +} {.mfb // get exponent of x^2+y^2 getf.exp r3=f12 @@ -260,7 +260,7 @@ GLOBAL_IEEE754_ENTRY(hypot) // H0=0.5*z0 (p6) fma.s1 f15=f8,f7,f0 nop.i 0;; -} +} {.mfi @@ -334,7 +334,7 @@ GLOBAL_IEEE754_ENTRY(hypot) nop.i 0 } {.mfi - // Is x^2 + y^2 well less than the overflow + // Is x^2 + y^2 well less than the overflow // threshold? (p6) cmp.lt.unc p7, p8 = r3,r2 // P=P13+d3*P47 @@ -351,8 +351,8 @@ GLOBAL_IEEE754_ENTRY(hypot) } { .mfi - nop.m 0 -(p8) fsetc.s2 0x7F,0x42 + nop.m 0 +(p8) fsetc.s2 0x7F,0x42 // Possible overflow path, must detect by // Setting widest range exponent with prevailing // rounding mode. @@ -374,7 +374,7 @@ GLOBAL_IEEE754_ENTRY(hypot) nop.i 0 ;; } { .mfi - nop.m 0 + nop.m 0 (p8) fcmp.lt.unc.s1 p9, p10 = f12, f11 nop.i 0 ;; } @@ -382,7 +382,7 @@ GLOBAL_IEEE754_ENTRY(hypot) nop.m 0 mov GR_Parameter_TAG = 46 // No overflow -(p9) br.ret.sptk b0;; +(p9) br.ret.sptk b0;; } GLOBAL_IEEE754_END(hypot) diff --git a/sysdeps/ia64/fpu/e_hypotf.S b/sysdeps/ia64/fpu/e_hypotf.S index d6fcbd1..f9d5c07 100644 --- a/sysdeps/ia64/fpu/e_hypotf.S +++ b/sysdeps/ia64/fpu/e_hypotf.S @@ -21,25 +21,25 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // //********************************************************************* // -// History: +// History: // 02/02/00 hand-optimized // 04/04/00 Unwind support added // 06/26/00 new version @@ -86,7 +86,7 @@ // x2 = x * x in double-extended // y2 = y * y in double-extended // temp = x2 + y2 in double-extended -// sqrt(temp) rounded to single precision +// sqrt(temp) rounded to single precision // //********************************************************************* @@ -113,7 +113,7 @@ GLOBAL_IEEE754_ENTRY(hypotf) // Compute x*x fma.s1 f10=f8,f8,f0 // r2=bias-1 - mov r2=0xfffe + mov r2=0xfffe } {.mfi nop.m 0 @@ -132,8 +132,8 @@ GLOBAL_IEEE754_ENTRY(hypotf) {.mfi nop.m 0 // if possible overflow, copy f8 to f14 - // set Denormal, if necessary - // (p8) + // set Denormal, if necessary + // (p8) fma.s.s0 f14=f8,f1,f0 nop.i 0;; } @@ -211,11 +211,11 @@ GLOBAL_IEEE754_ENTRY(hypotf) { .mfi nop.m 0 -// Identify Natvals, Infs, NaNs, and Zeros +// Identify Natvals, Infs, NaNs, and Zeros // and return result fclass.m.unc p7, p0 = f12, 0x1E7 nop.i 0 -} +} {.mfi nop.m 0 // z0=frsqrta(a) @@ -243,7 +243,7 @@ GLOBAL_IEEE754_ENTRY(hypotf) // H0=0.5*z0 (p6) fma.s1 f10=f8,f7,f0 nop.i 0;; -} +} {.mfi @@ -287,7 +287,7 @@ GLOBAL_IEEE754_ENTRY(hypotf) {.mfi - // Is x^2 + y^2 well less than the overflow + // Is x^2 + y^2 well less than the overflow // threshold? (p6) cmp.lt.unc p7, p8 = r3,r2 // P=P01+d2*P23 @@ -304,8 +304,8 @@ GLOBAL_IEEE754_ENTRY(hypotf) } { .mfi - nop.m 0 -(p8) fsetc.s2 0x7F,0x42 + nop.m 0 +(p8) fsetc.s2 0x7F,0x42 // Possible overflow path, must detect by // Setting widest range exponent with prevailing // rounding mode. @@ -327,7 +327,7 @@ GLOBAL_IEEE754_ENTRY(hypotf) nop.i 0 ;; } { .mfi - nop.m 0 + nop.m 0 (p8) fcmp.lt.unc.s1 p9, p10 = f12, f11 nop.i 0 ;; } @@ -335,7 +335,7 @@ GLOBAL_IEEE754_ENTRY(hypotf) nop.m 0 mov GR_Parameter_TAG = 47 // No overflow -(p9) br.ret.sptk b0;; +(p9) br.ret.sptk b0;; } GLOBAL_IEEE754_END(hypotf) @@ -343,7 +343,7 @@ LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mii add GR_Parameter_Y=-32,sp // Parameter 2 value - mov GR_Parameter_TAG = 47 + mov GR_Parameter_TAG = 47 .save ar.pfs,GR_SAVE_PFS mov GR_SAVE_PFS=ar.pfs // Save ar.pfs } @@ -382,10 +382,10 @@ LOCAL_LIBM_ENTRY(__libm_error_region) mov b0 = GR_SAVE_B0 // Restore return address };; { .mib - mov gp = GR_SAVE_GP // Restore gp + mov gp = GR_SAVE_GP // Restore gp mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs br.ret.sptk b0 // Return -};; +};; LOCAL_LIBM_END(__libm_error_region) diff --git a/sysdeps/ia64/fpu/e_hypotl.S b/sysdeps/ia64/fpu/e_hypotl.S index 988b86e..a1716fd 100644 --- a/sysdeps/ia64/fpu/e_hypotl.S +++ b/sysdeps/ia64/fpu/e_hypotl.S @@ -21,25 +21,25 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // //********************************************************************* // -// History: +// History: // 02/02/00 hand-optimized // 04/04/00 Unwind support added // 06/20/00 new version @@ -112,7 +112,7 @@ GLOBAL_IEEE754_ENTRY(hypotl) // Compute x*x fma.s1 f10=f8,f8,f0 // r2=bias-1 - mov r2=0xfffe + mov r2=0xfffe } {.mfi nop.m 0 @@ -131,8 +131,8 @@ GLOBAL_IEEE754_ENTRY(hypotl) {.mfi nop.m 0 // if possible overflow, copy f8 to f32 - // set Denormal, if necessary - // (p8) + // set Denormal, if necessary + // (p8) fma.s0 f32=f8,f1,f0 nop.i 0;; } @@ -233,11 +233,11 @@ GLOBAL_IEEE754_ENTRY(hypotl) } { .mfi nop.m 0 -// Identify Natvals, Infs, NaNs, and Zeros +// Identify Natvals, Infs, NaNs, and Zeros // and return result fclass.m.unc p7, p0 = f12, 0x1E7 nop.i 0 -} +} {.mfi // get exponent of x^2+y^2 getf.exp r3=f12 @@ -271,7 +271,7 @@ GLOBAL_IEEE754_ENTRY(hypotl) // H0=0.5*z0 (p6) fma.s1 f15=f8,f7,f0 nop.i 0;; -} +} {.mfb nop.m 0 @@ -364,7 +364,7 @@ GLOBAL_IEEE754_ENTRY(hypotl) nop.i 0 } {.mfi - // Is x^2 + y^2 well less than the overflow + // Is x^2 + y^2 well less than the overflow // threshold? (p6) cmp.lt.unc p7, p8 = r3,r2 // c=dxy+da @@ -388,8 +388,8 @@ GLOBAL_IEEE754_ENTRY(hypotl) } { .mfi - nop.m 0 -(p8) fsetc.s2 0x7F,0x42 + nop.m 0 +(p8) fsetc.s2 0x7F,0x42 // Possible overflow path, must detect by // Setting widest range exponent with prevailing // rounding mode. @@ -411,7 +411,7 @@ GLOBAL_IEEE754_ENTRY(hypotl) nop.i 0 ;; } { .mfi - nop.m 0 + nop.m 0 (p8) fcmp.lt.unc.s1 p9, p10 = f12, f11 nop.i 0 ;; } @@ -419,7 +419,7 @@ GLOBAL_IEEE754_ENTRY(hypotl) nop.m 0 mov GR_Parameter_TAG = 45; // No overflow -(p9) br.ret.sptk b0;; +(p9) br.ret.sptk b0;; } GLOBAL_IEEE754_END(hypotl) diff --git a/sysdeps/ia64/fpu/e_log.S b/sysdeps/ia64/fpu/e_log.S index c644c6f..3c5ebc2 100644 --- a/sysdeps/ia64/fpu/e_log.S +++ b/sysdeps/ia64/fpu/e_log.S @@ -1425,11 +1425,11 @@ log_log10_common: fnorm.s1 FR_NormX = f8 mov GR_bias = 0xffff };; - + { .mfi setf.d FR_A3 = GR_A3 // create A3 fcmp.eq.s1 p12,p0 = f1,f8 // is x equal to 1.0? - dep.z GR_xorg = GR_xorg, 44, 19 // 0x3fefe00000000000 + dep.z GR_xorg = GR_xorg, 44, 19 // 0x3fefe00000000000 // double precision memory // representation of 255/256 } @@ -1519,7 +1519,7 @@ log_core: { .mfi (p6) getf.exp GR_rexp = FR_r // Get signexp of x-1 (p7) fcvt.xf FR_N = FR_N -(p8) cmp.eq p9,p6 = r0,r0 // Also set p9 and clear p6 if log10 +(p8) cmp.eq p9,p6 = r0,r0 // Also set p9 and clear p6 if log10 // and arg near 1 };; diff --git a/sysdeps/ia64/fpu/e_log2.S b/sysdeps/ia64/fpu/e_log2.S index 660a952..0edf8ad 100644 --- a/sysdeps/ia64/fpu/e_log2.S +++ b/sysdeps/ia64/fpu/e_log2.S @@ -21,25 +21,25 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History //================================================================= -// 09/11/00 Initial version +// 09/11/00 Initial version // 03/19/01 Added one polynomial coefficient, to improve accuracy // 05/20/02 Cleaned up namespace and sf0 syntax // 02/10/03 Reordered header: .section, .global, .proc, .align @@ -60,19 +60,19 @@ // j=0 if f<128; j=1 if f>=128 // T is a table that stores log2(1/y) (in entries 1..255) rounded to // double extended precision; f is used as an index; T[255]=0 -// +// // If f=0 and b9=0, r is set to 2^{-8}* 0.b9 b10 ... b52 = m-1 (fractional part of m), -// and 0 is used instead of T[0] +// and 0 is used instead of T[0] // (polynomial evaluation only, for m=1+r, 0<=r<2^{-9}) // If f=255, r is set to (m-2)/2 (T[255]=0, and only polynomial evaluation is used // for m=2(1-r'), 0<=r'<2^{-9}) // // log2(x) is approximated as // (l-j) + T[f] + (c1*r+c2*r^2+...+c7*r^7), if f>0 -// +// -// Special values +// Special values //================================================================= // log2(0)=-inf, raises Divide by Zero // log2(+inf)=inf @@ -90,7 +90,7 @@ GR_SAVE_B0 = r33 GR_SAVE_PFS = r34 -GR_SAVE_GP = r35 // This reg. can safely be used +GR_SAVE_GP = r35 // This reg. can safely be used GR_SAVE_SP = r36 GR_Parameter_X = r37 @@ -388,15 +388,15 @@ LOCAL_OBJECT_END(T_table) GLOBAL_LIBM_ENTRY(log2) { .mfi - alloc r32=ar.pfs,1,4,4,0 - // y=frcpa(x) + alloc r32=ar.pfs,1,4,4,0 + // y=frcpa(x) frcpa.s1 f6,p0=f1,f8 - // will form significand of 1.5 (to test whether the index is 128 or above) + // will form significand of 1.5 (to test whether the index is 128 or above) mov r24=0xc } {.mfi nop.m 0 - // normalize x + // normalize x fma.s1 f7=f8,f1,f0 // r2 = pointer to C_1...C_6 followed by T_table addl r2 = @ltoff(poly_coeffs), gp;; @@ -406,7 +406,7 @@ GLOBAL_LIBM_ENTRY(log2) getf.sig r25=f8 // f8 denormal ? fclass.m p8,p10=f8,0x9 - // will form significand of 1.5 (to test whether the index is 128 or above) + // will form significand of 1.5 (to test whether the index is 128 or above) shl r24=r24,60 } {.mfi @@ -420,7 +420,7 @@ GLOBAL_LIBM_ENTRY(log2) getf.exp r29=f8 // load start address for C_1...C_6 followed by T_table ld8 r2=[r2] - // will continue only for positive normal/denormal numbers + // will continue only for positive normal/denormal numbers fclass.nm.unc p12,p7 = f8, 0x19 ;; } @@ -465,7 +465,7 @@ GLOBAL_LIBM_ENTRY(log2) {.mmi // load C_6, C_7 ldfpd f12,f13=[r2],16 - // r27=bias-1 (if index >=128, will add exponent+1) + // r27=bias-1 (if index >=128, will add exponent+1) (p12) mov r27=0xfffe (p8) shr.u r28=r25,63-8;; } @@ -513,7 +513,7 @@ GLOBAL_LIBM_ENTRY(log2) {.mmf // load T (unless first 9 bits after leading 1 are 0) (p12) ldfe f33=[r2] - // f8=expon - bias + // f8=expon - bias setf.sig f8=r29 // set T=0 (if first 9 bits after leading 1 are 0) (p8) fma.s1 f33=f0,f0,f0;; @@ -602,7 +602,7 @@ GLOBAL_LIBM_ENTRY(log2) SPECIAL_LOG2: -{.mfi +{.mfi nop.m 0 // x=+Infinity ? fclass.m p7,p0=f8,0x21 @@ -627,7 +627,7 @@ SPECIAL_LOG2: (p7) br.ret.spnt b0;; } {.mfi - (p8) mov GR_Parameter_TAG = 170 + (p8) mov GR_Parameter_TAG = 170 // log2(+/-0)=-infinity, raises Divide by Zero // set f8=-0 (p8) fmerge.ns f8=f0,f8 @@ -639,12 +639,12 @@ SPECIAL_LOG2: (p8) br.cond.sptk __libm_error_region;; } {.mfb - (p6) mov GR_Parameter_TAG = 171 + (p6) mov GR_Parameter_TAG = 171 // x<0: return NaN, raise Invalid (p6) frcpa.s0 f8,p0=f0,f0 (p6) br.cond.sptk __libm_error_region;; -} - +} + {.mfb nop.m 0 @@ -662,10 +662,10 @@ LOCAL_LIBM_ENTRY(__libm_error_region) add GR_Parameter_Y=-32,sp // Parameter 2 value nop.f 0 .save ar.pfs,GR_SAVE_PFS - mov GR_SAVE_PFS=ar.pfs // Save ar.pfs + mov GR_SAVE_PFS=ar.pfs // Save ar.pfs } { .mfi -.fframe 64 +.fframe 64 add sp=-64,sp // Create new stack nop.f 0 mov GR_SAVE_GP=gp // Save gp @@ -673,18 +673,18 @@ LOCAL_LIBM_ENTRY(__libm_error_region) { .mmi stfd [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack add GR_Parameter_X = 16,sp // Parameter 1 address -.save b0, GR_SAVE_B0 - mov GR_SAVE_B0=b0 // Save b0 +.save b0, GR_SAVE_B0 + mov GR_SAVE_B0=b0 // Save b0 };; .body { .mib - stfd [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack - add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address - nop.b 0 + stfd [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack + add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address + nop.b 0 } { .mib stfd [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack - add GR_Parameter_Y = -16,GR_Parameter_Y + add GR_Parameter_Y = -16,GR_Parameter_Y br.call.sptk b0=__libm_error_support# // Call error handling function };; { .mmi @@ -699,10 +699,10 @@ LOCAL_LIBM_ENTRY(__libm_error_region) mov b0 = GR_SAVE_B0 // Restore return address };; { .mib - mov gp = GR_SAVE_GP // Restore gp + mov gp = GR_SAVE_GP // Restore gp mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs br.ret.sptk b0 // Return -};; +};; LOCAL_LIBM_END(__libm_error_region) .type __libm_error_support#,@function diff --git a/sysdeps/ia64/fpu/e_log2f.S b/sysdeps/ia64/fpu/e_log2f.S index 17d710a..cb25fb0 100644 --- a/sysdeps/ia64/fpu/e_log2f.S +++ b/sysdeps/ia64/fpu/e_log2f.S @@ -21,25 +21,25 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History //============================================================== -// 09/11/00 Initial version +// 09/11/00 Initial version // 05/20/02 Cleaned up namespace and sf0 syntax // 02/10/03 Reordered header: .section, .global, .proc, .align // @@ -58,19 +58,19 @@ // j=0 if f<128; j=1 if f>=128 // T is a table that stores log2(1/y) (in entries 1..255) rounded to // double extended precision; f is used as an index; T[255]=0 -// +// // If f=0 and b9=0, r is set to 2^{-8}* 0.b9 b10 ... b52 = m-1 (fractional part of m), -// and 0 is used instead of T[0] +// and 0 is used instead of T[0] // (polynomial evaluation only, for m=1+r, 0<=r<2^{-9}) // If f=255, r is set to (m-2)/2 (T[255]=0, and only polynomial evaluation is used // for m=2(1-r'), 0<=r'<2^{-9}) // // log2f(x) is approximated as // (l-j) + T[f] + (c1*r+c2*r^2+...+c6*r^6), if f>0 -// +// -// Special values +// Special values //============================================================== // log2f(0)=-inf, raises Divide by Zero // log2f(+inf)=inf @@ -88,7 +88,7 @@ GR_SAVE_B0 = r33 GR_SAVE_PFS = r34 -GR_SAVE_GP = r35 // This reg. can safely be used +GR_SAVE_GP = r35 // This reg. can safely be used GR_SAVE_SP = r36 GR_Parameter_X = r37 @@ -255,15 +255,15 @@ LOCAL_OBJECT_END(T_table) GLOBAL_LIBM_ENTRY(log2f) { .mfi - alloc r32=ar.pfs,1,4,4,0 - // y=frcpa(x) + alloc r32=ar.pfs,1,4,4,0 + // y=frcpa(x) frcpa.s1 f6,p0=f1,f8 - // will form significand of 1.5 (to test whether the index is 128 or above) + // will form significand of 1.5 (to test whether the index is 128 or above) mov r24=0xc } {.mfi nop.m 0 - // normalize x + // normalize x fma.s1 f7=f8,f1,f0 // r2 = pointer to C_1...C_6 followed by T_table addl r2 = @ltoff(poly_coeffs), gp;; @@ -273,7 +273,7 @@ GLOBAL_LIBM_ENTRY(log2f) getf.sig r25=f8 // f8 denormal ? fclass.m p8,p10=f8,0x9 - // will form significand of 1.5 (to test whether the index is 128 or above) + // will form significand of 1.5 (to test whether the index is 128 or above) shl r24=r24,60 } {.mfi @@ -287,7 +287,7 @@ GLOBAL_LIBM_ENTRY(log2f) getf.exp r29=f8 // load start address for C_1...C_6 followed by T_table ld8 r2=[r2] - // will continue only for positive normal/denormal numbers + // will continue only for positive normal/denormal numbers fclass.nm.unc p12,p7 = f8, 0x19 ;; } @@ -331,7 +331,7 @@ GLOBAL_LIBM_ENTRY(log2f) // load C_3, C_4 ldfpd f10,f11=[r2],16 nop.f 0 - // r27=bias-1 (if index >=128, will add exponent+1) + // r27=bias-1 (if index >=128, will add exponent+1) (p12) mov r27=0xfffe;; } @@ -360,7 +360,7 @@ GLOBAL_LIBM_ENTRY(log2f) cmp.ltu p8,p12=r25,r26;; } {.mfi - // f8=expon - bias + // f8=expon - bias setf.sig f8=r29 nop.f 0 // get T address @@ -440,7 +440,7 @@ GLOBAL_LIBM_ENTRY(log2f) SPECIAL_log2f: -{.mfi +{.mfi nop.m 0 // x=+Infinity ? fclass.m p7,p0=f8,0x21 @@ -465,7 +465,7 @@ SPECIAL_log2f: (p7) br.ret.spnt b0;; } {.mfi - (p8) mov GR_Parameter_TAG = 172 + (p8) mov GR_Parameter_TAG = 172 // log2f(+/-0)=-infinity, raises Divide by Zero // set f8=-0 (p8) fmerge.ns f8=f0,f8 @@ -477,12 +477,12 @@ SPECIAL_log2f: (p8) br.cond.sptk __libm_error_region;; } {.mfb - (p6) mov GR_Parameter_TAG = 173 + (p6) mov GR_Parameter_TAG = 173 // x<0: return NaN, raise Invalid (p6) frcpa.s0 f8,p0=f0,f0 (p6) br.cond.sptk __libm_error_region;; -} - +} + {.mfb nop.m 0 @@ -500,10 +500,10 @@ LOCAL_LIBM_ENTRY(__libm_error_region) add GR_Parameter_Y=-32,sp // Parameter 2 value nop.f 0 .save ar.pfs,GR_SAVE_PFS - mov GR_SAVE_PFS=ar.pfs // Save ar.pfs + mov GR_SAVE_PFS=ar.pfs // Save ar.pfs } { .mfi -.fframe 64 +.fframe 64 add sp=-64,sp // Create new stack nop.f 0 mov GR_SAVE_GP=gp // Save gp @@ -511,18 +511,18 @@ LOCAL_LIBM_ENTRY(__libm_error_region) { .mmi stfs [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack add GR_Parameter_X = 16,sp // Parameter 1 address -.save b0, GR_SAVE_B0 - mov GR_SAVE_B0=b0 // Save b0 +.save b0, GR_SAVE_B0 + mov GR_SAVE_B0=b0 // Save b0 };; .body { .mib - stfs [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack - add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address - nop.b 0 + stfs [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack + add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address + nop.b 0 } { .mib stfs [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack - add GR_Parameter_Y = -16,GR_Parameter_Y + add GR_Parameter_Y = -16,GR_Parameter_Y br.call.sptk b0=__libm_error_support# // Call error handling function };; { .mmi @@ -537,10 +537,10 @@ LOCAL_LIBM_ENTRY(__libm_error_region) mov b0 = GR_SAVE_B0 // Restore return address };; { .mib - mov gp = GR_SAVE_GP // Restore gp + mov gp = GR_SAVE_GP // Restore gp mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs br.ret.sptk b0 // Return -};; +};; LOCAL_LIBM_END(__libm_error_region) .type __libm_error_support#,@function diff --git a/sysdeps/ia64/fpu/e_log2l.S b/sysdeps/ia64/fpu/e_log2l.S index b3fe63f..837c55d 100644 --- a/sysdeps/ia64/fpu/e_log2l.S +++ b/sysdeps/ia64/fpu/e_log2l.S @@ -21,27 +21,27 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History //============================================================== -// 09/25/00 Initial version +// 09/25/00 Initial version // 11/22/00 Fixed accuracy bug (for mantissas near 1, 2) -// 12/07/00 Fixed C_1l constant, eliminated rounding errors in +// 12/07/00 Fixed C_1l constant, eliminated rounding errors in // reduced argument (x*frcpa(x)-1) // 05/20/02 Cleaned up namespace and sf0 syntax // 02/10/03 Reordered header: .section, .global, .proc, .align @@ -57,16 +57,16 @@ // Implementation // // Let x = 2^l * m, where m=1.b1 b2 ... b8 b9 ... b52 -// y=frcpa(m), r=m*y-1, f=b1 b2 .. b8 -// T_hi is a table that stores the 24 most significant bits of log2(1/y) +// y=frcpa(m), r=m*y-1, f=b1 b2 .. b8 +// T_hi is a table that stores the 24 most significant bits of log2(1/y) // (in entries 1..255) in single precision format // T_low is a table that stores (log2(1/y)-T_high), rounded to double -// precision +// precision // // f is used as an index; T_high[255]=T_low[255]=0 -// +// // If f=0 and b9=0, r is set to 2^{-8}* 0.b9 b10 ... b52 = m-1 (fractional part of m), -// and 0 is used instead of T_high[0], T_low[0] +// and 0 is used instead of T_high[0], T_low[0] // (polynomial evaluation only, for m=1+r, 0<=r<2^{-9}) // If f=255, r is set to (m-2)/2 (T[255]=0, and only polynomial evaluation is used // for m=2(1-r'), 0<=r'<2^{-9}) @@ -80,10 +80,10 @@ // // log2l(x) is approximated as // (l+T_high[f]+C1r) + (D+r*(c1+c2*r+c3*r^2...+c8*r^7)+(T_low[f]+C_1*E)) -// +// -// Special values +// Special values //============================================================== // log2l(0)=-inf, raises Divide by Zero // log2l(+inf)=inf @@ -101,7 +101,7 @@ GR_SAVE_B0 = r33 GR_SAVE_PFS = r34 -GR_SAVE_GP = r35 // This reg. can safely be used +GR_SAVE_GP = r35 // This reg. can safely be used GR_SAVE_SP = r36 GR_Parameter_X = r37 @@ -127,7 +127,7 @@ LOCAL_OBJECT_START(poly_coeffs) data8 0xb8aa3b295c17f0bc, 0x00003fff // C_1 data8 0x3fca61762a7aded9, 0xbfc71547652b82fe // C_7, C_8 -data8 0x3fd2776c50ef9bfe, 0xbfcec709dc3a03fd // C_5, C_6 +data8 0x3fd2776c50ef9bfe, 0xbfcec709dc3a03fd // C_5, C_6 data8 0x3fdec709dc3a03fd, 0xbfd71547652b82fe // C_3, C_4 //data8 0xd871319ff0342580, 0x0000bfbd // C_1l (low part of C1) data8 0x82f0025f2dc582ee, 0x0000bfbe // C_1l (low part of C1) @@ -345,9 +345,9 @@ LOCAL_OBJECT_END(T_low) GLOBAL_IEEE754_ENTRY(log2l) { .mfi - alloc r32=ar.pfs,1,4,4,0 - // normalize x - // y=frcpa(x) + alloc r32=ar.pfs,1,4,4,0 + // normalize x + // y=frcpa(x) frcpa.s1 f41,p0=f1,f8 // r26=bias-1 mov r26=0xfffe @@ -378,8 +378,8 @@ GLOBAL_IEEE754_ENTRY(log2l) getf.exp r29=f8 // load start address for C_1...C_7 followed by T_table ld8 r2=[r2] - // will continue only for positive normal/unnormal numbers - fclass.m.unc p0,p12 = f8, 0x19;; + // will continue only for positive normal/unnormal numbers + fclass.m.unc p0,p12 = f8, 0x19;; } @@ -409,7 +409,7 @@ GLOBAL_IEEE754_ENTRY(log2l) } {.mfb - add r3=16,r2 + add r3=16,r2 // r=x*y-1 fms.s1 f6=f41,f8,f1 (p12) br.cond.spnt SPECIAL_log2l @@ -468,10 +468,10 @@ GLOBAL_IEEE754_ENTRY(log2l) // add 1 to the exponent additive term, and estimate log2(1-r) (p10) add r29=1,r29 nop.f 0 - (p7) br.cond.spnt LOG2_PSEUDO_ZERO + (p7) br.cond.spnt LOG2_PSEUDO_ZERO } {.mfi - // get T_low adress + // get T_low adress shladd r3=r28,3,r3 // if first 8 bits after leading 1 are all ones, use polynomial approx. only (p10) fms.s1 f6=f7,f36,f1 @@ -514,7 +514,7 @@ GLOBAL_IEEE754_ENTRY(log2l) .pred.rel "mutex",p8,p12 {.mfi - // f8=expon - bias + // f8=expon - bias setf.sig f8=r29 // general case: 2^{16}+C1*r (p12) fma.s1 f33=f6,f14,f32 @@ -687,7 +687,7 @@ SPECIAL_log2l: mov FR_X=f8 nop.i 0 } -{.mfi +{.mfi nop.m 0 // x=+Infinity ? fclass.m p7,p0=f8,0x21 @@ -712,7 +712,7 @@ SPECIAL_log2l: (p7) br.ret.spnt b0;; } {.mfi - (p8) mov GR_Parameter_TAG = 168 + (p8) mov GR_Parameter_TAG = 168 // log2l(+/-0)=-infinity, raises Divide by Zero // set f8=-0 (p8) fmerge.ns f8=f0,f8 @@ -724,12 +724,12 @@ SPECIAL_log2l: (p8) br.cond.sptk __libm_error_region;; } {.mfb - (p6) mov GR_Parameter_TAG = 169 + (p6) mov GR_Parameter_TAG = 169 // x<0: return NaN, raise Invalid (p6) frcpa.s0 f8,p0=f0,f0 (p6) br.cond.sptk __libm_error_region;; -} - +} + {.mfb nop.m 0 @@ -746,7 +746,7 @@ LOG2_PSEUDO_ZERO: nop.i 0 } {.mfi - mov GR_Parameter_TAG = 168 + mov GR_Parameter_TAG = 168 // log2l(+/-0)=-infinity, raises Divide by Zero // set f8=-0 fmerge.ns f8=f0,f8 @@ -768,10 +768,10 @@ LOCAL_LIBM_ENTRY(__libm_error_region) add GR_Parameter_Y=-32,sp // Parameter 2 value nop.f 0 .save ar.pfs,GR_SAVE_PFS - mov GR_SAVE_PFS=ar.pfs // Save ar.pfs + mov GR_SAVE_PFS=ar.pfs // Save ar.pfs } { .mfi -.fframe 64 +.fframe 64 add sp=-64,sp // Create new stack nop.f 0 mov GR_SAVE_GP=gp // Save gp @@ -779,18 +779,18 @@ LOCAL_LIBM_ENTRY(__libm_error_region) { .mmi stfe [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack add GR_Parameter_X = 16,sp // Parameter 1 address -.save b0, GR_SAVE_B0 - mov GR_SAVE_B0=b0 // Save b0 +.save b0, GR_SAVE_B0 + mov GR_SAVE_B0=b0 // Save b0 };; .body { .mib - stfe [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack - add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address - nop.b 0 + stfe [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack + add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address + nop.b 0 } { .mib stfe [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack - add GR_Parameter_Y = -16,GR_Parameter_Y + add GR_Parameter_Y = -16,GR_Parameter_Y br.call.sptk b0=__libm_error_support# // Call error handling function };; { .mmi @@ -805,10 +805,10 @@ LOCAL_LIBM_ENTRY(__libm_error_region) mov b0 = GR_SAVE_B0 // Restore return address };; { .mib - mov gp = GR_SAVE_GP // Restore gp + mov gp = GR_SAVE_GP // Restore gp mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs br.ret.sptk b0 // Return -};; +};; LOCAL_LIBM_END(__libm_error_region) .type __libm_error_support#,@function diff --git a/sysdeps/ia64/fpu/e_logl.S b/sysdeps/ia64/fpu/e_logl.S index 3ebb20a..e12c65d 100644 --- a/sysdeps/ia64/fpu/e_logl.S +++ b/sysdeps/ia64/fpu/e_logl.S @@ -1,4 +1,4 @@ -.file "logl.s" +.file "logl.s" // Copyright (c) 2000 - 2003, Intel Corporation @@ -21,26 +21,26 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // //********************************************************************* // -// History: -// 05/21/01 Extracted logl and log10l from log1pl.s file, and optimized +// History: +// 05/21/01 Extracted logl and log10l from log1pl.s file, and optimized // all paths. // 06/20/01 Fixed error tag for x=-inf. // 05/20/02 Cleaned up namespace and sf0 syntax @@ -74,20 +74,20 @@ // IEEE Special Conditions: // // Denormal fault raised on denormal inputs -// Overflow exceptions cannot occur -// Underflow exceptions raised when appropriate for log1p +// Overflow exceptions cannot occur +// Underflow exceptions raised when appropriate for log1p // (Error Handling Routine called for underflow) // Inexact raised when appropriate by algorithm // // logl(inf) = inf -// logl(-inf) = QNaN -// logl(+/-0) = -inf +// logl(-inf) = QNaN +// logl(+/-0) = -inf // logl(SNaN) = QNaN // logl(QNaN) = QNaN // logl(EM_special Values) = QNaN // log10l(inf) = inf -// log10l(-inf) = QNaN -// log10l(+/-0) = -inf +// log10l(-inf) = QNaN +// log10l(+/-0) = -inf // log10l(SNaN) = QNaN // log10l(QNaN) = QNaN // log10l(EM_special Values) = QNaN @@ -106,11 +106,11 @@ // logl( 1 + X ) can be approximated by a simple polynomial // in W = X-1. This polynomial resembles the truncated Taylor // series W - W^/2 + W^3/3 - ... -// +// // Case log_regular: // // Here we use a table lookup method. The basic idea is that in -// order to compute logl(Arg) for an argument Arg in [1,2), we +// order to compute logl(Arg) for an argument Arg in [1,2), we // construct a value G such that G*Arg is close to 1 and that // logl(1/G) is obtainable easily from a table of values calculated // beforehand. Thus @@ -128,7 +128,7 @@ // // X = 2^N * S_hi exactly // -// where S_hi in [1,2) +// where S_hi in [1,2) // // Step 1: Argument Reduction // @@ -137,7 +137,7 @@ // G := G_1 * G_2 * G_3 // r := (G * S_hi - 1) // -// These G_j's have the property that the product is exactly +// These G_j's have the property that the product is exactly // representable and that |r| < 2^(-12) as a result. // // Step 2: Approximation @@ -160,7 +160,7 @@ // // Here we compute a simple polynomial. To exploit parallelism, we split // the polynomial into two portions. -// +// // W := X - 1 // Wsq := W * W // W4 := Wsq*Wsq @@ -175,7 +175,7 @@ // Step 0. Initialization // ---------------------- // -// Z := X +// Z := X // N := unbaised exponent of Z // S_hi := 2^(-N) * Z // @@ -216,7 +216,7 @@ // with 1.0000 in fixed point. // // -// Define A_2 := 1.0 0 0 0 d_5 d_6 d_7 d_8. This is X_1 +// Define A_2 := 1.0 0 0 0 d_5 d_6 d_7 d_8. This is X_1 // truncated to lsb = 2^(-8). Similar to A_1, // A_2 is not needed in actual implementation. It // helps explain how some of the values are defined. @@ -245,13 +245,13 @@ // Fetch G_3 := (1/A_3) truncated to 21 sig. bits. // floating pt. Fetch is done using index_3. // -// Compute G := G_1 * G_2 * G_3. +// Compute G := G_1 * G_2 * G_3. // // This is done exactly since each of G_j only has 21 sig. bits. // -// Compute +// Compute // -// r := (G*S_hi - 1) +// r := (G*S_hi - 1) // // // Step 2. Approximation @@ -285,7 +285,7 @@ // Finally // // Y_hi := N*log2_hi + SUM ( log1byGj_hi ) -// Y_lo := poly_hi + [ poly_lo + +// Y_lo := poly_hi + [ poly_lo + // ( SUM ( log1byGj_lo ) + N*log2_lo ) ] // @@ -294,7 +294,7 @@ RODATA // ************* DO NOT CHANGE THE ORDER OF THESE TABLES ************* -// P_8, P_7, P_6, P_5, P_4, P_3, P_2, and P_1 +// P_8, P_7, P_6, P_5, P_4, P_3, P_2, and P_1 LOCAL_OBJECT_START(Constants_P) data8 0xE3936754EFD62B15,0x00003FFB @@ -307,7 +307,7 @@ data8 0xAAAAAAAAAAAAAAAA,0x00003FFD data8 0xFFFFFFFFFFFFFFFE,0x0000BFFD LOCAL_OBJECT_END(Constants_P) -// log2_hi, log2_lo, Q_4, Q_3, Q_2, and Q_1 +// log2_hi, log2_lo, Q_4, Q_3, Q_2, and Q_1 LOCAL_OBJECT_START(Constants_Q) data8 0xB172180000000000,0x00003FFE @@ -327,7 +327,7 @@ LOCAL_OBJECT_END(Constants_1_by_LN10) // Z1 - 16 bit fixed - + LOCAL_OBJECT_START(Constants_Z_1) data4 0x00008000 data4 0x00007879 @@ -442,7 +442,7 @@ data4 0x3F71D488,0x3D693B9D data8 0xBE049391B6B7C239 LOCAL_OBJECT_END(Constants_G_H_h2) -// G3 and H3 - IEEE single and h3 - IEEE double +// G3 and H3 - IEEE single and h3 - IEEE double LOCAL_OBJECT_START(Constants_G_H_h3) data4 0x3F7FFC00,0x38800100 @@ -514,64 +514,64 @@ LOCAL_OBJECT_END(Constants_G_H_h3) // Floating Point Registers -FR_Input_X = f8 +FR_Input_X = f8 -FR_Y_hi = f34 +FR_Y_hi = f34 FR_Y_lo = f35 FR_Scale = f36 -FR_X_Prime = f37 -FR_S_hi = f38 +FR_X_Prime = f37 +FR_S_hi = f38 FR_W = f39 FR_G = f40 FR_H = f41 -FR_wsq = f42 +FR_wsq = f42 FR_w4 = f43 FR_h = f44 -FR_w6 = f45 +FR_w6 = f45 FR_G2 = f46 FR_H2 = f47 FR_poly_lo = f48 -FR_P8 = f49 +FR_P8 = f49 FR_poly_hi = f50 -FR_P7 = f51 -FR_h2 = f52 -FR_rsq = f53 +FR_P7 = f51 +FR_h2 = f52 +FR_rsq = f53 FR_P6 = f54 -FR_r = f55 - -FR_log2_hi = f56 -FR_log2_lo = f57 -FR_p87 = f58 -FR_p876 = f58 -FR_p8765 = f58 -FR_float_N = f59 -FR_Q4 = f60 - -FR_p43 = f61 -FR_p432 = f61 -FR_p4321 = f61 -FR_P4 = f62 -FR_G3 = f63 -FR_H3 = f64 -FR_h3 = f65 - -FR_Q3 = f66 -FR_P3 = f67 -FR_Q2 = f68 -FR_P2 = f69 -FR_1LN10_hi = f70 - -FR_Q1 = f71 -FR_P1 = f72 -FR_1LN10_lo = f73 -FR_P5 = f74 -FR_rcub = f75 - -FR_Output_X_tmp = f76 +FR_r = f55 + +FR_log2_hi = f56 +FR_log2_lo = f57 +FR_p87 = f58 +FR_p876 = f58 +FR_p8765 = f58 +FR_float_N = f59 +FR_Q4 = f60 + +FR_p43 = f61 +FR_p432 = f61 +FR_p4321 = f61 +FR_P4 = f62 +FR_G3 = f63 +FR_H3 = f64 +FR_h3 = f65 + +FR_Q3 = f66 +FR_P3 = f67 +FR_Q2 = f68 +FR_P2 = f69 +FR_1LN10_hi = f70 + +FR_Q1 = f71 +FR_P1 = f72 +FR_1LN10_lo = f73 +FR_P5 = f74 +FR_rcub = f75 + +FR_Output_X_tmp = f76 FR_X = f8 FR_Y = f0 @@ -581,22 +581,22 @@ FR_RESULT = f76 // General Purpose Registers GR_ad_p = r33 -GR_Index1 = r34 -GR_Index2 = r35 -GR_signif = r36 -GR_X_0 = r37 -GR_X_1 = r38 -GR_X_2 = r39 -GR_Z_1 = r40 -GR_Z_2 = r41 -GR_N = r42 -GR_Bias = r43 -GR_M = r44 -GR_Index3 = r45 +GR_Index1 = r34 +GR_Index2 = r35 +GR_signif = r36 +GR_X_0 = r37 +GR_X_1 = r38 +GR_X_2 = r39 +GR_Z_1 = r40 +GR_Z_2 = r41 +GR_N = r42 +GR_Bias = r43 +GR_M = r44 +GR_Index3 = r45 GR_ad_p2 = r46 -GR_exp_mask = r47 -GR_exp_2tom7 = r48 -GR_ad_ln10 = r49 +GR_exp_mask = r47 +GR_exp_2tom7 = r48 +GR_ad_ln10 = r49 GR_ad_tbl_1 = r50 GR_ad_tbl_2 = r51 GR_ad_tbl_3 = r52 @@ -650,7 +650,7 @@ GLOBAL_IEEE754_ENTRY(log10l) // Common code for logl and log10 -LOGL_BEGIN: +LOGL_BEGIN: { .mfi ld8 GR_ad_z_1 = [GR_ad_z_1] // Get pointer to Constants_Z_1 fclass.m p10, p0 = FR_Input_X, 0x0b // Test for denormal @@ -741,7 +741,7 @@ LOGL_64_COMMON: { .mmi ldfe FR_log2_lo = [GR_ad_q],16 // Load log2_lo (p14) ldfe FR_1LN10_hi = [GR_ad_ln10],16 // If log10l, load 1/ln10_hi - sub GR_N = GR_N, GR_Bias + sub GR_N = GR_N, GR_Bias } ;; @@ -762,7 +762,7 @@ LOGL_64_COMMON: { .mmi getf.exp GR_M = FR_W // Get signexp of w = x - 1 ldfe FR_Q2 = [GR_ad_q],16 // Load Q2 - extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1 + extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1 } ;; @@ -1007,7 +1007,7 @@ LOGL_64_COMMON: { .mfi nop.m 999 -(p9) fadd.s1 FR_Y_lo = FR_poly_hi, FR_poly_lo // Y_lo = poly_hi + poly_lo +(p9) fadd.s1 FR_Y_lo = FR_poly_hi, FR_poly_lo // Y_lo = poly_hi + poly_lo nop.i 999 } ;; @@ -1041,25 +1041,25 @@ LOGL_64_COMMON: // Here if x=+-0 -LOGL_64_zero: +LOGL_64_zero: // // If x=+-0 raise divide by zero and return -inf -// +// { .mfi (p7) mov GR_Parameter_TAG = 0 - fsub.s1 FR_Output_X_tmp = f0, f1 + fsub.s1 FR_Output_X_tmp = f0, f1 nop.i 999 } ;; { .mfb -(p14) mov GR_Parameter_TAG = 6 - frcpa.s0 FR_Output_X_tmp, p8 = FR_Output_X_tmp, f0 +(p14) mov GR_Parameter_TAG = 6 + frcpa.s0 FR_Output_X_tmp, p8 = FR_Output_X_tmp, f0 br.cond.sptk __libm_error_region } ;; -LOGL_64_special: +LOGL_64_special: { .mfi nop.m 999 fclass.m.unc p8, p0 = FR_Input_X, 0x1E1 // Test for natval, nan, +inf @@ -1067,21 +1067,21 @@ LOGL_64_special: } ;; -// +// // For SNaN raise invalid and return QNaN. // For QNaN raise invalid and return QNaN. // For +Inf return +Inf. -// +// { .mfb nop.m 999 -(p8) fmpy.s0 f8 = FR_Input_X, f1 +(p8) fmpy.s0 f8 = FR_Input_X, f1 (p8) br.ret.sptk b0 // Return for natval, nan, +inf } ;; -// +// // For -Inf raise invalid and return QNaN. -// +// { .mmi (p7) mov GR_Parameter_TAG = 1 nop.m 999 @@ -1091,7 +1091,7 @@ LOGL_64_special: { .mfb (p14) mov GR_Parameter_TAG = 7 - fmpy.s0 FR_Output_X_tmp = FR_Input_X, f0 + fmpy.s0 FR_Output_X_tmp = FR_Input_X, f0 br.cond.sptk __libm_error_region } ;; @@ -1112,23 +1112,23 @@ LOGL_64_denormal: } ;; -LOGL_64_unsupported: -// +LOGL_64_unsupported: +// // Return generated NaN or other value. -// +// { .mfb nop.m 999 - fmpy.s0 f8 = FR_Input_X, f0 + fmpy.s0 f8 = FR_Input_X, f0 br.ret.sptk b0 } ;; // Here if -inf < x < 0 -LOGL_64_negative: -// +LOGL_64_negative: +// // Deal with x < 0 in a special way - raise // invalid and produce QNaN indefinite. -// +// { .mfi (p7) mov GR_Parameter_TAG = 1 frcpa.s0 FR_Output_X_tmp, p8 = f0, f0 diff --git a/sysdeps/ia64/fpu/e_powf.S b/sysdeps/ia64/fpu/e_powf.S index 1406a94..5585f9d 100644 --- a/sysdeps/ia64/fpu/e_powf.S +++ b/sysdeps/ia64/fpu/e_powf.S @@ -1471,7 +1471,7 @@ POW_POSSIBLE_UNDER: // 0.1...11 2^-3ffe (biased, 1) // largest dn smallest normal -// Form small constant (2^-170) to correct underflow result near region of +// Form small constant (2^-170) to correct underflow result near region of // smallest denormal in round-nearest. // Put in s2 (td set, ftz set) @@ -1482,9 +1482,9 @@ POW_POSSIBLE_UNDER: mov pow_GR_rcs0_mask = 0x0c00 // Set mask for rc.s0 } { .mfi -(p12) mov pow_GR_tmp = 0x2ffff - 170 +(p12) mov pow_GR_tmp = 0x2ffff - 170 nop.f 999 -(p13) mov pow_GR_tmp = 0x0ffff - 170 +(p13) mov pow_GR_tmp = 0x0ffff - 170 } ;; diff --git a/sysdeps/ia64/fpu/e_remainder.S b/sysdeps/ia64/fpu/e_remainder.S index f655567..d3bf707 100644 --- a/sysdeps/ia64/fpu/e_remainder.S +++ b/sysdeps/ia64/fpu/e_remainder.S @@ -51,12 +51,12 @@ // // API //==================================================================== -// double remainder(double,double); +// double remainder(double,double); // // Overview of operation //==================================================================== // remainder(a,b)=a-i*b, -// where i is an integer such that, if b!=0 and a is finite, +// where i is an integer such that, if b!=0 and a is finite, // |a/b-i|<=1/2. If |a/b-i|=1/2, i is even. // // Algorithm @@ -64,16 +64,16 @@ // a). eliminate special cases // b). if |a/b|<0.25 (first quotient estimate), return a // c). use single precision divide algorithm to get quotient q -// rounded to 24 bits of precision -// d). calculate partial remainders (using both q and q-ulp); -// select one and RZ(a/b) based on the sign of |a|-|b|*q +// rounded to 24 bits of precision +// d). calculate partial remainders (using both q and q-ulp); +// select one and RZ(a/b) based on the sign of |a|-|b|*q // e). if the exponent difference (exponent(a)-exponent(b)) -// is less than 24 (quotient estimate<2^{24}-2), use RZ(a/b) +// is less than 24 (quotient estimate<2^{24}-2), use RZ(a/b) // and sticky bits to round to integer; exit loop and // calculate final remainder // f). if exponent(a)-exponent(b)>=24, select new value of a as -// the partial remainder calculated using RZ(a/b); -// repeat from c). +// the partial remainder calculated using RZ(a/b); +// repeat from c). // // Special cases //==================================================================== @@ -88,7 +88,7 @@ GR_SAVE_B0 = r33 GR_SAVE_PFS = r34 -GR_SAVE_GP = r35 +GR_SAVE_GP = r35 GR_SAVE_SP = r36 GR_Parameter_X = r37 @@ -128,7 +128,7 @@ GLOBAL_IEEE754_ENTRY(remainder) // Y +-NAN, +-inf, +-0? p11 { .mfi setf.exp f32=r28 - fclass.m.unc p11,p0 = f9, 0xe7 + fclass.m.unc p11,p0 = f9, 0xe7 nop.i 999 } // qnan snan inf norm unorm 0 -+ @@ -137,8 +137,8 @@ GLOBAL_IEEE754_ENTRY(remainder) // X +-NAN, +-inf, ? p9 { .mfi nop.m 999 - fclass.m.unc p9,p0 = f8, 0xe3 - nop.i 999;; + fclass.m.unc p9,p0 = f8, 0xe3 + nop.i 999;; } {.mfi @@ -153,7 +153,7 @@ GLOBAL_IEEE754_ENTRY(remainder) // y0 = 1 / b in f10 frcpa.s1 f10,p6=f13,f14 nop.i 0;; -} +} {.bbb (p9) br.cond.spnt FREM_X_NAN_INF @@ -164,10 +164,10 @@ GLOBAL_IEEE754_ENTRY(remainder) // set D flag if a (f8) is denormal fnma.s0 f6=f8,f1,f8 nop.i 0;; -} +} -remloop24: +remloop24: { .mfi nop.m 0 // Step (2) @@ -184,7 +184,7 @@ remloop24: nop.m 0 // r2=1.25*2^{-24} movl r2=0x33a00000;; -} +} {.mfi nop.m 0 @@ -217,12 +217,12 @@ remloop24: // q2 = q1 + e1 * q1 in f6 (p6) fma.s1 f6=f7,f15,f15 nop.i 0;; -} +} {.mmi // f15=1.25*2^{-24} setf.s f15=r2 - // q<1/4 ? (i.e. expon< -2) + // q<1/4 ? (i.e. expon< -2) (p7) cmp.gt p7,p0=r28,r29 nop.i 0;; } @@ -230,7 +230,7 @@ remloop24: {.mfb // r29= -32+bias mov r29=0xffdf - // if |a/b|<1/4, set D flag before returning + // if |a/b|<1/4, set D flag before returning (p7) fma.d.s0 f9=f9,f0,f8 nop.b 0;; } @@ -248,7 +248,7 @@ remloop24: // set f8 to current a value | sign fmerge.s f8=f8,f13 nop.i 0;; -} +} {.mfi @@ -273,7 +273,7 @@ remloop24: nop.m 0 cmp.eq p11,p14=r2,r28 nop.i 0;; -} +} .pred.rel "mutex",p11,p14 {.mfi @@ -281,7 +281,7 @@ remloop24: // if exp_q=2^23, then r=a-b*2^{23} (p11) fnma.s1 f13=f12,f14,f13 nop.i 0 -} +} {.mfi nop.m 0 // r2=a-b*q' @@ -302,7 +302,7 @@ remloop24: .pred.rel "mutex",p8,p9 {.mfi - nop.m 0 + nop.m 0 // (p8) Q=q+(last iteration ? sticky bits:0) // i.e. Q=q+q*x (x=2^{-32} or 0) (p8) fma.s1 f11=f11,f7,f11 @@ -321,7 +321,7 @@ remloop24: // (p10) new a =r (p10) mov f13=f6 (p12) br.cond.sptk remloop24;; -} +} // last iteration {.mfi @@ -341,15 +341,15 @@ remloop24: // save sign of a fmerge.s f7=f8,f8 nop.i 0 -} {.mfi +} {.mfi nop.m 0 // normalize fcvt.xf f11=f11 nop.i 0;; -} +} {.mfi nop.m 0 - // This can be removed if sign of 0 is not important + // This can be removed if sign of 0 is not important // get remainder using sf1 fnma.d.s1 f12=f9,f11,f8 nop.i 0 @@ -363,24 +363,24 @@ remloop24: {.mfi nop.m 0 // f12=0? - // This can be removed if sign of 0 is not important + // This can be removed if sign of 0 is not important fcmp.eq.unc.s1 p8,p0=f12,f0 nop.i 0;; } {.mfb nop.m 0 // if f8=0, set sign correctly - // This can be removed if sign of 0 is not important + // This can be removed if sign of 0 is not important (p8) fmerge.s f8=f7,f8 // return br.ret.sptk b0;; } -FREM_X_NAN_INF: +FREM_X_NAN_INF: // Y zero ? -{.mfi +{.mfi nop.m 0 fma.s1 f10=f9,f1,f0 nop.i 0;; @@ -394,20 +394,20 @@ FREM_X_NAN_INF: nop.m 0 nop.i 0 // if Y zero - (p11) br.cond.spnt FREM_Y_ZERO;; + (p11) br.cond.spnt FREM_Y_ZERO;; } // X infinity? Return QNAN indefinite { .mfi nop.m 999 - fclass.m.unc p8,p0 = f8, 0x23 + fclass.m.unc p8,p0 = f8, 0x23 nop.i 999 } // X infinity? Return QNAN indefinite { .mfi nop.m 999 - fclass.m.unc p11,p0 = f8, 0x23 - nop.i 999;; + fclass.m.unc p11,p0 = f8, 0x23 + nop.i 999;; } // Y NaN ? {.mfi @@ -420,10 +420,10 @@ FREM_X_NAN_INF: // also set Denormal flag if necessary (p8) fma.s0 f9=f9,f1,f0 nop.i 0 -} +} { .mfi nop.m 999 -(p8) frcpa.s0 f8,p7 = f8,f8 +(p8) frcpa.s0 f8,p7 = f8,f8 nop.i 999 ;; } @@ -434,48 +434,48 @@ FREM_X_NAN_INF: } { .mfi nop.m 999 -(p8) fma.d.s0 f8=f8,f1,f0 - nop.i 0 ;; +(p8) fma.d.s0 f8=f8,f1,f0 + nop.i 0 ;; } { .mfb nop.m 999 - frcpa.s0 f8,p7=f8,f9 - (p11) br.cond.spnt EXP_ERROR_RETURN;; + frcpa.s0 f8,p7=f8,f9 + (p11) br.cond.spnt EXP_ERROR_RETURN;; } { .mib nop.m 0 nop.i 0 - br.ret.spnt b0 ;; + br.ret.spnt b0 ;; } -FREM_Y_NAN_INF_ZERO: +FREM_Y_NAN_INF_ZERO: // Y INF { .mfi nop.m 999 - fclass.m.unc p7,p0 = f9, 0x23 + fclass.m.unc p7,p0 = f9, 0x23 nop.i 999 ;; } { .mfb nop.m 999 -(p7) fma.d.s0 f8=f8,f1,f0 -(p7) br.ret.spnt b0 ;; +(p7) fma.d.s0 f8=f8,f1,f0 +(p7) br.ret.spnt b0 ;; } // Y NAN? { .mfi nop.m 999 - fclass.m.unc p9,p0 = f9, 0xc3 + fclass.m.unc p9,p0 = f9, 0xc3 nop.i 999 ;; } { .mfb nop.m 999 -(p9) fma.d.s0 f8=f9,f1,f0 -(p9) br.ret.spnt b0 ;; +(p9) fma.d.s0 f8=f9,f1,f0 +(p9) br.ret.spnt b0 ;; } FREM_Y_ZERO: @@ -486,12 +486,12 @@ FREM_Y_ZERO: // X NAN? { .mfi nop.m 999 - fclass.m.unc p9,p10 = f8, 0xc3 + fclass.m.unc p9,p10 = f8, 0xc3 nop.i 999 ;; } { .mfi nop.m 999 -(p10) fclass.nm p9,p10 = f8, 0xff +(p10) fclass.nm p9,p10 = f8, 0xff nop.i 999 ;; } @@ -503,29 +503,29 @@ FREM_Y_ZERO: { .mfi nop.m 999 -(p10) frcpa.s0 f11,p7 = f0,f0 - nop.i 999;; +(p10) frcpa.s0 f11,p7 = f0,f0 + nop.i 999;; } { .mfi nop.m 999 - fmerge.s f10 = f8, f8 + fmerge.s f10 = f8, f8 nop.i 999 } { .mfi nop.m 999 - fma.d.s0 f8=f11,f1,f0 + fma.d.s0 f8=f11,f1,f0 nop.i 999 } -EXP_ERROR_RETURN: +EXP_ERROR_RETURN: { .mib - mov GR_Parameter_TAG = 124 + mov GR_Parameter_TAG = 124 nop.i 999 - br.sptk __libm_error_region;; + br.sptk __libm_error_region;; } GLOBAL_IEEE754_END(remainder) @@ -538,10 +538,10 @@ LOCAL_LIBM_ENTRY(__libm_error_region) add GR_Parameter_Y=-32,sp // Parameter 2 value nop.f 0 .save ar.pfs,GR_SAVE_PFS - mov GR_SAVE_PFS=ar.pfs // Save ar.pfs + mov GR_SAVE_PFS=ar.pfs // Save ar.pfs } { .mfi -.fframe 64 +.fframe 64 add sp=-64,sp // Create new stack nop.f 0 mov GR_SAVE_GP=gp // Save gp @@ -549,18 +549,18 @@ LOCAL_LIBM_ENTRY(__libm_error_region) { .mmi stfd [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack add GR_Parameter_X = 16,sp // Parameter 1 address -.save b0, GR_SAVE_B0 - mov GR_SAVE_B0=b0 // Save b0 +.save b0, GR_SAVE_B0 + mov GR_SAVE_B0=b0 // Save b0 };; .body { .mib - stfd [GR_Parameter_X] = FR_X // Store Parameter 1 on stack - add GR_Parameter_RESULT = 0,GR_Parameter_Y + stfd [GR_Parameter_X] = FR_X // Store Parameter 1 on stack + add GR_Parameter_RESULT = 0,GR_Parameter_Y nop.b 0 // Parameter 3 address } { .mib stfd [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack - add GR_Parameter_Y = -16,GR_Parameter_Y + add GR_Parameter_Y = -16,GR_Parameter_Y br.call.sptk b0=__libm_error_support# // Call error handling function };; { .mmi @@ -575,10 +575,10 @@ LOCAL_LIBM_ENTRY(__libm_error_region) mov b0 = GR_SAVE_B0 // Restore return address };; { .mib - mov gp = GR_SAVE_GP // Restore gp + mov gp = GR_SAVE_GP // Restore gp mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs br.ret.sptk b0 // Return -};; +};; LOCAL_LIBM_END(__libm_error_region) diff --git a/sysdeps/ia64/fpu/e_remainderf.S b/sysdeps/ia64/fpu/e_remainderf.S index 0e9bedd..efc5a8d 100644 --- a/sysdeps/ia64/fpu/e_remainderf.S +++ b/sysdeps/ia64/fpu/e_remainderf.S @@ -40,7 +40,7 @@ // History //==================================================================== // 02/02/00 Initial version -// 03/02/00 New algorithm +// 03/02/00 New algorithm // 04/04/00 Unwind support added // 07/21/00 Fixed quotient=2^{24*m+23} bug // 08/15/00 Bundle added after call to __libm_error_support to properly @@ -51,12 +51,12 @@ // // API //==================================================================== -// float remainderf(float,float); +// float remainderf(float,float); // // Overview of operation //==================================================================== // remainder(a,b)=a-i*b, -// where i is an integer such that, if b!=0 and a is finite, +// where i is an integer such that, if b!=0 and a is finite, // |a/b-i|<=1/2. If |a/b-i|=1/2, i is even. // // Algorithm @@ -64,16 +64,16 @@ // a). eliminate special cases // b). if |a/b|<0.25 (first quotient estimate), return a // c). use single precision divide algorithm to get quotient q -// rounded to 24 bits of precision -// d). calculate partial remainders (using both q and q-ulp); -// select one and RZ(a/b) based on the sign of |a|-|b|*q +// rounded to 24 bits of precision +// d). calculate partial remainders (using both q and q-ulp); +// select one and RZ(a/b) based on the sign of |a|-|b|*q // e). if the exponent difference (exponent(a)-exponent(b)) -// is less than 24 (quotient estimate<2^{24}-2), use RZ(a/b) +// is less than 24 (quotient estimate<2^{24}-2), use RZ(a/b) // and sticky bits to round to integer; exit loop and // calculate final remainder // f). if exponent(a)-exponent(b)>=24, select new value of a as -// the partial remainder calculated using RZ(a/b); -// repeat from c). +// the partial remainder calculated using RZ(a/b); +// repeat from c). // // Special cases //==================================================================== @@ -89,7 +89,7 @@ GR_SAVE_B0 = r33 GR_SAVE_PFS = r34 -GR_SAVE_GP = r35 +GR_SAVE_GP = r35 GR_SAVE_SP = r36 GR_Parameter_X = r37 @@ -129,7 +129,7 @@ GLOBAL_IEEE754_ENTRY(remainderf) // Y +-NAN, +-inf, +-0? p11 { .mfi nop.m 999 - fclass.m.unc p11,p0 = f9, 0xe7 + fclass.m.unc p11,p0 = f9, 0xe7 nop.i 999 } // qnan snan inf norm unorm 0 -+ @@ -138,8 +138,8 @@ GLOBAL_IEEE754_ENTRY(remainderf) // X +-NAN, +-inf, ? p9 { .mfi nop.m 999 - fclass.m.unc p9,p0 = f8, 0xe3 - nop.i 999;; + fclass.m.unc p9,p0 = f8, 0xe3 + nop.i 999;; } {.mfi @@ -154,7 +154,7 @@ GLOBAL_IEEE754_ENTRY(remainderf) // y0 = 1 / b in f10 frcpa.s1 f10,p6=f13,f14 nop.i 0;; -} +} {.bbb (p9) br.cond.spnt FREM_X_NAN_INF (p11) br.cond.spnt FREM_Y_NAN_INF_ZERO @@ -164,10 +164,10 @@ GLOBAL_IEEE754_ENTRY(remainderf) // set D flag if a (f8) is denormal fnma.s0 f6=f8,f1,f8 nop.i 0;; -} +} .align 32 -remloop24: +remloop24: { .mfi // f12=2^{24}-2 setf.s f12=r3 @@ -175,26 +175,26 @@ remloop24: // q0 = a * y0 in f15 (p6) fma.s1 f15=f13,f10,f0 nop.i 0 -} +} { .mfi nop.m 0 // Step (3) // e0 = 1 - b * y0 in f7 (p6) fnma.s1 f7=f14,f10,f1 nop.i 0;; -} +} {.mlx nop.m 0 // r2=1.25*2^{-24} movl r2=0x33a00000;; -} +} { .mfi nop.m 0 // Step (4) // q1 = q0 + e0 * q0 in f6 (p6) fma.s1 f6=f7,f15,f15 nop.i 0 -} +} { .mfi nop.m 0 // Step (5) @@ -215,17 +215,17 @@ remloop24: // q2 = q1 + e1 * q1 in f6 (p6) fma.s1 f6=f7,f6,f6 nop.i 0 -} +} { .mfi mov r2=0x3e7 // Step (7) // e2 = e1 * e1 in f7 (p6) fma.s1 f7=f7,f7,f0 nop.i 0;; -} +} {.mmi - // q<1/4 ? (i.e. expon< -2) + // q<1/4 ? (i.e. expon< -2) (p7) cmp.gt.unc p7,p0=r28,r29 nop.m 0 // r2=0x3e7000000 @@ -235,7 +235,7 @@ remloop24: {.mfb // r2=0x3e7000001 add r2=1,r2 - // if |a/b|<1/4, set D flag before returning + // if |a/b|<1/4, set D flag before returning (p7) fma.s.s0 f9=f9,f0,f8 nop.b 0;; } @@ -253,7 +253,7 @@ remloop24: fmerge.s f8=f8,f13 // r2=2^{-24}+2^{-48} (double prec.) shl r2=r2,28;; -} +} { .mfi @@ -263,14 +263,14 @@ remloop24: // q3 = q2 + e2 * q2 in f6 (p6) fma.d.s1 f6=f7,f6,f6 nop.i 0;; -} +} { .mfi nop.m 0 // Step (9) // q = q3 in f11 (p6) fma.s.s1 f11=f6,f1,f0 nop.i 0;; -} +} {.mfi // f7=2^{-24} setf.d f7=r2 @@ -288,7 +288,7 @@ remloop24: // r=a-b*q fnma.s1 f6=f14,f11,f13 nop.i 0 -} +} {.mfi nop.m 0 // q'=q-q*(1.25*2^{-24}) (q'=q-ulp) @@ -307,7 +307,7 @@ remloop24: // r>0 iff q=RZ(a/b) and inexact fcmp.gt.unc.s1 p8,p0=f6,f0 nop.i 0 -} +} {.mfi nop.m 0 // r<0 iff q'=RZ(a/b) and inexact @@ -321,7 +321,7 @@ remloop24: // i.e. Q=q+q*x (x=2^{-32} or 0) (p8) fma.s1 f11=f11,f12,f11 nop.i 0 -} +} {.mfi nop.m 0 // (p9) Q=q'+(last iteration ? sticky bits:0) @@ -336,7 +336,7 @@ remloop24: // (p10) new a =r (p10) mov f13=f6 (p12) br.cond.sptk remloop24;; -} +} // last iteration {.mfi @@ -356,16 +356,16 @@ remloop24: // save sign of a fmerge.s f7=f8,f8 nop.i 0 -} -{.mfi +} +{.mfi nop.m 0 // normalize fcvt.xf f11=f11 nop.i 0;; -} +} {.mfi nop.m 0 - // This can be removed if sign of 0 is not important + // This can be removed if sign of 0 is not important // get remainder using sf1 fnma.s.s1 f12=f9,f11,f8 nop.i 0 @@ -382,24 +382,24 @@ remloop24: {.mfi nop.m 0 // f12=0? - // This can be removed if sign of 0 is not important + // This can be removed if sign of 0 is not important fcmp.eq.unc.s1 p8,p0=f12,f0 nop.i 0;; } {.mfb nop.m 0 // if f8=0, set sign correctly - // This can be removed if sign of 0 is not important + // This can be removed if sign of 0 is not important (p8) fmerge.s f8=f7,f8 // return br.ret.sptk b0;; } -FREM_X_NAN_INF: +FREM_X_NAN_INF: // Y zero ? -{.mfi +{.mfi nop.m 0 fma.s1 f10=f9,f1,f0 nop.i 0;; @@ -413,20 +413,20 @@ FREM_X_NAN_INF: nop.m 0 nop.i 0 // if Y zero - (p11) br.cond.spnt FREM_Y_ZERO;; + (p11) br.cond.spnt FREM_Y_ZERO;; } // X infinity? Return QNAN indefinite { .mfi nop.m 999 - fclass.m.unc p8,p0 = f8, 0x23 + fclass.m.unc p8,p0 = f8, 0x23 nop.i 999 } // X infinity? Return QNAN indefinite { .mfi nop.m 999 - fclass.m.unc p11,p0 = f8, 0x23 - nop.i 999;; + fclass.m.unc p11,p0 = f8, 0x23 + nop.i 999;; } // Y NaN ? {.mfi @@ -439,10 +439,10 @@ FREM_X_NAN_INF: // also set Denormal flag if necessary (p8) fma.s0 f9=f9,f1,f0 nop.i 0 -} +} { .mfi nop.m 999 -(p8) frcpa.s0 f8,p7 = f8,f8 +(p8) frcpa.s0 f8,p7 = f8,f8 nop.i 999 ;; } @@ -453,48 +453,48 @@ FREM_X_NAN_INF: } { .mfi nop.m 999 -(p8) fma.s.s0 f8=f8,f1,f0 - nop.i 0 ;; +(p8) fma.s.s0 f8=f8,f1,f0 + nop.i 0 ;; } { .mfb nop.m 999 - frcpa.s0 f8,p7=f8,f9 - (p11) br.cond.spnt EXP_ERROR_RETURN;; + frcpa.s0 f8,p7=f8,f9 + (p11) br.cond.spnt EXP_ERROR_RETURN;; } { .mib nop.m 0 nop.i 0 - br.ret.spnt b0 ;; + br.ret.spnt b0 ;; } -FREM_Y_NAN_INF_ZERO: +FREM_Y_NAN_INF_ZERO: // Y INF { .mfi nop.m 999 - fclass.m.unc p7,p0 = f9, 0x23 + fclass.m.unc p7,p0 = f9, 0x23 nop.i 999 ;; } { .mfb nop.m 999 -(p7) fma.s.s0 f8=f8,f1,f0 -(p7) br.ret.spnt b0 ;; +(p7) fma.s.s0 f8=f8,f1,f0 +(p7) br.ret.spnt b0 ;; } // Y NAN? { .mfi nop.m 999 - fclass.m.unc p9,p0 = f9, 0xc3 + fclass.m.unc p9,p0 = f9, 0xc3 nop.i 999 ;; } { .mfb nop.m 999 -(p9) fma.s.s0 f8=f9,f1,f0 -(p9) br.ret.spnt b0 ;; +(p9) fma.s.s0 f8=f9,f1,f0 +(p9) br.ret.spnt b0 ;; } FREM_Y_ZERO: @@ -505,12 +505,12 @@ FREM_Y_ZERO: // X NAN? { .mfi nop.m 999 - fclass.m.unc p9,p10 = f8, 0xc3 + fclass.m.unc p9,p10 = f8, 0xc3 nop.i 999 ;; } { .mfi nop.m 999 -(p10) fclass.nm p9,p10 = f8, 0xff +(p10) fclass.nm p9,p10 = f8, 0xff nop.i 999 ;; } @@ -522,29 +522,29 @@ FREM_Y_ZERO: { .mfi nop.m 999 -(p10) frcpa.s0 f11,p7 = f0,f0 +(p10) frcpa.s0 f11,p7 = f0,f0 nop.i 999;; } { .mfi nop.m 999 - fmerge.s f10 = f8, f8 + fmerge.s f10 = f8, f8 nop.i 999 } { .mfi nop.m 999 - fma.s.s0 f8=f11,f1,f0 + fma.s.s0 f8=f11,f1,f0 nop.i 999 } -EXP_ERROR_RETURN: +EXP_ERROR_RETURN: { .mib - mov GR_Parameter_TAG = 125 + mov GR_Parameter_TAG = 125 nop.i 999 - br.sptk __libm_error_region;; + br.sptk __libm_error_region;; } GLOBAL_IEEE754_END(remainderf) @@ -557,10 +557,10 @@ LOCAL_LIBM_ENTRY(__libm_error_region) add GR_Parameter_Y=-32,sp // Parameter 2 value nop.f 0 .save ar.pfs,GR_SAVE_PFS - mov GR_SAVE_PFS=ar.pfs // Save ar.pfs + mov GR_SAVE_PFS=ar.pfs // Save ar.pfs } { .mfi -.fframe 64 +.fframe 64 add sp=-64,sp // Create new stack nop.f 0 mov GR_SAVE_GP=gp // Save gp @@ -568,18 +568,18 @@ LOCAL_LIBM_ENTRY(__libm_error_region) { .mmi stfs [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack add GR_Parameter_X = 16,sp // Parameter 1 address -.save b0, GR_SAVE_B0 - mov GR_SAVE_B0=b0 // Save b0 +.save b0, GR_SAVE_B0 + mov GR_SAVE_B0=b0 // Save b0 };; .body { .mib - stfs [GR_Parameter_X] = FR_X // Store Parameter 1 on stack - add GR_Parameter_RESULT = 0,GR_Parameter_Y + stfs [GR_Parameter_X] = FR_X // Store Parameter 1 on stack + add GR_Parameter_RESULT = 0,GR_Parameter_Y nop.b 0 // Parameter 3 address } { .mib stfs [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack - add GR_Parameter_Y = -16,GR_Parameter_Y + add GR_Parameter_Y = -16,GR_Parameter_Y br.call.sptk b0=__libm_error_support#;; // Call error handling function } { .mmi @@ -594,10 +594,10 @@ LOCAL_LIBM_ENTRY(__libm_error_region) mov b0 = GR_SAVE_B0 // Restore return address };; { .mib - mov gp = GR_SAVE_GP // Restore gp + mov gp = GR_SAVE_GP // Restore gp mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs br.ret.sptk b0 // Return -};; +};; LOCAL_LIBM_END(__libm_error_region) diff --git a/sysdeps/ia64/fpu/e_remainderl.S b/sysdeps/ia64/fpu/e_remainderl.S index 8c1630e..9727199 100644 --- a/sysdeps/ia64/fpu/e_remainderl.S +++ b/sysdeps/ia64/fpu/e_remainderl.S @@ -40,7 +40,7 @@ // History //==================================================================== // 02/02/00 Initial version -// 03/02/00 New algorithm +// 03/02/00 New algorithm // 04/04/00 Unwind support added // 07/21/00 Fixed quotient=2^{24*m+23}*1.q1...q23 1 bug // 08/15/00 Bundle added after call to __libm_error_support to properly @@ -51,12 +51,12 @@ // // API //==================================================================== -// long double remainderl(long double,long double); +// long double remainderl(long double,long double); // // Overview of operation //==================================================================== // remainder(a,b)=a-i*b, -// where i is an integer such that, if b!=0 and a is finite, +// where i is an integer such that, if b!=0 and a is finite, // |a/b-i|<=1/2. If |a/b-i|=1/2, i is even. // // Algorithm @@ -64,16 +64,16 @@ // a). eliminate special cases // b). if |a/b|<0.25 (first quotient estimate), return a // c). use single precision divide algorithm to get quotient q -// rounded to 24 bits of precision -// d). calculate partial remainders (using both q and q-ulp); -// select one and RZ(a/b) based on the sign of |a|-|b|*q +// rounded to 24 bits of precision +// d). calculate partial remainders (using both q and q-ulp); +// select one and RZ(a/b) based on the sign of |a|-|b|*q // e). if the exponent difference (exponent(a)-exponent(b)) -// is less than 24 (quotient estimate<2^{24}-2), use RZ(a/b) +// is less than 24 (quotient estimate<2^{24}-2), use RZ(a/b) // and sticky bits to round to integer; exit loop and // calculate final remainder // f). if exponent(a)-exponent(b)>=24, select new value of a as -// the partial remainder calculated using RZ(a/b); -// repeat from c). +// the partial remainder calculated using RZ(a/b); +// repeat from c). // // Special cases //==================================================================== @@ -89,7 +89,7 @@ GR_SAVE_B0 = r33 GR_SAVE_PFS = r34 -GR_SAVE_GP = r35 +GR_SAVE_GP = r35 GR_SAVE_SP = r36 GR_Parameter_X = r37 @@ -137,7 +137,7 @@ cmp.eq p11,p10=r29,r0;; // Y +-NAN, +-inf, +-0? p11 { .mfi nop.m 999 -(p10) fclass.m p11,p10 = f9, 0xe7 +(p10) fclass.m p11,p10 = f9, 0xe7 nop.i 999 } // qnan snan inf norm unorm 0 -+ @@ -146,8 +146,8 @@ cmp.eq p11,p10=r29,r0;; // X +-NAN, +-inf, ? p9 { .mfi nop.m 999 - fclass.m.unc p9,p8 = f8, 0xe3 - nop.i 999;; + fclass.m.unc p9,p8 = f8, 0xe3 + nop.i 999;; } {.mfi @@ -162,12 +162,12 @@ cmp.eq p11,p10=r29,r0;; // y0 = 1 / b in f10 frcpa.s1 f10,p6=f13,f14 nop.i 0;; -} +} // Y +-NAN, +-inf, +-0? p11 { .mfi nop.m 999 // pseudo-NaN ? -(p10) fclass.nm p11,p0 = f9, 0xff +(p10) fclass.nm p11,p0 = f9, 0xff nop.i 999 } @@ -178,7 +178,7 @@ cmp.eq p11,p10=r29,r0;; { .mfi nop.m 999 -(p8) fclass.nm p9,p0 = f8, 0xff +(p8) fclass.nm p9,p0 = f8, 0xff nop.i 999;; } @@ -191,9 +191,9 @@ cmp.eq p11,p10=r29,r0;; // set D flag if a (f8) is denormal fnma.s0 f6=f8,f1,f8 nop.i 0;; -} +} -remloop24: +remloop24: { .mfi nop.m 0 // Step (2) @@ -210,7 +210,7 @@ remloop24: nop.m 0 // r2=1.25*2^{-24} movl r2=0x33a00000;; -} +} {.mfi nop.m 0 @@ -244,12 +244,12 @@ remloop24: // q2 = q1 + e1 * q1 in f6 (p6) fma.s1 f6=f7,f15,f15 nop.i 0;; -} +} {.mmi // f15=1.25*2^{-24} setf.s f15=r2 - // q<1/4 ? (i.e. expon< -2) + // q<1/4 ? (i.e. expon< -2) (p7) cmp.gt p7,p0=r28,r29 nop.i 0;; } @@ -257,7 +257,7 @@ remloop24: {.mfb // r29= -32+bias mov r29=0xffdf - // if |a/b|<1/4, set D flag before returning + // if |a/b|<1/4, set D flag before returning (p7) fma.s0 f9=f9,f0,f8 nop.b 0;; } @@ -275,7 +275,7 @@ remloop24: // set f8 to current a value | sign fmerge.s f8=f8,f13 nop.i 0;; -} +} {.mfi getf.exp r28=f6 // last step ? (q<2^{23}) @@ -298,7 +298,7 @@ remloop24: nop.m 0 cmp.eq p11,p14=r2,r28 nop.i 0;; -} +} .pred.rel "mutex",p11,p14 {.mfi @@ -306,7 +306,7 @@ remloop24: // if exp_q=2^23, then r=a-b*2^{23} (p11) fnma.s1 f13=f12,f14,f13 nop.i 0 -} +} {.mfi nop.m 0 // r2=a-b*q' @@ -327,7 +327,7 @@ remloop24: .pred.rel "mutex",p8,p9 {.mfi - nop.m 0 + nop.m 0 // (p8) Q=q+(last iteration ? sticky bits:0) // i.e. Q=q+q*x (x=2^{-32} or 0) (p8) fma.s1 f11=f11,f7,f11 @@ -346,7 +346,7 @@ remloop24: // (p10) new a =r (p10) mov f13=f6 (p12) br.cond.sptk remloop24;; -} +} // last iteration {.mfi @@ -366,15 +366,15 @@ remloop24: // save sign of a fmerge.s f7=f8,f8 nop.i 0 -} {.mfi +} {.mfi nop.m 0 // normalize fcvt.xf f11=f11 nop.i 0;; -} +} {.mfi nop.m 0 - // This can be removed if sign of 0 is not important + // This can be removed if sign of 0 is not important // get remainder using sf1 fnma.s1 f12=f9,f11,f8 nop.i 0 @@ -388,14 +388,14 @@ remloop24: {.mfi nop.m 0 // f12=0? - // This can be removed if sign of 0 is not important + // This can be removed if sign of 0 is not important fcmp.eq.unc.s1 p8,p0=f12,f0 nop.i 0;; } {.mfb nop.m 0 // if f8=0, set sign correctly - // This can be removed if sign of 0 is not important + // This can be removed if sign of 0 is not important (p8) fmerge.s f8=f7,f8 // return br.ret.sptk b0;; @@ -403,10 +403,10 @@ remloop24: -FREM_X_NAN_INF: +FREM_X_NAN_INF: // Y zero ? -{.mfi +{.mfi nop.m 0 fma.s1 f10=f9,f1,f0 nop.i 0;; @@ -420,20 +420,20 @@ FREM_X_NAN_INF: nop.m 0 nop.i 0 // if Y zero - (p11) br.cond.spnt FREM_Y_ZERO;; + (p11) br.cond.spnt FREM_Y_ZERO;; } // X infinity? Return QNAN indefinite { .mfi nop.m 999 - fclass.m.unc p8,p0 = f8, 0x23 + fclass.m.unc p8,p0 = f8, 0x23 nop.i 999 } // X infinity? Return QNAN indefinite { .mfi nop.m 999 - fclass.m.unc p11,p0 = f8, 0x23 - nop.i 999;; + fclass.m.unc p11,p0 = f8, 0x23 + nop.i 999;; } // Y NaN ? {.mfi @@ -446,10 +446,10 @@ FREM_X_NAN_INF: // also set Denormal flag if necessary (p8) fnma.s0 f9=f9,f1,f9 nop.i 0 -} +} { .mfi nop.m 999 -(p8) frcpa.s0 f8,p7 = f8,f8 +(p8) frcpa.s0 f8,p7 = f8,f8 nop.i 999 ;; } @@ -460,52 +460,52 @@ FREM_X_NAN_INF: } { .mfi nop.m 999 -(p8) fma.s0 f8=f8,f1,f0 - nop.i 0 ;; +(p8) fma.s0 f8=f8,f1,f0 + nop.i 0 ;; } { .mfb nop.m 999 - frcpa.s0 f8,p7=f8,f9 - (p11) br.cond.spnt EXP_ERROR_RETURN;; + frcpa.s0 f8,p7=f8,f9 + (p11) br.cond.spnt EXP_ERROR_RETURN;; } { .mib nop.m 0 nop.i 0 - br.ret.spnt b0 ;; + br.ret.spnt b0 ;; } -FREM_Y_NAN_INF_ZERO: +FREM_Y_NAN_INF_ZERO: // Y INF { .mfi nop.m 999 - fclass.m.unc p7,p0 = f9, 0x23 + fclass.m.unc p7,p0 = f9, 0x23 nop.i 999 ;; } { .mfb nop.m 999 -(p7) fma.s0 f8=f8,f1,f0 -(p7) br.ret.spnt b0 ;; +(p7) fma.s0 f8=f8,f1,f0 +(p7) br.ret.spnt b0 ;; } // Y NAN? { .mfi nop.m 999 - fclass.m.unc p9,p10 = f9, 0xc3 + fclass.m.unc p9,p10 = f9, 0xc3 nop.i 999 ;; } { .mfi nop.m 999 -(p10) fclass.nm p9,p0 = f9, 0xff +(p10) fclass.nm p9,p0 = f9, 0xff nop.i 999 ;; } { .mfb nop.m 999 -(p9) fma.s0 f8=f9,f1,f0 -(p9) br.ret.spnt b0 ;; +(p9) fma.s0 f8=f9,f1,f0 +(p9) br.ret.spnt b0 ;; } FREM_Y_ZERO: @@ -516,12 +516,12 @@ FREM_Y_ZERO: // X NAN? { .mfi nop.m 999 - fclass.m.unc p9,p10 = f8, 0xc3 + fclass.m.unc p9,p10 = f8, 0xc3 nop.i 999 ;; } { .mfi nop.m 999 -(p10) fclass.nm p9,p10 = f8, 0xff +(p10) fclass.nm p9,p10 = f8, 0xff nop.i 999 ;; } @@ -532,28 +532,28 @@ FREM_Y_ZERO: } { .mfi nop.m 999 -(p10) frcpa.s0 f11,p7 = f0,f0 +(p10) frcpa.s0 f11,p7 = f0,f0 nop.i 999;; } { .mfi nop.m 999 - fmerge.s f10 = f8, f8 + fmerge.s f10 = f8, f8 nop.i 999 } { .mfi nop.m 999 - fma.s0 f8=f11,f1,f0 + fma.s0 f8=f11,f1,f0 nop.i 999;; } -EXP_ERROR_RETURN: +EXP_ERROR_RETURN: { .mib - mov GR_Parameter_TAG = 123 + mov GR_Parameter_TAG = 123 nop.i 999 - br.sptk __libm_error_region;; + br.sptk __libm_error_region;; } GLOBAL_IEEE754_END(remainderl) @@ -564,10 +564,10 @@ LOCAL_LIBM_ENTRY(__libm_error_region) add GR_Parameter_Y=-32,sp // Parameter 2 value nop.f 0 .save ar.pfs,GR_SAVE_PFS - mov GR_SAVE_PFS=ar.pfs // Save ar.pfs + mov GR_SAVE_PFS=ar.pfs // Save ar.pfs } { .mfi -.fframe 64 +.fframe 64 add sp=-64,sp // Create new stack nop.f 0 mov GR_SAVE_GP=gp // Save gp @@ -575,18 +575,18 @@ LOCAL_LIBM_ENTRY(__libm_error_region) { .mmi stfe [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack add GR_Parameter_X = 16,sp // Parameter 1 address -.save b0, GR_SAVE_B0 - mov GR_SAVE_B0=b0 // Save b0 +.save b0, GR_SAVE_B0 + mov GR_SAVE_B0=b0 // Save b0 };; .body { .mib - stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack - add GR_Parameter_RESULT = 0,GR_Parameter_Y + stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack + add GR_Parameter_RESULT = 0,GR_Parameter_Y nop.b 0 // Parameter 3 address } { .mib stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack - add GR_Parameter_Y = -16,GR_Parameter_Y + add GR_Parameter_Y = -16,GR_Parameter_Y br.call.sptk b0=__libm_error_support# // Call error handling function };; { .mmi @@ -601,10 +601,10 @@ LOCAL_LIBM_ENTRY(__libm_error_region) mov b0 = GR_SAVE_B0 // Restore return address };; { .mib - mov gp = GR_SAVE_GP // Restore gp + mov gp = GR_SAVE_GP // Restore gp mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs br.ret.sptk b0 // Return -};; +};; LOCAL_LIBM_END(__libm_error_region) diff --git a/sysdeps/ia64/fpu/e_scalb.S b/sysdeps/ia64/fpu/e_scalb.S index 3d48aab..c25d8ab 100644 --- a/sysdeps/ia64/fpu/e_scalb.S +++ b/sysdeps/ia64/fpu/e_scalb.S @@ -378,7 +378,7 @@ SCALB_UNDERFLOW: SCALB_NAN_INF_ZERO: // -// Before entry, N has been converted to a fp integer in significand of +// Before entry, N has been converted to a fp integer in significand of // FR_N_float_int // // Convert N_float_int to floating point value diff --git a/sysdeps/ia64/fpu/e_scalbf.S b/sysdeps/ia64/fpu/e_scalbf.S index e965667..2dde978 100644 --- a/sysdeps/ia64/fpu/e_scalbf.S +++ b/sysdeps/ia64/fpu/e_scalbf.S @@ -378,7 +378,7 @@ SCALBF_UNDERFLOW: SCALBF_NAN_INF_ZERO: // -// Before entry, N has been converted to a fp integer in significand of +// Before entry, N has been converted to a fp integer in significand of // FR_N_float_int // // Convert N_float_int to floating point value diff --git a/sysdeps/ia64/fpu/e_scalbl.S b/sysdeps/ia64/fpu/e_scalbl.S index 9b6467f..8aa3d0c 100644 --- a/sysdeps/ia64/fpu/e_scalbl.S +++ b/sysdeps/ia64/fpu/e_scalbl.S @@ -378,7 +378,7 @@ SCALBL_UNDERFLOW: SCALBL_NAN_INF_ZERO: // -// Before entry, N has been converted to a fp integer in significand of +// Before entry, N has been converted to a fp integer in significand of // FR_N_float_int // // Convert N_float_int to floating point value diff --git a/sysdeps/ia64/fpu/e_sinhl.S b/sysdeps/ia64/fpu/e_sinhl.S index 5b4a4ad..b560999 100644 --- a/sysdeps/ia64/fpu/e_sinhl.S +++ b/sysdeps/ia64/fpu/e_sinhl.S @@ -21,20 +21,20 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History @@ -44,7 +44,7 @@ // 08/15/00 Bundle added after call to __libm_error_support to properly // set [the previously overwritten] GR_Parameter_RESULT. // 10/12/00 Update to set denormal operand and underflow flags -// 01/22/01 Fixed to set inexact flag for small args. Fixed incorrect +// 01/22/01 Fixed to set inexact flag for small args. Fixed incorrect // call to __libm_error_support for 710.476 < x < 11357.2166. // 05/02/01 Reworked to improve speed of all paths // 05/20/02 Cleaned up namespace and sf0 syntax @@ -58,12 +58,12 @@ // // Registers used //============================================================== -// general registers: +// general registers: // r14 -> r40 // predicate registers used: // p6 -> p11 // floating-point registers used: -// f9 -> f15; f32 -> f90; +// f9 -> f15; f32 -> f90; // f8 has input, then output // // Overview of operation @@ -84,7 +84,7 @@ // 1. SINH_BY_POLY 0 < |x| < 0.25 // =============== // Evaluate sinh(x) by a 13th order polynomial -// Care is take for the order of multiplication; and P_1 is not exactly 1/3!, +// Care is take for the order of multiplication; and P_1 is not exactly 1/3!, // P_2 is not exactly 1/5!, etc. // sinh(x) = sign * (series(e^x) - series(e^-x))/2 // = sign * (ax + ax^3/3! + ax^5/5! + ax^7/7! + ax^9/9! + ax^11/11! @@ -100,18 +100,18 @@ // ============= // sinh(x) = sinh(B+R) // = sinh(B)cosh(R) + cosh(B)sinh(R) -// +// // ax = |x| = M*log2/64 + R // B = M*log2/64 -// M = 64*N + j +// M = 64*N + j // We will calculate M and get N as (M-j)/64 // The division is a shift. // exp(B) = exp(N*log2 + j*log2/64) // = 2^N * 2^(j*log2/64) // sinh(B) = 1/2(e^B -e^-B) -// = 1/2(2^N * 2^(j*log2/64) - 2^-N * 2^(-j*log2/64)) -// sinh(B) = (2^(N-1) * 2^(j*log2/64) - 2^(-N-1) * 2^(-j*log2/64)) -// cosh(B) = (2^(N-1) * 2^(j*log2/64) + 2^(-N-1) * 2^(-j*log2/64)) +// = 1/2(2^N * 2^(j*log2/64) - 2^-N * 2^(-j*log2/64)) +// sinh(B) = (2^(N-1) * 2^(j*log2/64) - 2^(-N-1) * 2^(-j*log2/64)) +// cosh(B) = (2^(N-1) * 2^(j*log2/64) + 2^(-N-1) * 2^(-j*log2/64)) // 2^(j*log2/64) is stored as Tjhi + Tjlo , j= -32,....,32 // Tjhi is double-extended (80-bit) and Tjlo is single(32-bit) // @@ -119,7 +119,7 @@ // R = ax - M*log2_by_64_hi - M*log2_by_64_lo // exp(R) = 1 + R +R^2(1/2! + R(1/3! + R(1/4! + ... + R(1/n!)...) // = 1 + p_odd + p_even -// where the p_even uses the A coefficients and the p_even uses +// where the p_even uses the A coefficients and the p_even uses // the B coefficients // // So sinh(R) = 1 + p_odd + p_even -(1 -p_odd -p_even)/2 = p_odd @@ -183,7 +183,7 @@ GR_Parameter_RESULT = r39 GR_Parameter_TAG = r40 -f_ABS_X = f9 +f_ABS_X = f9 f_X2 = f10 f_X4 = f11 f_tmp = f14 @@ -238,16 +238,16 @@ f_Tmjlo = f68 f_S_hi = f69 f_SC_hi_temp = f70 -f_S_lo_temp1 = f71 -f_S_lo_temp2 = f72 -f_S_lo_temp3 = f73 -f_S_lo_temp4 = f73 +f_S_lo_temp1 = f71 +f_S_lo_temp2 = f72 +f_S_lo_temp3 = f73 +f_S_lo_temp4 = f73 f_S_lo = f74 f_C_hi = f75 -f_Y_hi = f77 -f_Y_lo_temp = f78 -f_Y_lo = f79 +f_Y_hi = f77 +f_Y_lo_temp = f78 +f_Y_lo = f79 f_NORM_X = f80 f_P1 = f81 @@ -452,7 +452,7 @@ GLOBAL_IEEE754_ENTRY(sinhl) } { .mfi nop.m 0 - fnorm.s1 f_NORM_X = f8 + fnorm.s1 f_NORM_X = f8 mov r_exp_2tom57 = 0xffff-57 } ;; @@ -460,7 +460,7 @@ GLOBAL_IEEE754_ENTRY(sinhl) { .mfi setf.d f_RSHF_2TO57 = r_rshf_2to57 // Form const 1.100 * 2^120 fclass.m p10,p0 = f8, 0x0b // Test for denorm - mov r_exp_mask = 0x1ffff + mov r_exp_mask = 0x1ffff } { .mlx setf.sig f_INV_LN2_2TO63 = r_sig_inv_ln2 // Form 1/ln2 * 2^63 @@ -500,7 +500,7 @@ SINH_COMMON: add r_ad5 = 0x580, r_ad1 // Point to j_lo_table midpoint } { .mib - ldfe f_log2by64_hi = [r_ad1],16 + ldfe f_log2by64_hi = [r_ad1],16 and r_exp_x = r_exp_mask, r_signexp_x (p7) br.ret.spnt b0 // Exit if x=0 } @@ -508,36 +508,36 @@ SINH_COMMON: // Get the A coefficients for SINH_BY_TBL { .mfi - ldfe f_A1 = [r_ad3],16 + ldfe f_A1 = [r_ad3],16 fcmp.lt.s1 p8,p9 = f8,f0 // Test for x<0 cmp.lt p7,p0 = r_exp_x, r_exp_0_25 // Test x < 0.25 } { .mfb add r_ad2o = 0x30, r_ad2e // Point to p_table odd coeffs -(p6) fma.s0 f8 = f8,f1,f0 // Result for x nan, inf +(p6) fma.s0 f8 = f8,f1,f0 // Result for x nan, inf (p6) br.ret.spnt b0 // Exit for x nan, inf } ;; // Calculate X2 = ax*ax for SINH_BY_POLY { .mfi - ldfe f_log2by64_lo = [r_ad1],16 + ldfe f_log2by64_lo = [r_ad1],16 nop.f 0 nop.i 0 } { .mfb - ldfe f_A2 = [r_ad3],16 + ldfe f_A2 = [r_ad3],16 fma.s1 f_X2 = f_NORM_X, f_NORM_X, f0 (p7) br.cond.spnt SINH_BY_POLY } ;; // Here if |x| >= 0.25 -SINH_BY_TBL: +SINH_BY_TBL: // ****************************************************** // STEP 1 (TBL and EXP) - Argument reduction // ****************************************************** -// Get the following constants. +// Get the following constants. // Inv_log2by64 // log2by64_hi // log2by64_lo @@ -592,20 +592,20 @@ SINH_BY_TBL: // Subtract RSHF constant to get rounded M as a floating point value // M_temp * 2^(63-6) - 2^63 { .mfb - ldfe f_B3 = [r_ad3],16 + ldfe f_B3 = [r_ad3],16 fms.s1 f_M = f_M_temp, f_2TOM57, f_RSHF (p6) br.cond.spnt SINH_HUGE // Branch if result will overflow } ;; { .mfi - getf.sig r_M = f_M_temp + getf.sig r_M = f_M_temp nop.f 0 cmp.ge p7,p6 = r_exp_x, r_exp_32 // Test if x >= 32 } ;; -// Calculate j. j is the signed extension of the six lsb of M. It +// Calculate j. j is the signed extension of the six lsb of M. It // has a range of -32 thru 31. // Calculate R @@ -648,8 +648,8 @@ SINH_BY_TBL: // N = (M-j)/64 { .mfi ldfe f_Tjhi = [r_ad_J_hi] - fnma.s1 f_R = f_M, f_log2by64_lo, f_R_temp - shr r_N = r_Mmj, 0x6 // N = (M-j)/64 + fnma.s1 f_R = f_M, f_log2by64_lo, f_R_temp + shr r_N = r_Mmj, 0x6 // N = (M-j)/64 } { .mfi shladd r_ad_mJ_hi = r_mj, 4, r_ad4 // pointer to Tmjhi @@ -724,8 +724,8 @@ SINH_BY_TBL: } ;; -// -// If TBL, +// +// If TBL, // Calculate S_hi and S_lo, and C_hi // SC_hi_temp = sneg * Tmjhi // S_hi = spos * Tjhi - SC_hi_temp @@ -735,12 +735,12 @@ SINH_BY_TBL: { .mfi nop.m 0 -(p6) fma.s1 f_SC_hi_temp = f_sneg, f_Tmjhi, f0 +(p6) fma.s1 f_SC_hi_temp = f_sneg, f_Tmjhi, f0 nop.i 0 } ;; -// If TBL, +// If TBL, // S_lo_temp3 = sneg * Tmjlo // S_lo_temp4 = spos * Tjlo - S_lo_temp3 // S_lo_temp4 = spos * Tjlo -(sneg * Tmjlo) @@ -763,7 +763,7 @@ SINH_BY_TBL: } ;; -// If EXP, +// If EXP, // Compute sgnx * 2^(N-1) * Tjhi and sgnx * 2^(N-1) * Tjlo { .mfi nop.m 0 @@ -822,7 +822,7 @@ SINH_BY_TBL: { .mfi nop.m 0 -(p6) fnma.s1 f_S_lo_temp2 = f_sneg, f_Tmjhi, f_S_lo_temp1 +(p6) fnma.s1 f_S_lo_temp2 = f_sneg, f_Tmjhi, f_S_lo_temp1 nop.i 0 } ;; @@ -847,7 +847,7 @@ SINH_BY_TBL: ;; // If TBL, -// Y_hi = S_hi +// Y_hi = S_hi // Y_lo = C_hi*p_odd + (S_hi*p_even + S_lo) { .mfi nop.m 0 @@ -894,7 +894,7 @@ SINH_BY_TBL: // Here if 0 < |x| < 0.25 -SINH_BY_POLY: +SINH_BY_POLY: { .mmf ldfe f_P6 = [r_ad2e],16 ldfe f_P5 = [r_ad2o],16 @@ -911,7 +911,7 @@ SINH_BY_POLY: { .mmi ldfe f_P2 = [r_ad2e],16 - ldfe f_P1 = [r_ad2o],16 + ldfe f_P1 = [r_ad2o],16 nop.i 0 } ;; @@ -1012,7 +1012,7 @@ SINH_DENORM: { .mfi nop.m 0 (p6) fma.s0 f8 = f8,f8,f8 // If x +denorm, result=x+x^2 - nop.i 0 + nop.i 0 } { .mfb nop.m 0 @@ -1023,7 +1023,7 @@ SINH_DENORM: // Here if |x| >= overflow limit -SINH_HUGE: +SINH_HUGE: // for SINH_HUGE, put 24000 in exponent; take sign from input { .mmi mov r_exp_huge = 0x15dbf @@ -1035,7 +1035,7 @@ SINH_HUGE: .pred.rel "mutex",p8,p9 { .mfi - alloc r32 = ar.pfs,0,5,4,0 + alloc r32 = ar.pfs,0,5,4,0 (p8) fnma.s1 f_signed_hi_lo = f_huge, f1, f1 nop.i 0 } @@ -1083,7 +1083,7 @@ LOCAL_LIBM_ENTRY(__libm_error_region) { .mib stfe [GR_Parameter_X] = f8 // STORE Parameter 1 on stack add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address - nop.b 0 + nop.b 0 } { .mib stfe [GR_Parameter_Y] = f_pre_result // STORE Parameter 3 on stack diff --git a/sysdeps/ia64/fpu/e_sqrt.S b/sysdeps/ia64/fpu/e_sqrt.S index 53e60ef..da4e8cc 100644 --- a/sysdeps/ia64/fpu/e_sqrt.S +++ b/sysdeps/ia64/fpu/e_sqrt.S @@ -3,9 +3,9 @@ // Copyright (c) 2000 - 2003, Intel Corporation // All rights reserved. -// +// // Contributed 2000 by the Intel Numerics Group, Intel Corporation -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -21,20 +21,20 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // //******************************************************************** @@ -120,7 +120,7 @@ GLOBAL_IEEE754_ENTRY(sqrt) setf.s f10=r3 // Step (1) // y0 = 1/sqrt(a) in f7 - fclass.m.unc p7,p8 = f8,0x3A + fclass.m.unc p7,p8 = f8,0x3A nop.i 0;; } { .mlx nop.m 0 @@ -238,7 +238,7 @@ GLOBAL_IEEE754_ENTRY(sqrt) // g2 = g1 + d * h1 in f7 (p6) fma.d.s0 f8=f9,f6,f7 (p6) br.ret.sptk b0 ;; -} +} { .mfb nop.m 0 @@ -273,7 +273,7 @@ LOCAL_LIBM_ENTRY(__libm_error_region) // // This branch includes all those special values that are not negative, // with the result equal to frcpa(x) -// +// .prologue // We are distinguishing between over(under)flow and letting @@ -307,7 +307,7 @@ LOCAL_LIBM_ENTRY(__libm_error_region) { .mib stfd [GR_Parameter_X] = f15 // STORE Parameter 1 on stack add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address - nop.b 0 + nop.b 0 } { .mib stfd [GR_Parameter_Y] = f8 // STORE Parameter 3 on stack diff --git a/sysdeps/ia64/fpu/e_sqrtf.S b/sysdeps/ia64/fpu/e_sqrtf.S index daa2045..d50ff01 100644 --- a/sysdeps/ia64/fpu/e_sqrtf.S +++ b/sysdeps/ia64/fpu/e_sqrtf.S @@ -21,20 +21,20 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // //********************************************************************* @@ -55,7 +55,7 @@ // //******************************************************************** // -// Accuracy: Correctly Rounded +// Accuracy: Correctly Rounded // //******************************************************************** // @@ -77,7 +77,7 @@ // All faults and exceptions should be raised correctly. // sqrtf(QNaN) = QNaN // sqrtf(SNaN) = QNaN -// sqrtf(+/-0) = +/-0 +// sqrtf(+/-0) = +/-0 // sqrtf(negative) = QNaN and error handling is called // //******************************************************************** @@ -91,7 +91,7 @@ GR_SAVE_B0 = r34 GR_SAVE_PFS = r33 -GR_SAVE_GP = r35 +GR_SAVE_GP = r35 GR_Parameter_X = r37 GR_Parameter_Y = r38 @@ -119,12 +119,12 @@ GLOBAL_IEEE754_ENTRY(sqrtf) setf.exp f12 = r2 // Step (1) // y0 = 1/sqrt(a) in f7 - fclass.m.unc p7,p8 = f8,0x3A + fclass.m.unc p7,p8 = f8,0x3A nop.i 0 } { .mfi nop.m 0 - // Make a copy of x just in case - mov f13 = f8 + // Make a copy of x just in case + mov f13 = f8 nop.i 0;; } { .mfi nop.m 0 @@ -209,7 +209,7 @@ LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mii add GR_Parameter_Y=-32,sp // Parameter 2 value - mov GR_Parameter_TAG = 50 + mov GR_Parameter_TAG = 50 .save ar.pfs,GR_SAVE_PFS mov GR_SAVE_PFS=ar.pfs // Save ar.pfs } @@ -248,10 +248,10 @@ LOCAL_LIBM_ENTRY(__libm_error_region) mov b0 = GR_SAVE_B0 // Restore return address };; { .mib - mov gp = GR_SAVE_GP // Restore gp + mov gp = GR_SAVE_GP // Restore gp mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs br.ret.sptk b0 // Return -};; +};; LOCAL_LIBM_END(__libm_error_region) diff --git a/sysdeps/ia64/fpu/e_sqrtl.S b/sysdeps/ia64/fpu/e_sqrtl.S index 6a5735d..6aa4021 100644 --- a/sysdeps/ia64/fpu/e_sqrtl.S +++ b/sysdeps/ia64/fpu/e_sqrtl.S @@ -21,20 +21,20 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // //******************************************************************** @@ -123,8 +123,8 @@ alloc r32= ar.pfs,0,5,4,0 nop.i 0;; } { .mfi nop.m 0 - // Make copy input x - mov f13=f8 + // Make copy input x + mov f13=f8 nop.i 0 } { .mfi nop.m 0 @@ -136,7 +136,7 @@ alloc r32= ar.pfs,0,5,4,0 // d0 = 1/2 - S0 * H0 in f10 (p6) fnma.s1 f10=f7,f9,f12 nop.i 0;; -} +} { .mfi nop.m 0 mov f15=f8 diff --git a/sysdeps/ia64/fpu/libm_cpu_defs.h b/sysdeps/ia64/fpu/libm_cpu_defs.h index 516128c..810c683 100644 --- a/sysdeps/ia64/fpu/libm_cpu_defs.h +++ b/sysdeps/ia64/fpu/libm_cpu_defs.h @@ -1,6 +1,6 @@ /* file: libm_cpu_defs.h */ - - + + // Copyright (c) 2000 - 2004, Intel Corporation // All rights reserved. // diff --git a/sysdeps/ia64/fpu/libm_error_codes.h b/sysdeps/ia64/fpu/libm_error_codes.h index 4f0945e..f196b33 100644 --- a/sysdeps/ia64/fpu/libm_error_codes.h +++ b/sysdeps/ia64/fpu/libm_error_codes.h @@ -1,7 +1,7 @@ /* file: libm_error_codes.h */ - -/* + +/* // Copyright (c) 2000 - 2004, Intel Corporation // All rights reserved. // @@ -52,7 +52,7 @@ */ #if !defined(__LIBM_ERROR_CODES_H__) -#define __LIBM_ERROR_CODES_H__ +#define __LIBM_ERROR_CODES_H__ typedef enum { @@ -192,9 +192,9 @@ typedef enum tgamma_overflow, tgamma_negative, tgamma_reserve, /* 258, 259, 260 */ tgammaf_overflow, tgammaf_negative, tgammaf_reserve, /* 261, 262, 263 */ exp10l_underflow, exp10_underflow, exp10f_underflow, /* 264, 265, 266 */ - nextafterl_underflow, nextafter_underflow, + nextafterl_underflow, nextafter_underflow, nextafterf_underflow, /* 267, 268, 269 */ - nexttowardl_underflow, nexttoward_underflow, + nexttowardl_underflow, nexttoward_underflow, nexttowardf_underflow /* 270, 271, 272 */ } error_types; diff --git a/sysdeps/ia64/fpu/libm_frexp.S b/sysdeps/ia64/fpu/libm_frexp.S index c6bd676..51bb268 100644 --- a/sysdeps/ia64/fpu/libm_frexp.S +++ b/sysdeps/ia64/fpu/libm_frexp.S @@ -21,20 +21,20 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History @@ -74,7 +74,7 @@ // Registers used //============================================================== // -// general registers: +// general registers: // r14 exponent bias for x negative // r15 exponent bias for x positive // r16 signexp of x @@ -119,7 +119,7 @@ GLOBAL_LIBM_ENTRY(__libm_frexp) // The normalization also sets fault flags and takes faults if necessary { .mfi mov r20 = 0x1003f - fnorm.s0 f9 = f8 + fnorm.s0 f9 = f8 nop.i 999 ;; } @@ -137,7 +137,7 @@ GLOBAL_LIBM_ENTRY(__libm_frexp) { .mfi setf.exp f11 = r14 (p7) fcmp.lt.s0 p7,p8 = f8,f0 -(p6) cmp.eq.unc p10,p11 = r34, r0 ;; +(p6) cmp.eq.unc p10,p11 = r34, r0 ;; } // If x NAN, ZERO, INFINITY, set *y=0 and exit @@ -161,16 +161,16 @@ GLOBAL_LIBM_ENTRY(__libm_frexp) { .mfi (p9) add r15 = 64, r15 (p9) fmpy.s0 f9 = f9, f12 - cmp.eq p10,p11 = r34, r0 ;; + cmp.eq p10,p11 = r34, r0 ;; } // true exponent stored to int pointer -// the bias is treated as 0xfffe instead of +// the bias is treated as 0xfffe instead of // normal 0xffff because we want the significand // to be in the range <=0.5 sig < 1.0 // Store the value of the exponent at the pointer in r33 -// If x>0 form significand result +// If x>0 form significand result { .mfi nop.m 999 (p8) fmerge.se f8 = f10,f9 @@ -178,7 +178,7 @@ GLOBAL_LIBM_ENTRY(__libm_frexp) } // Get signexp of normalized x -// If x<0 form significand result +// If x<0 form significand result { .mfi getf.exp r16 = f9 (p7) fmerge.se f8 = f11,f9 diff --git a/sysdeps/ia64/fpu/libm_frexpf.S b/sysdeps/ia64/fpu/libm_frexpf.S index dde2d09..bf34524 100644 --- a/sysdeps/ia64/fpu/libm_frexpf.S +++ b/sysdeps/ia64/fpu/libm_frexpf.S @@ -21,20 +21,20 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History @@ -74,7 +74,7 @@ // Registers used //============================================================== // -// general registers: +// general registers: // r14 exponent bias for x negative // r15 exponent bias for x positive // r16 signexp of x @@ -119,7 +119,7 @@ GLOBAL_LIBM_ENTRY(__libm_frexpf) // The normalization also sets fault flags and takes faults if necessary { .mfi mov r20 = 0x1003f - fnorm.s0 f9 = f8 + fnorm.s0 f9 = f8 nop.i 999 ;; } @@ -137,7 +137,7 @@ GLOBAL_LIBM_ENTRY(__libm_frexpf) { .mfi setf.exp f11 = r14 (p7) fcmp.lt.s0 p7,p8 = f8,f0 -(p6) cmp.eq.unc p10,p11 = r34, r0 ;; +(p6) cmp.eq.unc p10,p11 = r34, r0 ;; } // If x NAN, ZERO, INFINITY, set *y=0 and exit @@ -161,16 +161,16 @@ GLOBAL_LIBM_ENTRY(__libm_frexpf) { .mfi (p9) add r15 = 64, r15 (p9) fmpy.s0 f9 = f9, f12 - cmp.eq p10,p11 = r34, r0 ;; + cmp.eq p10,p11 = r34, r0 ;; } // true exponent stored to int pointer -// the bias is treated as 0xfffe instead of +// the bias is treated as 0xfffe instead of // normal 0xffff because we want the significand // to be in the range <=0.5 sig < 1.0 // Store the value of the exponent at the pointer in r33 -// If x>0 form significand result +// If x>0 form significand result { .mfi nop.m 999 (p8) fmerge.se f8 = f10,f9 @@ -178,7 +178,7 @@ GLOBAL_LIBM_ENTRY(__libm_frexpf) } // Get signexp of normalized x -// If x<0 form significand result +// If x<0 form significand result { .mfi getf.exp r16 = f9 (p7) fmerge.se f8 = f11,f9 diff --git a/sysdeps/ia64/fpu/libm_frexpl.S b/sysdeps/ia64/fpu/libm_frexpl.S index 64f30b6..3c3bba0 100644 --- a/sysdeps/ia64/fpu/libm_frexpl.S +++ b/sysdeps/ia64/fpu/libm_frexpl.S @@ -21,20 +21,20 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History @@ -74,7 +74,7 @@ // Registers used //============================================================== // -// general registers: +// general registers: // r14 exponent bias for x negative // r15 exponent bias for x positive // r16 signexp of x @@ -119,7 +119,7 @@ GLOBAL_LIBM_ENTRY(__libm_frexpl) // The normalization also sets fault flags and takes faults if necessary { .mfi mov r20 = 0x1003f - fnorm.s0 f9 = f8 + fnorm.s0 f9 = f8 nop.i 999 ;; } @@ -137,7 +137,7 @@ GLOBAL_LIBM_ENTRY(__libm_frexpl) { .mfi setf.exp f11 = r14 (p7) fcmp.lt.s0 p7,p8 = f8,f0 -(p6) cmp.eq.unc p10,p11 = r35, r0 ;; +(p6) cmp.eq.unc p10,p11 = r35, r0 ;; } // If x NAN, ZERO, INFINITY, set *y=0 and exit @@ -161,16 +161,16 @@ GLOBAL_LIBM_ENTRY(__libm_frexpl) { .mfi (p9) add r15 = 64, r15 (p9) fmpy.s0 f9 = f9, f12 - cmp.eq p10,p11 = r35, r0 ;; + cmp.eq p10,p11 = r35, r0 ;; } // true exponent stored to int pointer -// the bias is treated as 0xfffe instead of +// the bias is treated as 0xfffe instead of // normal 0xffff because we want the significand // to be in the range <=0.5 sig < 1.0 // Store the value of the exponent at the pointer in r34 -// If x>0 form significand result +// If x>0 form significand result { .mfi nop.m 999 (p8) fmerge.se f8 = f10,f9 @@ -178,7 +178,7 @@ GLOBAL_LIBM_ENTRY(__libm_frexpl) } // Get signexp of normalized x -// If x<0 form significand result +// If x<0 form significand result { .mfi getf.exp r16 = f9 (p7) fmerge.se f8 = f11,f9 diff --git a/sysdeps/ia64/fpu/libm_scalblnf.S b/sysdeps/ia64/fpu/libm_scalblnf.S index af620d4..56de44a 100644 --- a/sysdeps/ia64/fpu/libm_scalblnf.S +++ b/sysdeps/ia64/fpu/libm_scalblnf.S @@ -48,8 +48,8 @@ // // API //============================================================== -// float __libm_scalblnf (float x, long int n, int long_int_type) -// input floating point f8 and long int n (r33) +// float __libm_scalblnf (float x, long int n, int long_int_type) +// input floating point f8 and long int n (r33) // input long_int_type = 0 if long int defined as 32 bits, = 1 if 64 bits // output floating point f8 // diff --git a/sysdeps/ia64/fpu/libm_tan.S b/sysdeps/ia64/fpu/libm_tan.S index 179ea9c..655a864 100644 --- a/sysdeps/ia64/fpu/libm_tan.S +++ b/sysdeps/ia64/fpu/libm_tan.S @@ -2,7 +2,7 @@ // Copyright (C) 2000, 2001, Intel Corporation // All rights reserved. -// +// // Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story, // and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation. // @@ -21,26 +21,26 @@ // products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://developer.intel.com/opensource. // // ********************************************************************* // -// History: -// 02/02/00 Initial Version +// History: +// 02/02/00 Initial Version // 4/04/00 Unwind support added // 12/28/00 Fixed false invalid flags // @@ -50,7 +50,7 @@ // // ********************************************************************* // -// Accuracy: Very accurate for double-precision values +// Accuracy: Very accurate for double-precision values // // ********************************************************************* // @@ -72,7 +72,7 @@ // // Denormal fault raised on denormal inputs // Overflow exceptions do not occur -// Underflow exceptions raised when appropriate for tan +// Underflow exceptions raised when appropriate for tan // (No specialized error handling for this routine) // Inexact raised when appropriate by algorithm // @@ -218,7 +218,7 @@ // tan( B + x ) = ------------------------ // 1 - tan(B)*tan(x) // -// / \ +// / \ // | tan(B) + tan(x) | // = tan(B) + | ------------------------ - tan(B) | @@ -251,7 +251,7 @@ // cot( B + x ) = ------------------------ // tan(B) + tan(x) // -// / \ +// / \ // | 1 - tan(B)*tan(x) | // = cot(B) + | ----------------------- - cot(B) | @@ -315,7 +315,7 @@ // / (1/[sin(B)*cos(B)]) * tan(x) // tan(Arg) = sgn_r * | tan(B) + -------------------------------- // \ cot(B) - tan(x) -// \ +// \ // + CORR | // / @@ -335,7 +335,7 @@ // / (1/[sin(B)*cos(B)]) * tan(x) // tan(Arg) = sgn_r * | -cot(B) + -------------------------------- // \ tan(B) + tan(x) -// \ +// \ // + CORR | // / @@ -457,7 +457,7 @@ // / (1/[sin(B)*cos(B)]) * tan(x) // sgn_r * | tan(B) + -------------------------------- + // \ cot(B) - tan(x) -// \ +// \ // CORR | // / @@ -562,7 +562,7 @@ // / (1/[sin(B)*cos(B)]) * tan(x) // sgn_r * | -cot(B) + -------------------------------- + // \ tan(B) + tan(x) -// \ +// \ // CORR | // / @@ -913,7 +913,7 @@ data4 0xAE8C11FD, 0x800960AD, 0x00004000, 0x00000000 data4 0x5FDBEC21, 0x8000E147, 0x00004000, 0x00000000 data4 0xA07791FA, 0x80186650, 0x00004000, 0x00000000 -Arg = f8 +Arg = f8 Result = f8 fp_tmp = f9 U_2 = f10 @@ -1021,15 +1021,15 @@ delta1 = r36 table_ptr1 = r37 table_ptr2 = r38 i_0 = r39 -i_1 = r40 -N_fix_gr = r41 -N_inc = r42 -exp_Arg = r43 -exp_r = r44 -sig_r = r45 -lookup = r46 -table_offset = r47 -Create_B = r48 +i_1 = r40 +N_fix_gr = r41 +N_inc = r42 +exp_Arg = r43 +exp_r = r44 +sig_r = r45 +lookup = r46 +table_offset = r47 +Create_B = r48 gr_tmp = r49 GR_Parameter_X = r49 @@ -1042,12 +1042,12 @@ GR_Parameter_r = r50 .proc __libm_tan -__libm_tan: +__libm_tan: { .mfi alloc r32 = ar.pfs, 0,17,2,0 (p0) fclass.m.unc p6,p0 = Arg, 0x1E7 - addl gr_tmp = -1,r0 + addl gr_tmp = -1,r0 } ;; @@ -1073,7 +1073,7 @@ alloc r32 = ar.pfs, 0,17,2,0 ;; // -// Check for NatVals, Infs , NaNs, and Zeros +// Check for NatVals, Infs , NaNs, and Zeros // Check for everything - if false, then must be pseudo-zero // or pseudo-nan. // Local table pointer @@ -1081,12 +1081,12 @@ alloc r32 = ar.pfs, 0,17,2,0 { .mbb (p0) add table_ptr2 = 96, table_ptr1 -(p6) br.cond.spnt __libm_TAN_SPECIAL +(p6) br.cond.spnt __libm_TAN_SPECIAL (p7) br.cond.spnt __libm_TAN_SPECIAL ;; } // // Point to Inv_P_0 -// Branch out to deal with unsupporteds and special values. +// Branch out to deal with unsupporteds and special values. // { .mmf @@ -1110,7 +1110,7 @@ alloc r32 = ar.pfs, 0,17,2,0 { .mmi (p0) ldfs NEGTWO_TO_24 = [table_ptr1],12 ;; // -// Do fcmp to generate Denormal exception +// Do fcmp to generate Denormal exception // - can't do FNORM (will generate Underflow when U is unmasked!) // Normalize input argument. // @@ -1668,12 +1668,12 @@ alloc r32 = ar.pfs, 0,17,2,0 } -TAN_LARGER_ARG: +TAN_LARGER_ARG: { .mmf (p0) addl table_ptr1 = @ltoff(TAN_BASE_CONSTANTS), gp nop.m 999 -(p0) fmpy.s1 N_0 = Arg, Inv_P_0 +(p0) fmpy.s1 N_0 = Arg, Inv_P_0 } ;; @@ -2307,7 +2307,7 @@ TAN_LARGER_ARG: } -TAN_SMALL_R: +TAN_SMALL_R: { .mii nop.m 999 @@ -2394,7 +2394,7 @@ TAN_SMALL_R: (p11) ldfe P1_8 = [table_ptr1], -16 ;; // // N even: Poly1 = P1_2 + P1_3 * rsq -// N odd: poly1 = 1.0 + S_hi * r +// N odd: poly1 = 1.0 + S_hi * r // 16 bits partial account for necessary (-1) // (p11) ldfe P1_7 = [table_ptr1], -16 @@ -2679,7 +2679,7 @@ TAN_SMALL_R: } -TAN_NORMAL_R: +TAN_NORMAL_R: { .mfi (p0) getf.sig sig_r = r @@ -2847,7 +2847,7 @@ TAN_NORMAL_R: // xsq = x * x // N even: Tx = T_hi * x // Load T_lo. -// Load C_lo - increment pointer to get SC_inv +// Load C_lo - increment pointer to get SC_inv // - cant get all the way, do an add later. // (p0) add table_ptr2 = 569, table_ptr2 ;; @@ -3216,7 +3216,7 @@ ASM_SIZE_DIRECTIVE(__libm_tan) .proc __libm_callout __libm_callout: -TAN_ARG_TOO_LARGE: +TAN_ARG_TOO_LARGE: .prologue // (1) { .mfi @@ -3258,7 +3258,7 @@ TAN_ARG_TOO_LARGE: // (4) { .mmi mov gp = GR_SAVE_GP // Restore gp -(p0) mov N_fix_gr = r8 +(p0) mov N_fix_gr = r8 nop.i 999 } ;; @@ -3304,7 +3304,7 @@ TAN_ARG_TOO_LARGE: .restore sp add sp = 64,sp // Restore stack pointer (p6) br.cond.spnt TAN_SMALL_R -(p0) br.cond.sptk TAN_NORMAL_R +(p0) br.cond.sptk TAN_NORMAL_R } ;; .endp __libm_callout @@ -3322,7 +3322,7 @@ __libm_TAN_SPECIAL: { .mfb nop.m 999 (p0) fmpy.s0 Arg = Arg, f0 -(p0) br.ret.sptk b0 +(p0) br.ret.sptk b0 } .endp __libm_TAN_SPECIAL ASM_SIZE_DIRECTIVE(__libm_TAN_SPECIAL) diff --git a/sysdeps/ia64/fpu/s_asinhl.S b/sysdeps/ia64/fpu/s_asinhl.S index d3a5507..2d4d7a2 100644 --- a/sysdeps/ia64/fpu/s_asinhl.S +++ b/sysdeps/ia64/fpu/s_asinhl.S @@ -21,25 +21,25 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // //********************************************************************* // -// History: +// History: // 09/04/01 Initial version // 09/13/01 Performance improved, symmetry problems fixed // 10/10/01 Performance improved, split issues removed @@ -56,31 +56,31 @@ // // Overview of operation //============================================================== -// +// // There are 6 paths: // 1. x = 0, [S,Q]Nan or +/-INF // Return asinhl(x) = x + x; -// +// // 2. x = + denormal // Return asinhl(x) = x - x^2; -// +// // 3. x = - denormal // Return asinhl(x) = x + x^2; -// +// // 4. 'Near 0': max denormal < |x| < 1/128 // Return asinhl(x) = sign(x)*(x+x^3*(c3+x^2*(c5+x^2*(c7+x^2*(c9))))); // // 5. 'Huges': |x| > 2^63 // Return asinhl(x) = sign(x)*(logl(2*x)); -// +// // 6. 'Main path': 1/128 < |x| < 2^63 // b_hi + b_lo = x + sqrt(x^2 + 1); // asinhl(x) = sign(x)*(log_special(b_hi, b_lo)); -// -// Algorithm description +// +// Algorithm description //============================================================== // -// Main path algorithm +// Main path algorithm // ( thanks to Peter Markstein for the idea of sqrt(x^2+1) computation! ) // ************************************************************************* // @@ -89,19 +89,19 @@ // 1) p2 = (p2_hi+p2_lo) = x^2+1 obtaining // ------------------------------------ // p2_hi = x2_hi + 1, where x2_hi = x * x; -// p2_lo = x2_lo + p1_lo, where -// x2_lo = FMS(x*x-x2_hi), +// p2_lo = x2_lo + p1_lo, where +// x2_lo = FMS(x*x-x2_hi), // p1_lo = (1 - p2_hi) + x2_hi; // // 2) g = (g_hi+g_lo) = sqrt(p2) = sqrt(p2_hi+p2_lo) // ---------------------------------------------- // r = invsqrt(p2_hi) (8-bit reciprocal square root approximation); // g = p2_hi * r (first 8 bit-approximation of sqrt); -// +// // h = 0.5 * r; // e = 0.5 - g * h; // g = g * e + g (second 16 bit-approximation of sqrt); -// +// // h = h * e + h; // e = 0.5 - g * h; // g = g * e + g (third 32 bit-approximation of sqrt); @@ -109,7 +109,7 @@ // h = h * e + h; // e = 0.5 - g * h; // g_hi = g * e + g (fourth 64 bit-approximation of sqrt); -// +// // Remainder computation: // h = h * e + h; // d = (p2_hi - g_hi * g_hi) + p2_lo; @@ -119,15 +119,15 @@ // ------------------------------------------------------------------- // b_hi = (g_hi + x) + gl; // b_lo = (g_hi - b_hi) + x + gl; -// +// // Now we pass b presented as sum b_hi + b_lo to special version // of logl function which accept a pair of arguments as -// 'mutiprecision' value. -// +// 'mutiprecision' value. +// // Special log algorithm overview // ================================ // Here we use a table lookup method. The basic idea is that in -// order to compute logl(Arg) = logl (Arg-1) for an argument Arg in [1,2), +// order to compute logl(Arg) = logl (Arg-1) for an argument Arg in [1,2), // we construct a value G such that G*Arg is close to 1 and that // logl(1/G) is obtainable easily from a table of values calculated // beforehand. Thus @@ -157,7 +157,7 @@ // G := G_1 * G_2 * G_3 // r := (G * S_hi - 1) + G * S_lo // -// These G_j's have the property that the product is exactly +// These G_j's have the property that the product is exactly // representable and that |r| < 2^(-12) as a result. // // Step 2: Approximation @@ -166,7 +166,7 @@ // // Step 3: Reconstruction // -// Finally, +// Finally, // // logl( X ) = logl( 2^N * (S_hi + S_lo) ) // ~=~ N*logl(2) + logl(1/G) + logl(1 + r) @@ -176,25 +176,25 @@ // // Registers used //============================================================== -// Floating Point registers used: +// Floating Point registers used: // f8, input // f32 -> f101 (70 registers) -// General registers used: +// General registers used: // r32 -> r57 (26 registers) // Predicate registers used: // p6 -> p11 // p6 for '0, NaNs, Inf' path -// p7 for '+ denormals' path +// p7 for '+ denormals' path // p8 for 'near 0' path // p9 for 'huges' path -// p10 for '- denormals' path +// p10 for '- denormals' path // p11 for negative values // // Data tables //============================================================== - + RODATA .align 64 @@ -210,14 +210,14 @@ data8 0x999999999991D582, 0x00003FFB data8 0xAAAAAAAAAAAAAAA9, 0x0000BFFC LOCAL_OBJECT_END(Poly_C_near_0_35) -// Q coeffs +// Q coeffs LOCAL_OBJECT_START(Constants_Q) -data4 0x00000000,0xB1721800,0x00003FFE,0x00000000 +data4 0x00000000,0xB1721800,0x00003FFE,0x00000000 data4 0x4361C4C6,0x82E30865,0x0000BFE2,0x00000000 data4 0x328833CB,0xCCCCCAF2,0x00003FFC,0x00000000 data4 0xA9D4BAFB,0x80000077,0x0000BFFD,0x00000000 data4 0xAAABE3D2,0xAAAAAAAA,0x00003FFD,0x00000000 -data4 0xFFFFDAB7,0xFFFFFFFF,0x0000BFFD,0x00000000 +data4 0xFFFFDAB7,0xFFFFFFFF,0x0000BFFD,0x00000000 LOCAL_OBJECT_END(Constants_Q) // Z1 - 16 bit fixed @@ -332,7 +332,7 @@ data4 0x3F71D488,0x3D693B9D data8 0xBE049391B6B7C239 LOCAL_OBJECT_END(Constants_G_H_h2) -// G3 and H3 - IEEE single and h3 - IEEE double +// G3 and H3 - IEEE single and h3 - IEEE double LOCAL_OBJECT_START(Constants_G_H_h3) data4 0x3F7FFC00,0x38800100 data8 0x3D355595562224CD @@ -408,62 +408,62 @@ LOCAL_OBJECT_END(Constants_G_H_h3) FR_Arg = f8 FR_Res = f8 FR_AX = f32 -FR_XLog_Hi = f33 -FR_XLog_Lo = f34 +FR_XLog_Hi = f33 +FR_XLog_Lo = f34 // Special logl registers -FR_Y_hi = f35 +FR_Y_hi = f35 FR_Y_lo = f36 FR_Scale = f37 -FR_X_Prime = f38 -FR_S_hi = f39 +FR_X_Prime = f38 +FR_S_hi = f39 FR_W = f40 FR_G = f41 FR_H = f42 -FR_wsq = f43 +FR_wsq = f43 FR_w4 = f44 FR_h = f45 -FR_w6 = f46 +FR_w6 = f46 FR_G2 = f47 FR_H2 = f48 FR_poly_lo = f49 -FR_P8 = f50 +FR_P8 = f50 FR_poly_hi = f51 -FR_P7 = f52 -FR_h2 = f53 -FR_rsq = f54 +FR_P7 = f52 +FR_h2 = f53 +FR_rsq = f54 FR_P6 = f55 -FR_r = f56 +FR_r = f56 + +FR_log2_hi = f57 +FR_log2_lo = f58 -FR_log2_hi = f57 -FR_log2_lo = f58 - -FR_float_N = f59 -FR_Q4 = f60 +FR_float_N = f59 +FR_Q4 = f60 -FR_G3 = f61 -FR_H3 = f62 -FR_h3 = f63 +FR_G3 = f61 +FR_H3 = f62 +FR_h3 = f63 -FR_Q3 = f64 -FR_Q2 = f65 -FR_1LN10_hi = f66 +FR_Q3 = f64 +FR_Q2 = f65 +FR_1LN10_hi = f66 -FR_Q1 = f67 -FR_1LN10_lo = f68 -FR_P5 = f69 -FR_rcub = f70 +FR_Q1 = f67 +FR_1LN10_lo = f68 +FR_P5 = f69 +FR_rcub = f70 -FR_Neg_One = f71 -FR_Z = f72 -FR_AA = f73 -FR_BB = f74 -FR_S_lo = f75 -FR_2_to_minus_N = f76 +FR_Neg_One = f71 +FR_Z = f72 +FR_AA = f73 +FR_BB = f74 +FR_S_lo = f75 +FR_2_to_minus_N = f76 // Huge & Main path prolog registers @@ -512,22 +512,22 @@ GR_Poly_C_35 = r45 GR_Poly_C_79 = r46 // Special logl registers -GR_Index1 = r34 -GR_Index2 = r35 -GR_signif = r36 -GR_X_0 = r37 -GR_X_1 = r38 -GR_X_2 = r39 -GR_Z_1 = r40 -GR_Z_2 = r41 -GR_N = r42 -GR_Bias = r43 -GR_M = r44 -GR_Index3 = r45 -GR_exp_2tom80 = r45 -GR_exp_mask = r47 -GR_exp_2tom7 = r48 -GR_ad_ln10 = r49 +GR_Index1 = r34 +GR_Index2 = r35 +GR_signif = r36 +GR_X_0 = r37 +GR_X_1 = r38 +GR_X_2 = r39 +GR_Z_1 = r40 +GR_Z_2 = r41 +GR_N = r42 +GR_Bias = r43 +GR_M = r44 +GR_Index3 = r45 +GR_exp_2tom80 = r45 +GR_exp_mask = r47 +GR_exp_2tom7 = r48 +GR_ad_ln10 = r49 GR_ad_tbl_1 = r50 GR_ad_tbl_2 = r51 GR_ad_tbl_3 = r52 @@ -593,7 +593,7 @@ GLOBAL_LIBM_ENTRY(asinhl) { .mfb cmp.le p9, p0 = GR_TwoP63, GR_ArgExp // if arg > 2^63 ('huges') (p6) fma.s0 FR_Res = FR_Arg,f1,FR_Arg // r = a + a -(p6) br.ret.spnt b0 // return +(p6) br.ret.spnt b0 // return };; // (X^2 + 1) computation { .mfi @@ -677,7 +677,7 @@ GLOBAL_LIBM_ENTRY(asinhl) { .mfi ldfe FR_Q1 = [GR_ad_q] // Load Q1 - fma.s1 FR_GG = FR_GG, FR_EE, FR_GG // g = g * e + g + fma.s1 FR_GG = FR_GG, FR_EE, FR_GG // g = g * e + g // 16 bit Newton Raphson iteration nop.i 0 } @@ -695,7 +695,7 @@ GLOBAL_LIBM_ENTRY(asinhl) { .mfi nop.m 0 - fma.s1 FR_GG = FR_GG, FR_EE, FR_GG // g = g * e + g + fma.s1 FR_GG = FR_GG, FR_EE, FR_GG // g = g * e + g // 32 bit Newton Raphson iteration nop.i 0 } @@ -713,7 +713,7 @@ GLOBAL_LIBM_ENTRY(asinhl) { .mfi nop.m 0 - fma.s1 FR_GG = FR_GG, FR_EE, FR_GG // g = g * e + g + fma.s1 FR_GG = FR_GG, FR_EE, FR_GG // g = g * e + g // 64 bit Newton Raphson iteration nop.i 0 } @@ -806,7 +806,7 @@ GLOBAL_LIBM_ENTRY(asinhl) { .mfi nop.m 0 nop.f 0 - extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1 + extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1 };; @@ -839,7 +839,7 @@ GLOBAL_LIBM_ENTRY(asinhl) pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15 // Get bits 30-15 of X_1 * Z_2 };; -// WE CANNOT USE GR_X_2 IN NEXT 3 CYCLES ("DEAD" ZONE!) +// WE CANNOT USE GR_X_2 IN NEXT 3 CYCLES ("DEAD" ZONE!) // BECAUSE OF POSSIBLE 10 CLOCKS STALL! // So we can negate Q coefficients there for negative values @@ -1001,7 +1001,7 @@ GLOBAL_LIBM_ENTRY(asinhl) { .mfi nop.m 0 - fadd.s0 FR_Y_lo = FR_poly_hi, FR_poly_lo + fadd.s0 FR_Y_lo = FR_poly_hi, FR_poly_lo // Y_lo=poly_hi+poly_lo nop.i 0 } @@ -1088,7 +1088,7 @@ huges_logl: { .mmi ldfe FR_log2_lo = [GR_ad_q],16 // Load log2_lo - sub GR_N = GR_N, GR_Bias + sub GR_N = GR_N, GR_Bias mov GR_exp_2tom80 = 0x0ffaf // Exponent of 2^-80 };; @@ -1107,7 +1107,7 @@ huges_logl: { .mmi nop.m 0 ldfe FR_Q2 = [GR_ad_q],16 // Load Q2 - extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1 + extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1 };; { .mmi diff --git a/sysdeps/ia64/fpu/s_atanf.S b/sysdeps/ia64/fpu/s_atanf.S index 4da68c7..74d7340 100644 --- a/sysdeps/ia64/fpu/s_atanf.S +++ b/sysdeps/ia64/fpu/s_atanf.S @@ -183,351 +183,351 @@ LOCAL_OBJECT_END(atanf_coeff_2_table) .section .text GLOBAL_LIBM_ENTRY(atanf) -{ .mfi +{ .mfi alloc r32 = ar.pfs,1,2,0,0 frcpa.s1 atanf_z,p0 = f1,f8 addl EXP_Addr2 = @ltoff(atanf_coeff_2_table),gp -} -{ .mfi +} +{ .mfi addl EXP_Addr1 = @ltoff(atanf_coeff_1_table),gp fma.s1 atanf_t = f8,f8,f0 nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 fmerge.s atanf_sgn_x = f8,f1 nop.i 999;; -} - -{ .mfi +} + +{ .mfi ld8 EXP_Addr1 = [EXP_Addr1] fmerge.s atanf_abs_x = f1,f8 nop.i 999 -} -{ .mfi +} +{ .mfi ld8 EXP_Addr2 = [EXP_Addr2] nop.f 999 nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 fclass.m p8,p0 = f8,0x7 // @zero nop.i 999;; -} - -{ .mfi +} + +{ .mfi nop.m 999 fcmp.eq.unc.s0 p9,p10 = f8,f1 nop.i 999;; -} - -{ .mfi +} + +{ .mfi ldfpd atanf_coeff_R4,atanf_coeff_R5 = [EXP_Addr1],16 fnma.s1 atanf_b = f8,atanf_z,f1 nop.i 999 -} -{ .mfi +} +{ .mfi ldfpd atanf_coeff_R1,atanf_coeff_R2 = [EXP_Addr2],16 fma.s1 atanf_zsq = atanf_z,atanf_z,f0 nop.i 999;; } - -{ .mfi + +{ .mfi ldfpd atanf_coeff_R3,atanf_coeff_P1 = [EXP_Addr1],16 fma.s1 atanf_xcub = f8,atanf_t,f0 nop.i 999 -} -{ .mfi +} +{ .mfi ldfpd atanf_coeff_Q6,atanf_coeff_Q7 = [EXP_Addr2],16 fma.s1 atanf_tsq = atanf_t,atanf_t,f0 nop.i 999;; } - -{ .mfi + +{ .mfi ldfpd atanf_coeff_Q8,atanf_coeff_Q9 = [EXP_Addr1],16 // fcmp.le.s1 atanf_pred_LE1,atanf_pred_GT1 = atanf_abs_x,f1 fcmp.le.s1 p6,p7 = atanf_abs_x,f1 nop.i 999 -} -{ .mfi +} +{ .mfi ldfpd atanf_coeff_Q4,atanf_coeff_Q5 = [EXP_Addr2],16 nop.f 999 nop.i 999;; } - -{ .mfi + +{ .mfi ldfpd atanf_coeff_Q2,atanf_coeff_Q3 = [EXP_Addr1],16 fclass.m p8,p0 = f8,0xe7 // @inf|@qnan|@snan|@zero nop.i 999 -} -{ .mfi +} +{ .mfi ldfpd atanf_coeff_P5,atanf_coeff_P6 = [EXP_Addr2],16 nop.f 999 nop.i 999;; } - -{ .mfi + +{ .mfi ldfpd atanf_coeff_Q0,atanf_coeff_Q1 = [EXP_Addr1],16 nop.f 999 nop.i 999 -} -{ .mfi +} +{ .mfi ldfpd atanf_coeff_P7,atanf_coeff_P8 = [EXP_Addr2],16 nop.f 999 nop.i 999;; } - -{ .mfi + +{ .mfi ldfpd atanf_coeff_P3,atanf_coeff_P4 = [EXP_Addr1],16 fma.s1 atanf_bsq = atanf_b,atanf_b,f0 nop.i 999 -} -{ .mfi +} +{ .mfi ldfpd atanf_coeff_P9,atanf_coeff_P10 = [EXP_Addr2] fma.s1 atanf_z4 = atanf_zsq,atanf_zsq,f0 nop.i 999;; } - -{ .mfi + +{ .mfi ldfpd atanf_coeff_P2,atanf_piby2 = [EXP_Addr1] fma.s1 atanf_x6 = atanf_t,atanf_tsq,f0 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fma.s1 atanf_t4 = atanf_tsq,atanf_tsq,f0 nop.i 999;; } - -{ .mfb + +{ .mfb nop.m 999 fma.s1 atanf_x5 = atanf_t,atanf_xcub,f0 (p8) br.cond.spnt ATANF_X_INF_NAN_ZERO -} +} ;; - -{ .mfi + +{ .mfi nop.m 999 fma.s1 atanf_poly_r1 = atanf_b,atanf_coeff_R1,f1 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fma.s1 atanf_poly_r3 = atanf_b,atanf_coeff_R5,atanf_coeff_R4 nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 fma.s1 atanf_poly_r2 = atanf_b,atanf_coeff_R3,atanf_coeff_R2 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fma.s1 atanf_z8 = atanf_z4,atanf_z4,f0 nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 fma.s1 atanf_poly_q2 = atanf_t,atanf_coeff_Q5,atanf_coeff_Q4 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fma.s1 atanf_poly_q3 = atanf_t,atanf_coeff_Q7,atanf_coeff_Q6 nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 fma.s1 atanf_z5 = atanf_z,atanf_z4,f0 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fma.s1 atanf_poly_q1 = atanf_t,atanf_coeff_Q9,atanf_coeff_Q8 nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 fma.s1 atanf_poly_q4 = atanf_t,atanf_coeff_Q1,atanf_coeff_Q0 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fma.s1 atanf_poly_q5 = atanf_t,atanf_coeff_Q3,atanf_coeff_Q2 nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 fma.s1 atanf_poly_p4 = f8,atanf_coeff_P1,f0 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fma.s1 atanf_poly_p5 = atanf_t,atanf_coeff_P4,atanf_coeff_P3 nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 fma.s1 atanf_poly_r1 = atanf_z8,atanf_poly_r1,f0 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fma.s1 atanf_z8_bsq = atanf_z8,atanf_bsq,f0 nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 fma.s1 atanf_poly_q2 = atanf_tsq,atanf_poly_q3,atanf_poly_q2 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fma.s1 atanf_poly_r2 = atanf_bsq,atanf_poly_r3,atanf_poly_r2 nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 fma.s1 atanf_poly_p2 = atanf_t,atanf_coeff_P8,atanf_coeff_P7 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fma.s1 atanf_poly_q1 = atanf_poly_q1,f1,atanf_tsq nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 fma.s1 atanf_z13 = atanf_z5,atanf_z8,f0 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fma.s1 atanf_poly_p1 = atanf_t,atanf_coeff_P10,atanf_coeff_P9 nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 fma.s1 atanf_poly_p4 = atanf_t,atanf_poly_p4,f8 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fma.s1 atanf_poly_q4 = atanf_tsq,atanf_poly_q5,atanf_poly_q4 nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 fma.s1 atanf_poly_p3 = atanf_t,atanf_coeff_P6,atanf_coeff_P5 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fma.s1 atanf_poly_p5 = atanf_t,atanf_poly_p5,atanf_coeff_P2 nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 fma.s1 atanf_x11 = atanf_x5,atanf_x6,f0 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fma.s1 atanf_poly_r = atanf_z8_bsq,atanf_poly_r2,atanf_poly_r1 nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 fma.s0 atanf_sgnx_piby2 = atanf_sgn_x,atanf_piby2,f0 nop.i 999 -} -{ .mfi +} +{ .mfi nop.m 999 fma.s1 atanf_poly_q2 = atanf_t4,atanf_poly_q1,atanf_poly_q2 nop.i 999;; } - -{ .mfi + +{ .mfi nop.m 999 fma.s1 atanf_poly_p1 = atanf_tsq,atanf_poly_p1,atanf_poly_p2 nop.i 999;; -} - -{ .mfi +} + +{ .mfi nop.m 999 fma.s1 atanf_poly_p4 = atanf_x5,atanf_poly_p5,atanf_poly_p4 nop.i 999;; -} - -{ .mfi +} + +{ .mfi nop.m 999 fma.s1 atanf_z21_poly_r = atanf_z13,atanf_poly_r,f0 nop.i 999;; -} - -{ .mfi +} + +{ .mfi nop.m 999 fma.s1 atanf_poly_q = atanf_t4,atanf_poly_q2,atanf_poly_q4 nop.i 999;; -} - -{ .mfi +} + +{ .mfi nop.m 999 fma.s1 atanf_poly_p1 = atanf_tsq,atanf_poly_p1,atanf_poly_p3 nop.i 999;; -} - -{ .mfi +} + +{ .mfi nop.m 999 //(atanf_pred_GT1) fnma.s atanf_answer = atanf_poly_q,atanf_z21_poly_r,atanf_sgnx_piby2 (p7) fnma.s.s0 atanf_answer = atanf_poly_q,atanf_z21_poly_r,atanf_sgnx_piby2 nop.i 999;; -} - -{ .mfb +} + +{ .mfb nop.m 999 //(atanf_pred_LE1) fma.s atanf_answer = atanf_x11,atanf_poly_p1,atanf_poly_p4 (p6) fma.s.s0 atanf_answer = atanf_x11,atanf_poly_p1,atanf_poly_p4 br.ret.sptk b0 -} +} diff --git a/sysdeps/ia64/fpu/s_atanl.S b/sysdeps/ia64/fpu/s_atanl.S index 1a23611..fea68ab 100644 --- a/sysdeps/ia64/fpu/s_atanl.S +++ b/sysdeps/ia64/fpu/s_atanl.S @@ -21,20 +21,20 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // @@ -82,25 +82,25 @@ // IEEE Special Conditions: // // Denormal fault raised on denormal inputs -// Underflow exceptions may occur +// Underflow exceptions may occur // Special error handling for the y=0 and x=0 case // Inexact raised when appropriate by algorithm // // atanl(SNaN) = QNaN // atanl(QNaN) = QNaN // atanl(+/-0) = +/- 0 -// atanl(+/-Inf) = +/-pi/2 +// atanl(+/-Inf) = +/-pi/2 // // atan2l(Any NaN for x or y) = QNaN -// atan2l(+/-0,x) = +/-0 for x > 0 -// atan2l(+/-0,x) = +/-pi for x < 0 -// atan2l(+/-0,+0) = +/-0 -// atan2l(+/-0,-0) = +/-pi +// atan2l(+/-0,x) = +/-0 for x > 0 +// atan2l(+/-0,x) = +/-pi for x < 0 +// atan2l(+/-0,+0) = +/-0 +// atan2l(+/-0,-0) = +/-pi // atan2l(y,+/-0) = pi/2 y > 0 // atan2l(y,+/-0) = -pi/2 y < 0 // atan2l(+/-y, Inf) = +/-0 for finite y > 0 -// atan2l(+/-Inf, x) = +/-pi/2 for finite x -// atan2l(+/-y, -Inf) = +/-pi for finite y > 0 +// atan2l(+/-Inf, x) = +/-pi/2 for finite x +// atan2l(+/-y, -Inf) = +/-pi for finite y > 0 // atan2l(+/-Inf, Inf) = +/-pi/4 // atan2l(+/-Inf, -Inf) = +/-3pi/4 // @@ -549,20 +549,20 @@ GR_SAVE_PFS = r33 GR_SAVE_B0 = r34 GR_SAVE_GP = r35 sign_X = r36 -sign_Y = r37 -swap = r38 -table_ptr1 = r39 -table_ptr2 = r40 -k = r41 -lookup = r42 -exp_ArgX = r43 -exp_ArgY = r44 -exponent_Q = r45 -significand_Q = r46 -special = r47 -sp_exp_Q = r48 -sp_exp_4sig_Q = r49 -table_base = r50 +sign_Y = r37 +swap = r38 +table_ptr1 = r39 +table_ptr2 = r40 +k = r41 +lookup = r42 +exp_ArgX = r43 +exp_ArgY = r44 +exponent_Q = r45 +significand_Q = r46 +special = r47 +sp_exp_Q = r48 +sp_exp_4sig_Q = r49 +table_base = r50 int_temp = r51 GR_Parameter_X = r49 @@ -572,7 +572,7 @@ GR_Parameter_TAG = r52 GR_temp = r52 RODATA -.align 16 +.align 16 LOCAL_OBJECT_START(Constants_atan) // double pi/2 @@ -597,7 +597,7 @@ data8 0xE36F716D2A5F89BD, 0x3FFB // Q_4 // Entries Tbl_lo (single precision) // B = 1+Index/16+1/32 Index = 0 // -data8 0x3FE9A000A935BD8E +data8 0x3FE9A000A935BD8E data4 0x23ACA08F, 0x00000000 // // Entries Tbl_hi (double precision) Index = 0,1,...,15 @@ -605,37 +605,37 @@ data4 0x23ACA08F, 0x00000000 // Entries Tbl_lo (single precision) // Index = 0,1,...,15 B = 2^(-1)*(1+Index/16+1/32) // -data8 0x3FDE77EB7F175A34 +data8 0x3FDE77EB7F175A34 data4 0x238729EE, 0x00000000 -data8 0x3FE0039C73C1A40B +data8 0x3FE0039C73C1A40B data4 0x249334DB, 0x00000000 -data8 0x3FE0C6145B5B43DA +data8 0x3FE0C6145B5B43DA data4 0x22CBA7D1, 0x00000000 -data8 0x3FE1835A88BE7C13 +data8 0x3FE1835A88BE7C13 data4 0x246310E7, 0x00000000 -data8 0x3FE23B71E2CC9E6A +data8 0x3FE23B71E2CC9E6A data4 0x236210E5, 0x00000000 -data8 0x3FE2EE628406CBCA +data8 0x3FE2EE628406CBCA data4 0x2462EAF5, 0x00000000 -data8 0x3FE39C391CD41719 +data8 0x3FE39C391CD41719 data4 0x24B73EF3, 0x00000000 -data8 0x3FE445065B795B55 +data8 0x3FE445065B795B55 data4 0x24C11260, 0x00000000 -data8 0x3FE4E8DE5BB6EC04 +data8 0x3FE4E8DE5BB6EC04 data4 0x242519EE, 0x00000000 -data8 0x3FE587D81F732FBA +data8 0x3FE587D81F732FBA data4 0x24D4346C, 0x00000000 -data8 0x3FE6220D115D7B8D +data8 0x3FE6220D115D7B8D data4 0x24ED487B, 0x00000000 -data8 0x3FE6B798920B3D98 +data8 0x3FE6B798920B3D98 data4 0x2495FF1E, 0x00000000 -data8 0x3FE748978FBA8E0F +data8 0x3FE748978FBA8E0F data4 0x223D9531, 0x00000000 -data8 0x3FE7D528289FA093 +data8 0x3FE7D528289FA093 data4 0x242B0411, 0x00000000 -data8 0x3FE85D69576CC2C5 +data8 0x3FE85D69576CC2C5 data4 0x2335B374, 0x00000000 -data8 0x3FE8E17AA99CC05D +data8 0x3FE8E17AA99CC05D data4 0x24C27CFB, 0x00000000 // // Entries Tbl_hi (double precision) Index = 0,1,...,15 @@ -643,7 +643,7 @@ data4 0x24C27CFB, 0x00000000 // Entries Tbl_lo (single precision) // Index = 0,1,...,15 B = 2^(-2)*(1+Index/16+1/32) // -data8 0x3FD025FA510665B5 +data8 0x3FD025FA510665B5 data4 0x24263482, 0x00000000 data8 0x3FD1151A362431C9 data4 0x242C8DC9, 0x00000000 @@ -771,19 +771,19 @@ GLOBAL_IEEE754_ENTRY(atanl) { .mfi ldfd P_hi = [table_ptr1],8 // Load double precision hi part of pi fclass.m p8,p0 = ArgY_orig, 0x1e7 // Test y natval, nan, inf, zero - nop.i 999 + nop.i 999 } ;; { .mfi ldfps P_lo, TWO_TO_NEG3 = [table_ptr1], 8 // Load P_lo and constant 2^-3 - nop.f 999 - nop.i 999 + nop.f 999 + nop.i 999 } { .mfi nop.m 999 fma.s1 M = f1, f1, f0 // Set M = 1.0 - nop.i 999 + nop.i 999 } ;; @@ -803,7 +803,7 @@ GLOBAL_IEEE754_ENTRY(atanl) { .mfi nop.m 999 fcmp.ge.s1 p6,p7 = Xsq, Ysq // Test for |x| >= |y| using squares - nop.i 999 + nop.i 999 } { .mfb nop.m 999 @@ -857,19 +857,19 @@ GLOBAL_IEEE754_ENTRY(atan2l) { .mfi ldfd P_hi = [table_ptr1],8 // Load double precision hi part of pi fclass.m p8,p0 = ArgY_orig, 0x1e7 // Test y natval, nan, inf, zero - nop.i 999 + nop.i 999 } ;; { .mfi ldfps P_lo, TWO_TO_NEG3 = [table_ptr1], 8 // Load P_lo and constant 2^-3 fclass.m p9,p0 = ArgX_orig, 0x1e7 // Test x natval, nan, inf, zero - nop.i 999 + nop.i 999 } { .mfi nop.m 999 fma.s1 M = f1, f1, f0 // Set M = 1.0 - nop.i 999 + nop.i 999 } ;; @@ -889,7 +889,7 @@ GLOBAL_IEEE754_ENTRY(atan2l) { .mfi nop.m 999 fcmp.ge.s1 p6,p7 = Xsq, Ysq // Test for |x| >= |y| using squares - nop.i 999 + nop.i 999 } { .mfb nop.m 999 @@ -995,7 +995,7 @@ ATANL_COMMON: } ;; -// Create a single precision representation of the signexp of Q with the +// Create a single precision representation of the signexp of Q with the // 4 most significant bits of the significand followed by a 1 and then 18 0's { .mfi nop.m 999 @@ -1071,7 +1071,7 @@ ATANL_COMMON: ;; // -// Generate sign_exp_Q b_1 b_2 b_3 b_4 1 0 0 0 ... 0 in single precision +// Generate sign_exp_Q b_1 b_2 b_3 b_4 1 0 0 0 ... 0 in single precision // representation. Note sign of Q is always 0. // { .mfi @@ -1185,7 +1185,7 @@ ATANL_COMMON: // C_hi_hold = 1 - C_hi * U_prime_hi (1) { .mfi nop.m 999 - fnma.s1 C_hi_hold = C_hi, U_prime_hi, f1 + fnma.s1 C_hi_hold = C_hi, U_prime_hi, f1 nop.i 999 } ;; @@ -1222,7 +1222,7 @@ ATANL_COMMON: // C_hi_hold = 1 - C_hi * U_prime_hi (3) { .mfi nop.m 999 - fnma.s1 C_hi_hold = C_hi, U_prime_hi, f1 + fnma.s1 C_hi_hold = C_hi, U_prime_hi, f1 nop.i 999 } ;; @@ -1342,7 +1342,7 @@ ATANL_COMMON: ;; -ATANL_POLY: +ATANL_POLY: // Here if 0 < V/U < 2^-3 // // *********************************************** @@ -1523,7 +1523,7 @@ ATANL_POLY: // Create small double in case need to raise underflow { .mfi - setf.d FR_temp = GR_temp + setf.d FR_temp = GR_temp fma.s1 poly = z8, poly1, poly2 // poly = poly2 + z8 * poly1 nop.i 999 } @@ -1635,9 +1635,9 @@ ATANL_POLY: } ;; -// +// // If Res_lo is denormal test if Result equals zero -// +// { .mfi nop.m 999 (p14) fclass.m.unc p14, p0 = Result, 0x07 @@ -1657,10 +1657,10 @@ ATANL_POLY: ;; -ATANL_UNSUPPORTED: +ATANL_UNSUPPORTED: { .mfb nop.m 999 - fmpy.s0 Result = ArgX,ArgY + fmpy.s0 Result = ArgX,ArgY br.ret.sptk b0 } ;; @@ -1713,7 +1713,7 @@ ATANL_X_SPECIAL: // Here if x or y inf or zero -ATANL_SPECIAL_HANDLING: +ATANL_SPECIAL_HANDLING: { .mfi nop.m 999 fclass.m p6, p7 = ArgY_orig, 0x007 // Test y zero @@ -1809,7 +1809,7 @@ ATANL_SPECIAL_HANDLING: ;; // Here if y not zero -ATANL_ArgY_Not_ZERO: +ATANL_ArgY_Not_ZERO: { .mfi nop.m 999 fclass.m p0, p10 = ArgY, 0x023 // Test y inf @@ -1841,7 +1841,7 @@ ATANL_ArgY_Not_ZERO: ;; { .mfi -(p6) add table_ptr1 = 16, table_ptr1 // Point to pi/2, if x finite +(p6) add table_ptr1 = 16, table_ptr1 // Point to pi/2, if x finite fclass.m p8, p0 = ArgX, 0x022 // Test for x=-inf nop.i 999 } @@ -1886,7 +1886,7 @@ ATANL_ArgY_Not_ZERO: ;; // Here if y not INF, and x=0 or INF -ATANL_ArgY_Not_INF: +ATANL_ArgY_Not_INF: // // Return +PI/2 when ArgY NOT Inf, ArgY > 0 and ArgX = +/-0 // Return -PI/2 when ArgY NOT Inf, ArgY < 0 and ArgX = +/-0 @@ -1953,7 +1953,7 @@ ATANL_ArgY_Not_INF: ;; GLOBAL_IEEE754_END(atan2l) - + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/s_cbrtl.S b/sysdeps/ia64/fpu/s_cbrtl.S index 76ef12f..00d1aed 100644 --- a/sysdeps/ia64/fpu/s_cbrtl.S +++ b/sysdeps/ia64/fpu/s_cbrtl.S @@ -56,7 +56,7 @@ // // The result is computed as // cbrt(x)= cbrt(1 - (1 - x*y)) * (1/cbrt(y)) -// where y = frcpa(x) = (-1)^sgn_y * 2^(3*k+j) * m_y, +// where y = frcpa(x) = (-1)^sgn_y * 2^(3*k+j) * m_y, // m_y in [1,2), j in {0,1,2} // // cbrt(1 - (1 - x*y)) is approximated by a degree-6 polynomial @@ -68,13 +68,13 @@ // (double-extended precision) and D (single precision) as follows: // T_hi (1 + D)= 1/cbrt(y) to about 80 bits of accuracy // -// The tables are only stored for three exponent values (i.e. +// The tables are only stored for three exponent values (i.e. // only for 2^j * m_y, where j in {0,1,2} and m_y covers the 256 // possible mantissas for an frcpa result); the index is formed // by the 8 leading mantissa bits of x, which is the same index used // by the hardware to get frcpa(x). // -// The table values are multiplied by 2^k where e is the exponent of +// The table values are multiplied by 2^k where e is the exponent of // the input number. This multiplication is carried out in parallel with // the polynomial evaluation: // T= 2^(k) * T_hi @@ -127,7 +127,7 @@ GR_TMP1 = r21 GR_SGNMASK = r22 GR_T_INDEX = r23 - GR_IX_T = r23 + GR_IX_T = r23 GR_IX_D = r24 GR_D_INDEX = r24 GR_TMP2 = r25 @@ -817,7 +817,7 @@ GLOBAL_LIBM_ENTRY(cbrtl) and GR_SIGN = GR_NORMEXPSGN, GR_SGNMASK // eliminate leading 1 from GR_NORMSIG = 2nd table index shl GR_INDEX2 = GR_NORMSIG, 1 - // eliminate sign from exponent + // eliminate sign from exponent andcm GR_NORMEXP = GR_NORMEXPSGN, GR_SGNMASK } ;; @@ -829,8 +829,8 @@ GLOBAL_LIBM_ENTRY(cbrtl) (p6) fnma.s1 FR_R = FR_RCP, FR_XNORM, f1 // Start computation of floor(exponent/3) by // computing (2^20+2)/3*exponent = exponent*0x55556 - // 1: exponent* = 5; - // (2^{16}-1)/3 = 0x5555: + // 1: exponent* = 5; + // (2^{16}-1)/3 = 0x5555: // will form 0x5555*exponent by using shladd's shladd GR_EXP5 = GR_NORMEXP, 2, GR_NORMEXP } diff --git a/sysdeps/ia64/fpu/s_cos.S b/sysdeps/ia64/fpu/s_cos.S index fc121fc..d278436 100644 --- a/sysdeps/ia64/fpu/s_cos.S +++ b/sysdeps/ia64/fpu/s_cos.S @@ -52,7 +52,7 @@ // 09/05/02 Work range is widened by reduction strengthen (3 parts of Pi/16) // 02/10/03 Reordered header: .section, .global, .proc, .align // 08/08/03 Improved performance -// 10/28/04 Saved sincos_r_sincos to avoid clobber by dynamic loader +// 10/28/04 Saved sincos_r_sincos to avoid clobber by dynamic loader // 03/31/05 Reformatted delimiters between data tables // API @@ -71,12 +71,12 @@ // nfloat = Round result to integer (round-to-nearest) // // r = x - nfloat * pi/2^k -// Do this as ((((x - nfloat * HIGH(pi/2^k))) - -// nfloat * LOW(pi/2^k)) - +// Do this as ((((x - nfloat * HIGH(pi/2^k))) - +// nfloat * LOW(pi/2^k)) - // nfloat * LOWEST(pi/2^k) for increased accuracy. // pi/2^k is stored as two numbers that when added make pi/2^k. // pi/2^k = HIGH(pi/2^k) + LOW(pi/2^k) -// HIGH and LOW parts are rounded to zero values, +// HIGH and LOW parts are rounded to zero values, // and LOWEST is rounded to nearest one. // // x = (nfloat * pi/2^k) + r @@ -508,7 +508,7 @@ _SINCOS_COMMON2: { .mfi ldfe sincos_Pi_by_16_3 = [sincos_AD_1],16 nop.f 999 - dep.z sincos_r_exp = sincos_r_signexp, 0, 17 + dep.z sincos_r_exp = sincos_r_signexp, 0, 17 };; // Polynomial coefficients (Q4, P4, Q3, P3, Q2, Q1, P2, P1) loading @@ -516,7 +516,7 @@ _SINCOS_COMMON2: // p10 is true if f8 exp is >= 0x1001a (2^27) { .mmb ldfpd sincos_P4,sincos_Q4 = [sincos_AD_1],16 - cmp.ge p10,p0 = sincos_r_exp,sincos_exp_limit + cmp.ge p10,p0 = sincos_r_exp,sincos_exp_limit (p10) br.cond.spnt _SINCOS_LARGE_ARGS // Go to "large args" routine };; @@ -542,7 +542,7 @@ _SINCOS_COMMON2: { .mfi ldfpd sincos_P1,sincos_Q1 = [sincos_AD_1],16 fnma.s1 sincos_r = sincos_NFLOAT, sincos_Pi_by_16_1, sincos_NORM_f8 - nop.i 999 + nop.i 999 };; // Add 2^(k-1) (which is in sincos_r_sincos) to N @@ -551,7 +551,7 @@ _SINCOS_COMMON2: ;; // Get M (least k+1 bits of N) and sincos_GR_m = 0x1f,sincos_GR_n - nop.i 999 + nop.i 999 };; // sincos_r = sincos_r -sincos_Nfloat * sincos_Pi_by_16_2 @@ -566,14 +566,14 @@ _SINCOS_COMMON2: { .mfi add sincos_AD_2 = sincos_GR_32m, sincos_AD_1 (p8) fclass.m.unc p10,p0 = f8,0x0b - nop.i 999 + nop.i 999 };; // Load Sin and Cos table value using obtained index m (sincosf_AD_2) { .mfi ldfe sincos_Sm = [sincos_AD_2],16 - nop.f 999 - nop.i 999 + nop.f 999 + nop.i 999 };; // get rsq = r*r @@ -585,17 +585,17 @@ _SINCOS_COMMON2: { .mfi nop.m 999 fmpy.s0 fp_tmp = fp_tmp,fp_tmp // forces inexact flag - nop.i 999 + nop.i 999 };; // sincos_r_exact = sincos_r -sincos_Nfloat * sincos_Pi_by_16_3 { .mfi nop.m 999 fnma.s1 sincos_r_exact = sincos_NFLOAT, sincos_Pi_by_16_3, sincos_r - nop.i 999 + nop.i 999 };; -// Polynomials calculation +// Polynomials calculation // P_1 = P4*r^2 + P3 // Q_2 = Q4*r^2 + Q3 { .mfi @@ -606,7 +606,7 @@ _SINCOS_COMMON2: { .mfi nop.m 999 fma.s1 sincos_Q_temp1 = sincos_rsq, sincos_Q4, sincos_Q3 - nop.i 999 + nop.i 999 };; // get rcube = r^3 and S[m]*r^2 @@ -618,10 +618,10 @@ _SINCOS_COMMON2: { .mfi nop.m 999 fmpy.s1 sincos_rcub = sincos_r_exact, sincos_rsq - nop.i 999 + nop.i 999 };; -// Polynomials calculation +// Polynomials calculation // Q_2 = Q_1*r^2 + Q2 // P_1 = P_1*r^2 + P2 { .mfi @@ -632,10 +632,10 @@ _SINCOS_COMMON2: { .mfi nop.m 999 fma.s1 sincos_P_temp2 = sincos_rsq, sincos_P_temp1, sincos_P2 - nop.i 999 + nop.i 999 };; -// Polynomials calculation +// Polynomials calculation // Q = Q_2*r^2 + Q1 // P = P_2*r^2 + P1 { .mfi @@ -646,7 +646,7 @@ _SINCOS_COMMON2: { .mfi nop.m 999 fma.s1 sincos_P = sincos_rsq, sincos_P_temp2, sincos_P1 - nop.i 999 + nop.i 999 };; // Get final P and Q @@ -660,7 +660,7 @@ _SINCOS_COMMON2: { .mfi nop.m 999 fma.s1 sincos_P = sincos_rcub,sincos_P, sincos_r_exact - nop.i 999 + nop.i 999 };; // If sin(denormal), force underflow to be set @@ -701,7 +701,7 @@ _SINCOS_SPECIAL_ARGS: _SINCOS_UNORM: // Here if x=unorm { .mfb - getf.exp sincos_r_signexp = sincos_NORM_f8 // Get signexp of x + getf.exp sincos_r_signexp = sincos_NORM_f8 // Get signexp of x fcmp.eq.s0 p11,p0 = f8, f0 // Dummy op to set denorm flag br.cond.sptk _SINCOS_COMMON2 // Return to main path };; @@ -750,7 +750,7 @@ _SINCOS_LARGE_ARGS: { .mfi nop.m 999 fmpy.s0 sincos_save_tmp = sincos_save_tmp, sincos_save_tmp - nop.i 999 + nop.i 999 };; { .mib diff --git a/sysdeps/ia64/fpu/s_cosf.S b/sysdeps/ia64/fpu/s_cosf.S index bcdf1b0..6e1c420 100644 --- a/sysdeps/ia64/fpu/s_cosf.S +++ b/sysdeps/ia64/fpu/s_cosf.S @@ -69,7 +69,7 @@ // nfloat = Round result to integer (round-to-nearest) // // r = x - nfloat * pi/2^k -// Do this as (x - nfloat * HIGH(pi/2^k)) - nfloat * LOW(pi/2^k) +// Do this as (x - nfloat * HIGH(pi/2^k)) - nfloat * LOW(pi/2^k) // for increased accuracy. // pi/2^k is stored as two numbers that when added make pi/2^k. @@ -484,14 +484,14 @@ _SINCOSF_COMMON: // Polynomial coefficients (Q2, Q1, P2, P1) loading { .mmi ldfpd sincosf_P2,sincosf_Q2 = [sincosf_AD_1],16 - nop.m 999 - nop.i 999 + nop.m 999 + nop.i 999 };; // Select exponent (17 lsb) { .mmi ldfpd sincosf_P1,sincosf_Q1 = [sincosf_AD_1],16 - nop.m 999 + nop.m 999 dep.z sincosf_r_exp = sincosf_r_signexp, 0, 17 };; @@ -507,9 +507,9 @@ _SINCOSF_COMMON: // Multiply x by scaled 16/pi and add large const to shift integer part of W to // rightmost bits of significand { .mfi - nop.m 999 + nop.m 999 fma.s1 sincosf_W_2TO61_RSH = sincosf_NORM_f8, sincosf_SIG_INV_PI_BY_16_2TO61, sincosf_RSHF_2TO61 - nop.i 999 + nop.i 999 };; // sincosf_NFLOAT = Round_Int_Nearest(sincosf_W) @@ -517,14 +517,14 @@ _SINCOSF_COMMON: { .mfi nop.m 999 fms.s1 sincosf_NFLOAT = sincosf_W_2TO61_RSH,sincosf_2TOM61,sincosf_RSHF - nop.i 999 + nop.i 999 };; // get N = (int)sincosf_int_Nfloat { .mfi getf.sig sincosf_GR_n = sincosf_W_2TO61_RSH // integer N value nop.f 999 - nop.i 999 + nop.i 999 };; // Add 2^(k-1) (which is in sincosf_r_sincos=8) to N @@ -532,12 +532,12 @@ _SINCOSF_COMMON: { .mfi add sincosf_GR_n = sincosf_GR_n, sincosf_r_sincos fnma.s1 sincosf_r = sincosf_NFLOAT, sincosf_Pi_by_16_1, sincosf_NORM_f8 - nop.i 999 + nop.i 999 };; // Get M (least k+1 bits of N) { .mmi - and sincosf_GR_m = 0x1f,sincosf_GR_n // Put mask 0x1F - + and sincosf_GR_m = 0x1f,sincosf_GR_n // Put mask 0x1F - nop.m 999 // - select k+1 bits nop.i 999 };; @@ -546,7 +546,7 @@ _SINCOSF_COMMON: { .mfi shladd sincosf_AD_2 = sincosf_GR_32m, 4, sincosf_AD_1 (p8) fclass.m.unc p10,p0 = f8,0x0b // If sin denormal input - - nop.i 999 + nop.i 999 };; // Load Sin and Cos table value using obtained index m (sincosf_AD_2) @@ -572,10 +572,10 @@ _SINCOSF_COMMON: { .mfi nop.m 999 fmpy.s0 fp_tmp = fp_tmp, fp_tmp // forces inexact flag - nop.i 999 + nop.i 999 };; -// Polynomials calculation +// Polynomials calculation // Q = Q2*r^2 + Q1 // P = P2*r^2 + P1 { .mfi @@ -586,7 +586,7 @@ _SINCOSF_COMMON: { .mfi nop.m 999 fma.s1 sincosf_P = sincosf_rsq, sincosf_P2, sincosf_P1 - nop.i 999 + nop.i 999 };; // get rcube and S[m]*r^2 @@ -598,7 +598,7 @@ _SINCOSF_COMMON: { .mfi nop.m 999 fmpy.s1 sincosf_rcub = sincosf_r_exact, sincosf_rsq - nop.i 999 + nop.i 999 };; // Get final P and Q @@ -612,7 +612,7 @@ _SINCOSF_COMMON: { .mfi nop.m 999 fma.s1 sincosf_P = sincosf_rcub,sincosf_P,sincosf_r_exact - nop.i 999 + nop.i 999 };; // If sinf(denormal) - force underflow to be set @@ -699,8 +699,8 @@ _SINCOSF_LARGE_ARGS: } { .mfi // force inexact set nop.m 999 - fmpy.s0 sincosf_save_tmp = sincosf_save_tmp, sincosf_save_tmp - nop.i 999 + fmpy.s0 sincosf_save_tmp = sincosf_save_tmp, sincosf_save_tmp + nop.i 999 };; { .mib diff --git a/sysdeps/ia64/fpu/s_erf.S b/sysdeps/ia64/fpu/s_erf.S index 7174a19..47fdea1 100644 --- a/sysdeps/ia64/fpu/s_erf.S +++ b/sysdeps/ia64/fpu/s_erf.S @@ -21,20 +21,20 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History @@ -61,7 +61,7 @@ // Return erf(x) = x *Pol9(x^2) // // 3. For several subranges of 0.5 <= |x| < 5.90625 -// Return erf(x) = sign(x)*Pol19(y), +// Return erf(x) = sign(x)*Pol19(y), // where y = (|x|-b)/a, Pol19(y) = A0 + A1*y^1 + A2*y^2 + ... + A19*y^19 // // For each subrange there is particular set of coefficients. @@ -80,7 +80,7 @@ // 6. |x| = INF // Return erf(x) = sign(x) * 1.0 // -// 7. x = [S,Q]NaN +// 7. x = [S,Q]NaN // Return erf(x) = QNaN // // 8. x is positive denormal @@ -93,11 +93,11 @@ // // Registers used //============================================================== -// Floating Point registers used: +// Floating Point registers used: // f8, input, output // f32 -> f63 -// General registers used: +// General registers used: // r32 -> r48, r2, r3 // Predicate registers used: @@ -106,7 +106,7 @@ // p6 to filter out case when x = denormal // p7 to filter out case when x = [Q,S]NaN or +/-0, // used also to process denormals -// p8 to filter out case when 3.25 <= |x| < 4.0, +// p8 to filter out case when 3.25 <= |x| < 4.0, // used also to process denormals // p9 to filter out case when |x| = inf // p10 to filter out case when |x| < 0.5 @@ -169,7 +169,7 @@ fTSqr = f58 fTQuadr = f59 fTDeg3 = f60 fTDeg7 = f61 -fArgAbsNormSgn = f62 +fArgAbsNormSgn = f62 fTQuadrSgn = f63 // Data tables @@ -180,7 +180,7 @@ RODATA LOCAL_OBJECT_START(erf_data) // Coefficients ##0..15 -// Polynomial coefficients for the erf(x), 0.5 <= |x| < 1.0 +// Polynomial coefficients for the erf(x), 0.5 <= |x| < 1.0 data8 0xB69AC40646D1F6C1, 0x00003FD2 //A19 data8 0x90AD48C0118FA10C, 0x00003FD7 //A18 data8 0x826FBAD055EA4AB8, 0x0000BFDB //A17 @@ -197,7 +197,7 @@ data8 0xB11E30BE912617D3, 0x00003FF0 //A7 data8 0xCCF89D9351CE26E3, 0x0000BFF4 //A6 data8 0xEFF75AD1F0F22809, 0x00003FF2 //A5 data8 0xBB793EF404C09A22, 0x00003FF8 //A4 -// Polynomial coefficients for the erf(x), 1.0 <= |x| < 2.0 +// Polynomial coefficients for the erf(x), 1.0 <= |x| < 2.0 data8 0xBAE93FF4174EA59B, 0x00003FE6 //A19 data8 0x8A0FD46092F95D44, 0x0000BFEA //A18 data8 0xA37B3242B7809E12, 0x00003FEC //A17 @@ -214,7 +214,7 @@ data8 0xF84B80EFCA43895D, 0x00003FF8 //A7 data8 0x9722D22DA628A17B, 0x00003FF7 //A6 data8 0x8DB0A586F8F3381F, 0x0000BFFB //A5 data8 0x8DB0A5879F87E5BE, 0x00003FFB //A4 -// Polynomial coefficients for the erf(x), 2.0 <= |x| < 3.25 +// Polynomial coefficients for the erf(x), 2.0 <= |x| < 3.25 data8 0x9C4AF1F3A4B21AFC, 0x00003FF6 //A19 data8 0x8D40D5D5DB741AB8, 0x0000BFF9 //A18 data8 0xDEBE7099E0A75BA4, 0x00003FFA //A17 @@ -231,7 +231,7 @@ data8 0xDD704DEFFB21B7E7, 0x0000BFFD //A7 data8 0xF0C9A6BBDE469115, 0x00003FF9 //A6 data8 0xD673A02CB5766633, 0x00003FFD //A5 data8 0x8D162CBAD8A12649, 0x0000BFFE //A4 -// Polynomial coefficients for the erf(x), 4.0 <= |x| < 6.0 +// Polynomial coefficients for the erf(x), 4.0 <= |x| < 6.0 data8 0xD4428B75C6FE8FD1, 0x0000BFFC //A19 data8 0xF76BE1935675D5C8, 0x00003FFE //A18 data8 0xFD6BB3B14AA7A8E6, 0x0000BFFF //A17 @@ -250,12 +250,12 @@ data8 0xED3003E477A53EE7, 0x00003FF6 //A5 data8 0xA4C07E9BB3FCB0F3, 0x0000BFF4 //A4 // // Coefficients ##16..19 -// Polynomial coefficients for the erf(x), 0.5 <= |x| < 1.0 +// Polynomial coefficients for the erf(x), 0.5 <= |x| < 1.0 data8 0x95FA98C337005D13, 0x0000BFF9 //A3 data8 0xE0F7E524D2808A97, 0x0000BFFB //A2 data8 0xE0F7E524D2808A98, 0x00003FFD //A1 data8 0x853F7AE0C76E915F, 0x00003FFE //A0 -// Polynomial coefficients for the erf(x), 1.0 <= |x| < 2.0 +// Polynomial coefficients for the erf(x), 1.0 <= |x| < 2.0 data8 0x8DB0A587A96ABCF0, 0x00003FFC //A3 data8 0xD488F84B7DE18DA8, 0x0000BFFD //A2 data8 0xD488F84B7DE12E9C, 0x00003FFD //A1 @@ -264,13 +264,13 @@ data8 0xD7BB3D3A08445636, 0x00003FFE //A0 data8 0xC58571D23D5C4B3A, 0x00003FFD //A3 data8 0xA94DCF467CD6AFF3, 0x0000BFFC //A2 data8 0xA94DCF467CD10A16, 0x00003FFA //A1 -data8 0xFECD70A13CAF1997, 0x00003FFE //A0 -// Polynomial coefficients for the erf(x), 4.0 <= |x| < 6.0 +data8 0xFECD70A13CAF1997, 0x00003FFE //A0 +// Polynomial coefficients for the erf(x), 4.0 <= |x| < 6.0 data8 0xB01D2B4F0D5AB8B0, 0x00003FF1 //A3 data8 0x8858A465CE594BD1, 0x0000BFEE //A2 data8 0x8858A447456DE61D, 0x00003FEA //A1 data8 0xFFFFFFBDC88BB107, 0x00003FFE //A0 -// Polynomial coefficients for the erf(x), 0.0 <= |x| < 0.5 +// Polynomial coefficients for the erf(x), 0.0 <= |x| < 0.5 data8 0xBE839EDBB36C7FCE //A9 data8 0x3EBB7745A18DD242 //A8 data8 0xBF4C02DB238F2AFC //A5 @@ -282,8 +282,8 @@ data8 0xBF9B82CE3127F4E4 //A3 data8 0x3FBCE2F21A042B25 //A2 data8 0x906EBA8214DB688D, 0x00003FFF //A0 // 1.0 - 2^(-63) -data8 0xFFFFFFFFFFFFFFFF, 0x00003FFE -// Polynomial coefficients for the erf(x), 3.25 <= |x| < 4.0 +data8 0xFFFFFFFFFFFFFFFF, 0x00003FFE +// Polynomial coefficients for the erf(x), 3.25 <= |x| < 4.0 data8 0x95E91576C7A12250, 0x00003FE7 //A14 data8 0x8E5E0D0E1F5D3CB5, 0x0000BFEA //A13 data8 0xED761DAFAF814DE9, 0x00003FEB //A12 @@ -300,7 +300,7 @@ data8 0xC6F1E695363BCB26, 0x0000BFF1 //A2 data8 0xF4DAF4680DA54C02, 0x00003FEF //A1 data8 0xFFFFB7CFB3F2ABBE, 0x00003FFE //A0 // A = 2.0/sqrt(Pi) -data8 0x906EBA8214DB688D, 0x00003FFF +data8 0x906EBA8214DB688D, 0x00003FFF LOCAL_OBJECT_END(erf_data) @@ -319,7 +319,7 @@ GLOBAL_LIBM_ENTRY(erf) } ;; { .mfi - getf.d rArg = f8 // x in GR + getf.d rArg = f8 // x in GR fclass.m p6,p0 = f8, 0x0b // is x denormal ? shl rThreeAndQ = rThreeAndQ, 44 // bits of 3.25 } @@ -350,7 +350,7 @@ GLOBAL_LIBM_ENTRY(erf) nop.f 0 (p6) br.cond.spnt erf_denormal // branch out if x is denormal } -;; +;; { .mfi and rShiftedArgMasked = rShiftedArg, rMask // bias of x << 8 fmerge.s fArgAbs = f1, f8 // |x| @@ -361,15 +361,15 @@ GLOBAL_LIBM_ENTRY(erf) (p7) fma.d.s0 f8 = f8,f1,f8 // NaN or +/-0 (p7) br.ret.spnt b0 // exit for x = NaN or +/-0 } -;; +;; { .mfi sub rIndex = rShiftedArgMasked, rBias // index << 8 - nop.f 0 - cmp.lt p10, p0 = rShiftedArgMasked, rBias // p10 = 1 if |x| < 0.5 + nop.f 0 + cmp.lt p10, p0 = rShiftedArgMasked, rBias // p10 = 1 if |x| < 0.5 } { .mfb - // p8 = 1 if 3.25 <= |x| < 4.0 -(p8) cmp.lt p8, p11 = rShiftedAbsArg, rBiasedExpOf4 + // p8 = 1 if 3.25 <= |x| < 4.0 +(p8) cmp.lt p8, p11 = rShiftedAbsArg, rBiasedExpOf4 fms.s1 fArgAbsNorm = fArgAbsNorm, f1, f1 (p10) br.cond.spnt erf_near_zero // branch out if |x| < 0.5 } @@ -387,10 +387,10 @@ GLOBAL_LIBM_ENTRY(erf) } ;; { .mfi - adds rCoeffAddr2 = 16, rCoeffAddr1 + adds rCoeffAddr2 = 16, rCoeffAddr1 fmerge.s fSignumX = f8, f1 // signum(x) nop.i 0 -} +} { .mfb cmp.lt p12, p0 = rSaturation, rShiftedAbsArg // |x| > 5.90625? nop.f 0 @@ -436,7 +436,7 @@ GLOBAL_LIBM_ENTRY(erf) ldfe fA13 = [rCoeffAddr1], 32 nop.f 0 // address of coefficients ##16..23 - add rCoeffAddr3 = rCoeffAddr3, rIndex + add rCoeffAddr3 = rCoeffAddr3, rIndex } {.mfi ldfe fA12 = [rCoeffAddr2], 32 @@ -475,7 +475,7 @@ GLOBAL_LIBM_ENTRY(erf) ldfe fA6 = [rCoeffAddr2], 32 nop.f 0 (p8) br.cond.spnt erf_3q_4 // branch out if 3.25 < |x| < 4.0 -} +} ;; {.mfi ldfe fA5 = [rCoeffAddr1], 32 @@ -565,7 +565,7 @@ GLOBAL_LIBM_ENTRY(erf) ;; { .mfi nop.m 0 - fma.s1 fA15 = fA15, fTSqr, fA13 + fma.s1 fA15 = fA15, fTSqr, fA13 nop.i 0 } { .mfi @@ -587,19 +587,19 @@ GLOBAL_LIBM_ENTRY(erf) } ;; { .mfi - nop.m 0 + nop.m 0 fma.s1 fA7 = fA7, fTSqr, fA5 nop.i 0 } ;; { .mfi - nop.m 0 + nop.m 0 fma.s1 fRes = fRes, fTQuadr, fA15 nop.i 0 } ;; { .mfi - nop.m 0 + nop.m 0 fma.s1 fA4 = fA4, fTSqr, fA2 nop.i 0 } @@ -611,7 +611,7 @@ GLOBAL_LIBM_ENTRY(erf) } ;; { .mfi - nop.m 0 + nop.m 0 fma.s1 fA4 = fA7, fTDeg3, fA4 nop.i 0 } @@ -637,7 +637,7 @@ GLOBAL_LIBM_ENTRY(erf) // Here if 3.25 < |x| < 4.0 .align 32 -erf_3q_4: +erf_3q_4: .pred.rel "mutex", p14, p15 { .mfi ldfe fA5 = [rCoeffAddr1], 32 @@ -660,7 +660,7 @@ erf_3q_4: fma.s1 fA15 = fA15, fArgAbs, fA14 nop.i 0 } -;; +;; { .mfi nop.m 0 fma.s1 fA13 = fA13, fArgAbs, fA12 @@ -671,7 +671,7 @@ erf_3q_4: fma.s1 fA11 = fA11, fArgAbs, fA10 nop.i 0 } -;; +;; { .mfi nop.m 0 fma.s1 fA9 = fA9, fArgAbs, fA8 @@ -682,7 +682,7 @@ erf_3q_4: fma.s1 fArgAbsNormSgn = fArgAbs, fSignumX, f0 nop.i 0 } -;; +;; { .mfi nop.m 0 fma.s1 fTQuadr = fTSqr, fTSqr, f0 @@ -694,24 +694,24 @@ erf_3q_4: fma.s1 fRes = fRes, fTSqr, fA17 nop.i 0 } -;; +;; { .mfi nop.m 0 fma.s1 fA15 = fA15, fTSqr, fA13 nop.i 0 } -;; +;; { .mfi nop.m 0 fma.s1 fA11 = fA11, fTSqr, fA9 nop.i 0 -} +} { .mfi nop.m 0 fma.s1 fA7 = fA7, fArgAbs, fA6 nop.i 0 } -;; +;; { .mfi nop.m 0 fma.s1 fTDeg7 = fTQuadr, fTSqr, f0 @@ -722,10 +722,10 @@ erf_3q_4: fma.s1 fRes = fRes, fTQuadr, fA15 nop.i 0 } -;; +;; { .mfi nop.m 0 - fma.s1 fA11 = fA11, fTSqr, fA7 + fma.s1 fA11 = fA11, fTSqr, fA7 nop.i 0 } ;; @@ -734,7 +734,7 @@ erf_3q_4: fma.s1 fRes = fRes, fTDeg7, fA11 nop.i 0 } -;; +;; { .mfi nop.m 0 // result for negative argument @@ -754,7 +754,7 @@ erf_3q_4: erf_near_zero: { .mfi adds rCoeffAddr1 = 1280, rDataPtr // address of A9 - fma.s1 fTSqr = fArgSqr, fArgSqr, f0 // x^4 + fma.s1 fTSqr = fArgSqr, fArgSqr, f0 // x^4 nop.i 0 } { .mfi @@ -887,7 +887,7 @@ erf_saturation: br.ret.sptk b0 // Exit for 5.90625 <=|x|< +inf } ;; - + // Here if x is double precision denormal .align 32 erf_denormal: diff --git a/sysdeps/ia64/fpu/s_erfc.S b/sysdeps/ia64/fpu/s_erfc.S index addfef4..c67c61a 100644 --- a/sysdeps/ia64/fpu/s_erfc.S +++ b/sysdeps/ia64/fpu/s_erfc.S @@ -21,20 +21,20 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History @@ -51,7 +51,7 @@ // Overview of operation //============================================================== // 1. 0 <= x <= 28.0 -// +// // erfc(x) = P14(z) * exp( -x^2 ), z = x - x(i). // // Comment: @@ -59,38 +59,38 @@ // Let x(i) = -1.0 + 2^(i/4),i=0,...19. So we have 20 unequal // argument intervals [x(i),x(i+1)] with length ratio q = 2^(1/4). // Values x(i) we have in the table erfc_xb_table. -// +// // Let x(i)<= x < x(i+1). // We can find i as exponent of number (x + 1)^4. -// +// // Let P14(z) - polynomial approximation of degree 14 for function // erfc(z+x(i)) * exp( (z+x(i))^2) and 0 <= z <= x(i+1)-x(i). // Polynomial coeffitients we have in the table erfc_p_table. // // So we can find result for erfc(x) as above. -// Algorithm description for exp function see below. -// +// Algorithm description for exp function see below. +// // 2. -6 <= x < 0 // // erfc(x) = 2.0 - erfc(-x) // // 3. x > 28.0 -// erfc(x) ~=~ 0.0 +// erfc(x) ~=~ 0.0 // -// 4. x < -6.0 -// erfc(x) ~=~ 2.0 +// 4. x < -6.0 +// erfc(x) ~=~ 2.0 -// Special values +// Special values //============================================================== // erfc(+0) = 1.0 // erfc(-0) = 1.0 -// erfc(+qnan) = +qnan -// erfc(-qnan) = -qnan -// erfc(+snan) = +qnan -// erfc(-snan) = -qnan +// erfc(+qnan) = +qnan +// erfc(-qnan) = -qnan +// erfc(+snan) = +qnan +// erfc(-snan) = -qnan -// erfc(-inf) = 2.0 +// erfc(-inf) = 2.0 // erfc(+inf) = +0 //============================================================== @@ -118,16 +118,16 @@ // Comment for exp for erfc: // // We use quad precision for calculate input argument -x^2 and add -// result low bits to value delta in exp. +// result low bits to value delta in exp. // Registers used //============================================================== -// Floating Point registers used: +// Floating Point registers used: // f8, input // f9 -> f15, f32 -> f93 -// General registers used: -// r32 -> r68 +// General registers used: +// r32 -> r68 // Predicate registers used: // p6 -> p15 @@ -201,15 +201,15 @@ EXP_INV_LN2_2TO63 = f7 EXP_W_2TO56_RSH = f9 EXP_RSHF_2TO56 = f10 -exp_P4 = f11 -exp_P3 = f12 -exp_P2 = f13 -exp_P1 = f14 +exp_P4 = f11 +exp_P3 = f12 +exp_P2 = f13 +exp_P1 = f14 exp_ln2_by_128_hi = f15 - -exp_ln2_by_128_lo = f32 + +exp_ln2_by_128_lo = f32 EXP_RSHF = f33 -EXP_Nfloat = f34 +EXP_Nfloat = f34 exp_r = f35 exp_f = f36 exp_rsq = f37 @@ -222,7 +222,7 @@ exp_P_lo = f43 exp_P_hi = f44 exp_P = f45 exp_S = f46 -EXP_NORM_f8 = f47 +EXP_NORM_f8 = f47 exp_S2 = f48 exp_T2 = f49 @@ -355,7 +355,7 @@ LOCAL_OBJECT_END(erfc_xb_table) LOCAL_OBJECT_START(erfc_p_table) -// Pol0 +// Pol0 data8 0x8000000000000000, 0x00003FFF //A0 = +1.00000000000000000000e+00L data8 0x906EBA8214DB688D, 0x0000BFFF //A1 = -1.12837916709551257389e+00L data8 0xFFFFFFFFFFFFFFEB, 0x00003FFE //A2 = +9.99999999999999998841e-01L @@ -371,7 +371,7 @@ data8 0xE36112A686F5165B, 0x0000BFF6 //A11 = -3.46953111013788405745e-03L data8 0xB3DD6B2DB3307D2E, 0x00003FF5 //A12 = +1.37226041156280127011e-03L data8 0x8018A34267FED226, 0x0000BFF4 //A13 = -4.88648380816410282971e-04L data8 0xFBBA6A7AEBD3ABD9, 0x00003FF1 //A14 = +1.20033353451879025825e-04L -// Pol1 +// Pol1 data8 0xD15A1EF03BB91E71, 0x00003FFE //A0 = +8.17781385088640600540e-01L data8 0xD1A4ADDAC3337118, 0x0000BFFE //A1 = -8.18919053944410683867e-01L data8 0xA9AF9FFA2AD18CB0, 0x00003FFE //A2 = +6.62836073471060947628e-01L @@ -387,7 +387,7 @@ data8 0xBA821A59FC05FBAD, 0x0000BFF5 //A11 = -1.42294475244146555952e-03L data8 0x8D535042E11A0D89, 0x00003FF4 //A12 = +5.39113782651680545599e-04L data8 0xBE589447DB26564E, 0x0000BFF2 //A13 = -1.81528103431449706486e-04L data8 0xABC8C7EF636F5B0A, 0x00003FF0 //A14 = +4.09565689009869217620e-05L -// Pol2 +// Pol2 data8 0xA9973ABB272898B2, 0x00003FFE //A0 = +6.62463827792779356910e-01L data8 0x945F1A7993F7AADD, 0x0000BFFE //A1 = -5.79576162988785154930e-01L data8 0xD84439C6609A8A62, 0x00003FFD //A2 = +4.22395520654665085222e-01L @@ -403,7 +403,7 @@ data8 0x86FAEBB4438A20FA, 0x0000BFF4 //A11 = -5.14908443679775343409e-04L data8 0xC2503856CE48A657, 0x00003FF2 //A12 = +1.85311660448280465934e-04L data8 0xF52642F22A26965B, 0x0000BFF0 //A13 = -5.84481856856861454591e-05L data8 0xC98588E1A95FFDBD, 0x00003FEE //A14 = +1.20116245684500489648e-05L -// Pol3 +// Pol3 data8 0x887CBA2C47B1E2B5, 0x00003FFE //A0 = +5.33153186617432643784e-01L data8 0xCD81909CF194328E, 0x0000BFFD //A1 = -4.01379126699602646289e-01L data8 0x84DCA15C52122372, 0x00003FFD //A2 = +2.59495775718310530164e-01L @@ -419,7 +419,7 @@ data8 0xAAE3CAAB9D117591, 0x0000BFF2 //A11 = -1.62973223928790256249e-04L data8 0xE7704D06A3080C19, 0x00003FF0 //A12 = +5.51792801195012080688e-05L data8 0x875A5B53E510F305, 0x0000BFEF //A13 = -1.61353297293572230995e-05L data8 0xC8F10CDDB9CC9A42, 0x00003FEC //A14 = +2.99426321046583353559e-06L -// Pol4 +// Pol4 data8 0xDAEC3C07CAB590C1, 0x00003FFD //A0 = +4.27583576155807004411e-01L data8 0x8BE271F8BE0280AC, 0x0000BFFD //A1 = -2.73212014783898564863e-01L data8 0x9E13941E19661429, 0x00003FFC //A2 = +1.54371561371908397882e-01L @@ -435,7 +435,7 @@ data8 0xBC17A73E9CA51313, 0x0000BFF0 //A11 = -4.48447217225392170834e-05L data8 0xED10FE8FC0E44CAD, 0x00003FEE //A12 = +1.41302576244352578317e-05L data8 0xFE49912328516F81, 0x0000BFEC //A13 = -3.78917710289305330220e-06L data8 0xA8F6077E25DAFD33, 0x00003FEA //A14 = +6.29428967202166402369e-07L -// Pol5 +// Pol5 data8 0xAF72220985BED710, 0x00003FFD //A0 = +3.42667640364081975844e-01L data8 0xBC1CB559042410AB, 0x0000BFFC //A1 = -1.83703263815036934677e-01L data8 0xB730BF62E0B63A3C, 0x00003FFB //A2 = +8.94484474229911741150e-02L @@ -451,7 +451,7 @@ data8 0xB3911863705825F6, 0x0000BFEE //A11 = -1.07030140392753204852e-05L data8 0xD023CF5C3F915685, 0x00003FEC //A12 = +3.10152594473606007552e-06L data8 0xCA7016FADFF584F5, 0x0000BFEA //A13 = -7.54139761055503416594e-07L data8 0xEEBB5CC0901D2BB0, 0x00003FE7 //A14 = +1.11168196441717301549e-07L -// Pol6 +// Pol6 data8 0x8CD1160326A754AF, 0x00003FFD //A0 = +2.75032699474947383325e-01L data8 0xFB22A4C657119388, 0x0000BFFB //A1 = -1.22624671271190511269e-01L data8 0xD02B2CA872A774E9, 0x00003FFA //A2 = +5.08224243596176920409e-02L @@ -467,7 +467,7 @@ data8 0x950CBA5D80D8125E, 0x0000BFEC //A11 = -2.22101388436550539151e-06L data8 0x9CE72C0409A3E800, 0x00003FEA //A12 = +5.84509280984781223375e-07L data8 0x88CCD7A000D1C213, 0x0000BFE8 //A13 = -1.27405082040077425019e-07L data8 0x8DF4EC84F093B1C0, 0x00003FE5 //A14 = +1.65259388738830506389e-08L -// Pol7 +// Pol7 data8 0xE2BF82A153B1B82E, 0x00003FFC //A0 = +2.21433678719152843912e-01L data8 0xA72A9AE0BD7F29D5, 0x0000BFFB //A1 = -8.16242313227913578068e-02L data8 0xE98939292289EDBE, 0x00003FF9 //A2 = +2.85078159732432477516e-02L @@ -483,7 +483,7 @@ data8 0xD8D0ED030032926D, 0x0000BFE9 //A11 = -4.03851487695924456733e-07L data8 0xCCA1CA2AC3EB8973, 0x00003FE7 //A12 = +9.52891963880517988726e-08L data8 0x9E26A080F9DA39DE, 0x0000BFE5 //A13 = -1.84111863600343741644e-08L data8 0x8F3DC58F64A92C62, 0x00003FE2 //A14 = +2.08443519336792003049e-09L -// Pol8 +// Pol8 data8 0xB74C13E914E9666F, 0x00003FFC //A0 = +1.79001151181389950418e-01L data8 0xDEB57268A58B763B, 0x0000BFFA //A1 = -5.43722600071728705200e-02L data8 0x821FF0D4C605A4CD, 0x00003FF9 //A2 = +1.58843711598712515609e-02L @@ -499,7 +499,7 @@ data8 0x8BFE95FCD7B92763, 0x0000BFE7 //A11 = -6.51900079707465044843e-08L data8 0xE9F15C8E7F58CF90, 0x00003FE4 //A12 = +1.36172642554216769522e-08L data8 0x9E90F22B11FAF8B5, 0x0000BFE2 //A13 = -2.30744183054978535129e-09L data8 0xF8CF74F1A138FBBA, 0x00003FDE //A14 = +2.26291720693360003233e-10L -// Pol9 +// Pol9 data8 0x94D45274A831ED57, 0x00003FFC //A0 = +1.45341194505862183128e-01L data8 0x94D4518B699A4A68, 0x0000BFFA //A1 = -3.63352952323113355459e-02L data8 0x90C3B59FF403A916, 0x00003FF8 //A2 = +8.83572327421709216515e-03L @@ -515,7 +515,7 @@ data8 0xA34CD3DFAC12AA45, 0x0000BFE4 //A11 = -9.50531730989412282035e-09L data8 0xEEBB49645DE0E34C, 0x00003FE1 //A12 = +1.73700091999434388879e-09L data8 0x8C86D8677DEACFBA, 0x0000BFDF //A13 = -2.55616650187281815453e-10L data8 0xBDB223D0FE2A7D6B, 0x00003FDB //A14 = +2.15659223402509415592e-11L -// Pol10 +// Pol10 data8 0xF2C1812715E4050A, 0x00003FFB //A0 = +1.18533143048567888157e-01L data8 0xC7DA2C565ADAEE57, 0x0000BFF9 //A1 = -2.43960252726894623056e-02L data8 0xA15CEFFD632F697D, 0x00003FF7 //A2 = +4.92440908672041077933e-03L @@ -531,7 +531,7 @@ data8 0xAF86504D78D35E89, 0x0000BFE1 //A11 = -1.27711000692808421573e-09L data8 0xDE1CE78ADB6DDF04, 0x00003FDE //A12 = +2.02010513073041015283e-10L data8 0xE124FFAA267301A5, 0x0000BFDB //A13 = -2.55959692063871343080e-11L data8 0x81F1BEBEFBE168D2, 0x00003FD8 //A14 = +1.84661980716000872722e-12L -// Pol11 +// Pol11 data8 0xC6CE5D7D18203EAA, 0x00003FFB //A0 = +9.70732978630764996752e-02L data8 0x86E8A30A76923C88, 0x0000BFF9 //A1 = -1.64683517829920230086e-02L data8 0xB4A1CBB7576B4183, 0x00003FF6 //A2 = +2.75622581042760461528e-03L @@ -547,7 +547,7 @@ data8 0xB16A6CC5A3AE6E01, 0x0000BFDE //A11 = -1.61358659378896671620e-10L data8 0xC0970F2551C52F96, 0x00003FDB //A12 = +2.18949565869759698947e-11L data8 0xA6E029ABB3BB500C, 0x0000BFD8 //A13 = -2.37144541649446501026e-12L data8 0xA3E43F3857D1B6A5, 0x00003FD4 //A14 = +1.45564973108152568130e-13L -// Pol12 +// Pol12 data8 0xA36E35FC807B3E64, 0x00003FFB //A0 = +7.98000543291529334886e-02L data8 0xB725A29237C8F94F, 0x0000BFF8 //A1 = -1.11784064873715046550e-02L data8 0xCB51EF23EAD5F327, 0x00003FF5 //A2 = +1.55120891755237931425e-03L @@ -563,7 +563,7 @@ data8 0xABD305A38349EAEB, 0x0000BFDB //A11 = -1.95341618552982314342e-11L data8 0x9EDB00104DB66DD9, 0x00003FD8 //A12 = +2.25747200093121867690e-12L data8 0xE9F80AF513F2B8AB, 0x0000BFD4 //A13 = -2.07806143133802417637e-13L data8 0xC2B840C3859AB166, 0x00003FD0 //A14 = +1.08091168358477817812e-14L -// Pol13 +// Pol13 data8 0x86CD0BF01914407A, 0x00003FFB //A0 = +6.58207829138836028568e-02L data8 0xF9F4A17FA70807C3, 0x0000BFF7 //A1 = -7.62803922344113067603e-03L data8 0xE63BF84EDE20EDAA, 0x00003FF4 //A2 = +8.78273993036530088653e-04L @@ -579,7 +579,7 @@ data8 0xA1FB98FA19E62A4F, 0x0000BFD8 //A11 = -2.30191407969654156362e-12L data8 0xFDB2E0599016AD1E, 0x00003FD4 //A12 = +2.25329742249079975388e-13L data8 0x9E179A99CDD4BF4B, 0x0000BFD1 //A13 = -1.75517603530017718494e-14L data8 0xDE4DE992A707C7BC, 0x00003FCC //A14 = +7.71273133169032472595e-16L -// Pol14 +// Pol14 data8 0xDF0639E60CF6E96C, 0x00003FFA //A0 = +5.44492971101228988138e-02L data8 0xAB6737B6065BD1C2, 0x0000BFF7 //A1 = -5.23081035867078490333e-03L data8 0x8322CC0765FD9C27, 0x00003FF4 //A2 = +5.00243857322493802503e-04L @@ -595,7 +595,7 @@ data8 0x967A0ECC142382D9, 0x0000BFD5 //A11 = -2.67300472044743953909e-13L data8 0xC6D8869855133985, 0x00003FD1 //A12 = +2.20763189681614758000e-14L data8 0xD10AC0B228ABCECC, 0x0000BFCD //A13 = -1.45052027893524847250e-15L data8 0xF7C6DEB4522487A3, 0x00003FC8 //A14 = +5.37280367113168366711e-17L -// Pol15 +// Pol15 data8 0xB8F57DECFAC3B255, 0x00003FFA //A0 = +4.51559943173131409760e-02L data8 0xEC1B8A6C822C036F, 0x0000BFF6 //A1 = -3.60271577347565115947e-03L data8 0x963A6DD66951B72E, 0x00003FF3 //A2 = +2.86537625289770759336e-04L @@ -611,7 +611,7 @@ data8 0x8AF8F1E3FED32CEC, 0x0000BFD2 //A11 = -3.08580807479307213059e-14L data8 0x9A88033A08842BEA, 0x00003FCE //A12 = +2.14455258045503137285e-15L data8 0x88BCF775B7B3A939, 0x0000BFCA //A13 = -1.18601440246395438386e-16L data8 0x88687B63A5B7135E, 0x00003FC5 //A14 = +3.69734984736162880476e-18L -// Pol16 +// Pol16 data8 0x99B8A501204BF3E7, 0x00003FFA //A0 = +3.75296063885057657456e-02L data8 0xA33FA20D2867C79C, 0x0000BFF6 //A1 = -2.49097544033960143953e-03L data8 0xACFD14CA6AA55829, 0x00003FF2 //A2 = +1.64974783411741182991e-04L @@ -627,7 +627,7 @@ data8 0x805C040421E7A098, 0x0000BFCF //A11 = -3.56269003968981157635e-15L data8 0xEFCCD20DE93A138E, 0x00003FCA //A12 = +2.07993414310230172191e-16L data8 0xB259764466732080, 0x0000BFC6 //A13 = -9.66834364652262630640e-18L data8 0x9597C1DB6AF830E4, 0x00003FC1 //A14 = +2.53420063550355940811e-19L -// Pol17 +// Pol17 data8 0xFFFCBD66BAA4368C, 0x00003FF9 //A0 = +3.12484454387527380657e-02L data8 0xE28174723762D197, 0x0000BFF5 //A1 = -1.72810121976742793952e-03L data8 0xC81D832836019EC4, 0x00003FF1 //A2 = +9.54224026432644399736e-05L @@ -643,7 +643,7 @@ data8 0xEE034E350C65D2D9, 0x0000BFCB //A11 = -4.12886586201102092942e-16L data8 0xBA94473E52495304, 0x00003FC7 //A12 = +2.02289587087169937807e-17L data8 0xE913D34CBB853CEE, 0x0000BFC2 //A13 = -7.89697093687557412061e-19L data8 0xA44576A85E8CAB59, 0x00003FBD //A14 = +1.73929048516879172258e-20L -// Pol18 +// Pol18 data8 0xD579A3FE4622DED2, 0x00003FF9 //A0 = +2.60589793198885278242e-02L data8 0x9D97EB84E7CD89C8, 0x0000BFF5 //A1 = -1.20234251012583627659e-03L data8 0xE86EFDC2CCA5C47B, 0x00003FF0 //A2 = +5.54164790116744315389e-05L @@ -659,7 +659,7 @@ data8 0xDDF6F1B79F50E3C4, 0x0000BFC8 //A11 = -4.81309059042573202592e-17L data8 0x91F283C0351A9ACA, 0x00003FC4 //A12 = +1.97795505638619048412e-18L data8 0x990BC4FAFA9C7542, 0x0000BFBF //A13 = -6.48174913943425248713e-20L data8 0xB536865B89676892, 0x00003FB9 //A14 = +1.19916696090758913485e-21L -// Pol19 +// Pol19 data8 0xB241CEB1B7C953F1, 0x00003FF9 //A0 = +2.17598950382519671244e-02L data8 0xDBD6FBA9B11B85E1, 0x0000BFF4 //A1 = -8.38622198373701898430e-04L data8 0x877605B1AD082441, 0x00003FF0 //A2 = +3.22964249573360786077e-05L @@ -698,7 +698,7 @@ GLOBAL_LIBM_ENTRY(erfc) mov exp_GR_rshf_2to56 = 0x4768 // begin 1.1 2^(63+56) } { .mlx - mov exp_TB1_size = 0x100 + mov exp_TB1_size = 0x100 movl exp_GR_sig_inv_ln2 = 0xb8aa3b295c17f0bc //signif. of 1/ln2 };; @@ -710,7 +710,7 @@ GLOBAL_LIBM_ENTRY(erfc) { .mfi mov exp_GR_exp_2tom56 = 0xffff-56 fnma.s1 EXP_NORM_f8 = f8, f8, f0 // high bits for -x^2 - nop.i 0 + nop.i 0 };; @@ -718,7 +718,7 @@ GLOBAL_LIBM_ENTRY(erfc) { .mfi setf.sig EXP_INV_LN2_2TO63 = exp_GR_sig_inv_ln2 // form 1/ln2 * 2^63 (p6) fma.s1 FR_AbsArg = f1, f0, f8 // |x|, if x >= 0 - mov GR_POS_ARG_ASYMP = 0x403C + mov GR_POS_ARG_ASYMP = 0x403C } { .mfi mov GR_NEG_ARG_ASYMP = 0x4018 @@ -729,11 +729,11 @@ GLOBAL_LIBM_ENTRY(erfc) { .mfi setf.exp EXP_2TOM56 = exp_GR_exp_2tom56 // 2^-56 for scaling Nfloat fclass.m p10,p0 = f8, 0x21 // p10: x = +inf - mov exp_GR_17ones = 0x1FFFF + mov exp_GR_17ones = 0x1FFFF } -{ .mlx +{ .mlx setf.d EXP_RSHF_2TO56 = exp_GR_rshf_2to56 // const 1.10*2^(63+56) - movl GR_ERFC_XB_TB = 0x1A0 + movl GR_ERFC_XB_TB = 0x1A0 };; @@ -744,9 +744,9 @@ GLOBAL_LIBM_ENTRY(erfc) shl exp_GR_rshf = exp_GR_rshf, 48 //end 1.1 2^63 for right shift } { .mfi - nop.m 0 + nop.m 0 (p7) fma.s1 FR_Tmp = FR_Tmp1, FR_Tmp1, f0 // (|x|+1)^2, x<0 - mov GR_0x1 = 0x1 + mov GR_0x1 = 0x1 };; { .mfi @@ -763,7 +763,7 @@ GLOBAL_LIBM_ENTRY(erfc) { .mfi nop.m 0 fclass.m p11,p0 = f8, 0xc3 // p11: x = nan - nop.i 0 + nop.i 0 } { .mfi setf.d EXP_RSHF = exp_GR_rshf //Form right shift const 1.100 * 2^63 @@ -772,8 +772,8 @@ GLOBAL_LIBM_ENTRY(erfc) };; { .mfi - setf.d FR_EpsNorm = GR_EpsNorm - nop.f 0 + setf.d FR_EpsNorm = GR_EpsNorm + nop.f 0 (p6) shl GR_ARG_ASYMP = GR_POS_ARG_ASYMP, 48//p6:ARG_ASYMP= 28.0,x>=0 } { .mfi @@ -789,18 +789,18 @@ GLOBAL_LIBM_ENTRY(erfc) };; { .mfi - sub GR_mBIAS = r0, GR_BIAS + sub GR_mBIAS = r0, GR_BIAS fma.s1 FR_Tmp = FR_Tmp, FR_Tmp, f0 // (|x|+1)^4 nop.i 0 } { .mfi ldfe exp_ln2_by_128_lo = [EXP_AD_TB1], 16 nop.f 0 - nop.i 0 + nop.i 0 };; { .mfi - getf.d GR_AbsArg = FR_AbsArg + getf.d GR_AbsArg = FR_AbsArg nop.f 0 add GR_ERFC_XB_TB = GR_ERFC_XB_TB, EXP_AD_TB1//pointer to XB_TBL } @@ -815,7 +815,7 @@ GLOBAL_LIBM_ENTRY(erfc) fma.s1 EXP_W_2TO56_RSH = EXP_NORM_f8,EXP_INV_LN2_2TO63,EXP_RSHF_2TO56 shladd GR_ShftPi_bias = GR_ShftPi_bias, 4, r0 // BIAS * 240 } -{ .mfb +{ .mfb nop.m 0 (p10) fma.d.s0 f8 = f0, f1, f0 // p10: y = 0 for x = +inf (p10) br.ret.spnt b0 // p10: quick exit for x = +inf @@ -825,7 +825,7 @@ GLOBAL_LIBM_ENTRY(erfc) .pred.rel "mutex",p6,p7 { .mfi (p6) cmp.gt.unc p15,p0 = GR_AbsArg,GR_ARG_ASYMP //p15: x > 28.0,p6: x >= 0 - nop.f 0 + nop.f 0 (p7) cmp.gt.unc p14,p0 = GR_AbsArg, GR_ARG_ASYMP //p14: x < - 6.0,p7: x < 0 } { .mfb @@ -833,9 +833,9 @@ GLOBAL_LIBM_ENTRY(erfc) (p11) fma.d.s0 f8 = f8, f1, f0 //p11: y = x for x = nan (p11) br.ret.spnt b0 //p11: quick exit for x = nan };; - -{ .mfi - add EXP_AD_P = exp_TB2_size, EXP_AD_TB2 + +{ .mfi + add EXP_AD_P = exp_TB2_size, EXP_AD_TB2 fms.s1 f8_sq_lo = f1, f1, f8_sq_lo // 1 - low bits for -x^2 nop.i 0 };; @@ -844,14 +844,14 @@ GLOBAL_LIBM_ENTRY(erfc) ldfpd exp_P4, exp_P3 = [EXP_AD_P], 16 fmerge.s FR_X = f8,f8 shladd GR_ShftXBi_bias = GR_mBIAS, 4, r0 -} +} { .mfb nop.m 0 (p14) fnma.d.s0 FR_RESULT = FR_EpsNorm,FR_EpsNorm,FR_2 //p14:y ~=~ 2,x< -6.0 (p14) br.ret.spnt b0 //p14: quick exit for x < -6.0 };; -//p15: y ~=~ 0.0(result with underflow error), x > ARG_ASYMP = 28, +//p15: y ~=~ 0.0(result with underflow error), x > ARG_ASYMP = 28, { .mfi ldfpd exp_P2, exp_P1 = [EXP_AD_P] fma.d.s0 FR_Tmpf = f1, f1, FR_EpsNorm // flag i @@ -859,14 +859,14 @@ GLOBAL_LIBM_ENTRY(erfc) } { .mfb (p15) mov GR_Parameter_TAG = 208 -(p15) fma.d.s0 FR_RESULT = FR_EpsNorm,FR_EpsNorm,f0 +(p15) fma.d.s0 FR_RESULT = FR_EpsNorm,FR_EpsNorm,f0 (p15) br.cond.spnt __libm_error_region };; //p8: x < 27.0, result without ungerflow error { .mfi getf.exp GR_IndxPlusBias = FR_Tmp // exp + bias for (|x|+1)^4 - fcmp.lt.s1 p8,p0 = FR_NormX,FR_UnfBound + fcmp.lt.s1 p8,p0 = FR_NormX,FR_UnfBound nop.i 0 } { .mfi @@ -878,11 +878,11 @@ GLOBAL_LIBM_ENTRY(erfc) { .mmi shladd GR_ShftXBi = GR_IndxPlusBias, 4, GR_ShftXBi_bias shladd GR_ShftPi = GR_IndxPlusBias, 4, GR_ShftPi_bias - shl GR_ShftPi_8 = GR_IndxPlusBias, 8 + shl GR_ShftPi_8 = GR_IndxPlusBias, 8 };; { .mmi - getf.sig exp_GR_N = EXP_W_2TO56_RSH + getf.sig exp_GR_N = EXP_W_2TO56_RSH add GR_ERFC_XB_TB = GR_ERFC_XB_TB, GR_ShftXBi// pointer to XB[i] sub GR_ShftPi = GR_ShftPi_8, GR_ShftPi // (256-16)*i };; @@ -890,13 +890,13 @@ GLOBAL_LIBM_ENTRY(erfc) { .mmi ldfe FR_Xb = [GR_ERFC_XB_TB] add GR_ShftA12 = 0xC0, GR_ShftPi // pointer shift for A12 - add GR_ShftA13 = 0xD0, GR_ShftPi // pointer shift for A13 + add GR_ShftA13 = 0xD0, GR_ShftPi // pointer shift for A13 };; { .mfi add GR_P_A13 = GR_ERFC_P_TB, GR_ShftA13 // pointer to A13 nop.f 0 - and exp_GR_index_1 = 0x0f, exp_GR_N + and exp_GR_index_1 = 0x0f, exp_GR_N } { .mfi add GR_P_A12 = GR_ERFC_P_TB, GR_ShftA12 // pointer to A12 @@ -905,52 +905,52 @@ GLOBAL_LIBM_ENTRY(erfc) };; { .mfi - ldfe FR_A12 = [GR_P_A12], -64 + ldfe FR_A12 = [GR_P_A12], -64 nop.f 0 - and exp_GR_index_2_16 = 0x70, exp_GR_N + and exp_GR_index_2_16 = 0x70, exp_GR_N } { .mfi - ldfe FR_A13 = [GR_P_A13], -64 + ldfe FR_A13 = [GR_P_A13], -64 nop.f 0 - shladd EXP_AD_T1 = exp_GR_index_1, 4, EXP_AD_TB1 -};; + shladd EXP_AD_T1 = exp_GR_index_1, 4, EXP_AD_TB1 +};; -{ .mmi +{ .mmi ldfe FR_A8 = [GR_P_A12], 32 - ldfe FR_A9 = [GR_P_A13], 32 + ldfe FR_A9 = [GR_P_A13], 32 add EXP_AD_T2 = EXP_AD_TB2, exp_GR_index_2_16 };; { .mmi ldfe FR_A10 = [GR_P_A12], -96 ldfe FR_A11 = [GR_P_A13], -96 - nop.i 0 + nop.i 0 };; { .mmi ldfe FR_A4 = [GR_P_A12], 32 - ldfe FR_A5 = [GR_P_A13], 32 + ldfe FR_A5 = [GR_P_A13], 32 shr r2 = exp_GR_N, 0x7 };; -{ .mfi - ldfe FR_A6 = [GR_P_A12], -64 +{ .mfi + ldfe FR_A6 = [GR_P_A12], -64 fma.s1 exp_rP4pP3 = exp_r, exp_P4, exp_P3 nop.i 0 } -{ .mfi - ldfe FR_A7 = [GR_P_A13], -64 +{ .mfi + ldfe FR_A7 = [GR_P_A13], -64 fma.s1 exp_rsq = exp_r, exp_r, f0 nop.i 0 };; { .mmi ldfe FR_A2 = [GR_P_A12], -32 - ldfe FR_A3 = [GR_P_A13], -32 + ldfe FR_A3 = [GR_P_A13], -32 addl exp_GR_biased_M = 0xffff, r2 };; -{ .mmi +{ .mmi ldfe FR_A0 = [GR_P_A12], 224 ldfe FR_A1 = [GR_P_A13] nop.i 0 @@ -975,12 +975,12 @@ GLOBAL_LIBM_ENTRY(erfc) };; { .mfi - nop.m 0 + nop.m 0 fma.s1 exp_rcube = exp_r, exp_rsq, f0 nop.i 0 -} -{ .mfi - nop.m 0 +} +{ .mfi + nop.m 0 fma.s1 exp_P_lo = exp_r, exp_rP4pP3, exp_P2 nop.i 0 };; @@ -988,41 +988,41 @@ GLOBAL_LIBM_ENTRY(erfc) { .mfi nop.m 0 fnma.s1 exp_f = EXP_Nfloat, exp_ln2_by_128_lo, f8_sq_lo - nop.i 0 -};; + nop.i 0 +};; { .mfi nop.m 0 fma.s1 FR_P14_0_1 = FR_LocArg, FR_LocArg, f0 // xloc ^2 - nop.i 0 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_P14_0_2 = FR_A13, FR_LocArg, FR_A12 - nop.i 0 + nop.i 0 };; { .mfi nop.m 0 - fma.s1 FR_P14_1_1 = FR_A9, FR_LocArg, FR_A8 + fma.s1 FR_P14_1_1 = FR_A9, FR_LocArg, FR_A8 nop.i 0 } { .mfi nop.m 0 - fma.s1 FR_P14_1_2 = FR_A11, FR_LocArg, FR_A10 + fma.s1 FR_P14_1_2 = FR_A11, FR_LocArg, FR_A10 nop.i 0 };; { .mfi nop.m 0 fma.s1 FR_P14_2_1 = FR_A5, FR_LocArg, FR_A4 - nop.i 0 + nop.i 0 } { .mfi nop.m 0 fma.s1 FR_P14_2_2 = FR_A7, FR_LocArg, FR_A6 nop.i 0 -};; +};; { .mfi nop.m 0 @@ -1057,7 +1057,7 @@ GLOBAL_LIBM_ENTRY(erfc) nop.i 0 };; { .mfi - nop.m 0 + nop.m 0 fma.s1 exp_S1 = EXP_2M, exp_T1, f0 nop.i 0 };; @@ -1080,9 +1080,9 @@ GLOBAL_LIBM_ENTRY(erfc) };; { .mfi - nop.m 0 - fma.s1 exp_S = exp_S1, exp_S2, f0 - nop.i 0 + nop.m 0 + fma.s1 exp_S = exp_S1, exp_S2, f0 + nop.i 0 } { .mfi nop.m 0 @@ -1105,34 +1105,34 @@ GLOBAL_LIBM_ENTRY(erfc) { .mfi nop.m 0 fma.s1 FR_Exp = exp_S, exp_P, exp_S // exp(-x^2) - nop.i 0 + nop.i 0 } { .mfi nop.m 0 - fma.s1 FR_Pol = FR_P14_13_2, FR_P14_12_1, FR_P14_13_1 + fma.s1 FR_Pol = FR_P14_13_2, FR_P14_12_1, FR_P14_13_1 nop.i 0 };; { .mfi nop.m 0 fma.d.s0 FR_Tmpf = f8, f1, f0 // flag d - nop.i 0 + nop.i 0 };; //p6: result for 0 < x < = 28.0, //p7: result for -6.0 <= x < 0, //p8: exit for - 6.0 <= x < UnfBound ~=~ 26.54.. - + .pred.rel "mutex",p6,p7 { .mfi nop.m 0 -(p6) fma.d.s0 f8 = FR_Exp, FR_Pol, f0 - nop.i 0 +(p6) fma.d.s0 f8 = FR_Exp, FR_Pol, f0 + nop.i 0 } { .mfb mov GR_Parameter_TAG = 208 -(p7) fnma.d.s0 f8 = FR_Exp, FR_Pol, FR_2 -(p8) br.ret.sptk b0 +(p7) fnma.d.s0 f8 = FR_Exp, FR_Pol, FR_2 +(p8) br.ret.sptk b0 };; GLOBAL_LIBM_END(erfc) @@ -1152,7 +1152,7 @@ LOCAL_LIBM_ENTRY(__libm_error_region) mov GR_SAVE_PFS=ar.pfs // Save ar.pfs } { .mfi -.fframe 64 +.fframe 64 add sp=-64,sp // Create new stack nop.f 0 mov GR_SAVE_GP=gp // Save gp @@ -1160,18 +1160,18 @@ LOCAL_LIBM_ENTRY(__libm_error_region) { .mmi stfd [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack add GR_Parameter_X = 16,sp // Parameter 1 address -.save b0, GR_SAVE_B0 +.save b0, GR_SAVE_B0 mov GR_SAVE_B0=b0 // Save b0 };; .body { .mib stfd [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address - nop.b 0 + nop.b 0 } { .mib stfd [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack - add GR_Parameter_Y = -16,GR_Parameter_Y + add GR_Parameter_Y = -16,GR_Parameter_Y br.call.sptk b0=__libm_error_support# // Call error handling function };; { .mmi @@ -1189,7 +1189,7 @@ LOCAL_LIBM_ENTRY(__libm_error_region) mov gp = GR_SAVE_GP // Restore gp mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs br.ret.sptk b0 // Return -};; +};; LOCAL_LIBM_END(__libm_error_region) .type __libm_error_support#,@function diff --git a/sysdeps/ia64/fpu/s_erfcf.S b/sysdeps/ia64/fpu/s_erfcf.S index 2e3eeab..ce6be8d 100644 --- a/sysdeps/ia64/fpu/s_erfcf.S +++ b/sysdeps/ia64/fpu/s_erfcf.S @@ -21,20 +21,20 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History @@ -51,23 +51,23 @@ // Overview of operation //============================================================== // 1. 0 <= x <= 10.06 -// +// // erfcf(x) = P15(x) * exp( -x^2 ) // // Comment: // // Let x(0)=0, x(i) = 2^(i), i=1,...3, x(4)= 10.06 -// +// // Let x(i)<= x < x(i+1). // We can find i as exponent of argument x (let i = 0 for 0<= x < 2 ) -// +// // Let P15(x) - polynomial approximation of degree 15 for function // erfcf(x) * exp( x^2) and x(i) <= x <= x(i+1), i = 0,1,2,3 // Polynomial coeffitients we have in the table erfc_p_table. // // So we can find result for erfcf(x) as above. // Algorithm description for exp function see below. -// +// // 2. -4.4 <= x < 0 // // erfcf(x) = 2.0 - erfcf(-x) @@ -77,20 +77,20 @@ // erfcf(x) ~=~ 0.0 // // 4. x < -4.4 -// +// // erfcf(x) ~=~ 2.0 -// Special values +// Special values //============================================================== // erfcf(+0) = 1.0 // erfcf(-0) = 1.0 -// erfcf(+qnan) = +qnan -// erfcf(-qnan) = -qnan -// erfcf(+snan) = +qnan -// erfcf(-snan) = -qnan +// erfcf(+qnan) = +qnan +// erfcf(-qnan) = -qnan +// erfcf(+snan) = +qnan +// erfcf(-snan) = -qnan -// erfcf(-inf) = 2.0 +// erfcf(-inf) = 2.0 // erfcf(+inf) = +0 //============================================================== @@ -123,12 +123,12 @@ // // Registers used //============================================================== -// Floating Point registers used: +// Floating Point registers used: // f8, input // f6,f7,f9 -> f11, f32 -> f92 -// General registers used: -// r14 -> r22,r32 -> r50 +// General registers used: +// r14 -> r22,r32 -> r50 // Predicate registers used: // p6 -> p15 @@ -195,10 +195,10 @@ EXP_INV_LN2_2TO63 = f7 EXP_W_2TO56_RSH = f9 exp_ln2_by_128_hi = f11 -EXP_RSHF_2TO56 = f32 -exp_ln2_by_128_lo = f33 +EXP_RSHF_2TO56 = f32 +exp_ln2_by_128_lo = f33 EXP_RSHF = f34 -EXP_Nfloat = f35 +EXP_Nfloat = f35 exp_r = f36 exp_rsq = f37 EXP_2M = f38 @@ -206,7 +206,7 @@ exp_S1 = f39 exp_T1 = f40 exp_P = f41 exp_S = f42 -EXP_NORM_f8 = f43 +EXP_NORM_f8 = f43 exp_S2 = f44 exp_T2 = f45 @@ -281,19 +281,19 @@ RODATA // double-extended 1/ln(2) // 3fff b8aa 3b29 5c17 f0bb be87fed0691d3e88 -// 3fff b8aa 3b29 5c17 f0bc +// 3fff b8aa 3b29 5c17 f0bc // For speed the significand will be loaded directly with a movl and setf.sig // and the exponent will be bias+63 instead of bias+0. Thus subsequent // computations need to scale appropriately. -// The constant 128/ln(2) is needed for the computation of w. This is also +// The constant 128/ln(2) is needed for the computation of w. This is also // obtained by scaling the computations. // -// Two shifting constants are loaded directly with movl and setf.d. -// 1. EXP_RSHF_2TO56 = 1.1000..00 * 2^(63-7) +// Two shifting constants are loaded directly with movl and setf.d. +// 1. EXP_RSHF_2TO56 = 1.1000..00 * 2^(63-7) // This constant is added to x*1/ln2 to shift the integer part of // x*128/ln2 into the rightmost bits of the significand. // The result of this fma is EXP_W_2TO56_RSH. -// 2. EXP_RSHF = 1.1000..00 * 2^(63) +// 2. EXP_RSHF = 1.1000..00 * 2^(63) // This constant is subtracted from EXP_W_2TO56_RSH * 2^(-56) to give // the integer part of w, n, as a floating-point number. // The result of this fms is EXP_Nfloat. @@ -345,7 +345,7 @@ LOCAL_OBJECT_END(exp_table_2) LOCAL_OBJECT_START(erfc_p_table) -// Pol_0 +// Pol_0 data8 0xBEA3260C63CB0446 //A15 = -5.70673541831883454676e-07 data8 0x3EE63D6178077654 //A14 = +1.06047480138940182343e-05 data8 0xBF18646BC5FC70A7 //A13 = -9.30491237309283694347e-05 @@ -362,7 +362,7 @@ data8 0xBFE81270C361852B //A3 = -7.52251035312075583309e-01 data8 0x3FEFFFFFC67295FC //A2 = +9.99999892800303301771e-01 data8 0xBFF20DD74F8CD2BF //A1 = -1.12837916445020868099e+00 data8 0x3FEFFFFFFFFE7C1D //A0 = +9.99999999988975570714e-01 -// Pol_1 +// Pol_1 data8 0xBDE8EC4BDD953B56 //A15 = -1.81338928934942767144e-10 data8 0x3E43607F269E2A1C //A14 = +9.02309090272196442358e-09 data8 0xBE8C4D9E69C10E02 //A13 = -2.10875261143659275328e-07 @@ -379,7 +379,7 @@ data8 0xBFE547BFE39AE2EA //A3 = -6.65008492032112467310e-01 data8 0x3FEE7C91BDF13578 //A2 = +9.52706213932898128515e-01 data8 0xBFF1CB5B61F8C589 //A1 = -1.11214769621105541214e+00 data8 0x3FEFEA56BC81FD37 //A0 = +9.97355812243688815239e-01 -// Pol_2 +// Pol_2 data8 0xBD302724A12F46E0 //A15 = -5.73866382814058809406e-14 data8 0x3D98889B75D3102E //A14 = +5.57829983681360947356e-12 data8 0xBDF16EA15074A1E9 //A13 = -2.53671153922423457844e-10 @@ -396,7 +396,7 @@ data8 0xBFD224DE9F62C258 //A3 = -2.83500342989133623476e-01 data8 0x3FE28A95CB8C6D3E //A2 = +5.79417131000276437708e-01 data8 0xBFEC21205D358672 //A1 = -8.79043752717008257224e-01 data8 0x3FEDAE44D5EDFE5B //A0 = +9.27523057776805771830e-01 -// Pol_3 +// Pol_3 data8 0xBCA3BCA734AC82F1 //A15 = -1.36952437983096410260e-16 data8 0x3D16740DC3990612 //A14 = +1.99425676175410093285e-14 data8 0xBD77F4353812C46A //A13 = -1.36162367755616790260e-12 @@ -419,8 +419,8 @@ LOCAL_OBJECT_END(erfc_p_table) .section .text GLOBAL_LIBM_ENTRY(erfcf) -// Form index i for table erfc_p_table as exponent of x -// We use i + bias in real calculations +// Form index i for table erfc_p_table as exponent of x +// We use i + bias in real calculations { .mlx getf.exp GR_IndxPlusBias = f8 // (sign + exp + bias) of x movl exp_GR_sig_inv_ln2 = 0xb8aa3b295c17f0bc //signif.of 1/ln2 @@ -445,14 +445,14 @@ GLOBAL_LIBM_ENTRY(erfcf) ;; // Form two constants we need -// 1/ln2 * 2^63 to compute w = x * 1/ln2 * 128 +// 1/ln2 * 2^63 to compute w = x * 1/ln2 * 128 // 1.1000..000 * 2^(63+63-7) to right shift int(w) into the significand // p9: x = 0,+inf,-inf,nan,unnorm. // p10: x!= 0,+inf,-inf,nan,unnorm. { .mfi setf.sig EXP_INV_LN2_2TO63 = exp_GR_sig_inv_ln2 // Form 1/ln2*2^63 - fclass.m p9,p10 = f8,0xef + fclass.m p9,p10 = f8,0xef shl GR_ShftPi_bias = GR_BIAS, 7 } { .mfi @@ -484,7 +484,7 @@ GLOBAL_LIBM_ENTRY(erfcf) } ;; -// Form shift GR_ShftPi from the beginning of erfc_p_table +// Form shift GR_ShftPi from the beginning of erfc_p_table // to the polynomial with number i { .mfi ldfps FR_UnfBound, FR_EpsNorm = [EXP_AD_TB1],8 @@ -494,11 +494,11 @@ GLOBAL_LIBM_ENTRY(erfcf) { .mfi setf.d EXP_RSHF = exp_GR_rshf // Form right shift 1.100 * 2^63 (p7) fms.s1 FR_AbsArg = f1, f0, f8 // |x| if x < 0 - mov exp_TB1_size = 0x100 + mov exp_TB1_size = 0x100 } ;; -// Form pointer GR_P_POINT_3 to the beginning of erfc_p_table +// Form pointer GR_P_POINT_3 to the beginning of erfc_p_table { .mfi setf.d FR_05 = GR_05 nop.f 0 @@ -517,7 +517,7 @@ GLOBAL_LIBM_ENTRY(erfcf) add GR_P_POINT_2 = GR_P_POINT_3, GR_ShftPi } { .mfi - ldfe exp_ln2_by_128_hi = [EXP_AD_TB1],16 + ldfe exp_ln2_by_128_hi = [EXP_AD_TB1],16 fma.s1 FR_NormX = f8,f1,f0 add GR_P_POINT_3 = GR_P_POINT_3, GR_ShftPi } @@ -526,19 +526,19 @@ GLOBAL_LIBM_ENTRY(erfcf) // Load coefficients for polynomial P15(x) { .mfi ldfpd FR_A15, FR_A14 = [GR_P_POINT_1], 16 - nop.f 0 + nop.f 0 add GR_P_POINT_3 = 0x30, GR_P_POINT_3 } { .mfi ldfe exp_ln2_by_128_lo = [EXP_AD_TB1], 16 - nop.f 0 - add GR_P_POINT_2 = 0x20, GR_P_POINT_2 + nop.f 0 + add GR_P_POINT_2 = 0x20, GR_P_POINT_2 } ;; // Now EXP_AD_TB1 points to the beginning of table 1 { .mlx - ldfpd FR_A13, FR_A12 = [GR_P_POINT_1] + ldfpd FR_A13, FR_A12 = [GR_P_POINT_1] movl GR_1_by_6 = 0x3FC5555555555555 } { .mfi @@ -564,10 +564,10 @@ GLOBAL_LIBM_ENTRY(erfcf) // By adding 1.10...0*2^63 we shift and get round_int(W) in significand. // We actually add 1.10...0*2^56 to X * Inv_log2 to do the same thing. { .mfi - ldfpd FR_A7, FR_A6 = [GR_P_POINT_3] + ldfpd FR_A7, FR_A6 = [GR_P_POINT_3] fma.s1 EXP_W_2TO56_RSH = EXP_NORM_f8,EXP_INV_LN2_2TO63,EXP_RSHF_2TO56 add EXP_AD_TB2 = exp_TB1_size, EXP_AD_TB1 - + } { .mfi ldfpd FR_A5, FR_A4 = [GR_P_POINT_4], 16 @@ -581,7 +581,7 @@ GLOBAL_LIBM_ENTRY(erfcf) fmerge.s FR_X = f8,f8 nop.i 0 } -{ .mfi +{ .mfi ldfpd FR_A1, FR_A0 = [GR_P_POINT_1] nop.f 0 nop.i 0 @@ -601,7 +601,7 @@ GLOBAL_LIBM_ENTRY(erfcf) nop.m 0 (p6) fcmp.gt.unc.s1 p15,p0 = FR_AbsArg, FR_POS_ARG_ASYMP //p6: x > 0 nop.i 0 -} +} ;; { .mfi @@ -616,7 +616,7 @@ GLOBAL_LIBM_ENTRY(erfcf) } ;; -// Nfloat = round_int(W) +// Nfloat = round_int(W) // The signficand of EXP_W_2TO56_RSH contains the rounded integer part of W, // as a twos complement number in the lower bits (that is, it may be negative). // That twos complement number (called N) is put into exp_GR_N. @@ -629,12 +629,12 @@ GLOBAL_LIBM_ENTRY(erfcf) nop.m 0 fms.s1 EXP_Nfloat = EXP_W_2TO56_RSH, EXP_2TOM56, EXP_RSHF nop.i 0 -} +} { .mfb (p15) mov GR_Parameter_TAG = 209 (p15) fma.s.s0 FR_RESULT = FR_EpsNorm,FR_EpsNorm,f0 //Result.for x>10.06 (p15) br.cond.spnt __libm_error_region -} +} ;; // Now we can calculate polynomial P15(x) @@ -652,19 +652,19 @@ GLOBAL_LIBM_ENTRY(erfcf) { .mfi nop.m 0 - fma.s1 FR_P15_1_2 = FR_A13, FR_AbsArg, FR_A12 - nop.i 0 + fma.s1 FR_P15_1_2 = FR_A13, FR_AbsArg, FR_A12 + nop.i 0 } ;; { .mfi - getf.sig exp_GR_N = EXP_W_2TO56_RSH - fma.s1 FR_P15_2_1 = FR_A9, FR_AbsArg, FR_A8 - nop.i 0 + getf.sig exp_GR_N = EXP_W_2TO56_RSH + fma.s1 FR_P15_2_1 = FR_A9, FR_AbsArg, FR_A8 + nop.i 0 } { .mfi nop.m 0 - fma.s1 FR_P15_2_2 = FR_A11, FR_AbsArg, FR_A10 + fma.s1 FR_P15_2_2 = FR_A11, FR_AbsArg, FR_A10 nop.i 0 } ;; @@ -672,7 +672,7 @@ GLOBAL_LIBM_ENTRY(erfcf) { .mfi nop.m 0 fma.s1 FR_P15_3_1 = FR_A5, FR_AbsArg, FR_A4 - nop.i 0 + nop.i 0 } { .mfi nop.m 0 @@ -691,7 +691,7 @@ GLOBAL_LIBM_ENTRY(erfcf) and exp_GR_index_1 = 0x0f, exp_GR_N fma.s1 FR_P15_4_1 = FR_A1, FR_AbsArg, FR_A0 shr r2 = exp_GR_N, 0x7 - + } { .mfi and exp_GR_index_2_16 = 0x70, exp_GR_N @@ -700,12 +700,12 @@ GLOBAL_LIBM_ENTRY(erfcf) } ;; -// EXP_AD_T1 has address of T1 -// EXP_AD_T2 has address if T2 +// EXP_AD_T1 has address of T1 +// EXP_AD_T2 has address if T2 { .mfi add EXP_AD_T2 = EXP_AD_TB2, exp_GR_index_2_16 - nop.f 0 + nop.f 0 shladd EXP_AD_T1 = exp_GR_index_1, 4, EXP_AD_TB1 } { .mfi @@ -716,15 +716,15 @@ GLOBAL_LIBM_ENTRY(erfcf) ;; // Create Scale = 2^M -// r = x - Nfloat * ln2_by_128_hi - +// r = x - Nfloat * ln2_by_128_hi + { .mfi setf.exp EXP_2M = exp_GR_biased_M fma.s1 FR_P15_7_1 = FR_P15_0_1, FR_P15_1_1, FR_P15_1_2 nop.i 0 } { .mfi - ldfe exp_T2 = [EXP_AD_T2] + ldfe exp_T2 = [EXP_AD_T2] nop.f 0 nop.i 0 } @@ -739,45 +739,45 @@ GLOBAL_LIBM_ENTRY(erfcf) } { .mfi nop.m 0 - fma.s1 FR_P15_8_1 = FR_P15_1_1, FR_P15_2_2, FR_P15_2_1 + fma.s1 FR_P15_8_1 = FR_P15_1_1, FR_P15_2_2, FR_P15_2_1 nop.i 0 } ;; { .mfi nop.m 0 - fma.s1 FR_P15_9_1 = FR_P15_1_1, FR_P15_4_2, FR_P15_4_1 + fma.s1 FR_P15_9_1 = FR_P15_1_1, FR_P15_4_2, FR_P15_4_1 nop.i 0 } { .mfi nop.m 0 - fma.s1 FR_P15_9_2 = FR_P15_1_1, FR_P15_3_2, FR_P15_3_1 + fma.s1 FR_P15_9_2 = FR_P15_1_1, FR_P15_3_2, FR_P15_3_1 nop.i 0 } ;; { .mfi nop.m 0 - fma.s1 exp_P = FR_1_by_6, exp_r, FR_05 + fma.s1 exp_P = FR_1_by_6, exp_r, FR_05 nop.i 0 } { .mfi nop.m 0 - fma.s1 exp_rsq = exp_r, exp_r, f0 + fma.s1 exp_rsq = exp_r, exp_r, f0 nop.i 0 } ;; { .mfi nop.m 0 - fma.s1 FR_P15_13_1 = FR_P15_7_2, FR_P15_7_1, FR_P15_8_1 + fma.s1 FR_P15_13_1 = FR_P15_7_2, FR_P15_7_1, FR_P15_8_1 nop.i 0 } ;; { .mfi nop.m 0 - fma.s1 FR_P15_14_1 = FR_P15_7_2, FR_P15_9_2, FR_P15_9_1 + fma.s1 FR_P15_14_1 = FR_P15_7_2, FR_P15_9_2, FR_P15_9_1 nop.i 0 } { .mfi @@ -794,7 +794,7 @@ GLOBAL_LIBM_ENTRY(erfcf) } { .mfi nop.m 0 - fma.s1 exp_S1 = EXP_2M, exp_T2, f0 + fma.s1 exp_S1 = EXP_2M, exp_T2, f0 nop.i 0 } ;; @@ -816,30 +816,30 @@ GLOBAL_LIBM_ENTRY(erfcf) { .mfi nop.m 0 fma.s1 FR_Exp = exp_S, exp_P, exp_S // exp(-x^2) - nop.i 0 + nop.i 0 } -;; +;; { .mfi nop.m 0 fma.s.s0 FR_Tmpf = f8, f1, f0 // Flag d - nop.i 0 + nop.i 0 } ;; -//p6: result for 0 < x < = POS_ARG_ASYMP +//p6: result for 0 < x < = POS_ARG_ASYMP //p7: result for - NEG_ARG_ASYMP <= x < 0 //p8: exit for - NEG_ARG_ASYMP <= x <= UnfBound, x!=0 .pred.rel "mutex",p6,p7 { .mfi nop.m 0 -(p6) fma.s.s0 f8 = FR_Exp, FR_Pol, f0 - nop.i 0 +(p6) fma.s.s0 f8 = FR_Exp, FR_Pol, f0 + nop.i 0 } { .mfb mov GR_Parameter_TAG = 209 (p7) fnma.s.s0 f8 = FR_Exp, FR_Pol, FR_2 -(p8) br.ret.sptk b0 +(p8) br.ret.sptk b0 } ;; @@ -847,7 +847,7 @@ GLOBAL_LIBM_ENTRY(erfcf) { .mfb nop.m 0 nop.f 0 -(p10) br.cond.spnt __libm_error_region +(p10) br.cond.spnt __libm_error_region } ;; @@ -921,9 +921,9 @@ GLOBAL_LIBM_END(erfcf) // Call via (p10) br.cond.spnt __libm_error_region -// for UnfBound < x < = POS_ARG_ASYMP +// for UnfBound < x < = POS_ARG_ASYMP // and -// +// // call via (p15) br.cond.spnt __libm_error_region // for x > POS_ARG_ASYMP @@ -936,7 +936,7 @@ LOCAL_LIBM_ENTRY(__libm_error_region) mov GR_SAVE_PFS=ar.pfs // Save ar.pfs } { .mfi -.fframe 64 +.fframe 64 add sp=-64,sp // Create new stack nop.f 0 mov GR_SAVE_GP=gp // Save gp @@ -944,18 +944,18 @@ LOCAL_LIBM_ENTRY(__libm_error_region) { .mmi stfs [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack add GR_Parameter_X = 16,sp // Parameter 1 address -.save b0, GR_SAVE_B0 +.save b0, GR_SAVE_B0 mov GR_SAVE_B0=b0 // Save b0 };; .body { .mib stfs [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address - nop.b 0 + nop.b 0 } { .mib stfs [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack - add GR_Parameter_Y = -16,GR_Parameter_Y + add GR_Parameter_Y = -16,GR_Parameter_Y br.call.sptk b0=__libm_error_support# // Call error handling function };; { .mmi @@ -973,7 +973,7 @@ LOCAL_LIBM_ENTRY(__libm_error_region) mov gp = GR_SAVE_GP // Restore gp mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs br.ret.sptk b0 // Return -};; +};; LOCAL_LIBM_END(__libm_error_region) .type __libm_error_support#,@function diff --git a/sysdeps/ia64/fpu/s_erfcl.S b/sysdeps/ia64/fpu/s_erfcl.S index 31ffc2d..b9f7004 100644 --- a/sysdeps/ia64/fpu/s_erfcl.S +++ b/sysdeps/ia64/fpu/s_erfcl.S @@ -21,20 +21,20 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History @@ -53,7 +53,7 @@ // Implementation and Algorithm Notes: //============================================================== // 1. 0 <= x <= 107.0 -// +// // erfcl(x) ~=~ P15(z) * expl( -x^2 )/(dx + x), z = x - xc(i). // // Comment: @@ -62,28 +62,28 @@ // argument intervals [x(i),x(i+1)] with length ratio q = 2^(1/4). // Values xc(i) we have in the table erfc_xc_table,xc(i)=x(i)for i = 0 // and xc(i)= 0.5*( x(i)+x(i+1) ) for i>0. -// +// // Let x(i)<= x < x(i+1). // We can find i as exponent of number (x + 1)^4. -// +// // Let P15(z)= a0+ a1*z +..+a15*z^15 - polynomial approximation of degree 15 -// for function erfcl(z+xc(i)) * expl( (z+xc(i))^2)* (dx+z+xc(i)) and +// for function erfcl(z+xc(i)) * expl( (z+xc(i))^2)* (dx+z+xc(i)) and // -0.5*[x(i+1)-x(i)] <= z <= 0.5*[x(i+1)-x(i)]. // // Let Q(z)= (P(z)- S)/S, S = a0, rounded to 16 bits. // Polynomial coeffitients for Q(z) we have in the table erfc_Q_table as // long double values // -// We use multi precision to calculate input argument -x^2 for expl and -// for u = 1/(dx + x). +// We use multi precision to calculate input argument -x^2 for expl and +// for u = 1/(dx + x). // // Algorithm description for expl function see below. In accordance with // denotation of this algorithm we have for expl: // -// expl(X) ~=~ 2^K*T_1*(1+W_1)*T_2*(1+W_2)*(1+ poly(r)), X = -x^2. +// expl(X) ~=~ 2^K*T_1*(1+W_1)*T_2*(1+W_2)*(1+ poly(r)), X = -x^2. // // Final calculations for erfcl: -// +// // erfcl(x) ~=~ // // 2^K*T_1*(1+W_1)*T_2*(1+W_2)*(1+ poly(r))*(1-dy)*S*(1+Q(z))*u*(1+du), @@ -95,35 +95,35 @@ // 1) M = 2^K*T_1*T_2*S without rounding error, // 2) W = W_1 + (W_2 + W_1*W_2), where 1+W ~=~ (1+W_1)(1+W_2), // 3) H = W - dy, where 1+H ~=~ (1+W )(1-dy), -// 4) R = poly(r)*H + poly(r), +// 4) R = poly(r)*H + poly(r), // 5) R = H + R , where 1+R ~=~ (1+H )(1+poly(r)), // 6) G = Q(z)*R + Q(z), // 7) R1 = R + du, where 1+R1 ~=~ (1+R)(1+du), // 8) G1 = R1 + G, where 1+G1 ~=~ (1+R1)(1+Q(z)), // 9) V = G1*M*u, -// 10) erfcl(x) ~=~ M*u + V -// +// 10) erfcl(x) ~=~ M*u + V +// // 2. -6.5 <= x < 0 // // erfcl(x) = 2.0 - erfl(-x) // // 3. x > 107.0 -// erfcl(x) ~=~ 0.0 +// erfcl(x) ~=~ 0.0 // -// 4. x < -6.5 -// erfcl(x) ~=~ 2.0 +// 4. x < -6.5 +// erfcl(x) ~=~ 2.0 -// Special values +// Special values //============================================================== // erfcl(+0) = 1.0 // erfcl(-0) = 1.0 -// erfcl(+qnan) = +qnan -// erfcl(-qnan) = -qnan -// erfcl(+snan) = +qnan -// erfcl(-snan) = -qnan +// erfcl(+qnan) = +qnan +// erfcl(-qnan) = -qnan +// erfcl(+snan) = +qnan +// erfcl(-snan) = -qnan -// erfcl(-inf) = 2.0 +// erfcl(-inf) = 2.0 // erfcl(+inf) = +0 //============================================================== @@ -139,35 +139,35 @@ // // On input, X is in register format // -// On output, +// On output, // // scale*(Y_hi + Y_lo) approximates exp(X) // // The accuracy is sufficient for a highly accurate 64 sig. -// bit implementation. Safe is set if there is no danger of -// overflow/underflow when the result is composed from scale, -// Y_hi and Y_lo. Thus, we can have a fast return if Safe is set. -// Otherwise, one must prepare to handle the possible exception -// appropriately. Note that SAFE not set (false) does not mean +// bit implementation. Safe is set if there is no danger of +// overflow/underflow when the result is composed from scale, +// Y_hi and Y_lo. Thus, we can have a fast return if Safe is set. +// Otherwise, one must prepare to handle the possible exception +// appropriately. Note that SAFE not set (false) does not mean // that overflow/underflow will occur; only the setting of SAFE // guarantees the opposite. // -// **** High Level Overview **** +// **** High Level Overview **** // // The method consists of three cases. -// +// // If |X| < Tiny use case exp_tiny; // else if |X| < 2^(-6) use case exp_small; // else use case exp_regular; // // Case exp_tiny: // -// 1 + X can be used to approximate exp(X) +// 1 + X can be used to approximate exp(X) // X + X^2/2 can be used to approximate exp(X) - 1 // // Case exp_small: // -// Here, exp(X) and exp(X) - 1 can all be +// Here, exp(X) and exp(X) - 1 can all be // approximated by a relatively simple polynomial. // // This polynomial resembles the truncated Taylor series @@ -204,9 +204,9 @@ // r := (X - N*L_hi) - N*L_lo // // We pick L_hi such that N*L_hi is representable in 64 sig. bits -// and thus the FMA X - N*L_hi is error free. So r is the -// 1 rounding error from an exact reduction with respect to -// +// and thus the FMA X - N*L_hi is error free. So r is the +// 1 rounding error from an exact reduction with respect to +// // L_hi + L_lo. // // In particular, L_hi has 30 significant bit and can be stored @@ -216,10 +216,10 @@ // Step 2: Approximation // // exp(r) - 1 is approximated by a short polynomial of the form -// +// // r + A_1 r^2 + A_2 r^3 + A_3 r^4 . // -// Step 3: Composition from Table Values +// Step 3: Composition from Table Values // // The value 2^( N / 2^12 ) can be composed from a couple of tables // of precalculated values. First, express N as three integers @@ -232,8 +232,8 @@ // lsb's, M_1 is the next 6, and K is simply N shifted right // arithmetically (sign extended) by 12 bits. // -// Now, 2^( N / 2^12 ) is simply -// +// Now, 2^( N / 2^12 ) is simply +// // 2^K * 2^( M_1 / 2^6 ) * 2^( M_2 / 2^12 ) // // Clearly, 2^K needs no tabulation. The other two values are less @@ -244,14 +244,14 @@ // Define two mathematical values, delta_1 and delta_2, implicitly // such that // -// T_1 = exp( [M_1 log(2)/2^6] - delta_1 ) +// T_1 = exp( [M_1 log(2)/2^6] - delta_1 ) // T_2 = exp( [M_2 log(2)/2^12] - delta_2 ) // // are representable as 24 significant bits. To illustrate the idea, -// we show how we define delta_1: +// we show how we define delta_1: // // T_1 := round_to_24_bits( exp( M_1 log(2)/2^6 ) ) -// delta_1 = (M_1 log(2)/2^6) - log( T_1 ) +// delta_1 = (M_1 log(2)/2^6) - log( T_1 ) // // The last equality means mathematical equality. We then tabulate // @@ -264,7 +264,7 @@ // T and W via // // T := T_1 * T_2 ...exactly -// W := W_1 + (1 + W_1)*W_2 +// W := W_1 + (1 + W_1)*W_2 // // W approximates exp( delta ) - 1 where delta = delta_1 + delta_2. // The mathematical product of T and (W+1) is an accurate representation @@ -272,17 +272,17 @@ // // Step 4. Reconstruction // -// Finally, we can reconstruct exp(X), exp(X) - 1. +// Finally, we can reconstruct exp(X), exp(X) - 1. // Because // -// X = K * log(2) + (M_1*log(2)/2^6 - delta_1) +// X = K * log(2) + (M_1*log(2)/2^6 - delta_1) // + (M_2*log(2)/2^12 - delta_2) // + delta_1 + delta_2 + r ...accurately // We have // // exp(X) ~=~ 2^K * ( T + T*[exp(delta_1+delta_2+r) - 1] ) // ~=~ 2^K * ( T + T*[exp(delta + r) - 1] ) -// ~=~ 2^K * ( T + T*[(exp(delta)-1) +// ~=~ 2^K * ( T + T*[(exp(delta)-1) // + exp(delta)*(exp(r)-1)] ) // ~=~ 2^K * ( T + T*( W + (1+W)*poly(r) ) ) // ~=~ 2^K * ( Y_hi + Y_lo ) @@ -294,7 +294,7 @@ // exp(X)-1 ~=~ 2^K * ( Y_hi + Y_lo ) - 1 // ~=~ 2^K * ( Y_hi + Y_lo - 2^(-K) ) // -// and we combine Y_hi + Y_lo - 2^(-N) into the form of two +// and we combine Y_hi + Y_lo - 2^(-N) into the form of two // numbers Y_hi + Y_lo carefully. // // **** Algorithm Details **** @@ -305,8 +305,8 @@ // // Case exp_tiny: // -// The important points are to ensure an accurate result under -// different rounding directions and a correct setting of the SAFE +// The important points are to ensure an accurate result under +// different rounding directions and a correct setting of the SAFE // flag. // // If expm1 is 1, then @@ -325,11 +325,11 @@ // Here we compute a simple polynomial. To exploit parallelism, we split // the polynomial into several portions. // -// Let r = X +// Let r = X // // If exp ...i.e. exp( argument ) // -// rsq := r * r; +// rsq := r * r; // r4 := rsq*rsq // poly_lo := P_3 + r*(P_4 + r*(P_5 + r*P_6)) // poly_hi := r + rsq*(P_1 + r*P_2) @@ -381,12 +381,12 @@ // Registers used //============================================================== -// Floating Point registers used: +// Floating Point registers used: // f8, input // f9 -> f14, f36 -> f126 -// General registers used: -// r32 -> r71 +// General registers used: +// r32 -> r71 // Predicate registers used: // p6 -> p15 @@ -590,8 +590,8 @@ data8 0xFFFFFFFFFFFFFFFF,0x00003FFD // C1 LOCAL_OBJECT_END(Constants_exp_64_C) LOCAL_OBJECT_START(Constants_exp_64_T1) -data4 0x3F800000,0x3F8164D2,0x3F82CD87,0x3F843A29 -data4 0x3F85AAC3,0x3F871F62,0x3F88980F,0x3F8A14D5 +data4 0x3F800000,0x3F8164D2,0x3F82CD87,0x3F843A29 +data4 0x3F85AAC3,0x3F871F62,0x3F88980F,0x3F8A14D5 data4 0x3F8B95C2,0x3F8D1ADF,0x3F8EA43A,0x3F9031DC data4 0x3F91C3D3,0x3F935A2B,0x3F94F4F0,0x3F96942D data4 0x3F9837F0,0x3F99E046,0x3F9B8D3A,0x3F9D3EDA @@ -609,21 +609,21 @@ data4 0x3FF5257D,0x3FF7D0DF,0x3FFA83B3,0x3FFD3E0C LOCAL_OBJECT_END(Constants_exp_64_T1) LOCAL_OBJECT_START(Constants_exp_64_T2) -data4 0x3F800000,0x3F80058C,0x3F800B18,0x3F8010A4 -data4 0x3F801630,0x3F801BBD,0x3F80214A,0x3F8026D7 -data4 0x3F802C64,0x3F8031F2,0x3F803780,0x3F803D0E -data4 0x3F80429C,0x3F80482B,0x3F804DB9,0x3F805349 -data4 0x3F8058D8,0x3F805E67,0x3F8063F7,0x3F806987 -data4 0x3F806F17,0x3F8074A8,0x3F807A39,0x3F807FCA -data4 0x3F80855B,0x3F808AEC,0x3F80907E,0x3F809610 -data4 0x3F809BA2,0x3F80A135,0x3F80A6C7,0x3F80AC5A -data4 0x3F80B1ED,0x3F80B781,0x3F80BD14,0x3F80C2A8 -data4 0x3F80C83C,0x3F80CDD1,0x3F80D365,0x3F80D8FA -data4 0x3F80DE8F,0x3F80E425,0x3F80E9BA,0x3F80EF50 -data4 0x3F80F4E6,0x3F80FA7C,0x3F810013,0x3F8105AA -data4 0x3F810B41,0x3F8110D8,0x3F81166F,0x3F811C07 -data4 0x3F81219F,0x3F812737,0x3F812CD0,0x3F813269 -data4 0x3F813802,0x3F813D9B,0x3F814334,0x3F8148CE +data4 0x3F800000,0x3F80058C,0x3F800B18,0x3F8010A4 +data4 0x3F801630,0x3F801BBD,0x3F80214A,0x3F8026D7 +data4 0x3F802C64,0x3F8031F2,0x3F803780,0x3F803D0E +data4 0x3F80429C,0x3F80482B,0x3F804DB9,0x3F805349 +data4 0x3F8058D8,0x3F805E67,0x3F8063F7,0x3F806987 +data4 0x3F806F17,0x3F8074A8,0x3F807A39,0x3F807FCA +data4 0x3F80855B,0x3F808AEC,0x3F80907E,0x3F809610 +data4 0x3F809BA2,0x3F80A135,0x3F80A6C7,0x3F80AC5A +data4 0x3F80B1ED,0x3F80B781,0x3F80BD14,0x3F80C2A8 +data4 0x3F80C83C,0x3F80CDD1,0x3F80D365,0x3F80D8FA +data4 0x3F80DE8F,0x3F80E425,0x3F80E9BA,0x3F80EF50 +data4 0x3F80F4E6,0x3F80FA7C,0x3F810013,0x3F8105AA +data4 0x3F810B41,0x3F8110D8,0x3F81166F,0x3F811C07 +data4 0x3F81219F,0x3F812737,0x3F812CD0,0x3F813269 +data4 0x3F813802,0x3F813D9B,0x3F814334,0x3F8148CE data4 0x3F814E68,0x3F815402,0x3F81599C,0x3F815F37 LOCAL_OBJECT_END(Constants_exp_64_T2) @@ -765,7 +765,7 @@ LOCAL_OBJECT_END(erfc_s_table) LOCAL_OBJECT_START(erfc_Q_table) // Q(z)= (P(z)- S)/S // -// Pol0 +// Pol0 data8 0x98325D50F9DC3499, 0x0000BFAA //A0 = +3.07358861423101280650e-26L data8 0xED35081A2494DDD9, 0x00003FF8 //A1 = +1.44779757616302832466e-02L data8 0x9443549BCD0F94CE, 0x0000BFFD //A2 = -2.89576190966300084405e-01L @@ -782,7 +782,7 @@ data8 0xA8DEC641AACEB600, 0x0000BFF6 //A12 = -2.57675495383156581601e-03L data8 0x87F0E77BA914FBEB, 0x00003FF5 //A13 = +1.03714776726541296794e-03L data8 0xC306C2894C5CEF2D, 0x0000BFF3 //A14 = -3.71983348634136412407e-04L data8 0xBDAB416A989D0697, 0x00003FF1 //A15 = +9.04412111877987292294e-05L -// Pol1 +// Pol1 data8 0x82808893DA2DD83F, 0x00003FEE //A0 = +7.77853035974467145290e-06L data8 0xAE9CD9DCADC86113, 0x0000BFFB //A1 = -8.52601070853077921197e-02L data8 0x9D429743E312AD9F, 0x0000BFFB //A2 = -7.67871682732076080494e-02L @@ -799,7 +799,7 @@ data8 0x9078BC61927671C6, 0x0000BFF4 //A12 = -5.51115510818844954547e-04L data8 0xDF67AC6287A63B03, 0x00003FF2 //A13 = +2.13055585989529858265e-04L data8 0xA719CFEE67FCE1CE, 0x0000BFF1 //A14 = -7.96798844477905965933e-05L data8 0xEF926367BABBB029, 0x00003FEF //A15 = +2.85591875675765038065e-05L -// Pol2 +// Pol2 data8 0x82B5E5A93B059C50, 0x00003FEF //A0 = +1.55819100856330860049e-05L data8 0xDC856BC2542B1938, 0x0000BFFB //A1 = -1.07676355235999875911e-01L data8 0xDF225EF5694F14AE, 0x0000BFF8 //A2 = -1.36190345125628043277e-02L @@ -816,7 +816,7 @@ data8 0xB05949F947FA7AEF, 0x0000BFF2 //A12 = -1.68179306983868501372e-04L data8 0x82901D055A0D5CB6, 0x00003FF1 //A13 = +6.22572626227726684168e-05L data8 0xBB957698542D6FD0, 0x0000BFEF //A14 = -2.23617364009159182821e-05L data8 0x810740E1DF572394, 0x00003FEE //A15 = +7.69068800065192940487e-06L -// Pol3 +// Pol3 data8 0x9526D1C87655AFA8, 0x00003FEC //A0 = +2.22253260814242012255e-06L data8 0xA47E21EBFE73F72F, 0x0000BFF8 //A1 = -1.00398379581527733314e-02L data8 0xDE65685FCDF7A913, 0x0000BFFA //A2 = -5.42959286802879105148e-02L @@ -833,7 +833,7 @@ data8 0x8179C36354571747, 0x0000BFF1 //A12 = -6.17387951061077132522e-05L data8 0xB40F241C01C907E9, 0x00003FEF //A13 = +2.14647227210702861416e-05L data8 0xF436D84AD7D4D316, 0x0000BFED //A14 = -7.27815144835213913238e-06L data8 0x9EB432503FB0B7BC, 0x00003FEC //A15 = +2.36487228755136968792e-06L -// Pol4 +// Pol4 data8 0xE0BA539E4AFC4741, 0x00003FED //A0 = +6.69741148991838024429e-06L data8 0x8583BF71139452CF, 0x0000BFFA //A1 = -3.25963476363756051657e-02L data8 0x8384FEF6D08AD6CE, 0x0000BFF9 //A2 = -1.60546283500634200479e-02L @@ -850,7 +850,7 @@ data8 0xE2DCC5750FD769BA, 0x0000BFEE //A12 = -1.35220520471857266339e-05L data8 0x9459160B1E6F1F8D, 0x00003FED //A13 = +4.42111470121432700283e-06L data8 0xBE0A05701BD0DD42, 0x0000BFEB //A14 = -1.41590196994052764542e-06L data8 0xE905D729105081BF, 0x00003FE9 //A15 = +4.34038814785401120999e-07L -// Pol5 +// Pol5 data8 0xA33649C3AB459832, 0x00003FEE //A0 = +9.72819704141525206634e-06L data8 0x9E4EA2F44C9A24BD, 0x0000BFFA //A1 = -3.86492123987296806210e-02L data8 0xE80C0B1280F357BF, 0x0000BFF2 //A2 = -2.21297306012713370124e-04L @@ -867,7 +867,7 @@ data8 0xA878D338E6E6A079, 0x0000BFEC //A12 = -2.51042802626063073967e-06L data8 0xCD2C2F079D2FCB36, 0x00003FEA //A13 = +7.64327468786076941271e-07L data8 0xF5EF4A4B2EA426F2, 0x0000BFE8 //A14 = -2.29044563492386125272e-07L data8 0x8CE52181393820FC, 0x00003FE7 //A15 = +6.56093668622712763489e-08L -// Pol6 +// Pol6 data8 0xB2015D7F1864B7CF, 0x00003FEC //A0 = +2.65248615880090351276e-06L data8 0x954EA7A861B4462A, 0x0000BFFA //A1 = -3.64519642954351295215e-02L data8 0x9E46F2A4D9157E69, 0x00003FF7 //A2 = +4.83023498390681965101e-03L @@ -884,7 +884,7 @@ data8 0xD34EA4D283EC33FA, 0x0000BFE9 //A12 = -3.93590335713880681528e-07L data8 0xED209EBD68E1145F, 0x00003FE7 //A13 = +1.10421060667544991323e-07L data8 0x83A126E22A17568D, 0x0000BFE6 //A14 = -3.06473811074239684132e-08L data8 0x8B778496EDE9F415, 0x00003FE4 //A15 = +8.11804009754249175736e-09L -// Pol7 +// Pol7 data8 0x8E152F522501B7B9, 0x00003FEE //A0 = +8.46879203970927626532e-06L data8 0xFD22F92EE21F491E, 0x0000BFF9 //A1 = -3.09004656656418947425e-02L data8 0xAF0C41847D89EC14, 0x00003FF7 //A2 = +5.34203719233189217519e-03L @@ -901,7 +901,7 @@ data8 0xE03A81F2C976D11A, 0x0000BFE6 //A12 = -5.22072765405802337371e-08L data8 0xE56A19A67DD66100, 0x00003FE4 //A13 = +1.33536787408751203998e-08L data8 0xE964D255CB31DFFA, 0x0000BFE2 //A14 = -3.39632729387679010008e-09L data8 0xE22E62E932B704D4, 0x00003FE0 //A15 = +8.22842400379225526299e-10L -// Pol8 +// Pol8 data8 0xB8B835882D46A6C8, 0x00003FEF //A0 = +2.20202883282415435401e-05L data8 0xC9D1F63F89B74E90, 0x0000BFF9 //A1 = -2.46362504515706189782e-02L data8 0x8E376748B1274F30, 0x00003FF7 //A2 = +4.34010070001387441657e-03L @@ -918,7 +918,7 @@ data8 0xCA87133235F1F495, 0x0000BFE3 //A12 = -5.89433000014933371980e-09L data8 0xBB15B0021581C8B6, 0x00003FE1 //A13 = +1.36122047057936849125e-09L data8 0xAC9D6585D4AF505E, 0x0000BFDF //A14 = -3.13984547328132268695e-10L data8 0x975A1439C3795183, 0x00003FDD //A15 = +6.88268624429648826457e-11L -// Pol9 +// Pol9 data8 0x99A7676284CDC9FE, 0x00003FEF //A0 = +1.83169747921764176475e-05L data8 0x9AD0AE249A02896C, 0x0000BFF9 //A1 = -1.88983346204739151909e-02L data8 0xCB89B4AEC19898BE, 0x00003FF6 //A2 = +3.10574208447745576452e-03L @@ -935,7 +935,7 @@ data8 0x9CC6C178AD3F96AD, 0x0000BFE0 //A12 = -5.70349182959704086428e-10L data8 0x81D0E2AA27DEB74A, 0x00003FDE //A13 = +1.18066926578104076645e-10L data8 0xD75FB9049190BEFD, 0x0000BFDB //A14 = -2.44851795398843967972e-11L data8 0xA9384A51D48C8703, 0x00003FD9 //A15 = +4.80951837368635202609e-12L -// Pol10 +// Pol10 data8 0xD2B3482EE449C535, 0x00003FEE //A0 = +1.25587177382575655080e-05L data8 0xE7939B2D0607DFCF, 0x0000BFF8 //A1 = -1.41343131436717436429e-02L data8 0x8810EB4AC5F0F1CE, 0x00003FF6 //A2 = +2.07620377002350121270e-03L @@ -952,7 +952,7 @@ data8 0xD02658BF31411F4C, 0x0000BFDC //A12 = -4.73277831746128372261e-11L data8 0x9A4A95EE59127779, 0x00003FDA //A13 = +8.77044784978207256260e-12L data8 0xE518330AF013C2F6, 0x0000BFD7 //A14 = -1.62781453276882333209e-12L data8 0xA036A9DF71BD108A, 0x00003FD5 //A15 = +2.84596398987114375607e-13L -// Pol11 +// Pol11 data8 0x9191CFBF001F3BB3, 0x00003FEE //A0 = +8.67662287973472452343e-06L data8 0xAA47E0CF01AE9730, 0x0000BFF8 //A1 = -1.03931136509584404513e-02L data8 0xAEABE7F17B01D18F, 0x00003FF5 //A2 = +1.33263784731775399430e-03L @@ -969,7 +969,7 @@ data8 0xE7977BC1342D19BF, 0x0000BFD8 //A12 = -3.29111645807102123274e-12L data8 0x9BC3A7D6396C6756, 0x00003FD6 //A13 = +5.53385887288503961220e-13L data8 0xD0110D5683740B8C, 0x0000BFD3 //A14 = -9.24001363293241428519e-14L data8 0x81786D7856A5CC92, 0x00003FD1 //A15 = +1.43741041714595023996e-14L -// Pol12 +// Pol12 data8 0xB85654F6033B3372, 0x00003FEF //A0 = +2.19747106911869287049e-05L data8 0xF78B40078736B406, 0x0000BFF7 //A1 = -7.55444170413862312647e-03L data8 0xDA8FDE84D88E5D5D, 0x00003FF4 //A2 = +8.33747822263358628569e-04L @@ -986,7 +986,7 @@ data8 0xC0F093DEC2B019A1, 0x0000BFD4 //A12 = -1.71364927865227509533e-13L data8 0xFC1441C4CD105981, 0x00003FD1 //A13 = +2.79864052545369490865e-14L data8 0x9CC959853267F026, 0x0000BFCF //A14 = -4.35170017302700609509e-15L data8 0xB06BA14016154F1E, 0x00003FCC //A15 = +6.12081320471295704631e-16L -// Pol13 +// Pol13 data8 0xA59E74BF544F2422, 0x00003FEF //A0 = +1.97433196215210145261e-05L data8 0xB2814F4EDAE15330, 0x0000BFF7 //A1 = -5.44754383528015875700e-03L data8 0x867C249D378F0A23, 0x00003FF4 //A2 = +5.13019308804593120161e-04L @@ -1003,7 +1003,7 @@ data8 0xA90EC851E91C3319, 0x0000BFCE //A12 = -2.34614750044359490986e-15L data8 0xEC9CAF64237B5060, 0x00003FCC //A13 = +8.20912960028437475035e-16L data8 0xA9156668FCF01479, 0x0000BFCA //A14 = -1.46656639874123613261e-16L data8 0xBAEF58D8118DD5D4, 0x00003FC7 //A15 = +2.02675278255254907493e-17L -// Pol14 +// Pol14 data8 0xC698952E9CEAA800, 0x00003FEF //A0 = +2.36744912073515619263e-05L data8 0x800395F8C7B4FA00, 0x0000BFF7 //A1 = -3.90667746392883642897e-03L data8 0xA3B2467B6B391831, 0x00003FF3 //A2 = +3.12226081793919541155e-04L @@ -1020,7 +1020,7 @@ data8 0x95352E5597EACC23, 0x00003FCD //A12 = +1.03533666540077850452e-15L data8 0xCCEBE3043B689428, 0x0000BFC8 //A13 = -4.44352525147076912166e-17L data8 0xA779DAB4BE1F80BB, 0x0000BFBC //A14 = -8.86610526981738255206e-21L data8 0xB171271F3517282C, 0x00003FC1 //A15 = +3.00598445879282370850e-19L -// Pol15 +// Pol15 data8 0xB7AC727D1C3FEB05, 0x00003FEE //A0 = +1.09478009914822049780e-05L data8 0xB6E6274485C10B0A, 0x0000BFF6 //A1 = -2.79081782038927199588e-03L data8 0xC5CAE2122D009506, 0x00003FF2 //A2 = +1.88629638738336219173e-04L @@ -1037,7 +1037,7 @@ data8 0xEF402CCB4D29FAF8, 0x00003FCA //A12 = +2.07516888659313950588e-16L data8 0xD6B789E01141231B, 0x0000BFC6 //A13 = -1.16398290506765191078e-17L data8 0xB5EEE343E9CFE3EC, 0x00003FC2 //A14 = +6.16413506924643419723e-19L data8 0x859B41A39D600346, 0x0000BFBE //A15 = -2.82922705825870414438e-20L -// Pol16 +// Pol16 data8 0x85708B69FD184E11, 0x00003FED //A0 = +3.97681079176353356199e-06L data8 0x824D92BC60A1F70A, 0x0000BFF6 //A1 = -1.98826630037499070532e-03L data8 0xEDCF7D3576BB5258, 0x00003FF1 //A2 = +1.13396885054265675352e-04L @@ -1054,7 +1054,7 @@ data8 0x863A04008E12867C, 0x00003FC8 //A12 = +2.91057593756148904838e-17L data8 0xDF62F9F44F5C7170, 0x0000BFC3 //A13 = -1.51372666097522872780e-18L data8 0xBA4E118E88CFDD31, 0x00003FBF //A14 = +7.89032177282079635722e-20L data8 0x942AD897FC4D2F2A, 0x0000BFBB //A15 = -3.92195756076319409245e-21L -// Pol17 +// Pol17 data8 0xCB8514540566C717, 0x00003FEF //A0 = +2.42614557068144130848e-05L data8 0xB94F08D6816E0CD4, 0x0000BFF5 //A1 = -1.41379340061829929314e-03L data8 0x8E7C342C2DABB51B, 0x00003FF1 //A2 = +6.79422240687700109911e-05L @@ -1071,7 +1071,7 @@ data8 0x8435328C80940126, 0x00003FC5 //A12 = +3.58349966898667910204e-18L data8 0xC0D22F655BA5EF39, 0x0000BFC0 //A13 = -1.63325770165403860181e-19L data8 0x8F14B9EBD5A9AB25, 0x00003FBC //A14 = +7.57464305512080733773e-21L data8 0xCD4804BBF6DC1B6F, 0x0000BFB7 //A15 = -3.39609459750208886298e-22L -// Pol18 +// Pol18 data8 0xE251DFE45AB0C22E, 0x00003FEE //A0 = +1.34897126299700418200e-05L data8 0x83943CC7D59D4215, 0x0000BFF5 //A1 = -1.00386850310061655307e-03L data8 0xAA57896951134BCA, 0x00003FF0 //A2 = +4.06126834109940757047e-05L @@ -1088,7 +1088,7 @@ data8 0xF41FBBA6B343960F, 0x00003FC1 //A12 = +4.13562069721140021224e-19L data8 0x98F194AEE31D188D, 0x0000BFBD //A13 = -1.61935414722333263347e-20L data8 0xC42F5029BB622157, 0x00003FB8 //A14 = +6.49121108201931196678e-22L data8 0xF43BD08079E50E0F, 0x0000BFB3 //A15 = -2.52531675510242468317e-23L -// Pol19 +// Pol19 data8 0x82557B149A04D08E, 0x00003FEF //A0 = +1.55370127331027842820e-05L data8 0xBAAB433307CE614B, 0x0000BFF4 //A1 = -7.12085701486669872724e-04L data8 0xCB52D9DBAC16FE82, 0x00003FEF //A2 = +2.42380662859334411743e-05L @@ -1105,7 +1105,7 @@ data8 0xD959E428A62B1B6C, 0x00003FBE //A12 = +4.60258936838597812582e-20L data8 0xE7D49EC23F1A16A0, 0x0000BFB9 //A13 = -1.53412587409583783059e-21L data8 0xFDE429BC9947B2BE, 0x00003FB4 //A14 = +5.25034823750902928092e-23L data8 0x872137A062C042EF, 0x0000BFB0 //A15 = -1.74651114923000080365e-24L -// Pol20 +// Pol20 data8 0x8B9B185C6A2659AC, 0x00003FEF //A0 = +1.66423130594825442963e-05L data8 0x84503AD52588A1E8, 0x0000BFF4 //A1 = -5.04735556466270303549e-04L data8 0xF26C7C2B566388E1, 0x00003FEE //A2 = +1.44495826764677427386e-05L @@ -1122,7 +1122,7 @@ data8 0xBD5A7FAA548CC749, 0x00003FBB //A12 = +5.01214569023722089225e-21L data8 0xAB15D69425373A67, 0x0000BFB6 //A13 = -1.41518447770061562822e-22L data8 0x9EF95456F75B4DF4, 0x00003FB1 //A14 = +4.10938011540250142351e-24L data8 0x8FADCC45E81433E7, 0x0000BFAC //A15 = -1.16062889679749879834e-25L -// Pol21 +// Pol21 data8 0xB47A917B0F7B50AE, 0x00003FEF //A0 = +2.15147474240529518138e-05L data8 0xBB77DC3BA0C937B3, 0x0000BFF3 //A1 = -3.57567223048598672970e-04L data8 0x90694DFF4EBF7370, 0x00003FEE //A2 = +8.60758700336677694536e-06L @@ -1139,7 +1139,7 @@ data8 0xA2C1C5E19CC79E6F, 0x00003FB8 //A12 = +5.38517493921589837361e-22L data8 0xF86F9772306F56C1, 0x0000BFB2 //A13 = -1.28438352359240135735e-23L data8 0xC32F6FEEDE86528E, 0x00003FAD //A14 = +3.15338862172962186458e-25L data8 0x9534ED189744D7D4, 0x0000BFA8 //A15 = -7.53301543611470014315e-27L -// Pol22 +// Pol22 data8 0xCBA0A2DB94A2C494, 0x00003FEF //A0 = +2.42742878212752702946e-05L data8 0x84C089154A49E0E8, 0x0000BFF3 //A1 = -2.53204520651046300034e-04L data8 0xABF5665BD0D8B0CD, 0x00003FED //A2 = +5.12476542947092361490e-06L @@ -1156,7 +1156,7 @@ data8 0x8AB627E09CF45997, 0x00003FB5 //A12 = +5.73697507862703019314e-23L data8 0xB28C15C117CC604F, 0x0000BFAF //A13 = -1.15383428132352407085e-24L data8 0xECB8428626DA072C, 0x00003FA9 //A14 = +2.39025879246942839796e-26L data8 0x98B731BCFA2CE2B2, 0x0000BFA4 //A15 = -4.81885474332093262902e-28L -// Pol23 +// Pol23 data8 0xC6D013811314D31B, 0x00003FED //A0 = +5.92508308918577687876e-06L data8 0xBBF3057B8DBACBCF, 0x0000BFF2 //A1 = -1.79242422493281965934e-04L data8 0xCCADECA501162313, 0x00003FEC //A2 = +3.04996061562356504918e-06L @@ -1173,7 +1173,7 @@ data8 0xEB27A61B1D5C7697, 0x00003FB1 //A12 = +6.07861113430709162243e-24L data8 0xFEF9ED74D4F4C9B0, 0x0000BFAB //A13 = -1.02984099170876754831e-25L data8 0x8E6F410068C12043, 0x00003FA6 //A14 = +1.79777721804459361762e-27L data8 0x9AE2F6705481630E, 0x0000BFA0 //A15 = -3.05459905177379058768e-29L -// Pol24 +// Pol24 data8 0xD2D858D5B01C9434, 0x00003FEE //A0 = +1.25673476165670766128e-05L data8 0x8505330F8B4FDE49, 0x0000BFF2 //A1 = -1.26858053564784963985e-04L data8 0xF39171C8B1D418C2, 0x00003FEB //A2 = +1.81472407620770441249e-06L @@ -1190,7 +1190,7 @@ data8 0xC6A4DCACC554911E, 0x00003FAE //A12 = +6.41853356148678957077e-25L data8 0xB550CEA09DA96F44, 0x0000BFA8 //A13 = -9.15410112414783078242e-27L data8 0xAA9149317996F32F, 0x00003FA2 //A14 = +1.34554050666508391264e-28L data8 0x9C3008EFE3F52F19, 0x0000BF9C //A15 = -1.92516125328592532359e-30L -// Pol25 +// Pol25 data8 0xA68E78218806283F, 0x00003FEF //A0 = +1.98550844852103406280e-05L data8 0xBC41423996DC8A37, 0x0000BFF1 //A1 = -8.97669395268764751516e-05L data8 0x90E55AE31A2F8271, 0x00003FEB //A2 = +1.07955871580069359702e-06L @@ -1207,7 +1207,7 @@ data8 0xA76F391095A9563A, 0x00003FAB //A12 = +6.76262416498584003290e-26L data8 0x8098FA125C18D8DB, 0x0000BFA5 //A13 = -8.11564737276592661642e-28L data8 0xCB9E4D5C08923227, 0x00003F9E //A14 = +1.00391606269366059664e-29L data8 0x9CEC3BF7A0BE2CAF, 0x0000BF98 //A15 = -1.20888920108938909316e-31L -// Pol26 +// Pol26 data8 0xC17AB25E269272F7, 0x00003FEE //A0 = +1.15322640047234590651e-05L data8 0x85310509E633FEF2, 0x0000BFF1 //A1 = -6.35106483144690768696e-05L data8 0xAC5E4C4DCB2D940C, 0x00003FEA //A2 = +6.42122148740412561597e-07L @@ -1224,7 +1224,7 @@ data8 0x8CEEACB790B5F374, 0x00003FA8 //A12 = +7.11526399101774993883e-27L data8 0xB61C8A29D98F24C0, 0x0000BFA1 //A13 = -7.18303147470398859453e-29L data8 0xF296F69FE45BDA7D, 0x00003F9A //A14 = +7.47537230021540031251e-31L data8 0x9D4B25BF6FB7234B, 0x0000BF94 //A15 = -7.57340869663212138051e-33L -// Pol27 +// Pol27 data8 0xC7772CC326D6FBB8, 0x00003FEE //A0 = +1.18890718679826004395e-05L data8 0xE0F9D5410565D55D, 0x0000BFF0 //A1 = -5.36384368533203585378e-05L data8 0x85C0BE825680E148, 0x00003FEA //A2 = +4.98268406609692971520e-07L @@ -1246,7 +1246,7 @@ LOCAL_OBJECT_END(erfc_Q_table) .section .text GLOBAL_LIBM_ENTRY(erfcl) - + { .mfi alloc r32 = ar.pfs, 0, 36, 4, 0 fma.s1 FR_Tmp = f1, f1, f8 // |x|+1, if x >= 0 @@ -1278,7 +1278,7 @@ GLOBAL_LIBM_ENTRY(erfcl) { .mfi nop.m 0 fnma.s1 FR_norm_x = f8, f8, f0 //high bits for -x^2 - nop.i 0 + nop.i 0 } ;; @@ -1286,7 +1286,7 @@ GLOBAL_LIBM_ENTRY(erfcl) { .mfi setf.sig FR_INV_LN2_2TO63 = GR_sig_inv_ln2 // form 1/ln2 * 2^63 (p6) fma.s1 FR_AbsArg = f1, f0, f8 // |x|, if x >= 0 - nop.i 0 + nop.i 0 } { .mfi setf.d FR_RSHF_2TO51 = GR_rshf_2to51 //const 1.10 * 2^(63+51) @@ -1300,9 +1300,9 @@ GLOBAL_LIBM_ENTRY(erfcl) fclass.m p10,p0 = f8, 0x21 // p10: x = +inf mov GR_exp_bias = 0x0FFFF // Set exponent bias } -{ .mlx +{ .mlx setf.d FR_RSHF = GR_rshf // Right shift const 1.1000 * 2^63 - movl GR_ERFC_XC_TB = 0x650 + movl GR_ERFC_XC_TB = 0x650 } ;; @@ -1315,11 +1315,11 @@ GLOBAL_LIBM_ENTRY(erfcl) { .mfi ldfpd FR_POS_ARG_ASYMP,FR_NEG_ARG_ASYMP = [GR_ad_Arg], 16 (p7) fma.s1 FR_Tmp = FR_Tmp1, FR_Tmp1, f0 // (|x|+1)^2, x<0 - mov GR_0x1 = 0x1 + mov GR_0x1 = 0x1 } ;; -//p8: y = 1.0, x = 0.0,quick exit +//p8: y = 1.0, x = 0.0,quick exit { .mfi ldfpd FR_dx,FR_dx1 = [GR_ad_Arg], 16 fclass.m p9,p0 = f8, 0x22 // p9: x = -inf @@ -1327,16 +1327,16 @@ GLOBAL_LIBM_ENTRY(erfcl) } { .mfb - nop.m 0 -(p8) fma.s0 f8 = f1, f1, f0 -(p8) br.ret.spnt b0 + nop.m 0 +(p8) fma.s0 f8 = f1, f1, f0 +(p8) br.ret.spnt b0 } ;; { .mfi - ldfe FR_UnfBound = [GR_ad_Arg], 16 + ldfe FR_UnfBound = [GR_ad_Arg], 16 fclass.m p11,p0 = f8, 0xc3 // p11: x = nan - mov GR_BIAS = 0x0FFFF + mov GR_BIAS = 0x0FFFF } { .mfi nop.m 0 @@ -1359,12 +1359,12 @@ GLOBAL_LIBM_ENTRY(erfcl) { .mfi add GR_ad_C = 0x20, GR_ad_Arg // Point to C table - nop.f 0 + nop.f 0 add GR_ad_T1 = 0x50, GR_ad_Arg // Point to T1 table } { .mfi add GR_ad_T2 = 0x150, GR_ad_Arg // Point to T2 table - nop.f 0 + nop.f 0 add GR_ERFC_XC_TB = GR_ERFC_XC_TB, GR_ad_Arg //poin.to XB_TBL } ;; @@ -1383,14 +1383,14 @@ GLOBAL_LIBM_ENTRY(erfcl) // p9: y = 2.0, x = -inf, quick exit { .mfi - sub GR_mBIAS = r0, GR_BIAS + sub GR_mBIAS = r0, GR_BIAS fma.s1 FR_2 = f1, f1, f1 - nop.i 0 + nop.i 0 } { .mfb ldfe FR_L_lo = [GR_ad_Arg],16 // Get L_lo -(p9) fma.s0 f8 = f1, f1, f1 -(p9) br.ret.spnt b0 +(p9) fma.s0 f8 = f1, f1, f1 +(p9) br.ret.spnt b0 } ;; @@ -1400,10 +1400,10 @@ GLOBAL_LIBM_ENTRY(erfcl) fma.s1 FR_N_signif = FR_norm_x, FR_INV_LN2_2TO63, FR_RSHF_2TO51 and GR_exp_x = GR_signexp_x, GR_exp_mask } -{ .mfb +{ .mfb adds GR_ERFC_S_TB = 0x1C0, GR_ERFC_XC_TB // pointer to S_TBL -(p10) fma.s0 f8 = f0, f1, f0 -(p10) br.ret.spnt b0 +(p10) fma.s0 f8 = f0, f1, f0 +(p10) br.ret.spnt b0 } ;; @@ -1411,13 +1411,13 @@ GLOBAL_LIBM_ENTRY(erfcl) // p11: y = x, x = nan, quick exit { .mfi ldfe FR_C3 = [GR_ad_C],16 // Get C3 for normal path - fcmp.lt.s1 p12,p0 = FR_AbsArg, FR_ch_dx + fcmp.lt.s1 p12,p0 = FR_AbsArg, FR_ch_dx shl GR_ShftPi_bias = GR_BIAS, 8 // BIAS * 256 } { .mfb sub GR_exp_x = GR_exp_x, GR_exp_bias // Get exponent (p11) fma.s0 f8 = f8, f1, f0 -(p11) br.ret.spnt b0 +(p11) br.ret.spnt b0 } ;; @@ -1433,7 +1433,7 @@ GLOBAL_LIBM_ENTRY(erfcl) { .mfi ldfe FR_C1 = [GR_ad_C],16 // Get C1 for main path (p6) fcmp.gt.unc.s1 p15,p0 = FR_AbsArg, FR_POS_ARG_ASYMP // p6: x >= 0 - nop.i 0 + nop.i 0 } { .mfb nop.m 0 @@ -1448,7 +1448,7 @@ GLOBAL_LIBM_ENTRY(erfcl) (p7) fcmp.gt.unc.s1 p14,p0 = FR_AbsArg,FR_NEG_ARG_ASYMP // p7: x < 0 shladd GR_ShftXBi_bias = GR_mBIAS, 4, r0 } -;; +;; { .mfi nop.m 0 @@ -1457,7 +1457,7 @@ GLOBAL_LIBM_ENTRY(erfcl) } { .mfi nop.m 0 - fms.s1 FR_float_N = FR_N_signif, FR_2TOM51, FR_RSHF + fms.s1 FR_float_N = FR_N_signif, FR_2TOM51, FR_RSHF nop.i 0 } ;; @@ -1466,23 +1466,23 @@ GLOBAL_LIBM_ENTRY(erfcl) // p14: y ~=~ 2, x < -6.5,quick exit { .mfi getf.exp GR_IndxPlusBias = FR_Tmp // exp + bias for (|x|+1)^4 - fcmp.lt.s1 p8,p0 = FR_NormX,FR_UnfBound + fcmp.lt.s1 p8,p0 = FR_NormX,FR_UnfBound nop.i 0 } { .mfb nop.m 0 (p14) fnma.s0 FR_RESULT = FR_EpsNorm,FR_EpsNorm,FR_2 -(p14) br.ret.spnt b0 +(p14) br.ret.spnt b0 } ;; // p15: y ~=~ 0.0 (result with underflow error), x > POS_ARG_ASYMP = 107.0, -// call __libm_error_region +// call __libm_error_region { .mfb (p15) mov GR_Parameter_TAG = 207 -(p15) fma.s0 FR_RESULT = FR_EpsNorm,FR_EpsNorm,f0 -(p15) br.cond.spnt __libm_error_region +(p15) fma.s0 FR_RESULT = FR_EpsNorm,FR_EpsNorm,f0 +(p15) br.cond.spnt __libm_error_region } ;; @@ -1490,7 +1490,7 @@ GLOBAL_LIBM_ENTRY(erfcl) getf.sig GR_N_fix = FR_N_signif // Get N from significand nop.f 0 shl GR_ShftPi = GR_IndxPlusBias, 8 - + } { .mfi shladd GR_ShftXBi = GR_IndxPlusBias, 4, GR_ShftXBi_bias @@ -1511,7 +1511,7 @@ GLOBAL_LIBM_ENTRY(erfcl) fma.s1 FR_Xpdx_hi = FR_AbsArg, f1, FR_dx // x + dx add GR_ShftA14 = 0xE0, GR_ShftPi // pointer shift for A14 - + } { .mfi ldfe FR_S = [GR_ERFC_S_TB] @@ -1533,14 +1533,14 @@ GLOBAL_LIBM_ENTRY(erfcl) } ;; -{ .mfi +{ .mfi ldfe FR_A14 = [GR_P_POINT_1], -32 - nop.f 0 + nop.f 0 extr.u GR_M2 = GR_N_fix, 0, 6 // Extract index M_2 } -{ .mfi +{ .mfi ldfe FR_A15 = [GR_P_POINT_2], -32 - nop.f 0 + nop.f 0 shladd GR_ad_W1 = GR_M1,3,GR_ad_W1 // Point to W1 } ;; @@ -1555,7 +1555,7 @@ GLOBAL_LIBM_ENTRY(erfcl) nop.f 0 shladd GR_ad_T1 = GR_M1,2,GR_ad_T1 // Point to T1 } -;; +;; { .mfi ldfe FR_A8 = [GR_P_POINT_1], 32 @@ -1568,38 +1568,38 @@ GLOBAL_LIBM_ENTRY(erfcl) shladd GR_ad_W2 = GR_M2,3,GR_ad_W2 // Point to W2 } ;; - + { .mfi - ldfe FR_A10 = [GR_P_POINT_1], -96 + ldfe FR_A10 = [GR_P_POINT_1], -96 nop.f 0 shladd GR_ad_T2 = GR_M2,2,GR_ad_T2 // Point to T2 } { .mfi - ldfe FR_A11 = [GR_P_POINT_2], -96 + ldfe FR_A11 = [GR_P_POINT_2], -96 fnma.s1 FR_r = FR_L_lo, FR_float_N, FR_r //r = -L_lo*float_N + r nop.i 0 } -;; +;; -{ .mfi +{ .mfi ldfe FR_A4 = [GR_P_POINT_1], 32 (p10) fms.s1 FR_Tmp = FR_dx,f1, FR_Xpdx_hi //for lo of x+dx, x<=dx nop.i 0 } -{ .mfi +{ .mfi ldfe FR_A5 = [GR_P_POINT_2], 32 (p9) fms.s1 FR_Tmp = FR_AbsArg, f1, FR_Xpdx_hi //for lo of x+dx, x>dx nop.i 0 } ;; -{ .mfi - ldfe FR_A6 = [GR_P_POINT_1], -64 +{ .mfi + ldfe FR_A6 = [GR_P_POINT_1], -64 frcpa.s1 FR_U,p11 = f1, FR_Xpdx_hi // hi of 1 /(x + dx) nop.i 0 } -{ .mfi - ldfe FR_A7 = [GR_P_POINT_2], -64 +{ .mfi + ldfe FR_A7 = [GR_P_POINT_2], -64 nop.f 0 nop.i 0 } @@ -1607,17 +1607,17 @@ GLOBAL_LIBM_ENTRY(erfcl) { .mfi ldfe FR_A2 = [GR_P_POINT_1], -32 - nop.f 0 - nop.i 0 + nop.f 0 + nop.i 0 } { .mfi ldfe FR_A3 = [GR_P_POINT_2], -32 - nop.f 0 - nop.i 0 + nop.f 0 + nop.i 0 } ;; -{ .mfi +{ .mfi ldfe FR_A0 = [GR_P_POINT_1], 224 nop.f 0 nop.i 0 @@ -1631,37 +1631,37 @@ GLOBAL_LIBM_ENTRY(erfcl) { .mfi ldfd FR_W1 = [GR_ad_W1],0 // Get W1 - nop.f 0 - nop.i 0 + nop.f 0 + nop.i 0 } { .mfi ldfd FR_W2 = [GR_ad_W2],0 // Get W2 fma.s1 FR_poly = FR_r, FR_C3, FR_C2 // poly = r * A3 + A2 - nop.i 0 + nop.i 0 } ;; { .mfi ldfs FR_T1 = [GR_ad_T1],0 // Get T1 (p10) fma.s1 FR_Xpdx_lo = FR_AbsArg,f1, FR_Tmp//lo of x + dx , x <= dx - nop.i 0 + nop.i 0 } { .mfi ldfs FR_T2 = [GR_ad_T2],0 // Get T2 (p9) fma.s1 FR_Xpdx_lo = FR_dx,f1, FR_Tmp // lo of x + dx, x > dx - nop.i 0 + nop.i 0 } ;; { .mfi nop.m 0 fnma.s1 FR_Tmp1 = FR_Xpdx_hi, FR_U, FR_2 // N-R, iter. N1 - nop.i 0 + nop.i 0 } { .mfi nop.m 0 fmpy.s1 FR_rsq = FR_r, FR_r // rsq = r * r - nop.i 0 + nop.i 0 } ;; @@ -1671,32 +1671,32 @@ GLOBAL_LIBM_ENTRY(erfcl) nop.i 0 } { .mfi - nop.m 0 - fma.s1 FR_P15_0_1 = FR_A15, FR_LocArg, FR_A14 - nop.i 0 + nop.m 0 + fma.s1 FR_P15_0_1 = FR_A15, FR_LocArg, FR_A14 + nop.i 0 } ;; { .mfi nop.m 0 - fma.s1 FR_P15_1_2 = FR_A13, FR_LocArg, FR_A12 - nop.i 0 + fma.s1 FR_P15_1_2 = FR_A13, FR_LocArg, FR_A12 + nop.i 0 } { .mfi nop.m 0 fma.s1 FR_poly = FR_r, FR_poly, FR_C1 // poly = r * poly + A1 - nop.i 0 + nop.i 0 } ;; { .mfi nop.m 0 - fma.s1 FR_P15_2_1 = FR_A9, FR_LocArg, FR_A8 - nop.i 0 + fma.s1 FR_P15_2_1 = FR_A9, FR_LocArg, FR_A8 + nop.i 0 } { .mfi nop.m 0 - fma.s1 FR_P15_2_2 = FR_A11, FR_LocArg, FR_A10 + fma.s1 FR_P15_2_2 = FR_A11, FR_LocArg, FR_A10 nop.i 0 } ;; @@ -1711,7 +1711,7 @@ GLOBAL_LIBM_ENTRY(erfcl) { .mfi nop.m 0 fma.s1 FR_P15_3_1 = FR_A5, FR_LocArg, FR_A4 - nop.i 0 + nop.i 0 } { .mfi nop.m 0 @@ -1730,8 +1730,8 @@ GLOBAL_LIBM_ENTRY(erfcl) fma.s1 FR_W = FR_W1, FR_W2, FR_W2 // W = W1 * W2 + W2 nop.i 0 } -;; - +;; + { .mfi nop.m 0 fmpy.s1 FR_T = FR_T1, FR_T2 // T = T1 * T2 @@ -1791,7 +1791,7 @@ GLOBAL_LIBM_ENTRY(erfcl) { .mfi nop.m 0 fma.s1 FR_T_scale = FR_T, FR_scale, f0 // T_scale = T * scale - nop.i 0 + nop.i 0 } ;; @@ -1805,7 +1805,7 @@ GLOBAL_LIBM_ENTRY(erfcl) { .mfi nop.m 0 fma.s1 FR_U = FR_U, FR_Tmp, f0 // N-R, iter. N2 - nop.i 0 + nop.i 0 } ;; @@ -1823,15 +1823,15 @@ GLOBAL_LIBM_ENTRY(erfcl) { .mfi nop.m 0 - fma.s1 FR_M = FR_T_scale, FR_S, f0 - nop.i 0 + fma.s1 FR_M = FR_T_scale, FR_S, f0 + nop.i 0 } ;; { .mfi nop.m 0 fnma.s1 FR_Tmp = FR_Xpdx_hi, FR_U, FR_2 // N-R, iter. N3 - nop.i 0 + nop.i 0 } ;; @@ -1845,20 +1845,20 @@ GLOBAL_LIBM_ENTRY(erfcl) { .mfi nop.m 0 fms.s1 FR_H = FR_W, f1, FR_xsq_lo // H = W - xsq_lo - nop.i 0 + nop.i 0 } ;; { .mfi nop.m 0 fma.s1 FR_U = FR_U, FR_Tmp, f0 // N-R, iter. N3 - nop.i 0 + nop.i 0 } ;; { .mfi nop.m 0 - fma.s1 FR_Q = FR_A1, FR_LocArg, FR_Q + fma.s1 FR_Q = FR_A1, FR_LocArg, FR_Q nop.i 0 } ;; @@ -1866,12 +1866,12 @@ GLOBAL_LIBM_ENTRY(erfcl) { .mfi nop.m 0 fnma.s1 FR_Tmp = FR_Xpdx_hi, FR_U, f1 // for du - nop.i 0 + nop.i 0 } { .mfi nop.m 0 - fma.s1 FR_R = FR_H, FR_poly, FR_poly - nop.i 0 + fma.s1 FR_R = FR_H, FR_poly, FR_poly + nop.i 0 } ;; @@ -1879,28 +1879,28 @@ GLOBAL_LIBM_ENTRY(erfcl) nop.m 0 fma.s1 FR_res_pos_x_hi = FR_M, FR_U, f0 // M *U nop.i 0 - + } -;; +;; { .mfi nop.m 0 fma.s1 FR_R = FR_R, f1, FR_H // R = H + P(r) + H*P(r) - nop.i 0 + nop.i 0 } ;; { .mfi nop.m 0 fma.s0 FR_Tmpf = f8, f1, f0 // flag d - nop.i 0 + nop.i 0 } ;; { .mfi nop.m 0 - fnma.s1 FR_dU = FR_Xpdx_lo, FR_U, FR_Tmp - nop.i 0 + fnma.s1 FR_dU = FR_Xpdx_lo, FR_U, FR_Tmp + nop.i 0 } ;; @@ -1908,23 +1908,23 @@ GLOBAL_LIBM_ENTRY(erfcl) // for -6.5 <= x < 0 { .mfi nop.m 0 - fms.s1 FR_res_pos_x_lo = FR_M, FR_U, FR_res_pos_x_hi - nop.i 0 - + fms.s1 FR_res_pos_x_lo = FR_M, FR_U, FR_res_pos_x_hi + nop.i 0 + } { .mfi nop.m 0 (p7) fnma.s1 FR_Tmp1 = FR_res_pos_x_hi, f1, FR_2 //p7: x < 0 - nop.i 0 - + nop.i 0 + } ;; { .mfi nop.m 0 - fma.s1 FR_G = FR_R, FR_Q, FR_Q + fma.s1 FR_G = FR_R, FR_Q, FR_Q nop.i 0 - + } ;; @@ -1932,31 +1932,31 @@ GLOBAL_LIBM_ENTRY(erfcl) nop.m 0 fma.s1 FR_Tmp = FR_R, f1, FR_dU // R + du nop.i 0 - + } ;; { .mfi nop.m 0 (p7) fnma.s1 FR_Tmp2 = FR_Tmp1, f1, FR_2 //p7: x < 0 - nop.i 0 - + nop.i 0 + } ;; { .mfi nop.m 0 - fma.s1 FR_G = FR_G, f1, FR_Tmp + fma.s1 FR_G = FR_G, f1, FR_Tmp nop.i 0 - + } ;; { .mfi nop.m 0 (p7) fnma.s1 FR_Tmp2 = FR_res_pos_x_hi, f1, FR_Tmp2 //p7: x < 0 - nop.i 0 - + nop.i 0 + } ;; @@ -1964,42 +1964,42 @@ GLOBAL_LIBM_ENTRY(erfcl) nop.m 0 fma.s1 FR_V = FR_G, FR_res_pos_x_hi, f0 // V = G * M *U nop.i 0 - + } ;; { .mfi nop.m 0 (p7) fma.s1 FR_res_pos_x_lo = FR_res_pos_x_lo, f1, FR_V //p7: x < 0 - nop.i 0 - + nop.i 0 + } ;; { .mfi nop.m 0 (p7) fnma.s1 FR_Tmp2 = FR_res_pos_x_lo, f1, FR_Tmp2 //p7: x < 0 - nop.i 0 - + nop.i 0 + } ;; -//p6: result for 0 < x < = POS_ARG_ASYMP +//p6: result for 0 < x < = POS_ARG_ASYMP //p7: result for - NEG_ARG_ASYMP <= x < 0 //p8: exit for - NEG_ARG_ASYMP <= x < UnfBound -ERFC_RESULT: +ERFC_RESULT: .pred.rel "mutex",p6,p7 { .mfi nop.m 0 (p6) fma.s0 f8 = FR_M, FR_U, FR_V // p6: x >= 0 - nop.i 0 + nop.i 0 } { .mfb mov GR_Parameter_TAG = 207 (p7) fma.s0 f8 = FR_Tmp2, f1, FR_Tmp1 // p7: x < 0 -(p8) br.ret.sptk b0 +(p8) br.ret.sptk b0 };; GLOBAL_LIBM_END(erfcl) @@ -2019,7 +2019,7 @@ LOCAL_LIBM_ENTRY(__libm_error_region) mov GR_SAVE_PFS=ar.pfs // Save ar.pfs } { .mfi -.fframe 64 +.fframe 64 add sp=-64,sp // Create new stack nop.f 0 mov GR_SAVE_GP=gp // Save gp @@ -2027,18 +2027,18 @@ LOCAL_LIBM_ENTRY(__libm_error_region) { .mmi stfe [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack add GR_Parameter_X = 16,sp // Parameter 1 address -.save b0, GR_SAVE_B0 +.save b0, GR_SAVE_B0 mov GR_SAVE_B0=b0 // Save b0 };; .body { .mib stfe [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address - nop.b 0 + nop.b 0 } { .mib stfe [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack - add GR_Parameter_Y = -16,GR_Parameter_Y + add GR_Parameter_Y = -16,GR_Parameter_Y br.call.sptk b0=__libm_error_support# // Call error handling function };; { .mmi @@ -2056,7 +2056,7 @@ LOCAL_LIBM_ENTRY(__libm_error_region) mov gp = GR_SAVE_GP // Restore gp mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs br.ret.sptk b0 // Return -};; +};; LOCAL_LIBM_END(__libm_error_region) .type __libm_error_support#,@function diff --git a/sysdeps/ia64/fpu/s_erff.S b/sysdeps/ia64/fpu/s_erff.S index ed0aaac..a5cde66 100644 --- a/sysdeps/ia64/fpu/s_erff.S +++ b/sysdeps/ia64/fpu/s_erff.S @@ -21,20 +21,20 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History @@ -82,7 +82,7 @@ // 5. |x| = INF // Return erff(x) = sign(x) * 1.0 // -// 6. x = [S,Q]NaN +// 6. x = [S,Q]NaN // Return erff(x) = QNaN // // 7. x is positive denormal @@ -95,11 +95,11 @@ // // Registers used //============================================================== -// Floating Point registers used: +// Floating Point registers used: // f8, input // f32 -> f59 -// General registers used: +// General registers used: // r32 -> r45, r2, r3 // Predicate registers used: @@ -180,7 +180,7 @@ data8 0xBF468D71CF4F0918 // C3 data8 0x40312115B0932F24 // D0 data8 0xC0160D6CD0991EA3 // D1 data8 0xBFE04A567A6DBE4A // D2 -data8 0xBF4207BC640D1509 // B0 +data8 0xBF4207BC640D1509 // B0 // Polynomial coefficients for the erf(x), 0.25 <= |x| < 0.5 data8 0x3F90849356383F58 // C0 data8 0x3F830BD5BA240F09 // C1 @@ -217,7 +217,7 @@ data8 0xC08A5C9D5FE8B9F6 // D0 data8 0x406EFF5F088CEC4B // D1 data8 0xC03A5743DF38FDE0 // D2 data8 0xBEE397A9FA5686A2 // B0 -// Polynomial coefficients for the erf(x), -0.125 < x < 0.125 +// Polynomial coefficients for the erf(x), -0.125 < x < 0.125 data8 0x3FF20DD7504270CB // C0 data8 0xBFD8127465AFE719 // C1 data8 0x3FBCE2D77791DD77 // C2 @@ -269,10 +269,10 @@ GLOBAL_LIBM_ENTRY(erff) ;; { .mfi - getf.s rArg = f8 // x in GR + getf.s rArg = f8 // x in GR fclass.m p7,p0 = f8, 0x0b // is x denormal ? // sign bit and 2 most bits in significand - shl rMask = rMask, 20 + shl rMask = rMask, 20 } { .mfi ld8 rDataPtr = [rDataPtr] @@ -296,7 +296,7 @@ GLOBAL_LIBM_ENTRY(erff) { .mfi andcm rOffset2 = rArg, rMask fclass.m p6,p0 = f8, 0xc7 // is x [S,Q]NaN or +/-0 ? - shl rBound = rBound, 20 // 0.125f in GR + shl rBound = rBound, 20 // 0.125f in GR } { .mfb andcm rAbsArg = rArg, rSignBit // |x| in GR @@ -311,7 +311,7 @@ GLOBAL_LIBM_ENTRY(erff) shr rOffset2 = rOffset2, 21 } { .mfi - cmp.lt p10, p8 = rAbsArg, rBound // |x| < 0.125? + cmp.lt p10, p8 = rAbsArg, rBound // |x| < 0.125? nop.f 0 adds rCoeffAddr3 = 16, rDataPtr } @@ -332,8 +332,8 @@ GLOBAL_LIBM_ENTRY(erff) { .mfi shladd rCoeffAddr1 = rBias, 4, rDataPtr fma.s1 fArg3Sgn = fArgSqr, f8, f0 // sign(x)*|x|^3 - // is |x| < 4.0? - cmp.lt p11, p12 = rAbsArg, rSaturation + // is |x| < 4.0? + cmp.lt p11, p12 = rAbsArg, rSaturation } { .mfi shladd rCoeffAddr3 = rBias, 4, rCoeffAddr3 @@ -345,7 +345,7 @@ GLOBAL_LIBM_ENTRY(erff) { .mfi (p11) ldfpd fC0, fC1 = [rCoeffAddr1] (p9) fmerge.s f8 = f8,f1 // +/- inf -(p12) adds rDataPtr = 512, rDataPtr +(p12) adds rDataPtr = 512, rDataPtr } { .mfb (p11) ldfpd fC2, fC3 = [rCoeffAddr3], 16 @@ -434,7 +434,7 @@ GLOBAL_LIBM_ENTRY(erff) { .mfi nop.m 0 - fma.s1 fPolATmp = fA3, fAbsArg, fA2 // A3*|x| + A2 + fma.s1 fPolATmp = fA3, fAbsArg, fA2 // A3*|x| + A2 nop.i 0 } { .mfi @@ -446,7 +446,7 @@ GLOBAL_LIBM_ENTRY(erff) { .mfi nop.m 0 // C3*|x|^3 + C2*x^2 + C1*|x| + C0 - fma.s1 fPolC = fPolC, fArgSqr, fPolCTmp + fma.s1 fPolC = fPolC, fArgSqr, fPolCTmp nop.i 0 } ;; @@ -454,31 +454,31 @@ GLOBAL_LIBM_ENTRY(erff) { .mfi nop.m 0 // PolD = sign(x)*(|x|^7 + D2*x^6 + D1*|x|^5 + D0*x^4) - fma.d.s1 fPolD = fPolD, fArg4Sgn, fPolDTmp + fma.d.s1 fPolD = fPolD, fArg4Sgn, fPolDTmp nop.i 0 } ;; { .mfi nop.m 0 - // PolA = A3|x|^3 + A2*x^2 + A1*|x| + A0 - fma.d.s1 fPolA = fPolATmp, fArgSqr, fPolA + // PolA = A3|x|^3 + A2*x^2 + A1*|x| + A0 + fma.d.s1 fPolA = fPolATmp, fArgSqr, fPolA nop.i 0 } -;; +;; { .mfi nop.m 0 - // PolC = B0*x^4 + C3*|x|^3 + C2*|x|^2 + C1*|x| + C0 - fma.d.s1 fPolC = fPolC, f1, fB0 + // PolC = B0*x^4 + C3*|x|^3 + C2*|x|^2 + C1*|x| + C0 + fma.d.s1 fPolC = fPolC, f1, fB0 nop.i 0 } -;; +;; { .mfi nop.m 0 (p14) fma.s.s0 f8 = fPolC, fPolD, fPolA // for positive x - nop.i 0 + nop.i 0 } { .mfb nop.m 0 @@ -522,7 +522,7 @@ erff_saturation: br.ret.sptk b0 // Exit for 4.0 <=|x|< +inf } ;; - + // Here if x is single precision denormal erff_denormal: { .mfi diff --git a/sysdeps/ia64/fpu/s_erfl.S b/sysdeps/ia64/fpu/s_erfl.S index 10da22c..ebb1bed 100644 --- a/sysdeps/ia64/fpu/s_erfl.S +++ b/sysdeps/ia64/fpu/s_erfl.S @@ -21,20 +21,20 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History @@ -68,7 +68,7 @@ // // 3. Main path: 1/8 <= |x| < 6.53 // For several ranges of 1/8 <= |x| < 6.53 -// Return erfl(x) = sign(x)*((A0H+A0L) + y*(A1H+A1L) + y^2*(A2H+A2L) + +// Return erfl(x) = sign(x)*((A0H+A0L) + y*(A1H+A1L) + y^2*(A2H+A2L) + // + y^3*A3 + y^4*A4 + ... + y^25*A25 ) // where y = (|x|/a) - b // @@ -83,7 +83,7 @@ // 4.0 <= |x| < 6.53 a = 4.0, b = 1.5 // ( [3.25;4.0] subrange separated for monotonicity issues resolve ) // -// 4. Saturation path: 6.53 <= |x| < +INF +// 4. Saturation path: 6.53 <= |x| < +INF // Return erfl(x) = sign(x)*(1.0 - tiny_value) // (tiny_value ~ 1e-1233) // @@ -109,10 +109,10 @@ // Multiprecision have to be performed only for first few // polynomial iterations (up to 3-rd x degree) // Here we use the same parallelisation way as above: -// Split whole polynomial to first, "multiprecision" part, and second, +// Split whole polynomial to first, "multiprecision" part, and second, // so called "tail", native precision part. // -// 1) Multiprecision part: +// 1) Multiprecision part: // [v1=(A0H+A0L)+y*(A1H+A1L)] + [v2=y^2*((A2H+A2L)+y*A3)] // v1 and v2 terms calculated in parallel // @@ -120,23 +120,23 @@ // v3 = x^4 * ( A4 + x*A5 + ... + x^21*A25 ) // v3 is splitted to 2 even parts (10 coefficient in each one). // These 2 parts are also factorized using binary tree technique. -// +// // So Multiprecision and Tail parts cost is almost the same // and we have both results ready before final summation. // -// 4. Saturation path: 6.53 <= |x| < +INF +// 4. Saturation path: 6.53 <= |x| < +INF // // We use formula sign(x)*(1.0 - tiny_value) instead of simple sign(x)*1.0 // just to meet IEEE requirements for different rounding modes in this case. // // Registers used //============================================================== -// Floating Point registers used: +// Floating Point registers used: // f8 - input & output // f32 -> f90 -// General registers used: -// r2, r3, r32 -> r52 +// General registers used: +// r2, r3, r32 -> r52 // Predicate registers used: // p0, p6 -> p11, p14, p15 @@ -201,9 +201,9 @@ fA16 = f51 fA17 = f52 fA18 = f53 fA19 = f54 -fA20 = f55 -fA21 = f56 -fA22 = f57 +fA20 = f55 +fA21 = f56 +fA22 = f57 fA23 = f58 fA24 = f59 fA25 = f60 @@ -232,10 +232,10 @@ fRes3H = f79 fRes3L = f80 fRes4 = f81 -fTT = f82 +fTT = f82 fTH = f83 fTL = f84 -fTT2 = f85 +fTT2 = f85 fTH2 = f86 fTL2 = f87 @@ -252,7 +252,7 @@ RODATA LOCAL_OBJECT_START(erfl_data) ////////// Main tables /////////// _0p125_to_0p25_data: // exp = 2^-3 -// Polynomial coefficients for the erf(x), 1/8 <= |x| < 1/4 +// Polynomial coefficients for the erf(x), 1/8 <= |x| < 1/4 data8 0xACD9ED470F0BB048, 0x0000BFF4 //A3 = -6.5937529303909561891162915809e-04 data8 0xBF6A254428DDB452 //A2H = -3.1915980570631852578089571182e-03 data8 0xBC131B3BE3AC5079 //A2L = -2.5893976889070198978842231134e-19 @@ -275,7 +275,7 @@ data8 0x92E992C58B7C3847, 0x0000BFC6 //A14 = -7.9641369349930600223371163611e-18 LOCAL_OBJECT_END(erfl_data) LOCAL_OBJECT_START(_0p25_to_0p5_data) -// Polynomial coefficients for the erf(x), 1/4 <= |x| < 1/2 +// Polynomial coefficients for the erf(x), 1/4 <= |x| < 1/2 data8 0xF083628E8F7CE71D, 0x0000BFF6 //A3 = -3.6699405305266733332335619531e-03 data8 0xBF978749A434FE4E //A2H = -2.2977018973732214746075186440e-02 data8 0xBC30B3FAFBC21107 //A2L = -9.0547407100537663337591537643e-19 @@ -298,7 +298,7 @@ data8 0x9CC8FFFBDDCF9853, 0x0000BFD4 //A14 = -1.3925319209173383944263942226e-13 LOCAL_OBJECT_END(_0p25_to_0p5_data) LOCAL_OBJECT_START(_0p5_to_1_data) -// Polynomial coefficients for the erf(x), 1/2 <= |x| < 1 +// Polynomial coefficients for the erf(x), 1/2 <= |x| < 1 data8 0xDB742C8FB372DBE0, 0x00003FF6 //A3 = 3.3485993187250381721535255963e-03 data8 0xBFBEDC5644353C26 //A2H = -1.2054957547410136142751468924e-01 data8 0xBC6D7215B023455F //A2L = -1.2770012232203569059818773287e-17 @@ -321,7 +321,7 @@ data8 0xB989FDB3795165C7, 0x00003FE1 //A14 = 1.3499740992928183247608593000e-09 LOCAL_OBJECT_END(_0p5_to_1_data) LOCAL_OBJECT_START(_1_to_2_data) -// Polynomial coefficients for the erf(x), 1 <= |x| < 2.0 +// Polynomial coefficients for the erf(x), 1 <= |x| < 2.0 data8 0x8E15015F5B55BEAC, 0x00003FFC //A3 = 1.3875200409423426678618977531e-01 data8 0xBFC6D5A95D0A1B7E //A2H = -1.7839543383544403942764233761e-01 data8 0xBC7499F704C80E02 //A2L = -1.7868888188464394090788198634e-17 @@ -344,7 +344,7 @@ data8 0xEC6E63BB4507B585, 0x0000BFEE //A14 = -1.4092398243085031882423746824e-05 LOCAL_OBJECT_END(_1_to_2_data) LOCAL_OBJECT_START(_2_to_3p25_data) -// Polynomial coefficients for the erf(x), 2 <= |x| < 3.25 +// Polynomial coefficients for the erf(x), 2 <= |x| < 3.25 data8 0xCEDBA58E8EE6F055, 0x00003FF7 //A3 = 6.3128050215859026984338771121e-03 data8 0xBF5B60D5E974CBBD //A2H = -1.6710366233609740427984435840e-03 data8 0xBC0E11E2AEC18AF6 //A2L = -2.0376133202996259839305825162e-19 @@ -367,7 +367,7 @@ data8 0xF2F3D8D21E8762E0, 0x0000BFF7 //A14 = -7.4143227286535936033409745884e-03 LOCAL_OBJECT_END(_2_to_3p25_data) LOCAL_OBJECT_START(_4_to_6p53_data) -// Polynomial coefficients for the erf(x), 4 <= |x| < 6.53 +// Polynomial coefficients for the erf(x), 4 <= |x| < 6.53 data8 0xDF3151BE8652827E, 0x00003FD5 //A3 = 3.9646979666953349095427642209e-13 data8 0xBD1C4A9787DF888B //A2H = -2.5127788450714750484839908889e-14 data8 0xB99B35483E4603FD //A2L = -3.3536613901268985626466020210e-31 @@ -390,7 +390,7 @@ data8 0x965DA4A80008B7BC, 0x0000BFEE //A14 = -8.9624997201558650125662820562e-06 LOCAL_OBJECT_END(_4_to_6p53_data) LOCAL_OBJECT_START(_3p25_to_4_data) -// Polynomial coefficients for the erf(x), 3.25 <= |x| < 4 +// Polynomial coefficients for the erf(x), 3.25 <= |x| < 4 data8 0xB01D29846286CE08, 0x00003FEE //A3 = 1.0497207328743021499800978059e-05 data8 0xBEC10B1488AEB234 //A2H = -2.0317175474986489113480084279e-06 data8 0xBB7F19701B8B74F9 //A2L = -4.1159669348226960337518214996e-22 @@ -415,7 +415,7 @@ LOCAL_OBJECT_END(_3p25_to_4_data) //////// "Tail" tables ////////// LOCAL_OBJECT_START(_0p125_to_0p25_data_tail) -// Polynomial coefficients for the erf(x), 1/8 <= |x| < 1/4 +// Polynomial coefficients for the erf(x), 1/8 <= |x| < 1/4 data8 0x93086CBD21ED3962, 0x00003FCA //A13 = 1.2753071968462837024755878679e-16 data8 0x83CB5045A6D4B419, 0x00003FCF //A12 = 3.6580237062957773626379648530e-15 data8 0x8FCDB723209690EB, 0x0000BFD3 //A11 = -6.3861616307180801527566117146e-14 @@ -429,7 +429,7 @@ data8 0xCC43247F4410C54A, 0x00003FEF //A4 = 2.4349960762505993017186935493e-05 LOCAL_OBJECT_END(_0p125_to_0p25_data_tail) LOCAL_OBJECT_START(_0p25_to_0p5_data_tail) -// Polynomial coefficients for the erf(x), 1/4 <= |x| < 1/2 +// Polynomial coefficients for the erf(x), 1/4 <= |x| < 1/2 data8 0x8CEAC59AF361B78A, 0x0000BFD6 //A13 = -5.0063802958258679384986669123e-13 data8 0x9BC67404F348C0CE, 0x00003FDB //A12 = 1.7709590771868743572061278273e-11 data8 0xF4B5D0348AFAAC7A, 0x00003FDB //A11 = 2.7820329729584630464848160970e-11 @@ -443,7 +443,7 @@ data8 0xAA94D5E68033B764, 0x00003FF4 //A4 = 6.5071635765452563856926608000e-04 LOCAL_OBJECT_END(_0p25_to_0p5_data_tail) LOCAL_OBJECT_START(_0p5_to_1_data_tail) -// Polynomial coefficients for the erf(x), 1/2 <= |x| < 1 +// Polynomial coefficients for the erf(x), 1/2 <= |x| < 1 data8 0x9ED99EDF111CB785, 0x0000BFE4 //A13 = -9.2462916180079278241704711522e-09 data8 0xDEAF7539AE2FB062, 0x0000BFE5 //A12 = -2.5923990465973151101298441139e-08 data8 0xA392D5E5CC9DB1A7, 0x00003FE9 //A11 = 3.0467952847327075747032372101e-07 @@ -457,7 +457,7 @@ data8 0x9A4DAF550A2CC29A, 0x00003FF8 //A4 = 9.4179355839141698591817907680e-03 LOCAL_OBJECT_END(_0p5_to_1_data_tail) LOCAL_OBJECT_START(_1_to_2_data_tail) -// Polynomial coefficients for the erf(x), 1 <= |x| < 2.0 +// Polynomial coefficients for the erf(x), 1 <= |x| < 2.0 data8 0x969EAC5C7B46CAB9, 0x00003FEF //A13 = 1.7955281439310148162059582795e-05 data8 0xA2ED832912E9FCD9, 0x00003FF1 //A12 = 7.7690020847111408916570845775e-05 data8 0x85677C39C48E43E7, 0x0000BFF3 //A11 = -2.5444839340796031538582511806e-04 @@ -471,7 +471,7 @@ data8 0xB6AD4AE850DBF526, 0x0000BFFA //A4 = -4.4598858458861014323191919669e-02 LOCAL_OBJECT_END(_1_to_2_data_tail) LOCAL_OBJECT_START(_2_to_3p25_data_tail) -// Polynomial coefficients for the erf(x), 2 <= |x| < 3.25 +// Polynomial coefficients for the erf(x), 2 <= |x| < 3.25 data8 0x847C24DAC7C7558B, 0x00003FF5 //A13 = 1.0107798565424606512130100541e-03 data8 0xCB6340EAF02C3DF8, 0x00003FF8 //A12 = 1.2413800617425931997420375435e-02 data8 0xB5163D252DBBC107, 0x0000BFF9 //A11 = -2.2105330871844825370020459523e-02 @@ -485,7 +485,7 @@ data8 0x88E42D8F47FAB60E, 0x0000BFF9 //A4 = -1.6710366233609742619461063050e-02 LOCAL_OBJECT_END(_2_to_3p25_data_tail) LOCAL_OBJECT_START(_4_to_6p53_data_tail) -// Polynomial coefficients for the erf(x), 4 <= |x| < 6.53 +// Polynomial coefficients for the erf(x), 4 <= |x| < 6.53 data8 0xD8235ABF08B8A6D1, 0x00003FEE //A13 = 1.2882834877224764938429832586e-05 data8 0xAEDF44F9C77844C2, 0x0000BFEC //A12 = -2.6057980393716019511497492890e-06 data8 0xCCD5490956A4FCFD, 0x00003FEA //A11 = 7.6306293047300300284923464089e-07 @@ -499,7 +499,7 @@ data8 0xA29C398F83F8A0D1, 0x0000BFD9 //A4 = -4.6216613698438694005327544047e-12 LOCAL_OBJECT_END(_4_to_6p53_data_tail) LOCAL_OBJECT_START(_3p25_to_4_data_tail) -// Polynomial coefficients for the erf(x), 3.25 <= |x| < 4 +// Polynomial coefficients for the erf(x), 3.25 <= |x| < 4 data8 0x95BE1BEAD738160F, 0x00003FF2 //A13 = 1.4280568455209843005829620687e-04 data8 0x8108C8FFAC0F0B21, 0x0000BFF4 //A12 = -4.9222685622046459346377033307e-04 data8 0xD72A7FAEE7832BBE, 0x00003FF4 //A11 = 8.2079319302109644436194651098e-04 @@ -514,7 +514,7 @@ LOCAL_OBJECT_END(_3p25_to_4_data_tail) LOCAL_OBJECT_START(_0_to_1o8_data) -// Polynomial coefficients for the erf(x), 0.0 <= |x| < 0.125 +// Polynomial coefficients for the erf(x), 0.0 <= |x| < 0.125 data8 0x3FF20DD750429B6D, 0x3C71AE3A8DDFFEDE //A1H, A1L data8 0xF8B0DACE42525CC2, 0x0000BFEE //A15 data8 0xFCD02E1BF0EC2C37, 0x00003FF1 //A13 @@ -536,7 +536,7 @@ LOCAL_OBJECT_END(_denorm_data) GLOBAL_LIBM_ENTRY(erfl) { .mfi - alloc r32 = ar.pfs, 0, 21, 0, 0 + alloc r32 = ar.pfs, 0, 21, 0, 0 fmerge.se fArgAbsNorm = f1, f8 // normalized x (1.0 <= x < 2.0) addl rSignBit = 0x20000, r0 // Set sign bit for exponent } @@ -547,26 +547,26 @@ GLOBAL_LIBM_ENTRY(erfl) { .mfi getf.exp rArgExp = f8 // Get arg exponent - fclass.m p6,p0 = f8, 0xEF // Filter 0, denormals and specials + fclass.m p6,p0 = f8, 0xEF // Filter 0, denormals and specials // 0xEF = @qnan|@snan|@pos|@neg|@zero|@unorm|@inf - addl rBias = 0xfffc, r0 // Value to subtract from exp + addl rBias = 0xfffc, r0 // Value to subtract from exp // to get actual interval number } { .mfi ld8 rDataPtr = [rDataPtr] // Get real common data pointer fma.s1 fArgSqr = f8, f8, f0 // x^2 (for [0;1/8] path) - addl r2to4 = 0x10000, r0 // unbiased exponent + addl r2to4 = 0x10000, r0 // unbiased exponent // for [2;4] binary interval };; { .mfi - getf.sig rArgSig = f8 // Get arg significand + getf.sig rArgSig = f8 // Get arg significand fcmp.lt.s1 p15, p14 = f8, f0 // Is arg negative/positive? addl rSaturation = 0xd0e, r0 // First 12 bits of // saturation value signif. } { .mfi - setf.d f1p5 = r1p5 // 1.5 construction + setf.d f1p5 = r1p5 // 1.5 construction fma.s1 f2p0 = f1,f1,f1 // 2.0 construction addl r3p25Sign = 0xd00, r0 // First 12 bits of // 3.25 value signif. @@ -586,7 +586,7 @@ GLOBAL_LIBM_ENTRY(erfl) { .mfi sub rInterval = rArgExp, rBias // Get actual interval number nop.f 0 - shr.u rArgSig = rArgSig, 52 // Leave only 12 bits of sign. + shr.u rArgSig = rArgSig, 52 // Leave only 12 bits of sign. } { .mfi adds rShiftedDataPtr = 0x10, rDataPtr // Second ptr to data @@ -595,10 +595,10 @@ GLOBAL_LIBM_ENTRY(erfl) };; { .mfi -(p8) cmp.le p8, p10 = r3p25Sign, rArgSig // If sign. is greater +(p8) cmp.le p8, p10 = r3p25Sign, rArgSig // If sign. is greater // than 1.25? (means arg is in [3.25;4] interval) nop.f 0 - shl rOffset = rInterval, 8 // Make offset from + shl rOffset = rInterval, 8 // Make offset from // interval number } { .mfi @@ -609,30 +609,30 @@ GLOBAL_LIBM_ENTRY(erfl) };; { .mfi -(p8) adds rOffset = 0x200, rOffset // Add additional offset +(p8) adds rOffset = 0x200, rOffset // Add additional offset // if arg is in [3.25;4] (another data set) fma.s1 fArgCube = fArgSqr, f8, f0 // x^3 (for [0;1/8] path) shl rTailOffset = rInterval, 7 // Make offset to "tail" data // from interval number } { .mib - setf.exp fTiny = rTiny // Construct "tiny" value + setf.exp fTiny = rTiny // Construct "tiny" value // for saturation path cmp.ltu p11, p0 = 0x5, rInterval // if arg > 8 -(p9) br.cond.spnt _0_to_1o8 +(p9) br.cond.spnt _0_to_1o8 };; { .mfi - add rAddr1 = rDataPtr, rOffset // Get address for - // interval data + add rAddr1 = rDataPtr, rOffset // Get address for + // interval data nop.f 0 shl rTailAddOffset = rInterval, 5 // Offset to interval - // "tail" data + // "tail" data } { .mib add rAddr2 = rShiftedDataPtr, rOffset // Get second - // address for interval data -(p7) cmp.leu p11, p0 = rSaturation, rArgSig // if arg is + // address for interval data +(p7) cmp.leu p11, p0 = rSaturation, rArgSig // if arg is // in [6.53;8] interval (p11) br.cond.spnt _saturation // Branch to Saturation path };; @@ -660,14 +660,14 @@ GLOBAL_LIBM_ENTRY(erfl) .pred.rel "mutex",p8,p10 { .mfi ldfe fA18 = [rAddr1], 16 // Load A18 -(p8) fms.s1 fArgAbsNorm = fArgAbsNorm, f1, f2p0 // Add 2.0 +(p8) fms.s1 fArgAbsNorm = fArgAbsNorm, f1, f2p0 // Add 2.0 // to normalized arg (for [3.24;4] interval) adds rTailAddr2 = 0x10, rTailAddr1 // First tail // data address } { .mfi - ldfe fA25 = [rAddr2], 16 // Load A25 -(p10) fms.s1 fArgAbsNorm = fArgAbsNorm, f1, f1p5 // Add 1.5 + ldfe fA25 = [rAddr2], 16 // Load A25 +(p10) fms.s1 fArgAbsNorm = fArgAbsNorm, f1, f1p5 // Add 1.5 // to normalized arg nop.i 0 };; @@ -775,9 +775,9 @@ GLOBAL_LIBM_ENTRY(erfl) fma.s1 fA23 = fA24, fArgAbsNorm, fA23 // Polynomial tail nop.i 0 } -{ .mfi +{ .mfi nop.m 0 - fma.s1 fA21 = fA22, fArgAbsNorm, fA21 // Polynomial tail + fma.s1 fA21 = fA22, fArgAbsNorm, fA21 // Polynomial tail nop.i 0 };; @@ -793,7 +793,7 @@ GLOBAL_LIBM_ENTRY(erfl) fma.s1 fRes3L = fRes3L, f1, fTH // (A3*x+A2)*x^2 nop.i 0 } -{ .mfi +{ .mfi nop.m 0 fma.s1 fA19 = fA20, fArgAbsNorm, fA19 // Polynomial tail nop.i 0 @@ -804,7 +804,7 @@ GLOBAL_LIBM_ENTRY(erfl) fma.s1 fRes1H = fTH2, f1, fA0H // A1*x+A0 nop.i 0 } -{ .mfi +{ .mfi nop.m 0 fms.s1 fTL2 = fA1H, fArgAbsNorm, fTH2 // A1*x+A0 nop.i 0 @@ -815,7 +815,7 @@ GLOBAL_LIBM_ENTRY(erfl) fma.s1 fA8 = fA9, fArgAbsNorm, fA8 // Polynomial tail nop.i 0 } -{ .mfi +{ .mfi nop.m 0 fma.s1 fA10 = fA11, fArgAbsNorm, fA10 // Polynomial tail nop.i 0 @@ -835,7 +835,7 @@ GLOBAL_LIBM_ENTRY(erfl) fms.s1 fArgAbsNorm11 = fArgAbsNorm4, fArgAbsNorm4, f0 // x^8 nop.i 0 } -{ .mfi +{ .mfi nop.m 0 fma.s1 fA4 = fA5, fArgAbsNorm, fA4 // Polynomial tail nop.i 0 @@ -846,7 +846,7 @@ GLOBAL_LIBM_ENTRY(erfl) fma.s1 fRes3L = fRes3L, f1, fA2L // (A3*x+A2)*x^2 nop.i 0 } -{ .mfi +{ .mfi nop.m 0 fma.s1 fA6 = fA7, fArgAbsNorm, fA6 // Polynomial tail nop.i 0 @@ -857,7 +857,7 @@ GLOBAL_LIBM_ENTRY(erfl) fma.s1 fTL2 = fTL2, f1, fTT2 // A1*x+A0 nop.i 0 } -{ .mfi +{ .mfi nop.m 0 fms.s1 fRes1L = fA0H, f1, fRes1H // A1*x+A0 nop.i 0 @@ -868,7 +868,7 @@ GLOBAL_LIBM_ENTRY(erfl) fma.s1 fA23 = fA25, fArgAbsNorm2, fA23 // Polynomial tail nop.i 0 } -{ .mfi +{ .mfi nop.m 0 fma.s1 fA12 = fA14, fArgAbsNorm2, fA12 // Polynomial tail nop.i 0 @@ -879,7 +879,7 @@ GLOBAL_LIBM_ENTRY(erfl) fma.s1 fA19 = fA21, fArgAbsNorm2, fA19 // Polynomial tail nop.i 0 } -{ .mfi +{ .mfi nop.m 0 fma.s1 fA8 = fA10, fArgAbsNorm2, fA8 // Polynomial tail nop.i 0 @@ -890,7 +890,7 @@ GLOBAL_LIBM_ENTRY(erfl) fma.s1 fA15 = fA17, fArgAbsNorm2, fA15 // Polynomial tail nop.i 0 } -{ .mfi +{ .mfi nop.m 0 fms.s1 fArgAbsNorm11 = fArgAbsNorm11, fArgAbsNorm3, f0 // x^11 nop.i 0 @@ -901,7 +901,7 @@ GLOBAL_LIBM_ENTRY(erfl) fma.s1 fTT = fRes3L, fArgAbsNorm2, f0 // (A3*x+A2)*x^2 nop.i 0 } -{ .mfi +{ .mfi nop.m 0 fma.s1 fA4 = fA6, fArgAbsNorm2, fA4 // Polynomial tail nop.i 0 @@ -918,7 +918,7 @@ GLOBAL_LIBM_ENTRY(erfl) fma.s1 fA19 = fA23, fArgAbsNorm4, fA19 // Polynomial tail nop.i 0 } -{ .mfi +{ .mfi nop.m 0 fma.s1 fA8 = fA12, fArgAbsNorm4, fA8 // Polynomial tail nop.i 0 @@ -961,7 +961,7 @@ GLOBAL_LIBM_ENTRY(erfl) { .mfi nop.m 0 - fma.s1 fRes4 = fA15, fArgAbsNorm11, fA4 // Result of + fma.s1 fRes4 = fA15, fArgAbsNorm11, fA4 // Result of // polynomial tail nop.i 0 };; @@ -971,7 +971,7 @@ GLOBAL_LIBM_ENTRY(erfl) fms.s1 fRes2L = fRes3H, fArgAbsNorm2, fRes2H // (A3*x+A2)*x^2 nop.i 0 } -{ .mfi +{ .mfi nop.m 0 fma.s1 fResH = fRes2H, f1, fRes1H // High result nop.i 0 @@ -983,12 +983,12 @@ GLOBAL_LIBM_ENTRY(erfl) nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fma.s1 fRes2L = fRes2L, f1, fTT // (A3*x+A2)*x^2 nop.i 0 } -{ .mfi +{ .mfi nop.m 0 fms.s1 fResL = fRes1H, f1, fResH // Low result nop.i 0 @@ -999,13 +999,13 @@ GLOBAL_LIBM_ENTRY(erfl) fma.s1 fRes1L = fRes1L, f1, fRes2L // Low result nop.i 0 } -{ .mfi +{ .mfi nop.m 0 fma.s1 fResL = fResL, f1, fRes2H // Low result nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 (p15) fneg fResH = fResH // Invert high result if arg is neg. nop.i 0 @@ -1018,12 +1018,12 @@ GLOBAL_LIBM_ENTRY(erfl) };; .pred.rel "mutex",p14,p15 -{ .mfi +{ .mfi nop.m 0 (p14) fma.s0 f8 = fResH, f1, fResL // Add high and low results nop.i 0 } -{ .mfb +{ .mfb nop.m 0 (p15) fms.s0 f8 = fResH, f1, fResL // Add high and low results br.ret.sptk b0 // Main path return @@ -1033,12 +1033,12 @@ GLOBAL_LIBM_ENTRY(erfl) _saturation: .pred.rel "mutex",p14,p15 -{ .mfi +{ .mfi nop.m 0 (p14) fms.s0 f8 = f1, f1, fTiny // Saturation result r = 1-tiny nop.i 0 };; -{ .mfb +{ .mfb nop.m 0 (p15) fnma.s0 f8 = f1, f1, fTiny // Saturation result r = tiny-1 br.ret.sptk b0 // Saturation path return @@ -1048,69 +1048,69 @@ _saturation: // 0, denormals and special IEEE numbers path ///////////////////////////////// erfl_spec: -{ .mfi +{ .mfi addl rDataPtr = 0xBE0, rDataPtr // Ptr to denormals coeffs fclass.m p6,p0 = f8, 0x23 // To filter infinities - // 0x23 = @pos|@neg|@inf + // 0x23 = @pos|@neg|@inf nop.i 0 };; -{ .mfi +{ .mfi ldfpd fA1H, fA1L = [rDataPtr] // Load denormals coeffs A1H, A1L fclass.m p7,p0 = f8, 0xC7 // To filter NaNs & Zeros // 0xC7 = @pos|@neg|@zero|@qnan|@snan nop.i 0 };; -{ .mfb +{ .mfb nop.m 0 -(p6) fmerge.s f8 = f8, f1 // +/-1 for INF args +(p6) fmerge.s f8 = f8, f1 // +/-1 for INF args (p6) br.ret.spnt b0 // exit for x = INF };; -{ .mfb +{ .mfb nop.m 0 -(p7) fma.s0 f8 = f8, f1, f8 // +/-0 for 0 args +(p7) fma.s0 f8 = f8, f1, f8 // +/-0 for 0 args // and NaNs for NaNs (p7) br.ret.spnt b0 // exit for x = NaN or +/-0 };; -{ .mfi +{ .mfi nop.m 0 fnorm.s0 f8 = f8 // Normalize arg nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fms.s1 fRes1H = f8, fA1H, f0 // HighRes nop.i 0 } -{ .mfi +{ .mfi nop.m 0 fms.s1 fRes1L = f8, fA1L, f0 // LowRes nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fms.s1 fRes1Hd = f8, fA1H, fRes1H // HighRes delta nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fma.s1 fRes = fRes1L, f1, fRes1Hd // LowRes+HighRes delta nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fma.s1 fRes = f8, f8, fRes // r=x^2+r nop.i 0 };; -{ .mfb +{ .mfb nop.m 0 fma.s0 f8 = fRes, f1, fRes1H // res = r+ResHigh br.ret.sptk b0 // 0, denormals, specials return @@ -1120,120 +1120,120 @@ erfl_spec: // 0 < |x| < 1/8 path ///////////////////////////////////////////////////////// _0_to_1o8: -{ .mmi +{ .mmi adds rAddr1 = 0xB60, rDataPtr // Ptr 1 to coeffs adds rAddr2 = 0xB80, rDataPtr // Ptr 2 to coeffs nop.i 0 };; -{ .mmi +{ .mmi ldfpd fA1H, fA1L = [rAddr1], 16 // Load A1High, A1Low ldfe fA13 = [rAddr2], 16 // Load A13 nop.i 0 };; -{ .mmi +{ .mmi ldfe fA15 = [rAddr1], 48 // Load A15 ldfe fA11 = [rAddr2], 32 // Load A11 nop.i 0 };; -{ .mmi +{ .mmi ldfe fA9 = [rAddr1], 32 // Load A9 ldfe fA7 = [rAddr2], 32 // Load A7 nop.i 0 };; -{ .mmi +{ .mmi ldfe fA5 = [rAddr1] // Load A5 ldfe fA3 = [rAddr2] // Load A3 nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fms.s1 fRes1H = f8, fA1H, f0 // x*(A1H+A1L) nop.i 0 } -{ .mfi +{ .mfi nop.m 0 fms.s1 fRes1L = f8, fA1L, f0 // x*(A1H+A1L) nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fma.s1 fA11 = fA13, fArgSqr, fA11 // Polynomial tail nop.i 0 } -{ .mfi +{ .mfi nop.m 0 - fma.s1 fArgFour = fArgSqr, fArgSqr, f0 // a^4 + fma.s1 fArgFour = fArgSqr, fArgSqr, f0 // a^4 nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fma.s1 fA3 = fA5, fArgSqr, fA3 // Polynomial tail nop.i 0 } -{ .mfi +{ .mfi nop.m 0 fma.s1 fA7 = fA9, fArgSqr, fA7 // Polynomial tail nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fms.s1 fRes1Hd = f8, fA1H, fRes1H // x*(A1H+A1L) delta nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fma.s1 fA11 = fA15, fArgFour, fA11 // Polynomial tail nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fma.s1 fA3 = fA7, fArgFour, fA3 // Polynomial tail nop.i 0 } -{ .mfi +{ .mfi nop.m 0 fma.s1 fArgEight = fArgFour, fArgFour, f0 // a^8 nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fma.s1 f8 = fRes1L, f1, fRes1Hd // x*(A1H+A1L) nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fma.s1 fRes = fA11, fArgEight, fA3 //Polynomial tail result nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fma.s1 f8 = fRes, fArgCube, f8 // (Polynomial tail)*x^3 nop.i 0 };; -{ .mfb +{ .mfb nop.m 0 - fma.s0 f8 = f8, f1, fRes1H // (Polynomial tail)*x^3 + + fma.s0 f8 = f8, f1, fRes1H // (Polynomial tail)*x^3 + // + x*(A1H+A1L) br.ret.sptk b0 // [0;1/8] interval return };; - + GLOBAL_LIBM_END(erfl) diff --git a/sysdeps/ia64/fpu/s_expm1.S b/sysdeps/ia64/fpu/s_expm1.S index 09a22bb..f0b911e 100644 --- a/sysdeps/ia64/fpu/s_expm1.S +++ b/sysdeps/ia64/fpu/s_expm1.S @@ -70,7 +70,7 @@ // 5. x >= 709.7827 // Result overflows. Set I, O, and call error support // -// 6. 2^-2 <= x < 709.7827 or -48.0 <= x < -2^-2 +// 6. 2^-2 <= x < 709.7827 or -48.0 <= x < -2^-2 // This is the main path. The algorithm is described below: // Take the input x. w is "how many log2/128 in x?" diff --git a/sysdeps/ia64/fpu/s_expm1f.S b/sysdeps/ia64/fpu/s_expm1f.S index 8996977..55264a9 100644 --- a/sysdeps/ia64/fpu/s_expm1f.S +++ b/sysdeps/ia64/fpu/s_expm1f.S @@ -68,10 +68,10 @@ // Here we know result is essentially -1 + eps, where eps only affects // rounded result. Set I. // -// 5. x >= 88.7228 +// 5. x >= 88.7228 // Result overflows. Set I, O, and call error support // -// 6. 2^-2 <= x < 88.7228 or -24.0 <= x < -2^-2 +// 6. 2^-2 <= x < 88.7228 or -24.0 <= x < -2^-2 // This is the main path. The algorithm is described below: // Take the input x. w is "how many log2/128 in x?" @@ -491,7 +491,7 @@ EXPM1_COMMON: { .mfb nop.m 0 -(p7) fma.s.s0 f8 = fA8765432, fXsq, fNormX // Small path, +(p7) fma.s.s0 f8 = fA8765432, fXsq, fNormX // Small path, // result=xsq*A8765432+x (p7) br.ret.spnt b0 // Exit if 2^-40 <= |x| < 2^-2 } diff --git a/sysdeps/ia64/fpu/s_expm1l.S b/sysdeps/ia64/fpu/s_expm1l.S index a3a6e40..63bf39a 100644 --- a/sysdeps/ia64/fpu/s_expm1l.S +++ b/sysdeps/ia64/fpu/s_expm1l.S @@ -21,20 +21,20 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History @@ -51,36 +51,36 @@ // 04/17/03 Eliminated misplaced and unused data label // 12/15/03 Eliminated call to error support on expm1l underflow // -//********************************************************************* +//********************************************************************* // // Function: Combined expl(x) and expm1l(x), where -// x +// x // expl(x) = e , for double-extended precision x values // x // expm1l(x) = e - 1 for double-extended precision x values // -//********************************************************************* +//********************************************************************* // // Resources Used: // -// Floating-Point Registers: f8 (Input and Return Value) -// f9-f15,f32-f77 +// Floating-Point Registers: f8 (Input and Return Value) +// f9-f15,f32-f77 // -// General Purpose Registers: +// General Purpose Registers: // r14-r38 // r35-r38 (Used to pass arguments to error handling routine) -// +// // Predicate Registers: p6-p15 // -//********************************************************************* +//********************************************************************* // // IEEE Special Conditions: // -// Denormal fault raised on denormal inputs +// Denormal fault raised on denormal inputs // Overflow exceptions raised when appropriate for exp and expm1 // Underflow exceptions raised when appropriate for exp and expm1 // (Error Handling Routine called for overflow and Underflow) -// Inexact raised when appropriate by algorithm +// Inexact raised when appropriate by algorithm // // exp(inf) = inf // exp(-inf) = +0 @@ -89,13 +89,13 @@ // exp(0) = 1 // exp(EM_special Values) = QNaN // exp(inf) = inf -// expm1(-inf) = -1 +// expm1(-inf) = -1 // expm1(SNaN) = QNaN // expm1(QNaN) = QNaN // expm1(0) = 0 // expm1(EM_special Values) = QNaN -// -//********************************************************************* +// +//********************************************************************* // // Implementation and Algorithm Notes: // @@ -109,36 +109,36 @@ // p6 for exp, // p7 for expm1, // -// On output, +// On output, // // scale*(Y_hi + Y_lo) approximates exp(X) if exp // scale*(Y_hi + Y_lo) approximates exp(X)-1 if expm1 // // The accuracy is sufficient for a highly accurate 64 sig. -// bit implementation. Safe is set if there is no danger of -// overflow/underflow when the result is composed from scale, -// Y_hi and Y_lo. Thus, we can have a fast return if Safe is set. -// Otherwise, one must prepare to handle the possible exception -// appropriately. Note that SAFE not set (false) does not mean +// bit implementation. Safe is set if there is no danger of +// overflow/underflow when the result is composed from scale, +// Y_hi and Y_lo. Thus, we can have a fast return if Safe is set. +// Otherwise, one must prepare to handle the possible exception +// appropriately. Note that SAFE not set (false) does not mean // that overflow/underflow will occur; only the setting of SAFE // guarantees the opposite. // -// **** High Level Overview **** +// **** High Level Overview **** // // The method consists of three cases. -// +// // If |X| < Tiny use case exp_tiny; // else if |X| < 2^(-m) use case exp_small; m=12 for exp, m=7 for expm1 // else use case exp_regular; // // Case exp_tiny: // -// 1 + X can be used to approximate exp(X) +// 1 + X can be used to approximate exp(X) // X + X^2/2 can be used to approximate exp(X) - 1 // // Case exp_small: // -// Here, exp(X) and exp(X) - 1 can all be +// Here, exp(X) and exp(X) - 1 can all be // approximated by a relatively simple polynomial. // // This polynomial resembles the truncated Taylor series @@ -175,9 +175,9 @@ // r := (X - N*L_hi) - N*L_lo // // We pick L_hi such that N*L_hi is representable in 64 sig. bits -// and thus the FMA X - N*L_hi is error free. So r is the -// 1 rounding error from an exact reduction with respect to -// +// and thus the FMA X - N*L_hi is error free. So r is the +// 1 rounding error from an exact reduction with respect to +// // L_hi + L_lo. // // In particular, L_hi has 30 significant bit and can be stored @@ -187,10 +187,10 @@ // Step 2: Approximation // // exp(r) - 1 is approximated by a short polynomial of the form -// +// // r + A_1 r^2 + A_2 r^3 + A_3 r^4 . // -// Step 3: Composition from Table Values +// Step 3: Composition from Table Values // // The value 2^( N / 2^12 ) can be composed from a couple of tables // of precalculated values. First, express N as three integers @@ -203,8 +203,8 @@ // lsb's, M_1 is the next 6, and K is simply N shifted right // arithmetically (sign extended) by 12 bits. // -// Now, 2^( N / 2^12 ) is simply -// +// Now, 2^( N / 2^12 ) is simply +// // 2^K * 2^( M_1 / 2^6 ) * 2^( M_2 / 2^12 ) // // Clearly, 2^K needs no tabulation. The other two values are less @@ -215,14 +215,14 @@ // Define two mathematical values, delta_1 and delta_2, implicitly // such that // -// T_1 = exp( [M_1 log(2)/2^6] - delta_1 ) +// T_1 = exp( [M_1 log(2)/2^6] - delta_1 ) // T_2 = exp( [M_2 log(2)/2^12] - delta_2 ) // // are representable as 24 significant bits. To illustrate the idea, -// we show how we define delta_1: +// we show how we define delta_1: // // T_1 := round_to_24_bits( exp( M_1 log(2)/2^6 ) ) -// delta_1 = (M_1 log(2)/2^6) - log( T_1 ) +// delta_1 = (M_1 log(2)/2^6) - log( T_1 ) // // The last equality means mathematical equality. We then tabulate // @@ -235,7 +235,7 @@ // T and W via // // T := T_1 * T_2 ...exactly -// W := W_1 + (1 + W_1)*W_2 +// W := W_1 + (1 + W_1)*W_2 // // W approximates exp( delta ) - 1 where delta = delta_1 + delta_2. // The mathematical product of T and (W+1) is an accurate representation @@ -243,17 +243,17 @@ // // Step 4. Reconstruction // -// Finally, we can reconstruct exp(X), exp(X) - 1. +// Finally, we can reconstruct exp(X), exp(X) - 1. // Because // -// X = K * log(2) + (M_1*log(2)/2^6 - delta_1) +// X = K * log(2) + (M_1*log(2)/2^6 - delta_1) // + (M_2*log(2)/2^12 - delta_2) // + delta_1 + delta_2 + r ...accurately // We have // // exp(X) ~=~ 2^K * ( T + T*[exp(delta_1+delta_2+r) - 1] ) // ~=~ 2^K * ( T + T*[exp(delta + r) - 1] ) -// ~=~ 2^K * ( T + T*[(exp(delta)-1) +// ~=~ 2^K * ( T + T*[(exp(delta)-1) // + exp(delta)*(exp(r)-1)] ) // ~=~ 2^K * ( T + T*( W + (1+W)*poly(r) ) ) // ~=~ 2^K * ( Y_hi + Y_lo ) @@ -265,7 +265,7 @@ // exp(X)-1 ~=~ 2^K * ( Y_hi + Y_lo ) - 1 // ~=~ 2^K * ( Y_hi + Y_lo - 2^(-K) ) // -// and we combine Y_hi + Y_lo - 2^(-N) into the form of two +// and we combine Y_hi + Y_lo - 2^(-N) into the form of two // numbers Y_hi + Y_lo carefully. // // **** Algorithm Details **** @@ -276,8 +276,8 @@ // // Case exp_tiny: // -// The important points are to ensure an accurate result under -// different rounding directions and a correct setting of the SAFE +// The important points are to ensure an accurate result under +// different rounding directions and a correct setting of the SAFE // flag. // // If expm1 is 1, then @@ -296,11 +296,11 @@ // Here we compute a simple polynomial. To exploit parallelism, we split // the polynomial into several portions. // -// Let r = X +// Let r = X // // If exp ...i.e. exp( argument ) // -// rsq := r * r; +// rsq := r * r; // r4 := rsq*rsq // poly_lo := P_3 + r*(P_4 + r*(P_5 + r*P_6)) // poly_hi := r + rsq*(P_1 + r*P_2) @@ -390,7 +390,7 @@ GR_SAVE_GP = r34 GR_Parameter_X = r35 GR_Parameter_Y = r36 GR_Parameter_RESULT = r37 -GR_Parameter_TAG = r38 +GR_Parameter_TAG = r38 // Floating Point Registers // @@ -480,25 +480,25 @@ FR_RESULT = f15 // double-extended 1/ln(2) // 3fff b8aa 3b29 5c17 f0bb be87fed0691d3e88 -// 3fff b8aa 3b29 5c17 f0bc +// 3fff b8aa 3b29 5c17 f0bc // For speed the significand will be loaded directly with a movl and setf.sig // and the exponent will be bias+63 instead of bias+0. Thus subsequent // computations need to scale appropriately. -// The constant 2^12/ln(2) is needed for the computation of N. This is also +// The constant 2^12/ln(2) is needed for the computation of N. This is also // obtained by scaling the computations. // -// Two shifting constants are loaded directly with movl and setf.d. -// 1. RSHF_2TO51 = 1.1000..00 * 2^(63-12) +// Two shifting constants are loaded directly with movl and setf.d. +// 1. RSHF_2TO51 = 1.1000..00 * 2^(63-12) // This constant is added to x*1/ln2 to shift the integer part of // x*2^12/ln2 into the rightmost bits of the significand. // The result of this fma is N_signif. -// 2. RSHF = 1.1000..00 * 2^(63) +// 2. RSHF = 1.1000..00 * 2^(63) // This constant is subtracted from N_signif * 2^(-51) to give // the integer part of N, N_fix, as a floating-point number. // The result of this fms is float_N. RODATA -.align 64 +.align 64 LOCAL_OBJECT_START(Constants_exp_64_Arg) //data8 0xB8AA3B295C17F0BC,0x0000400B // Inv_L = 2^12/log(2) data8 0xB17217F400000000,0x00003FF2 // L_hi = hi part log(2)/2^12 @@ -538,8 +538,8 @@ data8 0x8000000000000000, 0x00003FFE // Q1 LOCAL_OBJECT_END(Constants_exp_64_Q) LOCAL_OBJECT_START(Constants_exp_64_T1) -data4 0x3F800000,0x3F8164D2,0x3F82CD87,0x3F843A29 -data4 0x3F85AAC3,0x3F871F62,0x3F88980F,0x3F8A14D5 +data4 0x3F800000,0x3F8164D2,0x3F82CD87,0x3F843A29 +data4 0x3F85AAC3,0x3F871F62,0x3F88980F,0x3F8A14D5 data4 0x3F8B95C2,0x3F8D1ADF,0x3F8EA43A,0x3F9031DC data4 0x3F91C3D3,0x3F935A2B,0x3F94F4F0,0x3F96942D data4 0x3F9837F0,0x3F99E046,0x3F9B8D3A,0x3F9D3EDA @@ -557,21 +557,21 @@ data4 0x3FF5257D,0x3FF7D0DF,0x3FFA83B3,0x3FFD3E0C LOCAL_OBJECT_END(Constants_exp_64_T1) LOCAL_OBJECT_START(Constants_exp_64_T2) -data4 0x3F800000,0x3F80058C,0x3F800B18,0x3F8010A4 -data4 0x3F801630,0x3F801BBD,0x3F80214A,0x3F8026D7 -data4 0x3F802C64,0x3F8031F2,0x3F803780,0x3F803D0E -data4 0x3F80429C,0x3F80482B,0x3F804DB9,0x3F805349 -data4 0x3F8058D8,0x3F805E67,0x3F8063F7,0x3F806987 -data4 0x3F806F17,0x3F8074A8,0x3F807A39,0x3F807FCA -data4 0x3F80855B,0x3F808AEC,0x3F80907E,0x3F809610 -data4 0x3F809BA2,0x3F80A135,0x3F80A6C7,0x3F80AC5A -data4 0x3F80B1ED,0x3F80B781,0x3F80BD14,0x3F80C2A8 -data4 0x3F80C83C,0x3F80CDD1,0x3F80D365,0x3F80D8FA -data4 0x3F80DE8F,0x3F80E425,0x3F80E9BA,0x3F80EF50 -data4 0x3F80F4E6,0x3F80FA7C,0x3F810013,0x3F8105AA -data4 0x3F810B41,0x3F8110D8,0x3F81166F,0x3F811C07 -data4 0x3F81219F,0x3F812737,0x3F812CD0,0x3F813269 -data4 0x3F813802,0x3F813D9B,0x3F814334,0x3F8148CE +data4 0x3F800000,0x3F80058C,0x3F800B18,0x3F8010A4 +data4 0x3F801630,0x3F801BBD,0x3F80214A,0x3F8026D7 +data4 0x3F802C64,0x3F8031F2,0x3F803780,0x3F803D0E +data4 0x3F80429C,0x3F80482B,0x3F804DB9,0x3F805349 +data4 0x3F8058D8,0x3F805E67,0x3F8063F7,0x3F806987 +data4 0x3F806F17,0x3F8074A8,0x3F807A39,0x3F807FCA +data4 0x3F80855B,0x3F808AEC,0x3F80907E,0x3F809610 +data4 0x3F809BA2,0x3F80A135,0x3F80A6C7,0x3F80AC5A +data4 0x3F80B1ED,0x3F80B781,0x3F80BD14,0x3F80C2A8 +data4 0x3F80C83C,0x3F80CDD1,0x3F80D365,0x3F80D8FA +data4 0x3F80DE8F,0x3F80E425,0x3F80E9BA,0x3F80EF50 +data4 0x3F80F4E6,0x3F80FA7C,0x3F810013,0x3F8105AA +data4 0x3F810B41,0x3F8110D8,0x3F81166F,0x3F811C07 +data4 0x3F81219F,0x3F812737,0x3F812CD0,0x3F813269 +data4 0x3F813802,0x3F813D9B,0x3F814334,0x3F8148CE data4 0x3F814E68,0x3F815402,0x3F81599C,0x3F815F37 LOCAL_OBJECT_END(Constants_exp_64_T2) @@ -652,14 +652,14 @@ GLOBAL_IEEE754_ENTRY(expm1l) // // Set p7 true for expm1, p6 false -// +// { .mlx getf.exp GR_signexp_x = f8 // Get sign and exponent of x, redo if unorm movl GR_sig_inv_ln2 = 0xb8aa3b295c17f0bc // significand of 1/ln2 } { .mlx - addl GR_ad_Arg = @ltoff(Constants_exp_64_Arg#),gp + addl GR_ad_Arg = @ltoff(Constants_exp_64_Arg#),gp movl GR_rshf_2to51 = 0x4718000000000000 // 1.10000 2^(63+51) } ;; @@ -667,12 +667,12 @@ GLOBAL_IEEE754_ENTRY(expm1l) { .mfi ld8 GR_ad_Arg = [GR_ad_Arg] // Point to Arg table fclass.m p8, p0 = f8, 0x1E7 // Test x for natval, nan, inf, zero - cmp.eq p7, p6 = r0, r0 + cmp.eq p7, p6 = r0, r0 } { .mfb mov GR_exp_half = 0x0FFFE // Exponent of 0.5, for very small path fnorm.s1 FR_norm_x = f8 // Normalize x - br.cond.sptk exp_continue + br.cond.sptk exp_continue } ;; @@ -682,13 +682,13 @@ GLOBAL_IEEE754_END(expm1l) GLOBAL_IEEE754_ENTRY(expl) // // Set p7 false for exp, p6 true -// +// { .mlx getf.exp GR_signexp_x = f8 // Get sign and exponent of x, redo if unorm movl GR_sig_inv_ln2 = 0xb8aa3b295c17f0bc // significand of 1/ln2 } { .mlx - addl GR_ad_Arg = @ltoff(Constants_exp_64_Arg#),gp + addl GR_ad_Arg = @ltoff(Constants_exp_64_Arg#),gp movl GR_rshf_2to51 = 0x4718000000000000 // 1.10000 2^(63+51) } ;; @@ -705,9 +705,9 @@ GLOBAL_IEEE754_ENTRY(expl) } ;; -exp_continue: +exp_continue: // Form two constants we need -// 1/ln2 * 2^63 to compute w = x * 1/ln2 * 128 +// 1/ln2 * 2^63 to compute w = x * 1/ln2 * 128 // 1.1000..000 * 2^(63+63-12) to right shift int(N) into the significand { .mfi @@ -832,7 +832,7 @@ exp_continue: // Now we are on the main path for |x| >= 2^-m, m=12 for exp, m=7 for expm1 // -// float_N = round_int(N_signif) +// float_N = round_int(N_signif) // The signficand of N_signif contains the rounded integer part of X * 2^12/ln2, // as a twos complement number in the lower bits (that is, it may be negative). // That twos complement number (called N) is put into GR_N. @@ -934,7 +934,7 @@ exp_continue: ;; { .mfi -(p7) cmp.lt.unc p8, p9 = 10, GR_K // If expm1, set p8 if K > 10 +(p7) cmp.lt.unc p8, p9 = 10, GR_K // If expm1, set p8 if K > 10 fma.s1 FR_poly = FR_r, FR_poly, FR_A1 // poly = r * poly + A1 nop.i 999 } @@ -1033,8 +1033,8 @@ exp_continue: } ;; - -EXP_SMALL: + +EXP_SMALL: // Here if 2^-60 < |x| < 2^-m, m=12 for exp, m=7 for expm1 { .mfi (p7) ldfe FR_Q3 = [GR_ad_Q],16 // Get Q3 for small path, if expm1 @@ -1146,7 +1146,7 @@ EXP_SMALL: ;; -EXP_VERY_SMALL: +EXP_VERY_SMALL: // // Here if 0 < |x| < 2^-60 // If exp, result = 1.0 + x @@ -1224,8 +1224,8 @@ EXP_CERTAIN_UNDERFLOW_ZERO: (p7) br.ret.sptk b0 // If expm1, exit } ;; - - + + EXP_OVERFLOW: // Here if x >= min_oflow_x { .mmi @@ -1309,7 +1309,7 @@ EXP_POSSIBLE_UNDERFLOW: ;; -EXP_64_SPECIAL: +EXP_64_SPECIAL: // Here if x natval, nan, inf, zero // If x natval, +inf, or if expm1 and x zero, just return x. // The other cases must be tested for, and results set. @@ -1367,7 +1367,7 @@ EXP_64_SPECIAL: ;; -EXP_64_UNSUPPORTED: +EXP_64_UNSUPPORTED: // Here if x unsupported type { .mfb nop.m 999 diff --git a/sysdeps/ia64/fpu/s_fabs.S b/sysdeps/ia64/fpu/s_fabs.S index 3434389..8bbdf9b 100644 --- a/sysdeps/ia64/fpu/s_fabs.S +++ b/sysdeps/ia64/fpu/s_fabs.S @@ -21,25 +21,25 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History //============================================================== -// 02/02/00 Initial version +// 02/02/00 Initial version // 02/07/02 Added __libm_fabs entry point to test in case compiler inlines // 05/20/02 Cleaned up namespace and sf0 syntax // 02/10/03 Reordered header: .section, .global, .proc, .align @@ -50,7 +50,7 @@ // // Overview of operation //============================================================== -// returns absolute value of x +// returns absolute value of x // floating-point registers used: 1 // f8, input @@ -69,14 +69,14 @@ GLOBAL_IEEE754_ENTRY(fabs) { .mfi nop.m 999 - fcmp.eq.unc.s0 p6,p7 = f8,f1 + fcmp.eq.unc.s0 p6,p7 = f8,f1 nop.i 999 ;; } { .mfb nop.m 999 - fmerge.s f8 = f0,f8 - br.ret.sptk b0 ;; + fmerge.s f8 = f0,f8 + br.ret.sptk b0 ;; } GLOBAL_IEEE754_END(fabs) diff --git a/sysdeps/ia64/fpu/s_fabsf.S b/sysdeps/ia64/fpu/s_fabsf.S index 71bb6da..546865f 100644 --- a/sysdeps/ia64/fpu/s_fabsf.S +++ b/sysdeps/ia64/fpu/s_fabsf.S @@ -21,25 +21,25 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History //============================================================== -// 02/02/00 Initial version +// 02/02/00 Initial version // 02/07/02 Added __libm_fabsf entry point to test in case compiler inlines // 05/20/02 Cleaned up namespace and sf0 syntax // 02/10/03 Reordered header: .section, .global, .proc, .align @@ -50,7 +50,7 @@ // // Overview of operation //============================================================== -// returns absolute value of x +// returns absolute value of x // floating-point registers used: 1 // f8, input @@ -69,14 +69,14 @@ GLOBAL_IEEE754_ENTRY(fabsf) { .mfi nop.m 999 - fcmp.eq.unc.s0 p6,p7 = f8,f1 + fcmp.eq.unc.s0 p6,p7 = f8,f1 nop.i 999 ;; } { .mfb nop.m 999 - fmerge.s f8 = f0,f8 - br.ret.sptk b0 ;; + fmerge.s f8 = f0,f8 + br.ret.sptk b0 ;; } GLOBAL_IEEE754_END(fabsf) diff --git a/sysdeps/ia64/fpu/s_fabsl.S b/sysdeps/ia64/fpu/s_fabsl.S index a048949..3794d19 100644 --- a/sysdeps/ia64/fpu/s_fabsl.S +++ b/sysdeps/ia64/fpu/s_fabsl.S @@ -21,25 +21,25 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History //============================================================== -// 02/02/00 Initial version +// 02/02/00 Initial version // 02/07/02 Added __libm_fabsl entry point to test in case compiler inlines // 05/20/02 Cleaned up namespace and sf0 syntax // 02/10/03 Reordered header: .section, .global, .proc, .align @@ -50,7 +50,7 @@ // // Overview of operation //============================================================== -// returns absolute value of x +// returns absolute value of x // floating-point registers used: 1 // f8, input @@ -69,14 +69,14 @@ GLOBAL_IEEE754_ENTRY(fabsl) { .mfi nop.m 999 - fcmp.eq.unc.s0 p6,p7 = f8,f1 + fcmp.eq.unc.s0 p6,p7 = f8,f1 nop.i 999 ;; } { .mfb nop.m 999 - fmerge.s f8 = f0,f8 - br.ret.sptk b0 ;; + fmerge.s f8 = f0,f8 + br.ret.sptk b0 ;; } GLOBAL_IEEE754_END(fabsl) diff --git a/sysdeps/ia64/fpu/s_finite.S b/sysdeps/ia64/fpu/s_finite.S index 78d3db5..664f815 100644 --- a/sysdeps/ia64/fpu/s_finite.S +++ b/sysdeps/ia64/fpu/s_finite.S @@ -30,7 +30,7 @@ ENTRY (__finite) (p6) mov ret0 = 0 (p7) mov ret0 = 1 br.ret.sptk.many rp -} +} END (__finite) strong_alias (__finite, __finitef) diff --git a/sysdeps/ia64/fpu/s_fma.S b/sysdeps/ia64/fpu/s_fma.S index 7798790..556f82a 100644 --- a/sysdeps/ia64/fpu/s_fma.S +++ b/sysdeps/ia64/fpu/s_fma.S @@ -21,25 +21,25 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History //============================================================== -// 06/07/01 Initial version +// 06/07/01 Initial version // 05/20/02 Cleaned up namespace and sf0 syntax // 02/10/03 Reordered header: .section, .global, .proc, .align // @@ -64,7 +64,7 @@ GLOBAL_LIBM_ENTRY(fma) { .mfb nop.m 999 fma.d.s0 f8 = f8, f9, f10 // Result = x * y + z - br.ret.sptk b0 + br.ret.sptk b0 } ;; diff --git a/sysdeps/ia64/fpu/s_fmaf.S b/sysdeps/ia64/fpu/s_fmaf.S index db112b2..1324675 100644 --- a/sysdeps/ia64/fpu/s_fmaf.S +++ b/sysdeps/ia64/fpu/s_fmaf.S @@ -21,25 +21,25 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History //============================================================== -// 06/07/01 Initial version +// 06/07/01 Initial version // 05/20/02 Cleaned up namespace and sf0 syntax // 02/10/03 Reordered header: .section, .global, .proc, .align // @@ -64,7 +64,7 @@ GLOBAL_LIBM_ENTRY(fmaf) { .mfb nop.m 999 fma.s.s0 f8 = f8, f9, f10 // Result = x * y + z - br.ret.sptk b0 + br.ret.sptk b0 } ;; diff --git a/sysdeps/ia64/fpu/s_fmal.S b/sysdeps/ia64/fpu/s_fmal.S index 2bdef0b..979fa9f 100644 --- a/sysdeps/ia64/fpu/s_fmal.S +++ b/sysdeps/ia64/fpu/s_fmal.S @@ -21,25 +21,25 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History //============================================================== -// 06/07/01 Initial version +// 06/07/01 Initial version // 05/20/02 Cleaned up namespace and sf0 syntax // 02/10/03 Reordered header: .section, .global, .proc, .align // @@ -64,7 +64,7 @@ GLOBAL_LIBM_ENTRY(fmal) { .mfb nop.m 999 fma.s0 f8 = f8, f9, f10 // Result = x * y + z - br.ret.sptk b0 + br.ret.sptk b0 } ;; diff --git a/sysdeps/ia64/fpu/s_fmax.S b/sysdeps/ia64/fpu/s_fmax.S index 6fd38df..bf08974 100644 --- a/sysdeps/ia64/fpu/s_fmax.S +++ b/sysdeps/ia64/fpu/s_fmax.S @@ -21,25 +21,25 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History //============================================================== -// 05/31/01 Initial version +// 05/31/01 Initial version // 05/20/02 Cleaned up namespace and sf0 syntax // 02/10/03 Reordered header: .section, .global, .proc, .align // @@ -50,7 +50,7 @@ // Overview of operation //============================================================== // returns the algebraic maximum of 2 input values -// +// // Special cases: // fmax(x, nan) returns x if x is numeric // Must special case this one // fmax(nan, y) returns y if y is numeric @@ -59,7 +59,7 @@ // fmax(-0,+0) returns +0 // fmax(-0,-0) returns -0 // fmax(+0,-0) returns +0 // Must special case this one -// +// // SNaN causes invalid to be set // floating-point registers used: 2 @@ -107,7 +107,7 @@ GLOBAL_LIBM_ENTRY(fmax) { .mfb nop.m 999 (p10) fmerge.s f8 = f9, f9 // If x nan, return y, else do nothing (returns x) - br.ret.sptk b0 + br.ret.sptk b0 } ;; diff --git a/sysdeps/ia64/fpu/s_fmaxf.S b/sysdeps/ia64/fpu/s_fmaxf.S index cac283c..30a78c7 100644 --- a/sysdeps/ia64/fpu/s_fmaxf.S +++ b/sysdeps/ia64/fpu/s_fmaxf.S @@ -21,25 +21,25 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History //============================================================== -// 05/31/01 Initial version +// 05/31/01 Initial version // 05/20/02 Cleaned up namespace and sf0 syntax // 02/10/03 Reordered header: .section, .global, .proc, .align // @@ -50,7 +50,7 @@ // Overview of operation //============================================================== // returns the algebraic maximum of 2 input values -// +// // Special cases: // fmaxf(x, nan) returns x if x is numeric // Must special case this one // fmaxf(nan, y) returns y if y is numeric @@ -59,7 +59,7 @@ // fmaxf(-0,+0) returns +0 // fmaxf(-0,-0) returns -0 // fmaxf(+0,-0) returns +0 // Must special case this one -// +// // SNaN causes invalid to be set // floating-point registers used: 2 @@ -107,7 +107,7 @@ GLOBAL_LIBM_ENTRY(fmaxf) { .mfb nop.m 999 (p10) fmerge.s f8 = f9, f9 // If x nan, return y, else do nothing (returns x) - br.ret.sptk b0 + br.ret.sptk b0 } ;; diff --git a/sysdeps/ia64/fpu/s_fmaxl.S b/sysdeps/ia64/fpu/s_fmaxl.S index fb8861d..a575cf0 100644 --- a/sysdeps/ia64/fpu/s_fmaxl.S +++ b/sysdeps/ia64/fpu/s_fmaxl.S @@ -21,25 +21,25 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History //============================================================== -// 05/31/01 Initial version +// 05/31/01 Initial version // 05/20/02 Cleaned up namespace and sf0 syntax // 02/10/03 Reordered header: .section, .global, .proc, .align // @@ -50,7 +50,7 @@ // Overview of operation //============================================================== // returns the algebraic maximum of 2 input values -// +// // Special cases: // fmaxl(x, nan) returns x if x is numeric // Must special case this one // fmaxl(nan, y) returns y if y is numeric @@ -59,7 +59,7 @@ // fmaxl(-0,+0) returns +0 // fmaxl(-0,-0) returns -0 // fmaxl(+0,-0) returns +0 // Must special case this one -// +// // SNaN causes invalid to be set // floating-point registers used: 2 @@ -107,7 +107,7 @@ GLOBAL_LIBM_ENTRY(fmaxl) { .mfb nop.m 999 (p10) fmerge.s f8 = f9, f9 // If x nan, return y, else do nothing (returns x) - br.ret.sptk b0 + br.ret.sptk b0 } ;; diff --git a/sysdeps/ia64/fpu/s_fpclassify.S b/sysdeps/ia64/fpu/s_fpclassify.S index a9df24a..eeab32d 100644 --- a/sysdeps/ia64/fpu/s_fpclassify.S +++ b/sysdeps/ia64/fpu/s_fpclassify.S @@ -32,7 +32,7 @@ ENTRY (__fpclassify) (p7) fclass.m p7, p8 = farg0, @inf (p6) br.ret.sptk.many rp ;; -} +} { .mfb (p7) mov ret0 = 1 diff --git a/sysdeps/ia64/fpu/s_frexp.c b/sysdeps/ia64/fpu/s_frexp.c index 7d90213..95421ea 100644 --- a/sysdeps/ia64/fpu/s_frexp.c +++ b/sysdeps/ia64/fpu/s_frexp.c @@ -23,12 +23,12 @@ // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS @@ -53,12 +53,12 @@ double __libm_frexp(double, int*, int); double frexp(double x, int *y) { -#ifdef SIZE_INT_64 +#ifdef SIZE_INT_64 return( __libm_frexp(x, y, 1) ); #else -#ifdef SIZE_INT_32 +#ifdef SIZE_INT_32 return( __libm_frexp(x, y, 0) ); #endif diff --git a/sysdeps/ia64/fpu/s_frexpf.c b/sysdeps/ia64/fpu/s_frexpf.c index 920f09d..03290b2 100644 --- a/sysdeps/ia64/fpu/s_frexpf.c +++ b/sysdeps/ia64/fpu/s_frexpf.c @@ -23,12 +23,12 @@ // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS @@ -53,12 +53,12 @@ float __libm_frexpf(float, int*, int); float frexpf(float x, int *y) { -#ifdef SIZE_INT_64 +#ifdef SIZE_INT_64 return( __libm_frexpf(x, y, 1) ); #else -#ifdef SIZE_INT_32 +#ifdef SIZE_INT_32 return( __libm_frexpf(x, y, 0) ); #endif diff --git a/sysdeps/ia64/fpu/s_frexpl.c b/sysdeps/ia64/fpu/s_frexpl.c index 968cc32..f999e20 100644 --- a/sysdeps/ia64/fpu/s_frexpl.c +++ b/sysdeps/ia64/fpu/s_frexpl.c @@ -23,12 +23,12 @@ // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS @@ -53,12 +53,12 @@ long double __libm_frexpl(long double, int*, int); long double frexpl(long double x, int *y) { -#ifdef SIZE_INT_64 +#ifdef SIZE_INT_64 return( __libm_frexpl(x, y, 1) ); #else -#ifdef SIZE_INT_32 +#ifdef SIZE_INT_32 return( __libm_frexpl(x, y, 0) ); #endif diff --git a/sysdeps/ia64/fpu/s_ldexp.c b/sysdeps/ia64/fpu/s_ldexp.c index a0bc14c..6cf0ffc 100644 --- a/sysdeps/ia64/fpu/s_ldexp.c +++ b/sysdeps/ia64/fpu/s_ldexp.c @@ -23,12 +23,12 @@ // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS @@ -48,11 +48,11 @@ double __libm_ldexp(double, int, int); double ldexp(double x, int n) { -#ifdef SIZE_INT_64 - return __libm_ldexp(x,n,1); +#ifdef SIZE_INT_64 + return __libm_ldexp(x,n,1); #else -#ifdef SIZE_INT_32 +#ifdef SIZE_INT_32 return __libm_ldexp(x,n,0); #endif diff --git a/sysdeps/ia64/fpu/s_ldexpf.c b/sysdeps/ia64/fpu/s_ldexpf.c index ad083fa..64e335b 100644 --- a/sysdeps/ia64/fpu/s_ldexpf.c +++ b/sysdeps/ia64/fpu/s_ldexpf.c @@ -23,12 +23,12 @@ // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS @@ -48,11 +48,11 @@ float __libm_ldexpf(float, int, int); float ldexpf(float x, int n) { -#ifdef SIZE_INT_64 - return __libm_ldexpf(x,n,1); +#ifdef SIZE_INT_64 + return __libm_ldexpf(x,n,1); #else -#ifdef SIZE_INT_32 +#ifdef SIZE_INT_32 return __libm_ldexpf(x,n,0); #endif diff --git a/sysdeps/ia64/fpu/s_ldexpl.c b/sysdeps/ia64/fpu/s_ldexpl.c index 61dfd21..aa6f2d7 100644 --- a/sysdeps/ia64/fpu/s_ldexpl.c +++ b/sysdeps/ia64/fpu/s_ldexpl.c @@ -23,12 +23,12 @@ // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS @@ -48,11 +48,11 @@ long double __libm_ldexpl(long double, int, int); long double ldexpl(long double x, int n) { -#ifdef SIZE_INT_64 - return __libm_ldexpl(x,n,1); +#ifdef SIZE_INT_64 + return __libm_ldexpl(x,n,1); #else -#ifdef SIZE_INT_32 +#ifdef SIZE_INT_32 return __libm_ldexpl(x,n,0); #endif diff --git a/sysdeps/ia64/fpu/s_log1pl.S b/sysdeps/ia64/fpu/s_log1pl.S index 9654265..f60ce12 100644 --- a/sysdeps/ia64/fpu/s_log1pl.S +++ b/sysdeps/ia64/fpu/s_log1pl.S @@ -1,4 +1,4 @@ -.file "log1pl.s" +.file "log1pl.s" // Copyright (c) 2000 - 2003, Intel Corporation @@ -21,25 +21,25 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // //********************************************************************* // -// History: +// History: // 02/02/00 Initial version // 04/04/00 Unwind support added // 08/15/00 Bundle added after call to __libm_error_support to properly @@ -74,14 +74,14 @@ // IEEE Special Conditions: // // Denormal fault raised on denormal inputs -// Overflow exceptions cannot occur -// Underflow exceptions raised when appropriate for log1p +// Overflow exceptions cannot occur +// Underflow exceptions raised when appropriate for log1p // Inexact raised when appropriate by algorithm // // log1pl(inf) = inf -// log1pl(-inf) = QNaN -// log1pl(+/-0) = +/-0 -// log1pl(-1) = -inf +// log1pl(-inf) = QNaN +// log1pl(+/-0) = +/-0 +// log1pl(-1) = -inf // log1pl(SNaN) = QNaN // log1pl(QNaN) = QNaN // log1pl(EM_special Values) = QNaN @@ -105,11 +105,11 @@ // log1pl( X ) = log( X+1 ) can be approximated by a simple polynomial // in W = X. This polynomial resembles the truncated Taylor // series W - W^/2 + W^3/3 - ... -// +// // Case log_regular: // // Here we use a table lookup method. The basic idea is that in -// order to compute logl(Arg) = log1pl (Arg-1) for an argument Arg in [1,2), +// order to compute logl(Arg) = log1pl (Arg-1) for an argument Arg in [1,2), // we construct a value G such that G*Arg is close to 1 and that // logl(1/G) is obtainable easily from a table of values calculated // beforehand. Thus @@ -137,7 +137,7 @@ // G := G_1 * G_2 * G_3 // r := (G * S_hi - 1) + G * S_lo // -// These G_j's have the property that the product is exactly +// These G_j's have the property that the product is exactly // representable and that |r| < 2^(-12) as a result. // // Step 2: Approximation @@ -160,8 +160,8 @@ // // Although log1pl(X) is basically X, we would like to preserve the inexactness // nature as well as consistent behavior under different rounding modes. -// We can do this by computing the result as -// +// We can do this by computing the result as +// // log1pl(X) = X - X*X // // @@ -169,7 +169,7 @@ // // Here we compute a simple polynomial. To exploit parallelism, we split // the polynomial into two portions. -// +// // W := X // Wsq := W * W // W4 := Wsq*Wsq @@ -226,7 +226,7 @@ // with 1.0000 in fixed point. // // -// Define A_2 := 1.0 0 0 0 d_5 d_6 d_7 d_8. This is X_1 +// Define A_2 := 1.0 0 0 0 d_5 d_6 d_7 d_8. This is X_1 // truncated to lsb = 2^(-8). Similar to A_1, // A_2 is not needed in actual implementation. It // helps explain how some of the values are defined. @@ -255,11 +255,11 @@ // Fetch G_3 := (1/A_3) truncated to 21 sig. bits. // floating pt. Fetch is done using index_3. // -// Compute G := G_1 * G_2 * G_3. +// Compute G := G_1 * G_2 * G_3. // // This is done exactly since each of G_j only has 21 sig. bits. // -// Compute +// Compute // // r := (G*S_hi - 1) + G*S_lo using 2 FMA operations. // @@ -298,7 +298,7 @@ // Finally // // Y_hi := N*log2_hi + SUM ( log1byGj_hi ) -// Y_lo := poly_hi + [ poly_lo + +// Y_lo := poly_hi + [ poly_lo + // ( SUM ( log1byGj_lo ) + N*log2_lo ) ] // @@ -307,7 +307,7 @@ RODATA // ************* DO NOT CHANGE THE ORDER OF THESE TABLES ************* -// P_8, P_7, P_6, P_5, P_4, P_3, P_2, and P_1 +// P_8, P_7, P_6, P_5, P_4, P_3, P_2, and P_1 LOCAL_OBJECT_START(Constants_P) //data4 0xEFD62B15,0xE3936754,0x00003FFB,0x00000000 @@ -328,15 +328,15 @@ data8 0xAAAAAAAAAAAAAAAA,0x00003FFD data8 0xFFFFFFFFFFFFFFFE,0x0000BFFD LOCAL_OBJECT_END(Constants_P) -// log2_hi, log2_lo, Q_4, Q_3, Q_2, and Q_1 +// log2_hi, log2_lo, Q_4, Q_3, Q_2, and Q_1 LOCAL_OBJECT_START(Constants_Q) -//data4 0x00000000,0xB1721800,0x00003FFE,0x00000000 +//data4 0x00000000,0xB1721800,0x00003FFE,0x00000000 //data4 0x4361C4C6,0x82E30865,0x0000BFE2,0x00000000 //data4 0x328833CB,0xCCCCCAF2,0x00003FFC,0x00000000 //data4 0xA9D4BAFB,0x80000077,0x0000BFFD,0x00000000 //data4 0xAAABE3D2,0xAAAAAAAA,0x00003FFD,0x00000000 -//data4 0xFFFFDAB7,0xFFFFFFFF,0x0000BFFD,0x00000000 +//data4 0xFFFFDAB7,0xFFFFFFFF,0x0000BFFD,0x00000000 data8 0xB172180000000000,0x00003FFE data8 0x82E308654361C4C6,0x0000BFE2 data8 0xCCCCCAF2328833CB,0x00003FFC @@ -356,7 +356,7 @@ LOCAL_OBJECT_END(Constants_1_by_LN10) // Z1 - 16 bit fixed - + LOCAL_OBJECT_START(Constants_Z_1) data4 0x00008000 data4 0x00007879 @@ -471,7 +471,7 @@ data4 0x3F71D488,0x3D693B9D data8 0xBE049391B6B7C239 LOCAL_OBJECT_END(Constants_G_H_h2) -// G3 and H3 - IEEE single and h3 - IEEE double +// G3 and H3 - IEEE single and h3 - IEEE double LOCAL_OBJECT_START(Constants_G_H_h3) data4 0x3F7FFC00,0x38800100 @@ -543,70 +543,70 @@ LOCAL_OBJECT_END(Constants_G_H_h3) // Floating Point Registers -FR_Input_X = f8 +FR_Input_X = f8 -FR_Y_hi = f34 +FR_Y_hi = f34 FR_Y_lo = f35 FR_Scale = f36 -FR_X_Prime = f37 -FR_S_hi = f38 +FR_X_Prime = f37 +FR_S_hi = f38 FR_W = f39 FR_G = f40 FR_H = f41 -FR_wsq = f42 +FR_wsq = f42 FR_w4 = f43 FR_h = f44 -FR_w6 = f45 +FR_w6 = f45 FR_G2 = f46 FR_H2 = f47 FR_poly_lo = f48 -FR_P8 = f49 +FR_P8 = f49 FR_poly_hi = f50 -FR_P7 = f51 -FR_h2 = f52 -FR_rsq = f53 +FR_P7 = f51 +FR_h2 = f52 +FR_rsq = f53 FR_P6 = f54 -FR_r = f55 - -FR_log2_hi = f56 -FR_log2_lo = f57 -FR_p87 = f58 -FR_p876 = f58 -FR_p8765 = f58 -FR_float_N = f59 -FR_Q4 = f60 - -FR_p43 = f61 -FR_p432 = f61 -FR_p4321 = f61 -FR_P4 = f62 -FR_G3 = f63 -FR_H3 = f64 -FR_h3 = f65 - -FR_Q3 = f66 -FR_P3 = f67 -FR_Q2 = f68 -FR_P2 = f69 -FR_1LN10_hi = f70 - -FR_Q1 = f71 -FR_P1 = f72 -FR_1LN10_lo = f73 -FR_P5 = f74 -FR_rcub = f75 - -FR_Output_X_tmp = f76 -FR_Neg_One = f77 -FR_Z = f78 -FR_AA = f79 -FR_BB = f80 -FR_S_lo = f81 -FR_2_to_minus_N = f82 +FR_r = f55 + +FR_log2_hi = f56 +FR_log2_lo = f57 +FR_p87 = f58 +FR_p876 = f58 +FR_p8765 = f58 +FR_float_N = f59 +FR_Q4 = f60 + +FR_p43 = f61 +FR_p432 = f61 +FR_p4321 = f61 +FR_P4 = f62 +FR_G3 = f63 +FR_H3 = f64 +FR_h3 = f65 + +FR_Q3 = f66 +FR_P3 = f67 +FR_Q2 = f68 +FR_P2 = f69 +FR_1LN10_hi = f70 + +FR_Q1 = f71 +FR_P1 = f72 +FR_1LN10_lo = f73 +FR_P5 = f74 +FR_rcub = f75 + +FR_Output_X_tmp = f76 +FR_Neg_One = f77 +FR_Z = f78 +FR_AA = f79 +FR_BB = f80 +FR_S_lo = f81 +FR_2_to_minus_N = f82 FR_X = f8 FR_Y = f0 @@ -616,24 +616,24 @@ FR_RESULT = f76 // General Purpose Registers GR_ad_p = r33 -GR_Index1 = r34 -GR_Index2 = r35 -GR_signif = r36 -GR_X_0 = r37 -GR_X_1 = r38 -GR_X_2 = r39 +GR_Index1 = r34 +GR_Index2 = r35 +GR_signif = r36 +GR_X_0 = r37 +GR_X_1 = r38 +GR_X_2 = r39 GR_minus_N = r39 -GR_Z_1 = r40 -GR_Z_2 = r41 -GR_N = r42 -GR_Bias = r43 -GR_M = r44 -GR_Index3 = r45 -GR_exp_2tom80 = r45 +GR_Z_1 = r40 +GR_Z_2 = r41 +GR_N = r42 +GR_Bias = r43 +GR_M = r44 +GR_Index3 = r45 +GR_exp_2tom80 = r45 GR_ad_p2 = r46 -GR_exp_mask = r47 -GR_exp_2tom7 = r48 -GR_ad_ln10 = r49 +GR_exp_mask = r47 +GR_exp_2tom7 = r48 +GR_ad_ln10 = r49 GR_ad_tbl_1 = r50 GR_ad_tbl_2 = r51 GR_ad_tbl_3 = r52 @@ -769,14 +769,14 @@ GLOBAL_IEEE754_ENTRY(log1pl) // { .mmi ldfe FR_log2_lo = [GR_ad_q],16 // Load log2_lo - sub GR_N = GR_N, GR_Bias + sub GR_N = GR_N, GR_Bias mov GR_exp_2tom80 = 0x0ffaf // Exponent of 2^-80 } ;; { .mfi ldfe FR_Q4 = [GR_ad_q],16 // Load Q4 - fms.s1 FR_S_lo = FR_AA, f1, FR_Z // Form S_lo = AA - Z + fms.s1 FR_S_lo = FR_AA, f1, FR_Z // Form S_lo = AA - Z sub GR_minus_N = GR_Bias, GR_N // Form exponent of 2^(-N) } ;; @@ -791,7 +791,7 @@ GLOBAL_IEEE754_ENTRY(log1pl) { .mmi getf.exp GR_M = FR_W // Get signexp of w = x ldfe FR_Q2 = [GR_ad_q],16 // Load Q2 - extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1 + extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1 } ;; @@ -1055,7 +1055,7 @@ GLOBAL_IEEE754_ENTRY(log1pl) { .mfi nop.m 999 -(p9) fadd.s1 FR_Y_lo = FR_poly_hi, FR_poly_lo // Y_lo = poly_hi + poly_lo +(p9) fadd.s1 FR_Y_lo = FR_poly_hi, FR_poly_lo // Y_lo = poly_hi + poly_lo nop.i 999 } ;; @@ -1070,25 +1070,25 @@ GLOBAL_IEEE754_ENTRY(log1pl) // Here if x=-1 -LOG1P_EQ_Minus_1: +LOG1P_EQ_Minus_1: // // If x=-1 raise divide by zero and return -inf -// +// { .mfi mov GR_Parameter_TAG = 138 - fsub.s1 FR_Output_X_tmp = f0, f1 + fsub.s1 FR_Output_X_tmp = f0, f1 nop.i 999 } ;; { .mfb nop.m 999 - frcpa.s0 FR_Output_X_tmp, p8 = FR_Output_X_tmp, f0 + frcpa.s0 FR_Output_X_tmp, p8 = FR_Output_X_tmp, f0 br.cond.sptk __libm_error_region } ;; -LOG1P_special: +LOG1P_special: { .mfi nop.m 999 fclass.m.unc p8, p0 = FR_Input_X, 0x1E1 // Test for natval, nan, +inf @@ -1096,46 +1096,46 @@ LOG1P_special: } ;; -// +// // For SNaN raise invalid and return QNaN. // For QNaN raise invalid and return QNaN. // For +Inf return +Inf. -// +// { .mfb nop.m 999 -(p8) fmpy.s0 f8 = FR_Input_X, f1 +(p8) fmpy.s0 f8 = FR_Input_X, f1 (p8) br.ret.sptk b0 // Return for natval, nan, +inf } ;; -// +// // For -Inf raise invalid and return QNaN. -// +// { .mfb mov GR_Parameter_TAG = 139 - fmpy.s0 FR_Output_X_tmp = FR_Input_X, f0 + fmpy.s0 FR_Output_X_tmp = FR_Input_X, f0 br.cond.sptk __libm_error_region } ;; -LOG1P_unsupported: -// +LOG1P_unsupported: +// // Return generated NaN or other value. -// +// { .mfb nop.m 999 - fmpy.s0 f8 = FR_Input_X, f0 + fmpy.s0 f8 = FR_Input_X, f0 br.ret.sptk b0 } ;; // Here if -inf < x < -1 -LOG1P_LT_Minus_1: -// +LOG1P_LT_Minus_1: +// // Deal with x < -1 in a special way - raise // invalid and produce QNaN indefinite. -// +// { .mfb mov GR_Parameter_TAG = 139 frcpa.s0 FR_Output_X_tmp, p8 = f0, f0 diff --git a/sysdeps/ia64/fpu/s_modf.S b/sysdeps/ia64/fpu/s_modf.S index 2008bbf..4634acf 100644 --- a/sysdeps/ia64/fpu/s_modf.S +++ b/sysdeps/ia64/fpu/s_modf.S @@ -21,27 +21,27 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History //============================================================== // 02/02/00 Initial version // 04/04/00 Improved speed, corrected result for NaN input -// 12/22/00 Fixed so inexact flag is never set, and invalid is not set for +// 12/22/00 Fixed so inexact flag is never set, and invalid is not set for // qnans nor for inputs larger than 2^63. // 05/20/02 Cleaned up namespace and sf0 syntax // 02/10/03 Reordered header: .section, .global, .proc, .align @@ -78,17 +78,17 @@ // CALCULATION: NOT HUGE, NOT SMALL // To get the integer part -// Take the floating-point input and truncate +// Take the floating-point input and truncate // then convert this integer to fp Call it MODF_INTEGER_PART // Subtract MODF_INTEGER_PART from MODF_NORM_F8 to get fraction part -// Then put fraction part in f8 +// Then put fraction part in f8 // put integer part MODF_INTEGER_PART into *iptr // Registers used //============================================================== -// predicate registers used: +// predicate registers used: // p6 - p13 // 0xFFFF 0x10033 @@ -99,21 +99,21 @@ // p13 --------------------------------------------------->| // -// floating-point registers used: +// floating-point registers used: MODF_NORM_F8 = f9 MODF_FRACTION_PART = f10 MODF_INTEGER_PART = f11 MODF_INT_INTEGER_PART = f12 -// general registers used +// general registers used modf_signexp = r14 modf_GR_no_frac = r15 modf_GR_FFFF = r16 -modf_17_ones = r17 +modf_17_ones = r17 modf_exp = r18 // r33 = iptr - + .section .text GLOBAL_LIBM_ENTRY(modf) @@ -122,7 +122,7 @@ GLOBAL_LIBM_ENTRY(modf) // Assume input is normalized and get signexp // Normalize input just in case -// Form exponent bias +// Form exponent bias { .mfi getf.exp modf_signexp = f8 fnorm.s0 MODF_NORM_F8 = f8 @@ -150,9 +150,9 @@ GLOBAL_LIBM_ENTRY(modf) // Is x unnorm? // qnan snan inf norm unorm 0 -+ // 0 0 0 0 1 0 11 = 0x0b UNORM -// Set p13 to indicate calculation path, else p6 if nan or inf +// Set p13 to indicate calculation path, else p6 if nan or inf { .mfi - and modf_exp = modf_17_ones, modf_signexp + and modf_exp = modf_17_ones, modf_signexp fclass.m.unc p8,p0 = f8, 0x0b nop.i 999 ;; } diff --git a/sysdeps/ia64/fpu/s_modff.S b/sysdeps/ia64/fpu/s_modff.S index edc1120..36e02be 100644 --- a/sysdeps/ia64/fpu/s_modff.S +++ b/sysdeps/ia64/fpu/s_modff.S @@ -21,27 +21,27 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History //============================================================== // 02/02/00 Initial version // 04/04/00 Improved speed, corrected result for NaN input -// 12/22/00 Fixed so inexact flag is never set, and invalid is not set for +// 12/22/00 Fixed so inexact flag is never set, and invalid is not set for // qnans nor for inputs larger than 2^63. // 05/20/02 Cleaned up namespace and sf0 syntax // 02/10/03 Reordered header: .section, .global, .proc, .align @@ -78,17 +78,17 @@ // CALCULATION: NOT HUGE, NOT SMALL // To get the integer part -// Take the floating-point input and truncate +// Take the floating-point input and truncate // then convert this integer to fp Call it MODF_INTEGER_PART // Subtract MODF_INTEGER_PART from MODF_NORM_F8 to get fraction part -// Then put fraction part in f8 +// Then put fraction part in f8 // put integer part MODF_INTEGER_PART into *iptr // Registers used //============================================================== -// predicate registers used: +// predicate registers used: // p6 - p13 // 0xFFFF 0x10016 @@ -99,21 +99,21 @@ // p13 --------------------------------------------------->| // -// floating-point registers used: +// floating-point registers used: MODF_NORM_F8 = f9 MODF_FRACTION_PART = f10 MODF_INTEGER_PART = f11 MODF_INT_INTEGER_PART = f12 -// general registers used +// general registers used modf_signexp = r14 modf_GR_no_frac = r15 modf_GR_FFFF = r16 -modf_17_ones = r17 +modf_17_ones = r17 modf_exp = r18 // r33 = iptr - + .section .text GLOBAL_LIBM_ENTRY(modff) @@ -122,7 +122,7 @@ GLOBAL_LIBM_ENTRY(modff) // Assume input is normalized and get signexp // Normalize input just in case -// Form exponent bias +// Form exponent bias { .mfi getf.exp modf_signexp = f8 fnorm.s0 MODF_NORM_F8 = f8 @@ -150,9 +150,9 @@ GLOBAL_LIBM_ENTRY(modff) // Is x unnorm? // qnan snan inf norm unorm 0 -+ // 0 0 0 0 1 0 11 = 0x0b UNORM -// Set p13 to indicate calculation path, else p6 if nan or inf +// Set p13 to indicate calculation path, else p6 if nan or inf { .mfi - and modf_exp = modf_17_ones, modf_signexp + and modf_exp = modf_17_ones, modf_signexp fclass.m.unc p8,p0 = f8, 0x0b nop.i 999 ;; } diff --git a/sysdeps/ia64/fpu/s_modfl.S b/sysdeps/ia64/fpu/s_modfl.S index eaf410c..e95d520 100644 --- a/sysdeps/ia64/fpu/s_modfl.S +++ b/sysdeps/ia64/fpu/s_modfl.S @@ -21,20 +21,20 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History @@ -42,7 +42,7 @@ // 02/02/00 Initial version // 04/04/00 Improved speed, corrected result for NaN input // 05/30/00 Fixed bug for exponent 0x1003e -// 12/22/00 Fixed so inexact flag is never set, and invalid is not set for +// 12/22/00 Fixed so inexact flag is never set, and invalid is not set for // qnans nor for inputs larger than 2^63. // 05/20/02 Cleaned up namespace and sf0 syntax // 02/10/03 Reordered header: .section, .global, .proc, .align @@ -73,17 +73,17 @@ // CALCULATION: NOT HUGE, NOT SMALL // To get the integer part -// Take the floating-point input and truncate +// Take the floating-point input and truncate // then convert this integer to fp Call it MODF_INTEGER_PART // Subtract MODF_INTEGER_PART from MODF_NORM_F8 to get fraction part -// Then put fraction part in f8 +// Then put fraction part in f8 // put integer part MODF_INTEGER_PART into *iptr // Registers used //============================================================== -// predicate registers used: +// predicate registers used: // p6 - p13 // 0xFFFF 0x1003e @@ -94,21 +94,21 @@ // p13 --------------------------------------------------->| // -// floating-point registers used: +// floating-point registers used: MODF_NORM_F8 = f9 MODF_FRACTION_PART = f10 MODF_INTEGER_PART = f11 MODF_INT_INTEGER_PART = f12 -// general registers used +// general registers used modf_signexp = r14 modf_GR_no_frac = r15 modf_GR_FFFF = r16 -modf_17_ones = r17 +modf_17_ones = r17 modf_exp = r18 // r34 = iptr - + .section .text GLOBAL_LIBM_ENTRY(modfl) @@ -117,7 +117,7 @@ GLOBAL_LIBM_ENTRY(modfl) // Assume input is normalized and get signexp // Normalize input just in case -// Form exponent bias +// Form exponent bias { .mfi getf.exp modf_signexp = f8 fnorm.s0 MODF_NORM_F8 = f8 @@ -145,9 +145,9 @@ GLOBAL_LIBM_ENTRY(modfl) // Is x unnorm? // qnan snan inf norm unorm 0 -+ // 0 0 0 0 1 0 11 = 0x0b UNORM -// Set p13 to indicate calculation path, else p6 if nan or inf +// Set p13 to indicate calculation path, else p6 if nan or inf { .mfi - and modf_exp = modf_17_ones, modf_signexp + and modf_exp = modf_17_ones, modf_signexp fclass.m.unc p8,p0 = f8, 0x0b nop.i 999 ;; } diff --git a/sysdeps/ia64/fpu/s_nextafter.S b/sysdeps/ia64/fpu/s_nextafter.S index 6635a31..ec8afce 100644 --- a/sysdeps/ia64/fpu/s_nextafter.S +++ b/sysdeps/ia64/fpu/s_nextafter.S @@ -21,25 +21,25 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History //============================================================== -// 02/02/00 Initial version +// 02/02/00 Initial version // 03/03/00 Modified to conform to C9X, and improve speed of main path // 03/14/00 Fixed case where x is a power of 2, and x > y, improved speed // 04/04/00 Unwind support added @@ -104,8 +104,8 @@ FR_tmp = f39 // // Overview of operation //============================================================== -// nextafter determines the next representable value -// after x in the direction of y. +// nextafter determines the next representable value +// after x in the direction of y. .section .text @@ -116,7 +116,7 @@ GLOBAL_LIBM_ENTRY(nextafter) // Form smallest denormal significand = ulp size { .mfi getf.exp GR_exp = f8 - fcmp.lt.s1 p10,p11 = f8, f9 + fcmp.lt.s1 p10,p11 = f8, f9 addl GR_sden_sig = 0x800, r0 } // Form largest normal significand 0xfffffffffffff800 @@ -131,7 +131,7 @@ GLOBAL_LIBM_ENTRY(nextafter) // Form largest normal exponent { .mfi getf.sig GR_sig = f8 - fcmp.eq.s0 p6,p0 = f8, f9 + fcmp.eq.s0 p6,p0 = f8, f9 addl GR_max_pexp = 0x103fe, r0 } // Move largest normal significand to fp reg for special cases @@ -148,7 +148,7 @@ GLOBAL_LIBM_ENTRY(nextafter) // It decreases (p13 set) if xy and x>=0 { .mfi setf.sig FR_sden_sig = GR_sden_sig - fclass.m p8,p0 = f8, 0xc3 + fclass.m p8,p0 = f8, 0xc3 (p10) cmp.lt p12,p13 = GR_exp, GR_sign_mask } { .mfi @@ -176,7 +176,7 @@ GLOBAL_LIBM_ENTRY(nextafter) // Form new exponent in case result exponent needs incrementing or decrementing { .mfi setf.exp FR_new_exp = GR_exp - fclass.m p9,p0 = f9, 0xc3 + fclass.m p9,p0 = f9, 0xc3 (p12) add GR_exp1 = 1, GR_exp } { .mib @@ -194,7 +194,7 @@ GLOBAL_LIBM_ENTRY(nextafter) } { .mfb nop.m 999 -(p8) fma.s0 f8 = f8,f1,f9 +(p8) fma.s0 f8 = f8,f1,f9 (p8) br.ret.spnt b0 ;; // Exit if x=nan } @@ -202,12 +202,12 @@ GLOBAL_LIBM_ENTRY(nextafter) // Is x=inf? { .mfi setf.exp FR_exp1 = GR_exp1 - fclass.m p6,p0 = f8, 0x23 + fclass.m p6,p0 = f8, 0x23 addl GR_exp_mask = 0x1ffff, r0 } { .mfb setf.sig FR_snorm_sig = GR_snorm_sig -(p9) fma.s0 f8 = f8,f1,f9 +(p9) fma.s0 f8 = f8,f1,f9 (p9) br.ret.spnt b0 ;; // Exit if y=nan } @@ -216,14 +216,14 @@ GLOBAL_LIBM_ENTRY(nextafter) { .mfb setf.sig FR_lden_sig = GR_lden_sig mov FR_save_f8 = f8 -(p7) br.cond.spnt NEXT_ZERO ;; // Exit if x=0 +(p7) br.cond.spnt NEXT_ZERO ;; // Exit if x=0 } // Mask off the sign to get x_exp { .mfb and GR_x_exp = GR_exp_mask, GR_exp nop.f 999 -(p6) br.cond.spnt NEXT_INF ;; // Exit if x=inf +(p6) br.cond.spnt NEXT_INF ;; // Exit if x=inf } // Check 6 special cases when significand rolls over: @@ -237,7 +237,7 @@ GLOBAL_LIBM_ENTRY(nextafter) // Set p9, result is sig=max_den_sig, exp same, signal underflow and inexact // 5 sig size decr, x_sig=min_den_sig, x_exp = min_exp // Set p10, result is zero, sign of x, signal underflow and inexact -// 6 sig size decr, x_sig=min_sig, x_exp < min_exp +// 6 sig size decr, x_sig=min_sig, x_exp < min_exp // Set p14, result is zero, sign of x, signal underflow and inexact // // Form exponent of smallest double denormal (if normalized register format) @@ -343,7 +343,7 @@ NEXT_UNDERFLOW_TO_ZERO: br.cond.sptk NEXT_COMMON_FINISH ;; } -NEXT_INF: +NEXT_INF: // Here if f8 is +- infinity // INF // if f8 is +inf, no matter what y is return largest double @@ -357,16 +357,16 @@ NEXT_INF: { .mfb nop.m 999 - fmerge.s f8 = f8,FR_lnorm - br.ret.sptk b0 ;; + fmerge.s f8 = f8,FR_lnorm + br.ret.sptk b0 ;; } -NEXT_ZERO: +NEXT_ZERO: // Here if f8 is +- zero // ZERO -// if f8 is zero and y is +, return + smallest double denormal -// if f8 is zero and y is -, return - smallest double denormal +// if f8 is zero and y is +, return + smallest double denormal +// if f8 is zero and y is -, return - smallest double denormal { .mfi nop.m 999 @@ -384,7 +384,7 @@ NEXT_ZERO: // Add correct sign from direction arg { .mfi nop.m 999 - fmerge.s f8 = f9,FR_sden + fmerge.s f8 = f9,FR_sden nop.i 999 ;; } @@ -395,7 +395,7 @@ NEXT_ZERO: br.cond.sptk NEXT_UNDERFLOW ;; } -NEXT_UNDERFLOW: +NEXT_UNDERFLOW: // Here if result is a denorm, or input is finite and result is zero // Call error support to report possible range error { .mib @@ -405,7 +405,7 @@ NEXT_UNDERFLOW: } ;; -NEXT_OVERFLOW: +NEXT_OVERFLOW: // Here if input is finite, but result will be infinite // Use frcpa to generate infinity of correct sign // Call error support to report possible range error @@ -464,7 +464,7 @@ LOCAL_LIBM_ENTRY(__libm_error_region) { .mib stfd [GR_Parameter_X] = FR_save_f8 // STORE Parameter 1 on stack add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address - nop.b 0 + nop.b 0 } { .mib stfd [GR_Parameter_Y] = f8 // STORE Parameter 3 on stack diff --git a/sysdeps/ia64/fpu/s_nextafterf.S b/sysdeps/ia64/fpu/s_nextafterf.S index 0c269ec..6470091 100644 --- a/sysdeps/ia64/fpu/s_nextafterf.S +++ b/sysdeps/ia64/fpu/s_nextafterf.S @@ -21,25 +21,25 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History //============================================================== -// 02/02/00 Initial version +// 02/02/00 Initial version // 03/03/00 Modified to conform to C9X, and improve speed of main path // 03/14/00 Fixed case where x is a power of 2, and x > y, improved speed // 04/04/00 Unwind support added @@ -104,8 +104,8 @@ FR_tmp = f39 // // Overview of operation //============================================================== -// nextafterf determines the next representable value -// after x in the direction of y. +// nextafterf determines the next representable value +// after x in the direction of y. .section .text @@ -135,14 +135,14 @@ GLOBAL_LIBM_ENTRY(nextafterf) // Extract significand from x // Form largest normal significand { .mlx - nop.m 0 + nop.m 0 movl GR_lnorm_sig = 0xffffff0000000000 ;; } // Move largest normal significand to fp reg for special cases { .mfi setf.sig FR_lnorm_sig = GR_lnorm_sig - nop.f 0 + nop.f 0 addl GR_sign_mask = 0x20000, r0 ;; } @@ -153,7 +153,7 @@ GLOBAL_LIBM_ENTRY(nextafterf) // It decreases (p13 set) if xy and x>=0 { .mfi setf.sig FR_sden_sig = GR_sden_sig - fclass.m p8,p0 = f8, 0xc3 + fclass.m p8,p0 = f8, 0xc3 (p10) cmp.lt p12,p13 = GR_exp, GR_sign_mask } { .mfi @@ -182,7 +182,7 @@ GLOBAL_LIBM_ENTRY(nextafterf) // Form new exponent in case result exponent needs incrementing or decrementing { .mfi setf.exp FR_new_exp = GR_exp - fclass.m p9,p0 = f9, 0xc3 + fclass.m p9,p0 = f9, 0xc3 (p12) add GR_exp1 = 1, GR_exp } { .mib @@ -200,7 +200,7 @@ GLOBAL_LIBM_ENTRY(nextafterf) } { .mfb nop.m 999 -(p8) fma.s0 f8 = f8,f1,f9 +(p8) fma.s0 f8 = f8,f1,f9 (p8) br.ret.spnt b0 ;; // Exit if x=nan } @@ -208,12 +208,12 @@ GLOBAL_LIBM_ENTRY(nextafterf) // Is x=inf? { .mfi setf.exp FR_exp1 = GR_exp1 - fclass.m p6,p0 = f8, 0x23 + fclass.m p6,p0 = f8, 0x23 addl GR_exp_mask = 0x1ffff, r0 } { .mfb setf.sig FR_snorm_sig = GR_snorm_sig -(p9) fma.s0 f8 = f8,f1,f9 +(p9) fma.s0 f8 = f8,f1,f9 (p9) br.ret.spnt b0 ;; // Exit if y=nan } @@ -222,14 +222,14 @@ GLOBAL_LIBM_ENTRY(nextafterf) { .mfb setf.sig FR_lden_sig = GR_lden_sig mov FR_save_f8 = f8 -(p7) br.cond.spnt NEXT_ZERO ;; // Exit if x=0 +(p7) br.cond.spnt NEXT_ZERO ;; // Exit if x=0 } // Mask off the sign to get x_exp { .mfb and GR_x_exp = GR_exp_mask, GR_exp nop.f 999 -(p6) br.cond.spnt NEXT_INF ;; // Exit if x=inf +(p6) br.cond.spnt NEXT_INF ;; // Exit if x=inf } // Check 6 special cases when significand rolls over: @@ -243,7 +243,7 @@ GLOBAL_LIBM_ENTRY(nextafterf) // Set p9, result is sig=max_den_sig, exp same, signal underflow and inexact // 5 sig size decr, x_sig=min_den_sig, x_exp = min_exp // Set p10, result is zero, sign of x, signal underflow and inexact -// 6 sig size decr, x_sig=min_sig, x_exp < min_exp +// 6 sig size decr, x_sig=min_sig, x_exp < min_exp // Set p14, result is zero, sign of x, signal underflow and inexact // // Form exponent of smallest float denormal (if normalized register format) @@ -349,7 +349,7 @@ NEXT_UNDERFLOW_TO_ZERO: br.cond.sptk NEXT_COMMON_FINISH ;; } -NEXT_INF: +NEXT_INF: // Here if f8 is +- infinity // INF // if f8 is +inf, no matter what y is return largest float @@ -363,16 +363,16 @@ NEXT_INF: { .mfb nop.m 999 - fmerge.s f8 = f8,FR_lnorm - br.ret.sptk b0 ;; + fmerge.s f8 = f8,FR_lnorm + br.ret.sptk b0 ;; } -NEXT_ZERO: +NEXT_ZERO: // Here if f8 is +- zero // ZERO -// if f8 is zero and y is +, return + smallest float denormal -// if f8 is zero and y is -, return - smallest float denormal +// if f8 is zero and y is +, return + smallest float denormal +// if f8 is zero and y is -, return - smallest float denormal { .mfi nop.m 999 @@ -390,7 +390,7 @@ NEXT_ZERO: // Add correct sign from direction arg { .mfi nop.m 999 - fmerge.s f8 = f9,FR_sden + fmerge.s f8 = f9,FR_sden nop.i 999 ;; } @@ -401,7 +401,7 @@ NEXT_ZERO: br.cond.sptk NEXT_UNDERFLOW ;; } -NEXT_UNDERFLOW: +NEXT_UNDERFLOW: // Here if result is a denorm, or input is finite and result is zero // Call error support to report possible range error { .mib @@ -411,7 +411,7 @@ NEXT_UNDERFLOW: } ;; -NEXT_OVERFLOW: +NEXT_OVERFLOW: // Here if input is finite, but result will be infinite // Use frcpa to generate infinity of correct sign // Call error support to report possible range error @@ -470,7 +470,7 @@ LOCAL_LIBM_ENTRY(__libm_error_region) { .mib stfs [GR_Parameter_X] = FR_save_f8 // STORE Parameter 1 on stack add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address - nop.b 0 + nop.b 0 } { .mib stfs [GR_Parameter_Y] = f8 // STORE Parameter 3 on stack diff --git a/sysdeps/ia64/fpu/s_nextafterl.S b/sysdeps/ia64/fpu/s_nextafterl.S index 20c927b..5ba5030 100644 --- a/sysdeps/ia64/fpu/s_nextafterl.S +++ b/sysdeps/ia64/fpu/s_nextafterl.S @@ -21,25 +21,25 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History //============================================================== -// 02/02/00 Initial version +// 02/02/00 Initial version // 03/03/00 Modified to conform to C9X, and improve speed of main path // 03/14/00 Fixed case where x is a power of 2, and x > y, improved speed // 04/04/00 Unwind support added @@ -48,7 +48,7 @@ // set [the previously overwritten] GR_Parameter_RESULT. // 09/09/00 Updated fcmp so that qnans do not raise invalid. // 12/15/00 Fixed case of smallest long double normal to largest denormal, -// now adhere to C99 for two zero args, and fixed flag settings +// now adhere to C99 for two zero args, and fixed flag settings // for several cases // 05/20/02 Cleaned up namespace and sf0 syntax // 02/10/03 Reordered header: .section, .global, .proc, .align @@ -105,8 +105,8 @@ FR_tmp = f39 // // Overview of operation //============================================================== -// nextafterl determines the next representable value -// after x in the direction of y. +// nextafterl determines the next representable value +// after x in the direction of y. .section .text @@ -117,7 +117,7 @@ GLOBAL_LIBM_ENTRY(nextafterl) // Form smallest denormal significand = ulp size { .mfi getf.exp GR_exp = f8 - fcmp.lt.s1 p10,p11 = f8, f9 + fcmp.lt.s1 p10,p11 = f8, f9 addl GR_sden_sig = 0x1, r0 } // Form largest normal significand 0xffffffffffffffff @@ -150,7 +150,7 @@ GLOBAL_LIBM_ENTRY(nextafterl) // It decreases (p13 set) if xy and x>=0 { .mfi setf.sig FR_sden_sig = GR_sden_sig - fclass.m p8,p0 = f8, 0xc3 + fclass.m p8,p0 = f8, 0xc3 (p10) cmp.lt p12,p13 = GR_exp, GR_sign_mask } // Move smallest normal exp to fp regs @@ -180,7 +180,7 @@ GLOBAL_LIBM_ENTRY(nextafterl) // Form new exponent in case result exponent needs incrementing or decrementing { .mfi setf.exp FR_new_exp = GR_exp - fclass.m p9,p0 = f9, 0xc3 + fclass.m p9,p0 = f9, 0xc3 (p12) add GR_exp1 = 1, GR_exp } { .mib @@ -198,7 +198,7 @@ GLOBAL_LIBM_ENTRY(nextafterl) } { .mfb setf.exp FR_den_exp = GR_min_pexp -(p8) fma.s0 f8 = f8,f1,f9 +(p8) fma.s0 f8 = f8,f1,f9 (p8) br.ret.spnt b0 ;; // Exit if x=nan } @@ -206,12 +206,12 @@ GLOBAL_LIBM_ENTRY(nextafterl) // Is x=inf? { .mfi setf.exp FR_exp1 = GR_exp1 - fclass.m p6,p0 = f8, 0x23 + fclass.m p6,p0 = f8, 0x23 addl GR_exp_mask = 0x1ffff, r0 } { .mfb setf.sig FR_snorm_sig = GR_snorm_sig -(p9) fma.s0 f8 = f8,f1,f9 +(p9) fma.s0 f8 = f8,f1,f9 (p9) br.ret.spnt b0 ;; // Exit if y=nan } @@ -220,14 +220,14 @@ GLOBAL_LIBM_ENTRY(nextafterl) { .mfb setf.sig FR_lden_sig = GR_lden_sig mov FR_save_f8 = f8 -(p7) br.cond.spnt NEXT_ZERO ;; // Exit if x=0 +(p7) br.cond.spnt NEXT_ZERO ;; // Exit if x=0 } // Mask off the sign to get x_exp { .mfb and GR_x_exp = GR_exp_mask, GR_exp nop.f 999 -(p6) br.cond.spnt NEXT_INF ;; // Exit if x=inf +(p6) br.cond.spnt NEXT_INF ;; // Exit if x=inf } // Check 5 special cases when significand rolls over: @@ -252,7 +252,7 @@ GLOBAL_LIBM_ENTRY(nextafterl) { .mmi (p6) cmp.lt.unc p6,p7 = GR_x_exp, GR_max_pexp (p10) cmp.eq.unc p10,p0 = GR_new_sig, r0 -(p9) cmp.le.unc p9,p8 = GR_x_exp, GR_min_pexp +(p9) cmp.le.unc p9,p8 = GR_x_exp, GR_min_pexp ;; } @@ -347,7 +347,7 @@ NEXT_UNDERFLOW_TO_ZERO: br.cond.sptk NEXT_COMMON_FINISH ;; } -NEXT_INF: +NEXT_INF: // Here if f8 is +- infinity // INF // if f8 is +inf, no matter what y is return largest long double @@ -362,16 +362,16 @@ NEXT_INF: { .mfb nop.m 999 - fmerge.s f8 = f8,FR_lnorm - br.ret.sptk b0 ;; + fmerge.s f8 = f8,FR_lnorm + br.ret.sptk b0 ;; } -NEXT_ZERO: +NEXT_ZERO: // Here if f8 is +- zero // ZERO -// if f8 is zero and y is +, return + smallest long double denormal -// if f8 is zero and y is -, return - smallest long double denormal +// if f8 is zero and y is +, return + smallest long double denormal +// if f8 is zero and y is -, return - smallest long double denormal { .mfi nop.m 999 @@ -389,7 +389,7 @@ NEXT_ZERO: // Add correct sign from direction arg { .mfi nop.m 999 - fmerge.s f8 = f9,FR_sden + fmerge.s f8 = f9,FR_sden nop.i 999 ;; } @@ -400,7 +400,7 @@ NEXT_ZERO: br.cond.sptk NEXT_UNDERFLOW ;; } -NEXT_UNDERFLOW: +NEXT_UNDERFLOW: // Here if result is a denorm, or input is finite and result is zero // Call error support to report possible range error { .mib @@ -410,7 +410,7 @@ NEXT_UNDERFLOW: } ;; -NEXT_OVERFLOW: +NEXT_OVERFLOW: // Here if input is finite, but result will be infinite // Use frcpa to generate infinity of correct sign // Call error support to report possible range error @@ -469,7 +469,7 @@ LOCAL_LIBM_ENTRY(__libm_error_region) { .mib stfe [GR_Parameter_X] = FR_save_f8 // STORE Parameter 1 on stack add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address - nop.b 0 + nop.b 0 } { .mib stfe [GR_Parameter_Y] = f8 // STORE Parameter 3 on stack diff --git a/sysdeps/ia64/fpu/s_nexttoward.S b/sysdeps/ia64/fpu/s_nexttoward.S index 741fea0..a0e6975 100644 --- a/sysdeps/ia64/fpu/s_nexttoward.S +++ b/sysdeps/ia64/fpu/s_nexttoward.S @@ -21,25 +21,25 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History //============================================================== -// 08/15/01 Initial version +// 08/15/01 Initial version // 08/23/01 Corrected error tag number // 05/20/02 Cleaned up namespace and sf0 syntax // 02/10/03 Reordered header: .section, .global, .proc, .align @@ -96,8 +96,8 @@ FR_tmp = f39 // // Overview of operation //============================================================== -// nexttoward determines the next representable value -// after x in the direction of y. +// nexttoward determines the next representable value +// after x in the direction of y. .section .text @@ -108,7 +108,7 @@ GLOBAL_LIBM_ENTRY(nexttoward) // Form smallest denormal significand = ulp size { .mfi getf.exp GR_exp = f8 - fcmp.lt.s1 p10,p11 = f8, f9 + fcmp.lt.s1 p10,p11 = f8, f9 addl GR_sden_sig = 0x800, r0 } // Form largest normal significand 0xfffffffffffff800 @@ -123,7 +123,7 @@ GLOBAL_LIBM_ENTRY(nexttoward) // Form largest normal exponent { .mfi getf.sig GR_sig = f8 - fcmp.eq.s0 p6,p0 = f8, f9 + fcmp.eq.s0 p6,p0 = f8, f9 addl GR_max_pexp = 0x103fe, r0 } // Move largest normal significand to fp reg for special cases @@ -140,7 +140,7 @@ GLOBAL_LIBM_ENTRY(nexttoward) // It decreases (p13 set) if xy and x>=0 { .mfi setf.sig FR_sden_sig = GR_sden_sig - fclass.m p8,p0 = f8, 0xc3 + fclass.m p8,p0 = f8, 0xc3 (p10) cmp.lt p12,p13 = GR_exp, GR_sign_mask } { .mfi @@ -168,7 +168,7 @@ GLOBAL_LIBM_ENTRY(nexttoward) // Form new exponent in case result exponent needs incrementing or decrementing { .mfi setf.exp FR_new_exp = GR_exp - fclass.m p9,p0 = f9, 0xc3 + fclass.m p9,p0 = f9, 0xc3 (p12) add GR_exp1 = 1, GR_exp } { .mib @@ -186,7 +186,7 @@ GLOBAL_LIBM_ENTRY(nexttoward) } { .mfb nop.m 999 -(p8) fma.s0 f8 = f8,f1,f9 +(p8) fma.s0 f8 = f8,f1,f9 (p8) br.ret.spnt b0 ;; // Exit if x=nan } @@ -194,12 +194,12 @@ GLOBAL_LIBM_ENTRY(nexttoward) // Is x=inf? { .mfi setf.exp FR_exp1 = GR_exp1 - fclass.m p6,p0 = f8, 0x23 + fclass.m p6,p0 = f8, 0x23 addl GR_exp_mask = 0x1ffff, r0 } { .mfb setf.sig FR_snorm_sig = GR_snorm_sig -(p9) fma.s0 f8 = f8,f1,f9 +(p9) fma.s0 f8 = f8,f1,f9 (p9) br.ret.spnt b0 ;; // Exit if y=nan } @@ -208,14 +208,14 @@ GLOBAL_LIBM_ENTRY(nexttoward) { .mfb setf.sig FR_lden_sig = GR_lden_sig mov FR_save_f8 = f8 -(p7) br.cond.spnt NEXT_ZERO ;; // Exit if x=0 +(p7) br.cond.spnt NEXT_ZERO ;; // Exit if x=0 } // Mask off the sign to get x_exp { .mfb and GR_x_exp = GR_exp_mask, GR_exp nop.f 999 -(p6) br.cond.spnt NEXT_INF ;; // Exit if x=inf +(p6) br.cond.spnt NEXT_INF ;; // Exit if x=inf } // Check 6 special cases when significand rolls over: @@ -229,7 +229,7 @@ GLOBAL_LIBM_ENTRY(nexttoward) // Set p9, result is sig=max_den_sig, exp same, signal underflow and inexact // 5 sig size decr, x_sig=min_den_sig, x_exp = min_exp // Set p10, result is zero, sign of x, signal underflow and inexact -// 6 sig size decr, x_sig=min_sig, x_exp < min_exp +// 6 sig size decr, x_sig=min_sig, x_exp < min_exp // Set p14, result is zero, sign of x, signal underflow and inexact // // Form exponent of smallest double denormal (if normalized register format) @@ -335,7 +335,7 @@ NEXT_UNDERFLOW_TO_ZERO: br.cond.sptk NEXT_COMMON_FINISH ;; } -NEXT_INF: +NEXT_INF: // Here if f8 is +- infinity // INF // if f8 is +inf, no matter what y is return largest double @@ -349,16 +349,16 @@ NEXT_INF: { .mfb nop.m 999 - fmerge.s f8 = f8,FR_lnorm - br.ret.sptk b0 ;; + fmerge.s f8 = f8,FR_lnorm + br.ret.sptk b0 ;; } -NEXT_ZERO: +NEXT_ZERO: // Here if f8 is +- zero // ZERO -// if f8 is zero and y is +, return + smallest double denormal -// if f8 is zero and y is -, return - smallest double denormal +// if f8 is zero and y is +, return + smallest double denormal +// if f8 is zero and y is -, return - smallest double denormal { .mfi nop.m 999 @@ -376,7 +376,7 @@ NEXT_ZERO: // Add correct sign from direction arg { .mfi nop.m 999 - fmerge.s f8 = f9,FR_sden + fmerge.s f8 = f9,FR_sden nop.i 999 ;; } @@ -387,7 +387,7 @@ NEXT_ZERO: br.cond.sptk NEXT_UNDERFLOW ;; } -NEXT_UNDERFLOW: +NEXT_UNDERFLOW: // Here if result is a denorm, or input is finite and result is zero // Call error support to report possible range error { .mib @@ -397,7 +397,7 @@ NEXT_UNDERFLOW: } ;; -NEXT_OVERFLOW: +NEXT_OVERFLOW: // Here if input is finite, but result will be infinite // Use frcpa to generate infinity of correct sign // Call error support to report possible range error @@ -456,7 +456,7 @@ LOCAL_LIBM_ENTRY(__libm_error_region) { .mib stfd [GR_Parameter_X] = FR_save_f8 // STORE Parameter 1 on stack add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address - nop.b 0 + nop.b 0 } { .mib stfd [GR_Parameter_Y] = f8 // STORE Parameter 3 on stack diff --git a/sysdeps/ia64/fpu/s_nexttowardf.S b/sysdeps/ia64/fpu/s_nexttowardf.S index b8b9762..d224951 100644 --- a/sysdeps/ia64/fpu/s_nexttowardf.S +++ b/sysdeps/ia64/fpu/s_nexttowardf.S @@ -21,25 +21,25 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History //============================================================== -// 08/15/01 Initial version +// 08/15/01 Initial version // 08/23/01 Corrected error tag number // 05/20/02 Cleaned up namespace and sf0 syntax // 02/10/03 Reordered header: .section, .global, .proc, .align @@ -96,8 +96,8 @@ FR_tmp = f39 // // Overview of operation //============================================================== -// nexttowardf determines the next representable value -// after x in the direction of y. +// nexttowardf determines the next representable value +// after x in the direction of y. .section .text @@ -127,14 +127,14 @@ GLOBAL_LIBM_ENTRY(nexttowardf) // Extract significand from x // Form largest normal significand { .mlx - nop.m 0 + nop.m 0 movl GR_lnorm_sig = 0xffffff0000000000 ;; } // Move largest normal significand to fp reg for special cases { .mfi setf.sig FR_lnorm_sig = GR_lnorm_sig - nop.f 0 + nop.f 0 addl GR_sign_mask = 0x20000, r0 ;; } @@ -145,7 +145,7 @@ GLOBAL_LIBM_ENTRY(nexttowardf) // It decreases (p13 set) if xy and x>=0 { .mfi setf.sig FR_sden_sig = GR_sden_sig - fclass.m p8,p0 = f8, 0xc3 + fclass.m p8,p0 = f8, 0xc3 (p10) cmp.lt p12,p13 = GR_exp, GR_sign_mask } { .mfi @@ -174,7 +174,7 @@ GLOBAL_LIBM_ENTRY(nexttowardf) // Form new exponent in case result exponent needs incrementing or decrementing { .mfi setf.exp FR_new_exp = GR_exp - fclass.m p9,p0 = f9, 0xc3 + fclass.m p9,p0 = f9, 0xc3 (p12) add GR_exp1 = 1, GR_exp } { .mib @@ -192,7 +192,7 @@ GLOBAL_LIBM_ENTRY(nexttowardf) } { .mfb nop.m 999 -(p8) fma.s0 f8 = f8,f1,f9 +(p8) fma.s0 f8 = f8,f1,f9 (p8) br.ret.spnt b0 ;; // Exit if x=nan } @@ -200,12 +200,12 @@ GLOBAL_LIBM_ENTRY(nexttowardf) // Is x=inf? { .mfi setf.exp FR_exp1 = GR_exp1 - fclass.m p6,p0 = f8, 0x23 + fclass.m p6,p0 = f8, 0x23 addl GR_exp_mask = 0x1ffff, r0 } { .mfb setf.sig FR_snorm_sig = GR_snorm_sig -(p9) fma.s0 f8 = f8,f1,f9 +(p9) fma.s0 f8 = f8,f1,f9 (p9) br.ret.spnt b0 ;; // Exit if y=nan } @@ -214,14 +214,14 @@ GLOBAL_LIBM_ENTRY(nexttowardf) { .mfb setf.sig FR_lden_sig = GR_lden_sig mov FR_save_f8 = f8 -(p7) br.cond.spnt NEXT_ZERO ;; // Exit if x=0 +(p7) br.cond.spnt NEXT_ZERO ;; // Exit if x=0 } // Mask off the sign to get x_exp { .mfb and GR_x_exp = GR_exp_mask, GR_exp nop.f 999 -(p6) br.cond.spnt NEXT_INF ;; // Exit if x=inf +(p6) br.cond.spnt NEXT_INF ;; // Exit if x=inf } // Check 6 special cases when significand rolls over: @@ -235,7 +235,7 @@ GLOBAL_LIBM_ENTRY(nexttowardf) // Set p9, result is sig=max_den_sig, exp same, signal underflow and inexact // 5 sig size decr, x_sig=min_den_sig, x_exp = min_exp // Set p10, result is zero, sign of x, signal underflow and inexact -// 6 sig size decr, x_sig=min_sig, x_exp < min_exp +// 6 sig size decr, x_sig=min_sig, x_exp < min_exp // Set p14, result is zero, sign of x, signal underflow and inexact // // Form exponent of smallest float denormal (if normalized register format) @@ -341,7 +341,7 @@ NEXT_UNDERFLOW_TO_ZERO: br.cond.sptk NEXT_COMMON_FINISH ;; } -NEXT_INF: +NEXT_INF: // Here if f8 is +- infinity // INF // if f8 is +inf, no matter what y is return largest float @@ -355,16 +355,16 @@ NEXT_INF: { .mfb nop.m 999 - fmerge.s f8 = f8,FR_lnorm - br.ret.sptk b0 ;; + fmerge.s f8 = f8,FR_lnorm + br.ret.sptk b0 ;; } -NEXT_ZERO: +NEXT_ZERO: // Here if f8 is +- zero // ZERO -// if f8 is zero and y is +, return + smallest float denormal -// if f8 is zero and y is -, return - smallest float denormal +// if f8 is zero and y is +, return + smallest float denormal +// if f8 is zero and y is -, return - smallest float denormal { .mfi nop.m 999 @@ -382,7 +382,7 @@ NEXT_ZERO: // Add correct sign from direction arg { .mfi nop.m 999 - fmerge.s f8 = f9,FR_sden + fmerge.s f8 = f9,FR_sden nop.i 999 ;; } @@ -393,7 +393,7 @@ NEXT_ZERO: br.cond.sptk NEXT_UNDERFLOW ;; } -NEXT_UNDERFLOW: +NEXT_UNDERFLOW: // Here if result is a denorm, or input is finite and result is zero // Call error support to report possible range error { .mib @@ -403,7 +403,7 @@ NEXT_UNDERFLOW: } ;; -NEXT_OVERFLOW: +NEXT_OVERFLOW: // Here if input is finite, but result will be infinite // Use frcpa to generate infinity of correct sign // Call error support to report possible range error @@ -462,7 +462,7 @@ LOCAL_LIBM_ENTRY(__libm_error_region) { .mib stfs [GR_Parameter_X] = FR_save_f8 // STORE Parameter 1 on stack add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address - nop.b 0 + nop.b 0 } { .mib stfs [GR_Parameter_Y] = f8 // STORE Parameter 3 on stack diff --git a/sysdeps/ia64/fpu/s_nexttowardl.S b/sysdeps/ia64/fpu/s_nexttowardl.S index fa2db12..7d38e3b 100644 --- a/sysdeps/ia64/fpu/s_nexttowardl.S +++ b/sysdeps/ia64/fpu/s_nexttowardl.S @@ -21,25 +21,25 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History //============================================================== -// 08/15/01 Initial version +// 08/15/01 Initial version // 08/23/01 Corrected error tag number // 05/20/02 Cleaned up namespace and sf0 syntax // 02/10/03 Reordered header: .section, .global, .proc, .align @@ -96,8 +96,8 @@ FR_tmp = f39 // // Overview of operation //============================================================== -// nexttowardl determines the next representable value -// after x in the direction of y. +// nexttowardl determines the next representable value +// after x in the direction of y. .section .text @@ -108,7 +108,7 @@ GLOBAL_LIBM_ENTRY(nexttowardl) // Form smallest denormal significand = ulp size { .mfi getf.exp GR_exp = f8 - fcmp.lt.s1 p10,p11 = f8, f9 + fcmp.lt.s1 p10,p11 = f8, f9 addl GR_sden_sig = 0x1, r0 } // Form largest normal significand 0xffffffffffffffff @@ -141,7 +141,7 @@ GLOBAL_LIBM_ENTRY(nexttowardl) // It decreases (p13 set) if xy and x>=0 { .mfi setf.sig FR_sden_sig = GR_sden_sig - fclass.m p8,p0 = f8, 0xc3 + fclass.m p8,p0 = f8, 0xc3 (p10) cmp.lt p12,p13 = GR_exp, GR_sign_mask } // Move smallest normal exp to fp regs @@ -171,7 +171,7 @@ GLOBAL_LIBM_ENTRY(nexttowardl) // Form new exponent in case result exponent needs incrementing or decrementing { .mfi setf.exp FR_new_exp = GR_exp - fclass.m p9,p0 = f9, 0xc3 + fclass.m p9,p0 = f9, 0xc3 (p12) add GR_exp1 = 1, GR_exp } { .mib @@ -189,7 +189,7 @@ GLOBAL_LIBM_ENTRY(nexttowardl) } { .mfb setf.exp FR_den_exp = GR_min_pexp -(p8) fma.s0 f8 = f8,f1,f9 +(p8) fma.s0 f8 = f8,f1,f9 (p8) br.ret.spnt b0 ;; // Exit if x=nan } @@ -197,12 +197,12 @@ GLOBAL_LIBM_ENTRY(nexttowardl) // Is x=inf? { .mfi setf.exp FR_exp1 = GR_exp1 - fclass.m p6,p0 = f8, 0x23 + fclass.m p6,p0 = f8, 0x23 addl GR_exp_mask = 0x1ffff, r0 } { .mfb setf.sig FR_snorm_sig = GR_snorm_sig -(p9) fma.s0 f8 = f8,f1,f9 +(p9) fma.s0 f8 = f8,f1,f9 (p9) br.ret.spnt b0 ;; // Exit if y=nan } @@ -211,14 +211,14 @@ GLOBAL_LIBM_ENTRY(nexttowardl) { .mfb setf.sig FR_lden_sig = GR_lden_sig mov FR_save_f8 = f8 -(p7) br.cond.spnt NEXT_ZERO ;; // Exit if x=0 +(p7) br.cond.spnt NEXT_ZERO ;; // Exit if x=0 } // Mask off the sign to get x_exp { .mfb and GR_x_exp = GR_exp_mask, GR_exp nop.f 999 -(p6) br.cond.spnt NEXT_INF ;; // Exit if x=inf +(p6) br.cond.spnt NEXT_INF ;; // Exit if x=inf } // Check 5 special cases when significand rolls over: @@ -243,7 +243,7 @@ GLOBAL_LIBM_ENTRY(nexttowardl) { .mmi (p6) cmp.lt.unc p6,p7 = GR_x_exp, GR_max_pexp (p10) cmp.eq.unc p10,p0 = GR_new_sig, r0 -(p9) cmp.le.unc p9,p8 = GR_x_exp, GR_min_pexp +(p9) cmp.le.unc p9,p8 = GR_x_exp, GR_min_pexp ;; } @@ -338,7 +338,7 @@ NEXT_UNDERFLOW_TO_ZERO: br.cond.sptk NEXT_COMMON_FINISH ;; } -NEXT_INF: +NEXT_INF: // Here if f8 is +- infinity // INF // if f8 is +inf, no matter what y is return largest long double @@ -353,16 +353,16 @@ NEXT_INF: { .mfb nop.m 999 - fmerge.s f8 = f8,FR_lnorm - br.ret.sptk b0 ;; + fmerge.s f8 = f8,FR_lnorm + br.ret.sptk b0 ;; } -NEXT_ZERO: +NEXT_ZERO: // Here if f8 is +- zero // ZERO -// if f8 is zero and y is +, return + smallest long double denormal -// if f8 is zero and y is -, return - smallest long double denormal +// if f8 is zero and y is +, return + smallest long double denormal +// if f8 is zero and y is -, return - smallest long double denormal { .mfi nop.m 999 @@ -380,7 +380,7 @@ NEXT_ZERO: // Add correct sign from direction arg { .mfi nop.m 999 - fmerge.s f8 = f9,FR_sden + fmerge.s f8 = f9,FR_sden nop.i 999 ;; } @@ -391,7 +391,7 @@ NEXT_ZERO: br.cond.sptk NEXT_UNDERFLOW ;; } -NEXT_UNDERFLOW: +NEXT_UNDERFLOW: // Here if result is a denorm, or input is finite and result is zero // Call error support to report possible range error { .mib @@ -401,7 +401,7 @@ NEXT_UNDERFLOW: } ;; -NEXT_OVERFLOW: +NEXT_OVERFLOW: // Here if input is finite, but result will be infinite // Use frcpa to generate infinity of correct sign // Call error support to report possible range error @@ -460,7 +460,7 @@ LOCAL_LIBM_ENTRY(__libm_error_region) { .mib stfe [GR_Parameter_X] = FR_save_f8 // STORE Parameter 1 on stack add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address - nop.b 0 + nop.b 0 } { .mib stfe [GR_Parameter_Y] = f8 // STORE Parameter 3 on stack diff --git a/sysdeps/ia64/fpu/s_round.S b/sysdeps/ia64/fpu/s_round.S index ed5ffae..8211875 100644 --- a/sysdeps/ia64/fpu/s_round.S +++ b/sysdeps/ia64/fpu/s_round.S @@ -159,7 +159,7 @@ ROUND_COMMON: { .mmi cmp.lt p6,p0 = rExp, rExpHalf // Is |x| < 0.5? cmp.ge p7,p0 = rExp, rBigexp // Is |x| >= 2^52? - cmp.lt p10,p0 = rExp, rExpHalf // Is |x| < 0.5? + cmp.lt p10,p0 = rExp, rExpHalf // Is |x| < 0.5? } ;; diff --git a/sysdeps/ia64/fpu/s_roundf.S b/sysdeps/ia64/fpu/s_roundf.S index 7cec860..c0351b4 100644 --- a/sysdeps/ia64/fpu/s_roundf.S +++ b/sysdeps/ia64/fpu/s_roundf.S @@ -159,7 +159,7 @@ ROUND_COMMON: { .mmi cmp.lt p6,p0 = rExp, rExpHalf // Is |x| < 0.5? cmp.ge p7,p0 = rExp, rBigexp // Is |x| >= 2^23? - cmp.lt p10,p0 = rExp, rExpHalf // Is |x| < 0.5? + cmp.lt p10,p0 = rExp, rExpHalf // Is |x| < 0.5? } ;; diff --git a/sysdeps/ia64/fpu/s_roundl.S b/sysdeps/ia64/fpu/s_roundl.S index da6cbfe..355eaf5 100644 --- a/sysdeps/ia64/fpu/s_roundl.S +++ b/sysdeps/ia64/fpu/s_roundl.S @@ -159,7 +159,7 @@ ROUND_COMMON: { .mmi cmp.lt p6,p0 = rExp, rExpHalf // Is |x| < 0.5? cmp.ge p7,p0 = rExp, rBigexp // Is |x| >= 2^63? - cmp.lt p10,p0 = rExp, rExpHalf // Is |x| < 0.5? + cmp.lt p10,p0 = rExp, rExpHalf // Is |x| < 0.5? } ;; diff --git a/sysdeps/ia64/fpu/s_scalblnf.c b/sysdeps/ia64/fpu/s_scalblnf.c index 2fa51ba..ed92bfd 100644 --- a/sysdeps/ia64/fpu/s_scalblnf.c +++ b/sysdeps/ia64/fpu/s_scalblnf.c @@ -23,12 +23,12 @@ // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS @@ -48,11 +48,11 @@ float __libm_scalblnf(float, long int, int); float scalblnf(float x, long int n) { -#ifdef SIZE_LONG_INT_64 - return __libm_scalblnf(x,n,1); +#ifdef SIZE_LONG_INT_64 + return __libm_scalblnf(x,n,1); #else -#ifdef SIZE_LONG_INT_32 +#ifdef SIZE_LONG_INT_32 return __libm_scalblnf(x,n,0); #endif diff --git a/sysdeps/ia64/fpu/s_scalbn.c b/sysdeps/ia64/fpu/s_scalbn.c index 1f57141..25cfad3 100644 --- a/sysdeps/ia64/fpu/s_scalbn.c +++ b/sysdeps/ia64/fpu/s_scalbn.c @@ -23,12 +23,12 @@ // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS @@ -48,11 +48,11 @@ double __libm_scalbn(double, int, int); double scalbn(double x, int n) { -#ifdef SIZE_INT_64 - return __libm_scalbn(x,n,1); +#ifdef SIZE_INT_64 + return __libm_scalbn(x,n,1); #else -#ifdef SIZE_INT_32 +#ifdef SIZE_INT_32 return __libm_scalbn(x,n,0); #endif diff --git a/sysdeps/ia64/fpu/s_scalbnf.c b/sysdeps/ia64/fpu/s_scalbnf.c index 97c06da..deab018 100644 --- a/sysdeps/ia64/fpu/s_scalbnf.c +++ b/sysdeps/ia64/fpu/s_scalbnf.c @@ -23,12 +23,12 @@ // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS @@ -48,11 +48,11 @@ float __libm_scalbnf(float, int, int); float scalbnf(float x, int n) { -#ifdef SIZE_INT_64 - return __libm_scalbnf(x,n,1); +#ifdef SIZE_INT_64 + return __libm_scalbnf(x,n,1); #else -#ifdef SIZE_INT_32 +#ifdef SIZE_INT_32 return __libm_scalbnf(x,n,0); #endif diff --git a/sysdeps/ia64/fpu/s_scalbnl.c b/sysdeps/ia64/fpu/s_scalbnl.c index d7a81df..cfd078b 100644 --- a/sysdeps/ia64/fpu/s_scalbnl.c +++ b/sysdeps/ia64/fpu/s_scalbnl.c @@ -23,12 +23,12 @@ // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS @@ -48,11 +48,11 @@ long double __libm_scalbnl(long double, int, int); long double scalbnl(long double x, int n) { -#ifdef SIZE_INT_64 - return __libm_scalbnl(x,n,1); +#ifdef SIZE_INT_64 + return __libm_scalbnl(x,n,1); #else -#ifdef SIZE_INT_32 +#ifdef SIZE_INT_32 return __libm_scalbnl(x,n,0); #endif diff --git a/sysdeps/ia64/fpu/s_signbit.S b/sysdeps/ia64/fpu/s_signbit.S index 5703080..a6ed929 100644 --- a/sysdeps/ia64/fpu/s_signbit.S +++ b/sysdeps/ia64/fpu/s_signbit.S @@ -30,7 +30,7 @@ ENTRY (__signbit) (p6) mov ret0 = 1 (p7) mov ret0 = 0 br.ret.sptk.many rp -} +} END (__signbit) strong_alias (__signbit, __signbitf) diff --git a/sysdeps/ia64/fpu/s_significand.S b/sysdeps/ia64/fpu/s_significand.S index 720e043..c3f921f 100644 --- a/sysdeps/ia64/fpu/s_significand.S +++ b/sysdeps/ia64/fpu/s_significand.S @@ -21,20 +21,20 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History @@ -54,11 +54,11 @@ // If x = sig * 2**n with 1 <= sig < 2 // significand returns sig // -// predicate registers used: +// predicate registers used: // p6, p7 // -// floating-point registers used: -// f8, f9, f10 +// floating-point registers used: +// f8, f9, f10 .section .text GLOBAL_LIBM_ENTRY(significand) @@ -69,12 +69,12 @@ GLOBAL_LIBM_ENTRY(significand) // f10 gets f8(sign) with f1(exp,significand) { .mfi nop.m 999 - fmerge.s f10 = f8,f1 + fmerge.s f10 = f8,f1 nop.i 999 } { .mfi nop.m 999 - fnorm.s0 f9 = f8 + fnorm.s0 f9 = f8 nop.i 999 ;; } @@ -91,7 +91,7 @@ GLOBAL_LIBM_ENTRY(significand) // return sign(f8) exp(f8) significand(f8), normalized. { .mfi nop.m 999 - fclass.m.unc p0,p6 = f8, 0xe7 + fclass.m.unc p0,p6 = f8, 0xe7 nop.i 999 ;; } @@ -109,7 +109,7 @@ GLOBAL_LIBM_ENTRY(significand) { .mfb nop.m 999 - fnorm.d.s0 f8 = f8 + fnorm.d.s0 f8 = f8 br.ret.sptk b0 ;; } diff --git a/sysdeps/ia64/fpu/s_significandf.S b/sysdeps/ia64/fpu/s_significandf.S index 5c8299b..5ae4c74 100644 --- a/sysdeps/ia64/fpu/s_significandf.S +++ b/sysdeps/ia64/fpu/s_significandf.S @@ -21,20 +21,20 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History @@ -53,11 +53,11 @@ // If x = sig * 2**n with 1 <= sig < 2 // significandf returns sig // -// predicate registers used: +// predicate registers used: // p6, p7 // -// floating-point registers used: -// f8, f9, f10 +// floating-point registers used: +// f8, f9, f10 .section .text GLOBAL_LIBM_ENTRY(significandf) @@ -68,12 +68,12 @@ GLOBAL_LIBM_ENTRY(significandf) // f10 gets f8(sign) with f1(exp,significand) { .mfi nop.m 999 - fmerge.s f10 = f8,f1 + fmerge.s f10 = f8,f1 nop.i 999 } { .mfi nop.m 999 - fnorm.s0 f9 = f8 + fnorm.s0 f9 = f8 nop.i 999 ;; } @@ -90,7 +90,7 @@ GLOBAL_LIBM_ENTRY(significandf) // return sign(f8) exp(f8) significand(f8), normalized. { .mfi nop.m 999 - fclass.m.unc p0,p6 = f8, 0xe7 + fclass.m.unc p0,p6 = f8, 0xe7 nop.i 999 ;; } diff --git a/sysdeps/ia64/fpu/s_significandl.S b/sysdeps/ia64/fpu/s_significandl.S index f62df43..e30c83c 100644 --- a/sysdeps/ia64/fpu/s_significandl.S +++ b/sysdeps/ia64/fpu/s_significandl.S @@ -21,20 +21,20 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History @@ -54,11 +54,11 @@ // If x = sig * 2**n with 1 <= sig < 2 // significandl returns sig // -// predicate registers used: +// predicate registers used: // p6, p7 // -// floating-point registers used: -// f8, f9, f10 +// floating-point registers used: +// f8, f9, f10 .section .text GLOBAL_LIBM_ENTRY(significandl) @@ -69,12 +69,12 @@ GLOBAL_LIBM_ENTRY(significandl) // f10 gets f8(sign) with f1(exp,significand) { .mfi nop.m 999 - fmerge.s f10 = f8,f1 + fmerge.s f10 = f8,f1 nop.i 999 } { .mfi nop.m 999 - fnorm.s0 f9 = f8 + fnorm.s0 f9 = f8 nop.i 999 ;; } @@ -84,14 +84,14 @@ GLOBAL_LIBM_ENTRY(significandl) fclass.m.unc p7,p0 = f8, 0x0b nop.i 999 ;; } - + // p6 = TRUE ==> x is not (nan,inf,0) // return sign(f8) exp(f1) significand(f8) // else x is (nan,inf,0) // return sign(f8) exp(f8) significand(f8), normalized. { .mfi nop.m 999 - fclass.m.unc p0,p6 = f8, 0xe7 + fclass.m.unc p0,p6 = f8, 0xe7 nop.i 999 ;; } @@ -131,7 +131,7 @@ SIGNIFICAND_DENORM: // This will be the final result unless x double-extended denormal { .mfi nop.m 999 - fnorm.s0 f8 = f8 + fnorm.s0 f8 = f8 nop.i 999 ;; } @@ -146,7 +146,7 @@ SIGNIFICAND_DENORM: // Final normalization if x double-extended denorm { .mfb nop.m 999 -(p7) fnorm.s0 f8 = f8 +(p7) fnorm.s0 f8 = f8 br.ret.sptk b0 ;; } diff --git a/sysdeps/ia64/fpu/s_tan.S b/sysdeps/ia64/fpu/s_tan.S index a2f80c8..0a13d60 100644 --- a/sysdeps/ia64/fpu/s_tan.S +++ b/sysdeps/ia64/fpu/s_tan.S @@ -21,20 +21,20 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History diff --git a/sysdeps/ia64/fpu/s_tanf.S b/sysdeps/ia64/fpu/s_tanf.S index 193d756..f14cdff 100644 --- a/sysdeps/ia64/fpu/s_tanf.S +++ b/sysdeps/ia64/fpu/s_tanf.S @@ -21,20 +21,20 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History diff --git a/sysdeps/ia64/fpu/s_tanh.S b/sysdeps/ia64/fpu/s_tanh.S index 9adbc9c..0a0b807 100644 --- a/sysdeps/ia64/fpu/s_tanh.S +++ b/sysdeps/ia64/fpu/s_tanh.S @@ -21,20 +21,20 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History @@ -71,10 +71,10 @@ // // 3. Main path: 0.25 <= |x| < 19.0625 // For several ranges of 0.25 <= |x| < 19.0625 -// Return tanh(x) = sign(x)*(A0 + y*A1 + y^2*A2 + +// Return tanh(x) = sign(x)*(A0 + y*A1 + y^2*A2 + // + y^3*A3 + ... + y^19*A19) // where y = (|x|/a) - b -// +// // For each range there is particular set of coefficients. // Below is the list of ranges: // 1/4 <= |x| < 1/2 a = 0.25, b = 1.0 @@ -87,28 +87,28 @@ // 8.0 <= |x| < 13.0 a = 8.0, b = 1.0 // 13.0 <= |x| < 16.0 a = 8.0, b = 2.0 // 16.0 <= |x| < 19.0625 a = 16.0, b = 1.0 -// ( [3.25;4.0], [6.5;8.0], [13.0;16.0] subranges separated +// ( [3.25;4.0], [6.5;8.0], [13.0;16.0] subranges separated // for monotonicity issues resolve ) // -// 4. Saturation path: 19.0625 <= |x| < +INF +// 4. Saturation path: 19.0625 <= |x| < +INF // Return tanh(x) = sign(x)*(1.0 - tiny_value) // (tiny_value ~ 2^(-63)) // // Registers used //============================================================================== -// Floating Point registers used: +// Floating Point registers used: // f8 = input, output // f32 -> f64 // -// General registers used: +// General registers used: // r32 -> r51, r2, r3 // // Predicate registers used: // p6, p8, p10, p11, p12, p14, p15 // p6 arg is zero, denormal or special IEEE -// p8 to filter out case when signd(x) > 1.625 +// p8 to filter out case when signd(x) > 1.625 // p10 to filter out case when |x| < 0.25 -// p11 to filter out case when signd(x) <= 1.625 +// p11 to filter out case when signd(x) <= 1.625 // p12 to filter out case when |x| >= 19.0625 // p14 set to 1 for positive x // p15 set to 1 for negative x @@ -169,7 +169,7 @@ fTSqr = f58 fTQuadr = f59 fTDeg3 = f60 fTDeg7 = f61 -fArgAbsNormSgn = f62 +fArgAbsNormSgn = f62 fTQuadrSgn = f63 fTwo = f64 @@ -184,7 +184,7 @@ LOCAL_OBJECT_START(tanh_data) // Main path coefficients: // Coefficients ##0..15 ("main" coefficient tables) -// Polynomial coefficients for the tanh(x), 0.25 <= |x| < 0.5 +// Polynomial coefficients for the tanh(x), 0.25 <= |x| < 0.5 data8 0xE9D218BC9A3FB55A, 0x00003FC7 //A19 data8 0xC8C0D38687F36EBA, 0x00003FCE //A18 data8 0xA2663E519FAC8A43, 0x0000BFD2 //A17 @@ -202,7 +202,7 @@ data8 0x8E1C15876AA589AD, 0x0000BFEF //A6 data8 0x942226246A8C2A86, 0x00003FF1 //A5 data8 0x8F06D9FF7DB47261, 0x00003FF4 //A4 // -// Polynomial coefficients for the tanh(x), 0.5 <= |x| < 1.0 +// Polynomial coefficients for the tanh(x), 0.5 <= |x| < 1.0 data8 0xC4A7B8FB672A8520, 0x00003FDC //A19 data8 0xA20724B847E13499, 0x0000BFE0 //A18 data8 0xE17DB53F02E4D340, 0x00003FE2 //A17 @@ -220,7 +220,7 @@ data8 0xCE63E8FA6B96480B, 0x0000BFF4 //A6 data8 0xDF017BE0D4FE45D8, 0x0000BFF4 //A5 data8 0xA8A0C6E2226DF3CD, 0x00003FF8 //A4 // -// Polynomial coefficients for the tanh(x), 1.0 <= |x| < 2.0 +// Polynomial coefficients for the tanh(x), 1.0 <= |x| < 2.0 data8 0x8E89D2EBFDAA160B, 0x00003FE9 //A19 data8 0xDD9226310A272046, 0x0000BFEC //A18 data8 0xA038042D28B0D665, 0x00003FEF //A17 @@ -238,7 +238,7 @@ data8 0x9B02FE0DAF42C08F, 0x00003FF9 //A6 data8 0xBDACE06F531D9491, 0x0000BFFA //A5 data8 0xE3048AD1DB2F648C, 0x00003FF9 //A4 // -// Polynomial coefficients for the tanh(x), 2.0 <= |x| < 3.25 +// Polynomial coefficients for the tanh(x), 2.0 <= |x| < 3.25 data8 0x856EC3B0330A385A, 0x00003FEB //A19 data8 0xC641D69DAE2D429C, 0x0000BFF2 //A18 data8 0xC683EB0BE1343FFF, 0x00003FF5 //A17 @@ -256,7 +256,7 @@ data8 0xA6CAAD4A3E31A7D5, 0x0000BFF8 //A6 data8 0x9CABD76D1D5C3878, 0x00003FFC //A5 data8 0x92906D077941CAA9, 0x0000BFFD //A4 // -// Polynomial coefficients for the tanh(x), 4.0 <= |x| < 6.5 +// Polynomial coefficients for the tanh(x), 4.0 <= |x| < 6.5 data8 0x9232D19F71709AC9, 0x0000BFF5 //A19 data8 0x819E31323F5DD3F8, 0x00003FF8 //A18 data8 0xDA8E1CDB8D23DC29, 0x0000BFF9 //A17 @@ -274,7 +274,7 @@ data8 0xF4CA0B968AF2DDE2, 0x0000BFFC //A6 data8 0xB99874B482BD17EE, 0x00003FFC //A5 data8 0xE93FB2F99431DC1D, 0x0000BFFB //A4 // -// Polynomial coefficients for the tanh(x), 8.0 <= |x| < 13.0 +// Polynomial coefficients for the tanh(x), 8.0 <= |x| < 13.0 data8 0xAAA9EB7EADA85CEC, 0x00003FF5 //A19 data8 0x980C80EE05A6BE78, 0x0000BFF8 //A18 data8 0x818DA9F5396390A5, 0x00003FFA //A17 @@ -292,7 +292,7 @@ data8 0xABD91DCE40D5EECB, 0x0000BFF7 //A6 data8 0x80E375C1B847B72F, 0x00003FF6 //A5 data8 0xA11C7DD978CF700A, 0x0000BFF4 //A4 // -// Polynomial coefficients for the tanh(x), 16.0 <= |x| < 19.0625 +// Polynomial coefficients for the tanh(x), 16.0 <= |x| < 19.0625 data8 0xE29D17C510F86F6B, 0x00003FF3 //A19 data8 0x88FE52EB39A3A98C, 0x0000BFF5 //A18 data8 0xA406547E50360693, 0x00003FF5 //A17 @@ -311,7 +311,7 @@ data8 0xF358B2C46F10CBAF, 0x00003FE3 //A5 data8 0x98176FD06229A385, 0x0000BFE1 //A4 // // Binary subranges -// Polynomial coefficients for the tanh(x), 3.25 <= |x| < 4.0 +// Polynomial coefficients for the tanh(x), 3.25 <= |x| < 4.0 data8 0xEF2EE841288F6706, 0x00003FE9 //A19 data8 0xE65D5B74B85F82A6, 0x00003FEB //A18 data8 0xE495FC21E42A79FF, 0x00003FEA //A17 @@ -329,7 +329,7 @@ data8 0xF4CA0661307243C7, 0x0000BFF6 //A6 data8 0xB998746D57061F74, 0x00003FF7 //A5 data8 0xE93FB2F482327C19, 0x0000BFF7 //A4 // -// Polynomial coefficients for the tanh(x), 6.5 <= |x| < 8.0 +// Polynomial coefficients for the tanh(x), 6.5 <= |x| < 8.0 data8 0xEB189B71ADC40BE2, 0x00003FEA //A19 data8 0xA60B46F9FF6DC2DF, 0x00003FEA //A18 data8 0xBB061CDD9F368B9D, 0x00003FEC //A17 @@ -347,7 +347,7 @@ data8 0xABD9E63CA575B950, 0x0000BFF1 //A6 data8 0x80E38B18E8D0F460, 0x00003FF1 //A5 data8 0xA11C80E20AAFDD3C, 0x0000BFF0 //A4 // -// Polynomial coefficients for the tanh(x), 13.0 <= |x| < 16.0 +// Polynomial coefficients for the tanh(x), 13.0 <= |x| < 16.0 data8 0xBECD0AF7E22E5594, 0x00003FE9 //A19 data8 0xE2834E2D68C1128C, 0x00003FEA //A18 data8 0x97B117611B317379, 0x00003FEB //A17 @@ -366,19 +366,19 @@ data8 0xF358D8A7FC012D5D, 0x00003FDE //A5 data8 0x98176E2309B7C73A, 0x0000BFDD //A4 // // Coefficients ##16..19 ("tail" coefficient tables) -// Polynomial coefficients for the tanh(x), 0.25 <= |x| < 0.5 +// Polynomial coefficients for the tanh(x), 0.25 <= |x| < 0.5 data8 0x838F209ABB9BA7B3, 0x0000BFF7 //A3 data8 0xEBC0AC78DA4FC500, 0x0000BFF8 //A2 data8 0xF0A4D02960B60E69, 0x00003FFC //A1 data8 0xFACBF534D0E42F8A, 0x00003FFC //A0 // -// Polynomial coefficients for the tanh(x), 0.5 <= |x| < 1.0 +// Polynomial coefficients for the tanh(x), 0.5 <= |x| < 1.0 data8 0xC0ECBDC0A0D133A6, 0x0000BFF8 //A3 data8 0xBA13A076BF8E812F, 0x0000BFFB //A2 data8 0xC954A37D1A1CA070, 0x00003FFD //A1 data8 0xEC9A9EBAB4579B29, 0x00003FFD //A0 // -// Polynomial coefficients for the tanh(x), 1.0 <= |x| < 2.0 +// Polynomial coefficients for the tanh(x), 1.0 <= |x| < 2.0 data8 0xD42E9175A6EA1397, 0x00003FFB //A3 data8 0xA3C361378A55CF56, 0x0000BFFD //A2 data8 0xD706E07CC8622983, 0x00003FFD //A1 @@ -427,7 +427,7 @@ data8 0xE42327B9B0D7202F, 0x0000BFD8 //A2 data8 0xE42327BB13076BD6, 0x00003FD5 //A1 data8 0xFFFFFFFFFFF8DEE7, 0x00003FFE //A0 // -// Polynomial coefficients for the tanh(x), 0.0 <= |x| < 0.25 +// Polynomial coefficients for the tanh(x), 0.0 <= |x| < 0.25 // ('tanh_near_zero' path) data8 0xBF2BA5D26E479D0C //A9 data8 0x3F4336D96F81EE26 //A8 @@ -441,7 +441,7 @@ data8 0x3FC1111111111108 //A2 // // 1.0 - 2^(-63) // ('tanh_saturation' path) -data8 0xFFFFFFFFFFFFFFFF, 0x00003FFE +data8 0xFFFFFFFFFFFFFFFF, 0x00003FFE LOCAL_OBJECT_END(tanh_data) // CAUTION: The order of table coefficients shouldn't be changed! @@ -461,8 +461,8 @@ GLOBAL_LIBM_ENTRY(tanh) };; { .mfi - getf.d rArg = f8 // x in GR - fclass.m p6,p0 = f8, 0xEF // Filter 0, denormals and specials + getf.d rArg = f8 // x in GR + fclass.m p6,p0 = f8, 0xEF // Filter 0, denormals and specials // 0xEF = @qnan|@snan|@pos|@neg|@zero|@unorm|@inf shl rArgSgnd = rArgSgnd, 52 // mask for exponent } @@ -493,11 +493,11 @@ GLOBAL_LIBM_ENTRY(tanh) nop.f 0 (p6) br.cond.spnt _tanh_spec // Branch to zero, denorm & specs };; - + { .mfi and rShiftedArgMasked = rShiftedArg, rMask // bias of x << 8 fmerge.s fArgAbs = f1, f8 // |x| - shr rShiftedAbsArg = rAbsArg, 44 // Select only necessary + shr rShiftedAbsArg = rAbsArg, 44 // Select only necessary // bits of absolute arg } { .mfi @@ -509,28 +509,28 @@ GLOBAL_LIBM_ENTRY(tanh) { .mfi sub rIndex = rShiftedArgMasked, rBias // index << 8 - nop.f 0 + nop.f 0 cmp.lt p10, p0 = rShiftedArgMasked, rBias // p10=1 if |x|<0.25 } { .mfb (p8) cmp.gt p8, p11 = rAbsArg, rTwo // If arg is greater than 2.0? // (then we should use binary subranges) - nop.f 0 + nop.f 0 (p10) br.cond.spnt tanh_near_zero // branch out if |x| < 0.25 };; .pred.rel "mutex",p8,p11 { .mfi -(p8) add rIndex = 0x400, rIndex // Make pointer to binary +(p8) add rIndex = 0x400, rIndex // Make pointer to binary // subranges (p11) fms.s1 fArgAbsNorm = fArgAbsNorm, f1, f1 // |x|/b - 1.0 addl rSaturation = 0x40331, r0 // shifted bits of 19.0625 } { .mfi - nop.m 0 + nop.m 0 (p8) fms.s1 fArgAbsNorm = fArgAbsNorm, f1, fTwo // |x|/b - 2.0 // this is only for binary subranges [3.25;4], [6.5;8], [13.0;16] - nop.i 0 + nop.i 0 } ;; @@ -544,7 +544,7 @@ GLOBAL_LIBM_ENTRY(tanh) adds rCoeffAddr2 = 16, rCoeffAddr1 // Shifted pointer to coeffs fmerge.s fSignumX = f8, f1 // signum(x) nop.i 0 -} +} { .mfb cmp.le p12, p0 = rSaturation, rShiftedAbsArg // |x|>=19.0625? nop.f 0 @@ -595,15 +595,15 @@ GLOBAL_LIBM_ENTRY(tanh) {.mfi ldfe fA12 = [rCoeffAddr2], 32 // Load A12 nop.f 0 - cmp.lt p15, p14 = rArg, r0 // Arg positive (p14) + cmp.lt p15, p14 = rArg, r0 // Arg positive (p14) // or negative (p15)? };; {.mfi ldfe fA11 = [rCoeffAddr1], 32 // Load A11 nop.f 0 - add rCoeffAddr4 = rCoeffAddr3, rCoeffAddr4 // shifted "tail" - // coeffs to load + add rCoeffAddr4 = rCoeffAddr3, rCoeffAddr4 // shifted "tail" + // coeffs to load } {.mfi ldfe fA10 = [rCoeffAddr2], 32 // Load A10 @@ -721,7 +721,7 @@ GLOBAL_LIBM_ENTRY(tanh) { .mfi nop.m 0 - fma.s1 fA15 = fA15, fTSqr, fA13 // Polynomial + fma.s1 fA15 = fA15, fTSqr, fA13 // Polynomial nop.i 0 } { .mfi @@ -743,19 +743,19 @@ GLOBAL_LIBM_ENTRY(tanh) };; { .mfi - nop.m 0 + nop.m 0 fma.s1 fA7 = fA7, fTSqr, fA5 // Polynomial nop.i 0 };; { .mfi - nop.m 0 + nop.m 0 fma.s1 fRes = fRes, fTQuadr, fA15 // Polynomial nop.i 0 };; { .mfi - nop.m 0 + nop.m 0 fma.s1 fA4 = fA4, fTSqr, fA2 // Polynomial nop.i 0 };; @@ -767,7 +767,7 @@ GLOBAL_LIBM_ENTRY(tanh) };; { .mfi - nop.m 0 + nop.m 0 fma.s1 fA4 = fA7, fTDeg3, fA4 // Polynomial nop.i 0 };; @@ -797,7 +797,7 @@ GLOBAL_LIBM_ENTRY(tanh) tanh_near_zero: { .mfi adds rCoeffAddr1 = 0xC80, rDataPtr // address of A9 - fma.s0 fTSqr = fArgSqr, fArgSqr, f0 // x^4 + fma.s0 fTSqr = fArgSqr, fArgSqr, f0 // x^4 nop.i 0 } { .mfi @@ -931,51 +931,51 @@ tanh_saturation: - + // 0, denormals and special IEEE numbers path ///////////////////////////////// _tanh_spec: -{ .mfi - cmp.lt p15, p14 = rArg, r0 // Is arg negative (p15) +{ .mfi + cmp.lt p15, p14 = rArg, r0 // Is arg negative (p15) // or positive p14) fclass.m p6,p0 = f8, 0x23 // To filter infinities - // 0x23 = @pos|@neg|@inf + // 0x23 = @pos|@neg|@inf nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fclass.m p7,p0 = f8, 0xC7 // To filter NaNs & Zeros // 0xC7 = @pos|@neg|@zero|@qnan|@snan nop.i 0 };; -{ .mfb +{ .mfb nop.m 0 -(p6) fmerge.s f8 = f8, f1 // +/-1 for INF args +(p6) fmerge.s f8 = f8, f1 // +/-1 for INF args (p6) br.ret.spnt b0 // exit for x = INF };; -{ .mfb +{ .mfb nop.m 0 -(p7) fma.d.s0 f8 = f8, f1, f8 // +/-0 for 0 args +(p7) fma.d.s0 f8 = f8, f1, f8 // +/-0 for 0 args // and NaNs for NaNs (p7) br.ret.spnt b0 // exit for x = NaN or +/-0 };; -{ .mfi +{ .mfi nop.m 0 fnorm.s0 f8 = f8 // Normalize arg nop.i 0 };; .pred.rel "mutex",p14,p15 -{ .mfi +{ .mfi nop.m 0 (p14) fnma.d.s0 f8 = f8, f8, f8 // res = r-r^2 nop.i 0 } -{ .mfb +{ .mfb nop.m 0 (p15) fma.d.s0 f8 = f8, f8, f8 // res = r+r^2 br.ret.sptk b0 // 0, denormals, specials return diff --git a/sysdeps/ia64/fpu/s_tanhf.S b/sysdeps/ia64/fpu/s_tanhf.S index e4e91cf..4749477 100644 --- a/sysdeps/ia64/fpu/s_tanhf.S +++ b/sysdeps/ia64/fpu/s_tanhf.S @@ -21,20 +21,20 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History @@ -85,7 +85,7 @@ // 6. |x| = INF // Return tanhf(x) = sign(x) * 1.0 // -// 7. x = [S,Q]NaN +// 7. x = [S,Q]NaN // Return tanhf(x) = QNaN // // 8. x is positive denormal @@ -96,11 +96,11 @@ // // Registers used //============================================================== -// Floating Point registers used: +// Floating Point registers used: // f8, input // f32 -> f59 -// General registers used: +// General registers used: // r32 -> r46, r2, r3 // Predicate registers used: @@ -220,7 +220,7 @@ data8 0xC0BE48CFADE2431E // D0 data8 0x4090E74249760FDD // D1 data8 0xC04B6F537FCF2F1E // D2 data8 0x3E0DCD879C91ADEA // B0 -// Polynomial coefficients for the tanh(x), -0.3125 < x < 0.3125 +// Polynomial coefficients for the tanh(x), -0.3125 < x < 0.3125 data8 0xBFD555551E8245B7 // A0 data8 0x3FC110E63F52E689 // A1 data8 0xBFAB8CD6A5B7BAFA // A2 @@ -250,7 +250,7 @@ data8 0xBFB1DEA49A831CBC // A0 data8 0x3FFA729FC7085674 // A1 data8 0xBFF2F44D923A8FA4 // A2 data8 0x3FE092FC5712227E // A3 -// Polynomial coefficients for the tanh(x), 8.0 <= |x| <= 9.125 +// Polynomial coefficients for the tanh(x), 8.0 <= |x| <= 9.125 data8 0x3FEFFF5769EE3041 // A0 data8 0x3EFBBF148D850891 // A1 data8 0xBEC86BCEF0F5C2FE // A2 @@ -275,10 +275,10 @@ GLOBAL_LIBM_ENTRY(tanhf) ;; { .mfi - getf.s rArg = f8 // x in GR + getf.s rArg = f8 // x in GR fclass.m p7,p0 = f8, 0x0b // is x denormal ? // sign bit and 2 most bits in significand - shl rMask = rMask, 20 + shl rMask = rMask, 20 } { .mfi ld8 rDataPtr = [rDataPtr] @@ -317,7 +317,7 @@ GLOBAL_LIBM_ENTRY(tanhf) shr rOffset2 = rOffset2, 21 } { .mfi - cmp.lt p10, p8 = rAbsArg, rBound // |x| < 0.3125? + cmp.lt p10, p8 = rAbsArg, rBound // |x| < 0.3125? nop.f 0 adds rCoeffAddr3 = 16, rDataPtr } @@ -338,8 +338,8 @@ GLOBAL_LIBM_ENTRY(tanhf) { .mfi shladd rCoeffAddr1 = rBias, 4, rDataPtr fma.s1 fArg3Sgn = fArgSqr, f8, f0 // sign(x)*|x|^3 - // is |x| < 9.125? - cmp.lt p11, p12 = rAbsArg, rSaturation + // is |x| < 9.125? + cmp.lt p11, p12 = rAbsArg, rSaturation } { .mfi shladd rCoeffAddr3 = rBias, 4, rCoeffAddr3 @@ -351,7 +351,7 @@ GLOBAL_LIBM_ENTRY(tanhf) { .mfi (p11) ldfpd fC0, fC1 = [rCoeffAddr1] (p9) fmerge.s f8 = f8,f1 // +/- inf -(p12) adds rDataPtr = 544, rDataPtr +(p12) adds rDataPtr = 544, rDataPtr } { .mfb (p11) ldfpd fC2, fC3 = [rCoeffAddr3], 16 @@ -404,7 +404,7 @@ GLOBAL_LIBM_ENTRY(tanhf) { .mfb nop.m 0 fma.s1 fArg6Sgn = fArg3, fArg3Sgn, f0 // sign(x)*|x|^6 -(p13) br.cond.spnt tanhf_close_to_saturation +(p13) br.cond.spnt tanhf_close_to_saturation } ;; @@ -440,7 +440,7 @@ GLOBAL_LIBM_ENTRY(tanhf) { .mfi nop.m 0 - fma.s1 fPolATmp = fA3, fAbsArg, fA2 // A3*|x| + A2 + fma.s1 fPolATmp = fA3, fAbsArg, fA2 // A3*|x| + A2 nop.i 0 } { .mfi @@ -452,7 +452,7 @@ GLOBAL_LIBM_ENTRY(tanhf) { .mfi nop.m 0 // C3*|x|^3 + C2*x^2 + C1*|x| + C0 - fma.s1 fPolC = fPolC, fArgSqr, fPolCTmp + fma.s1 fPolC = fPolC, fArgSqr, fPolCTmp nop.i 0 } ;; @@ -460,31 +460,31 @@ GLOBAL_LIBM_ENTRY(tanhf) { .mfi nop.m 0 // PolD = sign(x)*(|x|^7 + D2*x^6 + D1*|x|^5 + D0*x^4) - fma.d.s1 fPolD = fPolD, fArg4Sgn, fPolDTmp + fma.d.s1 fPolD = fPolD, fArg4Sgn, fPolDTmp nop.i 0 } ;; { .mfi nop.m 0 - // PolA = A3|x|^3 + A2*x^2 + A1*|x| + A0 - fma.d.s1 fPolA = fPolATmp, fArgSqr, fPolA + // PolA = A3|x|^3 + A2*x^2 + A1*|x| + A0 + fma.d.s1 fPolA = fPolATmp, fArgSqr, fPolA nop.i 0 } -;; +;; { .mfi nop.m 0 - // PolC = B0*x^4 + C3*|x|^3 + C2*|x|^2 + C1*|x| + C0 - fma.d.s1 fPolC = fPolC, f1, fB0 + // PolC = B0*x^4 + C3*|x|^3 + C2*|x|^2 + C1*|x| + C0 + fma.d.s1 fPolC = fPolC, f1, fB0 nop.i 0 } -;; +;; { .mfi nop.m 0 (p14) fma.s.s0 f8 = fPolC, fPolD, fPolA // for positive x - nop.i 0 + nop.i 0 } { .mfb nop.m 0 @@ -528,7 +528,7 @@ tanhf_saturation: br.ret.sptk b0 // Exit for 9.125 <=|x|< +inf } ;; - + // Here if 8.0 <= |x| < 9.125 tanhf_close_to_saturation: { .mfi @@ -540,7 +540,7 @@ tanhf_close_to_saturation: nop.m 0 fma.s1 fPolA = fA3, fAbsArg, fA2 // A3*|x| + A2 nop.i 0 -} +} ;; .pred.rel "mutex", p14, p15 @@ -548,7 +548,7 @@ tanhf_close_to_saturation: nop.m 0 // for positive x (p14) fma.s.s0 f8 = fPolA, fArgSqr, fPolATmp - nop.i 0 + nop.i 0 } { .mfb nop.m 0 diff --git a/sysdeps/ia64/fpu/s_tanhl.S b/sysdeps/ia64/fpu/s_tanhl.S index 3435f43..8da6afb 100644 --- a/sysdeps/ia64/fpu/s_tanhl.S +++ b/sysdeps/ia64/fpu/s_tanhl.S @@ -21,20 +21,20 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History @@ -68,7 +68,7 @@ // // 3. Main path: 1/8 <= |x| < 22.8 // For several ranges of 1/8 <= |x| < 22.8 -// Return tanhl(x) = sign(x)*((A0H+A0L) + y*(A1H+A1L) + y^2*(A2H+A2L) + +// Return tanhl(x) = sign(x)*((A0H+A0L) + y*(A1H+A1L) + y^2*(A2H+A2L) + // + y^3*A3 + y^4*A4 + ... + y^25*A25 ) // where y = (|x|/a) - b // @@ -85,10 +85,10 @@ // 8.0 <= |x| < 13.0 a = 8.0, b = 1.5 // 13.0 <= |x| < 16.0 a = 8.0, b = 2.0 // 16.0 <= |x| < 22.8 a = 16.0, b = 1.5 -// ( [3.25;4.0], [6.5;8.0], [13.9;16.0] subranges separated +// ( [3.25;4.0], [6.5;8.0], [13.9;16.0] subranges separated // for monotonicity issues resolve ) // -// 4. Saturation path: 22.8 <= |x| < +INF +// 4. Saturation path: 22.8 <= |x| < +INF // Return tanhl(x) = sign(x)*(1.0 - tiny_value) // (tiny_value ~ 1e-1233) // @@ -112,10 +112,10 @@ // Multiprecision have to be performed only for first few // polynomial iterations (up to 3-rd x degree) // Here we use the same parallelisation way as above: -// Split whole polynomial to first, "multiprecision" part, and second, +// Split whole polynomial to first, "multiprecision" part, and second, // so called "tail", native precision part. // -// 1) Multiprecision part: +// 1) Multiprecision part: // [v1=(A0H+A0L)+y*(A1H+A1L)] + [v2=y^2*((A2H+A2L)+y*A3)] // v1 and v2 terms calculated in parallel // @@ -123,7 +123,7 @@ // v3 = x^4 * ( A4 + x*A5 + ... + x^21*A25 ) // v3 is splitted to 2 even parts (10 coefficient in each one). // These 2 parts are also factorized using binary tree technique. -// +// // So Multiprecision and Tail parts cost is almost the same // and we have both results ready before final summation. // @@ -132,29 +132,29 @@ // not at the last operation but much more earlier and at // several places. // -// 4. Saturation path: 22.8 <= |x| < +INF +// 4. Saturation path: 22.8 <= |x| < +INF // // We use formula sign(x)*(1.0 - tiny_value) instead of simple sign(x)*1.0 // just to meet IEEE requirements for different rounding modes in this case. // // Registers used //============================================================== -// Floating Point registers used: +// Floating Point registers used: // f8 - input & output // f32 -> f92 -// General registers used: -// r2, r3, r32 -> r52 +// General registers used: +// r2, r3, r32 -> r52 // Predicate registers used: // p0, p6 -> p11, p14, p15 // p6 - arg is zero, denormal or special IEEE // p7 - arg is in [16;32] binary interval -// p8 - arg is in one of subranges +// p8 - arg is in one of subranges // [3.25;4.0], [6.5;8.0], [13.9;16.0] // p9 - arg < 1/8 -// p10 - arg is NOT in one of subranges +// p10 - arg is NOT in one of subranges // [3.25;4.0], [6.5;8.0], [13.9;16.0] // p11 - arg in saturation domain // p14 - arg is positive @@ -211,9 +211,9 @@ fA16 = f51 fA17 = f52 fA18 = f53 fA19 = f54 -fA20 = f55 -fA21 = f56 -fA22 = f57 +fA20 = f55 +fA21 = f56 +fA22 = f57 fA23 = f58 fA24 = f59 fA25 = f60 @@ -242,10 +242,10 @@ fRes3H = f79 fRes3L = f80 fRes4 = f81 -fTT = f82 +fTT = f82 fTH = f83 fTL = f84 -fTT2 = f85 +fTT2 = f85 fTH2 = f86 fTL2 = f87 @@ -264,7 +264,7 @@ LOCAL_OBJECT_START(tanhl_data) ////////// Main tables /////////// _0p125_to_0p25_data: // exp = 2^-3 -// Polynomial coefficients for the tanh(x), 1/8 <= |x| < 1/4 +// Polynomial coefficients for the tanh(x), 1/8 <= |x| < 1/4 data8 0x93D27D6AE7E835F8, 0x0000BFF4 //A3 = -5.6389704216278164626050408239e-04 data8 0xBF66E8668A78A8BC //A2H = -2.7963640930198357253955165902e-03 data8 0xBBD5384EFD0E7A54 //A2L = -1.7974001252014762983581666453e-20 @@ -287,7 +287,7 @@ data8 0x83C8DDF213711381, 0x0000BFCC //A14 = -4.5721980583985311263109531319e-16 LOCAL_OBJECT_END(tanhl_data) LOCAL_OBJECT_START(_0p25_to_0p5_data) -// Polynomial coefficients for the tanh(x), 1/4 <= |x| < 1/2 +// Polynomial coefficients for the tanh(x), 1/4 <= |x| < 1/2 data8 0xB6E27B747C47C8AD, 0x0000BFF6 //A3 = -2.7905990032063258105302045572e-03 data8 0xBF93FD54E226F8F7 //A2H = -1.9521070769536099515084615064e-02 data8 0xBC491BC884F6F18A //A2L = -2.7222721075104525371410300625e-18 @@ -310,7 +310,7 @@ data8 0x905F6F124AF956B1, 0x00003FD8 //A14 = 2.0516607231389483452611375485e-12 LOCAL_OBJECT_END(_0p25_to_0p5_data) LOCAL_OBJECT_START(_0p5_to_1_data) -// Polynomial coefficients for the tanh(x), 1/2 <= |x| < 1 +// Polynomial coefficients for the tanh(x), 1/2 <= |x| < 1 data8 0xAB402BE491EE72A7, 0x00003FF7 //A3 = 5.2261556931080934657023772945e-03 data8 0xBFB8403D3DDA87BE //A2H = -9.4730212784752659826992271519e-02 data8 0xBC6FF7BC2AB71A8B //A2L = -1.3863786398568460929625760740e-17 @@ -333,7 +333,7 @@ data8 0xC78363FF929EFF62, 0x0000BFE4 //A14 = -1.1613199289622686725595739572e-08 LOCAL_OBJECT_END(_0p5_to_1_data) LOCAL_OBJECT_START(_1_to_2_data) -// Polynomial coefficients for the tanh(x), 1 <= |x| < 2.0 +// Polynomial coefficients for the tanh(x), 1 <= |x| < 2.0 data8 0xB3D8FB48A548D99A, 0x00003FFB //A3 = 8.7816203264683800892441646129e-02 data8 0xBFC4EFBD8FB38E3B //A2H = -1.6356629864377389416141284073e-01 data8 0xBC77687FD8087B23 //A2L = -2.0303377679446772162287121190e-17 @@ -356,7 +356,7 @@ data8 0x8672AF27EB0823B7, 0x00003FEF //A14 = 1.6027448793338500004496520337e-05 LOCAL_OBJECT_END(_1_to_2_data) LOCAL_OBJECT_START(_2_to_3p25_data) -// Polynomial coefficients for the tanh(x), 2 <= |x| < 3.25 +// Polynomial coefficients for the tanh(x), 2 <= |x| < 3.25 data8 0xD45657BEC559E366, 0x00003FFA //A3 = 5.1840155367548909799883161889e-02 data8 0xBFA41B109CA6AB81 //A2H = -3.9268988726084870510835145296e-02 data8 0xBC2C3D708A4E56C5 //A2L = -7.6544669252238280132415018518e-19 @@ -379,7 +379,7 @@ data8 0xE1851A2D00737A5D, 0x00003FF2 //A14 = 2.1507256570895163202182573369e-04 LOCAL_OBJECT_END(_2_to_3p25_data) LOCAL_OBJECT_START(_4_to_6p5_data) -// Polynomial coefficients for the tanh(x), 4 <= |x| < 6.5 +// Polynomial coefficients for the tanh(x), 4 <= |x| < 6.5 data8 0x896FDBD321A0BE58, 0x00003FF5 //A3 = 1.0485606995331904734870550114e-03 data8 0xBF39C522B95A37D6 //A2H = -3.9321992640217512306882730044e-04 data8 0xBBA9B3EC39A45338 //A2L = -2.7213922673282819034134988241e-21 @@ -402,7 +402,7 @@ data8 0x922EC6F3CFE0496E, 0x0000BFF4 //A14 = -5.5764283474946207558456581668e-04 LOCAL_OBJECT_END(_4_to_6p5_data) LOCAL_OBJECT_START(_8_to_13_data) -// Polynomial coefficients for the tanh(x), 8 <= |x| < 13 +// Polynomial coefficients for the tanh(x), 8 <= |x| < 13 data8 0xDD6050A898303460, 0x00003FE6 //A3 = 5.1543170295688189081352133793e-08 data8 0xBE44C1078FDBADC0 //A2H = -9.6643444318955652627581125180e-09 data8 0xBAF95FCAA6DBBA6F //A2L = -1.3118146684038113473094275420e-24 @@ -425,7 +425,7 @@ data8 0x82DEDAA539A3A3F1, 0x0000BFF1 //A14 = -6.2403928644276709411156885292e-05 LOCAL_OBJECT_END(_8_to_13_data) LOCAL_OBJECT_START(_16_to_22p8_data) -// Polynomial coefficients for the tanh(x), 16 <= |x| < 22.88 +// Polynomial coefficients for the tanh(x), 16 <= |x| < 22.88 data8 0x992C00F33DDE804D, 0x00003FCE //A3 = 2.1256869805798788337547274131e-15 data8 0x3C8D42EA28102760 //A2H = 5.0760412270332007485198379096e-17 data8 0x391A747B43B072DD //A2L = 1.2737621993898125881520341053e-33 @@ -448,7 +448,7 @@ data8 0xDA2470DE110B293E, 0x00003FF1 //A14 = 1.0401837693241806604296821650e-04 LOCAL_OBJECT_END(_16_to_22p8_data) LOCAL_OBJECT_START(_3p25_to_4_data) -// Polynomial coefficients for the tanh(x), 3.25 <= |x| < 4 +// Polynomial coefficients for the tanh(x), 3.25 <= |x| < 4 data8 0xE9E07240432926E6, 0x00003FF7 //A3 = 7.1373517862636557382403555215e-03 data8 0xBF75F495227AF306 //A2H = -5.3602052282115727338540622782e-03 data8 0xBBBE92D355A6B716 //A2L = -6.4741983326810209847018826624e-21 @@ -471,7 +471,7 @@ data8 0x8987DF26A6789CCF, 0x00003FEE //A14 = 8.1974714257536543772040700977e-06 LOCAL_OBJECT_END(_3p25_to_4_data) LOCAL_OBJECT_START(_6p5_to_8_data) -// Polynomial coefficients for the tanh(x), 6.5 <= |x| < 8.0 +// Polynomial coefficients for the tanh(x), 6.5 <= |x| < 8.0 data8 0xA11C8A63815E5657, 0x00003FEF //A3 = 1.9205985861286093001394561449e-05 data8 0xBEDE355AD6CB61D8 //A2H = -7.2022479400070228499307345427e-06 data8 0xBB8E6B50B8468A63 //A2L = -8.0518953122203408718779840543e-22 @@ -494,7 +494,7 @@ data8 0xBE25D0FD069D0A93, 0x0000BFEE //A14 = -1.1333687314965721384777951065e-05 LOCAL_OBJECT_END(_6p5_to_8_data) LOCAL_OBJECT_START(_13_to_16_data) -// Polynomial coefficients for the tanh(x), 13 <= |x| < 16 +// Polynomial coefficients for the tanh(x), 13 <= |x| < 16 data8 0x98176FD2075BDBD5, 0x00003FDB //A3 = 1.7290807363028159200235264756e-11 data8 0xBD8C8464F76162D1 //A2H = -3.2420263805679445515400340441e-12 data8 0xBA2D56B508E0F1FD //A2L = -1.8515322669984580704502445180e-28 @@ -519,7 +519,7 @@ LOCAL_OBJECT_END(_13_to_16_data) //////// "Tail" tables ////////// LOCAL_OBJECT_START(_0p125_to_0p25_data_tail) -// Polynomial coefficients for the erf(x), 1/8 <= |x| < 1/4 +// Polynomial coefficients for the erf(x), 1/8 <= |x| < 1/4 data8 0x9D7D206E97ADC83A, 0x0000BFCC //A13 = -5.4639895428711257047470806445e-16 data8 0xA8972B666A845810, 0x00003FD3 //A12 = 7.4869224589947988668562043110e-14 data8 0x9A5B31511C9F4698, 0x0000BFD4 //A11 = -1.3709586467430093373657009487e-13 @@ -533,7 +533,7 @@ data8 0xE7C2AE92CB36769B, 0x00003FEF //A4 = 2.7628001723157068127646694830e-05 LOCAL_OBJECT_END(_0p125_to_0p25_data_tail) LOCAL_OBJECT_START(_0p25_to_0p5_data_tail) -// Polynomial coefficients for the tanh(x), 1/4 <= |x| < 1/2 +// Polynomial coefficients for the tanh(x), 1/4 <= |x| < 1/2 data8 0x9E2972C008B9965E, 0x0000BFDC //A13 = -3.5961854154738002253192260213e-11 data8 0xC3EABA3D219BEA8A, 0x00003FDB //A12 = 2.2273173303628274478819473067e-11 data8 0xC50FB68D960D5CD9, 0x00003FE1 //A11 = 1.4338102430978399800743148719e-09 @@ -547,7 +547,7 @@ data8 0xAC262F3F8CF49C02, 0x00003FF4 //A4 = 6.5669692402266433496312492412e-04 LOCAL_OBJECT_END(_0p25_to_0p5_data_tail) LOCAL_OBJECT_START(_0p5_to_1_data_tail) -// Polynomial coefficients for the tanh(x), 1/2 <= |x| < 1 +// Polynomial coefficients for the tanh(x), 1/2 <= |x| < 1 data8 0xDF67FB36FFA2A538, 0x00003FE7 //A13 = 1.0403160796697495720021114635e-07 data8 0xB7FB80FB5AFA63A4, 0x0000BFE8 //A12 = -1.7134699677764282023124981753e-07 data8 0xC87625A0BA7D6C5F, 0x0000BFEA //A11 = -7.4677732458471897291461679095e-07 @@ -561,7 +561,7 @@ data8 0xCC4AB2EC0965499B, 0x00003FF7 //A4 = 6.2344907419841579664122448353e-03 LOCAL_OBJECT_END(_0p5_to_1_data_tail) LOCAL_OBJECT_START(_1_to_2_data_tail) -// Polynomial coefficients for the tanh(x), 1 <= |x| < 2.0 +// Polynomial coefficients for the tanh(x), 1 <= |x| < 2.0 data8 0xCCAEE174EAC17F78, 0x0000BFEE //A13 = -1.2200065117856038355953618829e-05 data8 0xA39DD0981D1A2776, 0x0000BFF0 //A12 = -3.9009204899026604074167603200e-05 data8 0xB7104FA27FAF80D0, 0x00003FF2 //A11 = 1.7458316338540792661905876072e-04 @@ -575,7 +575,7 @@ data8 0xCC8286B331BD8AAA, 0x0000BFF9 //A4 = -2.4964583478826523250880337777e-02 LOCAL_OBJECT_END(_1_to_2_data_tail) LOCAL_OBJECT_START(_2_to_3p25_data_tail) -// Polynomial coefficients for the tanh(x), 2 <= |x| < 3.25 +// Polynomial coefficients for the tanh(x), 2 <= |x| < 3.25 data8 0x92E1711A3BD6408B, 0x0000BFF4 //A13 = -5.6030514548041036913731470443e-04 data8 0x8B9BD885FF3E98C5, 0x00003FF5 //A12 = 1.0651304064581604055612602669e-03 data8 0xD041356C7FA26A22, 0x0000BFF5 //A11 = -1.5888574328066952147023520244e-03 @@ -589,7 +589,7 @@ data8 0xD01D077B42E7ED76, 0x0000BFFA //A4 = -5.0808934425896607486919526567e-02 LOCAL_OBJECT_END(_2_to_3p25_data_tail) LOCAL_OBJECT_START(_4_to_6p5_data_tail) -// Polynomial coefficients for the tanh(x), 4 <= |x| < 6.5 +// Polynomial coefficients for the tanh(x), 4 <= |x| < 6.5 data8 0x870CCE8C76C52C7E, 0x00003FF5 //A13 = 1.0303499350193060915603525934e-03 data8 0xE1431E54AD2A738B, 0x0000BFF5 //A12 = -1.7186140560972621669872002486e-03 data8 0xAB20056533E28734, 0x00003FF6 //A11 = 2.6111615345168277554841545330e-03 @@ -603,7 +603,7 @@ data8 0x896E211733AD9D40, 0x0000BFF6 //A4 = -2.0970183170010094667442967500e-03 LOCAL_OBJECT_END(_4_to_6p5_data_tail) LOCAL_OBJECT_START(_8_to_13_data_tail) -// Polynomial coefficients for the tanh(x), 8 <= |x| < 13 +// Polynomial coefficients for the tanh(x), 8 <= |x| < 13 data8 0xE50C3476BED020AA, 0x00003FF0 //A13 = 5.4609221347524272615754239857e-05 data8 0xBA16F5F4EDC0EABC, 0x0000BFF0 //A12 = -4.4367239594986428539386662937e-05 data8 0x8B916C2F002C3D91, 0x00003FF0 //A11 = 3.3275617838067362533536610680e-05 @@ -617,7 +617,7 @@ data8 0xDD6050A7761D67BB, 0x0000BFE8 //A4 = -2.0617268111985310661707082242e-07 LOCAL_OBJECT_END(_8_to_13_data_tail) LOCAL_OBJECT_START(_16_to_22p8_data_tail) -// Polynomial coefficients for the tanh(x), 16 <= |x| < 22.88 +// Polynomial coefficients for the tanh(x), 16 <= |x| < 22.88 data8 0xEAF4AF87336E81B1, 0x00003FEF //A13 = 2.8008914392791730186582989654e-05 data8 0xD5B309EA768E2711, 0x00003FED //A12 = 6.3687375204024238267961143128e-06 data8 0xA4048CA537113538, 0x00003FEB //A11 = 1.2220276227448617951538196845e-06 @@ -631,7 +631,7 @@ data8 0x86BC347939478174, 0x00003FD3 //A4 = 5.9834437707863962671883176163e-14 LOCAL_OBJECT_END(_16_to_22p8_data_tail) LOCAL_OBJECT_START(_3p25_to_4_data_tail) -// Polynomial coefficients for the tanh(x), 3.25 <= |x| < 4 +// Polynomial coefficients for the tanh(x), 3.25 <= |x| < 4 data8 0xBE9A2BE19F21BA1C, 0x0000BFEE //A13 = -1.1360778336288065244475976873e-05 data8 0xF84910F515BDB014, 0x00003FED //A12 = 7.3994819819577018481862729782e-06 data8 0xC4C84FB788AA4007, 0x00003FEF //A11 = 2.3458298013663976251972482656e-05 @@ -645,7 +645,7 @@ data8 0xE93FB2F48233275B, 0x0000BFF7 //A4 = -7.1181892208343798194003322900e-03 LOCAL_OBJECT_END(_3p25_to_4_data_tail) LOCAL_OBJECT_START(_6p5_to_8_data_tail) -// Polynomial coefficients for the tanh(x), 6.5 <= |x| < 8.0 +// Polynomial coefficients for the tanh(x), 6.5 <= |x| < 8.0 data8 0xA6881D7D21774BFD, 0x00003FEF //A13 = 1.9852125640303530752913966680e-05 data8 0x875E983AA042E605, 0x0000BFF0 //A12 = -3.2274606306629334402383651599e-05 data8 0xCB19E01E94FC133C, 0x00003FF0 //A11 = 4.8423069963831314927026982707e-05 @@ -659,7 +659,7 @@ data8 0xA11C80E20ADA5A64, 0x0000BFF0 //A4 = -3.8411937140983728563216440713e-05 LOCAL_OBJECT_END(_6p5_to_8_data_tail) LOCAL_OBJECT_START(_13_to_16_data_tail) -// Polynomial coefficients for the tanh(x), 13 <= |x| < 16 +// Polynomial coefficients for the tanh(x), 13 <= |x| < 16 data8 0x9D6CCDA4767CA6D9, 0x00003FE5 //A13 = 1.8326683535066775712253572575e-08 data8 0xFFAF154F334BF403, 0x0000BFE4 //A12 = -1.4882762852665077172347508377e-08 data8 0xBFC68FA7C61B6C17, 0x00003FE4 //A11 = 1.1162810813806544919835662888e-08 @@ -673,7 +673,7 @@ data8 0x98176FD2074C1D77, 0x0000BFDD //A4 = -6.9163229452106125388824134881e-11 LOCAL_OBJECT_END(_13_to_16_data_tail) LOCAL_OBJECT_START(_0_to_1o8_data) -// Polynomial coefficients for the tanh(x), 0.0 <= |x| < 0.125 +// Polynomial coefficients for the tanh(x), 0.0 <= |x| < 0.125 data8 0xBA0EC1879495150B, 0x0000BFF5 // A15 = -1.4195071451378679802688367813e-03 data8 0xEB5A82898D1BCBA4, 0x00003FF6 // A13 = 3.5912102408030526706365632879e-03 data8 0x91370DAFE0B64438, 0x0000BFF8 // A11 = -8.8632234251336964576640807982e-03 @@ -688,7 +688,7 @@ LOCAL_OBJECT_END(_0_to_1o8_data) GLOBAL_LIBM_ENTRY(tanhl) { .mfi - alloc r32 = ar.pfs, 0, 21, 0, 0 + alloc r32 = ar.pfs, 0, 21, 0, 0 fmerge.se fArgAbsNorm = f1, f8 // normalized x (1.0 <= x < 2.0) addl rSignBit = 0x20000, r0 // Set sign bit for exponent } @@ -699,26 +699,26 @@ GLOBAL_LIBM_ENTRY(tanhl) { .mfi getf.exp rArgExp = f8 // Get arg exponent - fclass.m p6,p0 = f8, 0xEF // Filter 0, denormals and specials + fclass.m p6,p0 = f8, 0xEF // Filter 0, denormals and specials // 0xEF = @qnan|@snan|@pos|@neg|@zero|@unorm|@inf - addl rBias = 0xfffc, r0 // Value to subtract from exp + addl rBias = 0xfffc, r0 // Value to subtract from exp // to get actual interval number } { .mfi ld8 rDataPtr = [rDataPtr] // Get real common data pointer fma.s1 fArgSqr = f8, f8, f0 // x^2 (for [0;1/8] path) - addl r2to4 = 0x10000, r0 // unbiased exponent + addl r2to4 = 0x10000, r0 // unbiased exponent // for [2;4] binary interval };; { .mfi - getf.sig rArgSig = f8 // Get arg significand + getf.sig rArgSig = f8 // Get arg significand fcmp.lt.s1 p15, p14 = f8, f0 // Is arg negative/positive? addl rSaturation = 0xb70, r0 // First 12 bits of // saturation value signif. } { .mfi - setf.d f1p5 = r1p5 // 1.5 construction + setf.d f1p5 = r1p5 // 1.5 construction fma.s1 f2p0 = f1,f1,f1 // 2.0 construction addl r1625Sign = 0xd01, r0 // First 12 bits of // 1.625 value signif. @@ -733,13 +733,13 @@ GLOBAL_LIBM_ENTRY(tanhl) { .mfb addl rTiny = 0xf000, r0 // Tiny value for saturation path nop.f 0 -(p6) br.cond.spnt tanhl_spec // Branch to zero, denorm & specs +(p6) br.cond.spnt tanhl_spec // Branch to zero, denorm & specs };; { .mfi sub rInterval = rArgExp, rBias // Get actual interval number nop.f 0 - shr.u rArgSig = rArgSig, 52 // Leave only 12 bits of sign. + shr.u rArgSig = rArgSig, 52 // Leave only 12 bits of sign. } { .mfi adds rShiftedDataPtr = 0x10, rDataPtr // Second ptr to data @@ -748,10 +748,10 @@ GLOBAL_LIBM_ENTRY(tanhl) };; { .mfi -(p8) cmp.le p8, p10 = r1625Sign, rArgSig // If signd is greater +(p8) cmp.le p8, p10 = r1625Sign, rArgSig // If signd is greater // than 1.625? (arg is at one of binary subranges) nop.f 0 - shl rOffset = rInterval, 8 // Make offset from + shl rOffset = rInterval, 8 // Make offset from // interval number } { .mfi @@ -762,30 +762,30 @@ GLOBAL_LIBM_ENTRY(tanhl) };; { .mfi -(p8) adds rOffset = 0x400, rOffset // Add additional offset +(p8) adds rOffset = 0x400, rOffset // Add additional offset // (arg is at one of binary subranges) fma.s1 fArgCube = fArgSqr, f8, f0 // x^3 (for [0;1/8] path) shl rTailOffset = rInterval, 7 // Make offset to "tail" data // from interval number } { .mib - setf.exp fTiny = rTiny // Construct "tiny" value + setf.exp fTiny = rTiny // Construct "tiny" value // for saturation path cmp.ltu p11, p0 = 0x7, rInterval // if arg > 32 -(p9) br.cond.spnt _0_to_1o8 +(p9) br.cond.spnt _0_to_1o8 };; { .mfi - add rAddr1 = rDataPtr, rOffset // Get address for - // interval data + add rAddr1 = rDataPtr, rOffset // Get address for + // interval data nop.f 0 shl rTailAddOffset = rInterval, 5 // Offset to interval - // "tail" data + // "tail" data } { .mib add rAddr2 = rShiftedDataPtr, rOffset // Get second - // address for interval data -(p7) cmp.leu p11, p0 = rSaturation, rArgSig // if arg is + // address for interval data +(p7) cmp.leu p11, p0 = rSaturation, rArgSig // if arg is // in [22.8;32] interval (p11) br.cond.spnt _saturation // Branch to Saturation path };; @@ -813,14 +813,14 @@ GLOBAL_LIBM_ENTRY(tanhl) .pred.rel "mutex",p8,p10 { .mfi ldfe fA18 = [rAddr1], 16 // Load A18 -(p8) fms.s1 fArgAbsNorm = fArgAbsNorm, f1, f2p0 // Add 2.0 +(p8) fms.s1 fArgAbsNorm = fArgAbsNorm, f1, f2p0 // Add 2.0 // (arg is at one of binary subranges) adds rTailAddr2 = 0x10, rTailAddr1 // First tail // data address } { .mfi - ldfe fA25 = [rAddr2], 16 // Load A25 -(p10) fms.s1 fArgAbsNorm = fArgAbsNorm, f1, f1p5 // Add 1.5 + ldfe fA25 = [rAddr2], 16 // Load A25 +(p10) fms.s1 fArgAbsNorm = fArgAbsNorm, f1, f1p5 // Add 1.5 // to normalized arg nop.i 0 };; @@ -928,9 +928,9 @@ GLOBAL_LIBM_ENTRY(tanhl) fma.s1 fA23 = fA24, fArgAbsNorm, fA23 // Polynomial tail nop.i 0 } -{ .mfi +{ .mfi nop.m 0 - fma.s1 fA21 = fA22, fArgAbsNorm, fA21 // Polynomial tail + fma.s1 fA21 = fA22, fArgAbsNorm, fA21 // Polynomial tail nop.i 0 };; @@ -946,7 +946,7 @@ GLOBAL_LIBM_ENTRY(tanhl) fma.s1 fRes3L = fRes3L, f1, fTH // (A3*x+A2)*x^2 nop.i 0 } -{ .mfi +{ .mfi nop.m 0 fma.s1 fA19 = fA20, fArgAbsNorm, fA19 // Polynomial tail nop.i 0 @@ -957,7 +957,7 @@ GLOBAL_LIBM_ENTRY(tanhl) fma.s1 fRes1H = fTH2, f1, fA0H // A1*x+A0 nop.i 0 } -{ .mfi +{ .mfi nop.m 0 fms.s1 fTL2 = fA1H, fArgAbsNorm, fTH2 // A1*x+A0 nop.i 0 @@ -968,7 +968,7 @@ GLOBAL_LIBM_ENTRY(tanhl) fma.s1 fA8 = fA9, fArgAbsNorm, fA8 // Polynomial tail nop.i 0 } -{ .mfi +{ .mfi nop.m 0 fma.s1 fA10 = fA11, fArgAbsNorm, fA10 // Polynomial tail nop.i 0 @@ -990,7 +990,7 @@ GLOBAL_LIBM_ENTRY(tanhl) fms.s1 fArgAbsNorm11 = fArgAbsNorm4, fArgAbsNorm4, f0 // x^8 nop.i 0 } -{ .mfi +{ .mfi nop.m 0 fma.s1 fA4 = fA5, fArgAbsNorm, fA4 // Polynomial tail nop.i 0 @@ -1001,7 +1001,7 @@ GLOBAL_LIBM_ENTRY(tanhl) fma.s1 fRes3L = fRes3L, f1, fA2L // (A3*x+A2)*x^2 nop.i 0 } -{ .mfi +{ .mfi nop.m 0 fma.s1 fA6 = fA7, fArgAbsNorm, fA6 // Polynomial tail nop.i 0 @@ -1012,7 +1012,7 @@ GLOBAL_LIBM_ENTRY(tanhl) fma.s1 fTL2 = fTL2, f1, fTT2 // A1*x+A0 nop.i 0 } -{ .mfi +{ .mfi nop.m 0 fms.s1 fRes1L = fA0H, f1, fRes1H // A1*x+A0 nop.i 0 @@ -1023,7 +1023,7 @@ GLOBAL_LIBM_ENTRY(tanhl) fma.s1 fA23 = fA25, fArgAbsNorm2, fA23 // Polynomial tail nop.i 0 } -{ .mfi +{ .mfi nop.m 0 fma.s1 fA12 = fA14, fArgAbsNorm2, fA12 // Polynomial tail nop.i 0 @@ -1034,7 +1034,7 @@ GLOBAL_LIBM_ENTRY(tanhl) fma.s1 fA19 = fA21, fArgAbsNorm2, fA19 // Polynomial tail nop.i 0 } -{ .mfi +{ .mfi nop.m 0 fma.s1 fA8 = fA10, fArgAbsNorm2, fA8 // Polynomial tail nop.i 0 @@ -1045,7 +1045,7 @@ GLOBAL_LIBM_ENTRY(tanhl) fma.s1 fA15 = fA17, fArgAbsNorm2, fA15 // Polynomial tail nop.i 0 } -{ .mfi +{ .mfi nop.m 0 fms.s1 fArgAbsNorm11 = fArgAbsNorm11, fArgAbsNorm3, f0 // x^11 nop.i 0 @@ -1056,7 +1056,7 @@ GLOBAL_LIBM_ENTRY(tanhl) fma.s1 fTT = fRes3L, fArgAbsNorm2, f0 // (A3*x+A2)*x^2 nop.i 0 } -{ .mfi +{ .mfi nop.m 0 fma.s1 fA4 = fA6, fArgAbsNorm2, fA4 // Polynomial tail nop.i 0 @@ -1078,7 +1078,7 @@ GLOBAL_LIBM_ENTRY(tanhl) fma.s1 fA19 = fA23, fArgAbsNorm4, fA19 // Polynomial tail nop.i 0 } -{ .mfi +{ .mfi nop.m 0 fma.s1 fA8 = fA12, fArgAbsNorm4, fA8 // Polynomial tail nop.i 0 @@ -1131,7 +1131,7 @@ GLOBAL_LIBM_ENTRY(tanhl) fms.s1 fRes2L = fRes3H, fArgAbsNorm2, fRes2H // (A3*x+A2)*x^2 nop.i 0 } -{ .mfi +{ .mfi nop.m 0 fma.s1 fResH = fRes2H, f1, fRes1H // High result nop.i 0 @@ -1148,12 +1148,12 @@ GLOBAL_LIBM_ENTRY(tanhl) nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fma.s1 fRes2L = fRes2L, f1, fTT // (A3*x+A2)*x^2 nop.i 0 } -{ .mfi +{ .mfi nop.m 0 fms.s1 fResL = fRes1H, f1, fResH // Low result nop.i 0 @@ -1165,7 +1165,7 @@ GLOBAL_LIBM_ENTRY(tanhl) // .s0 - for symmetry issue resolving at +/-inf rounding mode nop.i 0 } -{ .mfi +{ .mfi nop.m 0 fma.s1 fResL = fResL, f1, fRes2H // Low result nop.i 0 @@ -1185,12 +1185,12 @@ GLOBAL_LIBM_ENTRY(tanhl) };; .pred.rel "mutex",p14,p15 -{ .mfi +{ .mfi nop.m 0 (p14) fma.s0 f8 = fResL, f1, fResH// Add high and low results nop.i 0 } -{ .mfb +{ .mfb nop.m 0 (p15) fms.s0 f8 = fResL, f1, fResH // Add high and low results br.ret.sptk b0 // Main path return @@ -1200,12 +1200,12 @@ GLOBAL_LIBM_ENTRY(tanhl) _saturation: .pred.rel "mutex",p14,p15 -{ .mfi +{ .mfi nop.m 0 (p14) fms.s0 f8 = f1, f1, fTiny // Saturation result r = 1-tiny nop.i 0 };; -{ .mfb +{ .mfb nop.m 0 (p15) fnma.s0 f8 = f1, f1, fTiny // Saturation result r = tiny-1 br.ret.sptk b0 // Saturation path return @@ -1215,46 +1215,46 @@ _saturation: // 0, denormals and special IEEE numbers path ///////////////////////////////// tanhl_spec: -{ .mfi +{ .mfi nop.m 0 fclass.m p6,p0 = f8, 0x23 // To filter infinities - // 0x23 = @pos|@neg|@inf + // 0x23 = @pos|@neg|@inf nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fclass.m p7,p0 = f8, 0xC7 // To filter NaNs & Zeros // 0xC7 = @pos|@neg|@zero|@qnan|@snan nop.i 0 };; -{ .mfb +{ .mfb nop.m 0 -(p6) fmerge.s f8 = f8, f1 // +/-1 for INF args +(p6) fmerge.s f8 = f8, f1 // +/-1 for INF args (p6) br.ret.spnt b0 // exit for x = INF };; -{ .mfb +{ .mfb nop.m 0 -(p7) fma.s0 f8 = f8, f1, f8 // +/-0 for 0 args +(p7) fma.s0 f8 = f8, f1, f8 // +/-0 for 0 args // and NaNs for NaNs (p7) br.ret.spnt b0 // exit for x = NaN or +/-0 };; -{ .mfi +{ .mfi nop.m 0 fnorm.s0 f8 = f8 // Normalize arg nop.i 0 };; .pred.rel "mutex",p14,p15 -{ .mfi +{ .mfi nop.m 0 (p14) fnma.s0 f8 = f8, f8, f8 // res = r-r^2 nop.i 0 } -{ .mfb +{ .mfb nop.m 0 (p15) fma.s0 f8 = f8, f8, f8 // res = r+r^2 br.ret.sptk b0 // 0, denormals, IEEE specials return @@ -1264,83 +1264,83 @@ tanhl_spec: // 0 < |x| < 1/8 path ///////////////////////////////////////////////////////// _0_to_1o8: -{ .mmi +{ .mmi adds rAddr1 = 0x11e0, rDataPtr // Ptr 1 to coeffs adds rAddr2 = 0x11f0, rDataPtr // Ptr 2 to coeffs nop.i 0 };; -{ .mmi +{ .mmi ldfe fA15 = [rAddr1], 32 // Load A15 ldfe fA13 = [rAddr2], 32 // Load A13 nop.i 0 };; -{ .mmi +{ .mmi ldfe fA11 = [rAddr1], 32 // Load A11 ldfe fA9 = [rAddr2], 32 // Load A9 nop.i 0 };; -{ .mmi +{ .mmi ldfe fA7 = [rAddr1], 32 // Load A7 ldfe fA5 = [rAddr2] // Load A5 nop.i 0 };; -{ .mfi +{ .mfi ldfe fA3 = [rAddr1] // Load A3 fma.s1 fA11 = fA13, fArgSqr, fA11 // Polynomial tail nop.i 0 } -{ .mfi +{ .mfi nop.m 0 - fma.s1 fArgFour = fArgSqr, fArgSqr, f0 // a^4 + fma.s1 fArgFour = fArgSqr, fArgSqr, f0 // a^4 nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fma.s1 fA3 = fA5, fArgSqr, fA3 // Polynomial tail nop.i 0 } -{ .mfi +{ .mfi nop.m 0 fma.s1 fA7 = fA9, fArgSqr, fA7 // Polynomial tail nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fma.s1 fA11 = fA15, fArgFour, fA11 // Polynomial tail nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fma.s1 fA3 = fA7, fArgFour, fA3 // Polynomial tail nop.i 0 } -{ .mfi +{ .mfi nop.m 0 fma.s1 fArgEight = fArgFour, fArgFour, f0 // a^8 nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fma.s1 fRes = fA11, fArgEight, fA3 //Polynomial tail result nop.i 0 };; -{ .mfb +{ .mfb nop.m 0 fma.s0 f8 = fRes, fArgCube, f8 // (Polynomial tail)*x^3 br.ret.sptk b0 // [0;1/8] interval return };; - + GLOBAL_LIBM_END(tanhl) diff --git a/sysdeps/ia64/fpu/s_tanl.S b/sysdeps/ia64/fpu/s_tanl.S index 607a271..b59936c 100644 --- a/sysdeps/ia64/fpu/s_tanl.S +++ b/sysdeps/ia64/fpu/s_tanl.S @@ -21,25 +21,25 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // //********************************************************************* // -// History: +// History: // // 02/02/00 (hand-optimized) // 04/04/00 Unwind support added diff --git a/sysdeps/ia64/fpu/w_tgamma.S b/sysdeps/ia64/fpu/w_tgamma.S index 24f3d11..e4b39cc 100644 --- a/sysdeps/ia64/fpu/w_tgamma.S +++ b/sysdeps/ia64/fpu/w_tgamma.S @@ -21,25 +21,25 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,INCLUDING,BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,INCLUDING,BUT NOT // LIMITED TO,THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT,INDIRECT,INCIDENTAL,SPECIAL, // EXEMPLARY,OR CONSEQUENTIAL DAMAGES (INCLUDING,BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,DATA,OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,DATA,OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY,WHETHER IN CONTRACT,STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE,EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE,EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code,and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // //********************************************************************* // -// History: +// History: // 10/12/01 Initial version // 05/20/02 Cleaned up namespace and sf0 syntax // 02/10/03 Reordered header: .section, .global, .proc, .align @@ -74,8 +74,8 @@ // IEEE Special Conditions: // // tgamma(+inf) = +inf -// tgamma(-inf) = QNaN -// tgamma(+/-0) = +/-inf +// tgamma(-inf) = QNaN +// tgamma(+/-0) = +/-inf // tgamma(x<0, x - integer) = QNaN // tgamma(SNaN) = QNaN // tgamma(QNaN) = QNaN @@ -85,7 +85,7 @@ // Overview // // The method consists of three cases. -// +// // If 2 <= x < OVERFLOW_BOUNDARY use case tgamma_regular; // else if 0 < x < 2 use case tgamma_from_0_to_2; // else if -(i+1) < x < -i, i = 0...184 use case tgamma_negatives; @@ -110,9 +110,9 @@ // r = x - N, note 0 <= r < 1 // // n = N & ~0xF - index of table that contains coefficient of -// polynomial approximation +// polynomial approximation // i = N & 0xF - is used in recursive formula -// +// // // Step 2: Approximation // --------------------- @@ -124,7 +124,7 @@ // ----------------- // In case when i > 0 we need to multiply P22n(r) by product // R(i)=(x-1)*(x-2)*...*(x-i). To reduce number of fp-instructions -// we can calculate R as follow: +// we can calculate R as follow: // R(i) = ((x-1)*(x-2))*((x-3)*(x-4))*...*((x-(i-1))*(x-i)) if i is // even or R = ((x-1)*(x-2))*((x-3)*(x-4))*...*((x-(i-2))*(x-(i-1)))* // *(i-1) if i is odd. In both cases we need to calculate @@ -145,7 +145,7 @@ // if 1.25 <= x < 1.5 than GAMMA(x) = P15(x-x_min) where // x_min is point of local minimum on [1; 2] interval. // if 1.5 <= x < 2.0 than GAMMA(x) = P15(x-1.5) -// and +// and // if 0 < x < 1 than GAMMA(x) = GAMMA(x+1)/x // // Case -(i+1) < x < -i, i = 0...184 @@ -156,14 +156,14 @@ // // Step 1: Reduction // ----------------- -// Note that period of sin(PI*x) is 2 and range reduction for -// sin(PI*x) is like to range reduction for GAMMA(x) +// Note that period of sin(PI*x) is 2 and range reduction for +// sin(PI*x) is like to range reduction for GAMMA(x) // i.e r = x - [x] with exception of cases // when r > 0.5 (in such cases r = 1 - (x - [x])). // // Step 2: Approximation // --------------------- -// To approximate sin(PI*x)/PI = sin(PI*(2*n+r))/PI = +// To approximate sin(PI*x)/PI = sin(PI*(2*n+r))/PI = // = (-1)^n*sin(PI*r)/PI Taylor series is used. // sin(PI*r)/PI ~ S21(r). // @@ -171,7 +171,7 @@ // ---------------- // To calculate 1/(x*GAMMA(x)*S21(r)) we use frcpa instruction // with following Newton-Raphson interations. -// +// // //********************************************************************* @@ -807,7 +807,7 @@ GLOBAL_LIBM_ENTRY(tgamma) } { .mfb ldfe FR_C01 = [GR_ad_Co],32 -(p7) fms.s1 FR_r02 = FR_r02,f1,f1 +(p7) fms.s1 FR_r02 = FR_r02,f1,f1 // jump if x is NaTVal, NaN, +/-0, +/-INF (p10) br.cond.spnt tgamma_spec };; @@ -882,7 +882,7 @@ GLOBAL_LIBM_ENTRY(tgamma) { .mfi ldfe FR_C30 = [GR_ad_Co],32 fma.s1 FR_Rq3 = FR_Rq3,FR_6,FR_X2pX // (x-5)*(x-6) - nop.i 0 + nop.i 0 };; { .mfi ldfe FR_C40 = [GR_ad_Ce],32 @@ -902,7 +902,7 @@ GLOBAL_LIBM_ENTRY(tgamma) } { .mfi ldfe FR_C70 = [GR_ad_Co7],32 - fma.s1 FR_rs = f0,f0,FR_r // reduced arg for sin(pi*x) + fma.s1 FR_rs = f0,f0,FR_r // reduced arg for sin(pi*x) add GR_ad_Co = 0x550,GR_ad_Data };; { .mfi @@ -1036,12 +1036,12 @@ GLOBAL_LIBM_ENTRY(tgamma) nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_C01 = FR_C01,FR_C11,f0 nop.i 0 };; { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_C21 = FR_C21,FR_C31,f0 nop.i 0 } @@ -1051,9 +1051,9 @@ GLOBAL_LIBM_ENTRY(tgamma) (p12) cmp.lt.unc p7,p0 = 2,GR_Sig2 // should mul by FR_Rq2? };; { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_C41 = FR_C41,FR_C51,f0 - nop.i 0 + nop.i 0 } { .mfi nop.m 0 @@ -1061,7 +1061,7 @@ GLOBAL_LIBM_ENTRY(tgamma) (p12) cmp.lt.unc p9,p0 = 6,GR_Sig2 // should mul by FR_Rq4? };; { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_C61 = FR_C61,FR_C71,f0 (p15) cmp.eq p11,p0 = r0,r0 } @@ -1071,7 +1071,7 @@ GLOBAL_LIBM_ENTRY(tgamma) (p12) cmp.lt.unc p8,p0 = 10,GR_Sig2 // should mul by FR_Rq6? };; { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_C81 = FR_C81,FR_C91,f0 nop.i 0 } @@ -1081,8 +1081,8 @@ GLOBAL_LIBM_ENTRY(tgamma) (p14) cmp.ltu p0,p11 = 0x9,GR_Tbl_Ind };; { .mfi - nop.m 0 - fcvt.xf FR_RqLin = FR_Xt + nop.m 0 + fcvt.xf FR_RqLin = FR_Xt nop.i 0 } { .mfi @@ -1101,12 +1101,12 @@ GLOBAL_LIBM_ENTRY(tgamma) nop.i 0 };; { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_C01 = FR_C01,FR_C21,f0 nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_rs4 = FR_rs2,FR_rs2,f0 (p12) cmp.lt.unc p8,p0 = 4,GR_Sig2 // should mul by FR_Rq3? };; @@ -1121,19 +1121,19 @@ GLOBAL_LIBM_ENTRY(tgamma) (p12) cmp.lt.unc p9,p0 = 12,GR_Sig2 // should mul by FR_Rq7? };; { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_C41 = FR_C41,FR_C61,f0 nop.i 0 } { .mfi - nop.m 0 + nop.m 0 (p9) fma.s1 FR_Rq5 = FR_Rq5,FR_Rq7,f0 - nop.i 0 + nop.i 0 };; { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_C81 = FR_C81,FR_CA1,f0 - nop.i 0 + nop.i 0 } { .mfi nop.m 0 @@ -1161,12 +1161,12 @@ GLOBAL_LIBM_ENTRY(tgamma) mov GR_ExpOf1 = 0x2FFFF } { .mfi - nop.m 0 + nop.m 0 (p6) fms.s1 FR_RqLin = FR_AbsX,f1,FR_RqLin (p12) cmp.lt.unc p8,p0 = 8,GR_Sig2 // should mul by FR_Rq5? };; { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_C01 = FR_C01,FR_C41,f0 nop.i 0 } @@ -1192,7 +1192,7 @@ GLOBAL_LIBM_ENTRY(tgamma) } { .mfi nop.m 0 -(p15) fcmp.lt.unc.s1 p0,p10 = FR_AbsX,FR_OvfBound // x >= overflow_boundary +(p15) fcmp.lt.unc.s1 p0,p10 = FR_AbsX,FR_OvfBound // x >= overflow_boundary nop.i 0 };; { .mfi @@ -1217,7 +1217,7 @@ GLOBAL_LIBM_ENTRY(tgamma) (p15) cmp.eq.unc p0,p11 = r0,GR_SigRqLin } { .mfb - nop.m 0 + nop.m 0 fma.s1 FR_GAMMA = FR_C01,FR_C81,f0 (p11) br.cond.spnt tgamma_positives };; @@ -1233,12 +1233,12 @@ GLOBAL_LIBM_ENTRY(tgamma) };; .pred.rel "mutex",p8,p9 { .mfi - nop.m 0 + nop.m 0 (p9) fma.s1 FR_GAMMA = FR_GAMMA,FR_Rq1,f0 tbit.z p6,p7 = GR_Sig,0 // p6 if sin<0, p7 if sin>0 } { .mfi - nop.m 0 + nop.m 0 (p8) fma.s1 FR_GAMMA = FR_GAMMA,FR_RqLin,f0 nop.i 0 };; @@ -1249,12 +1249,12 @@ GLOBAL_LIBM_ENTRY(tgamma) };; .pred.rel "mutex",p6,p7 { .mfi - nop.m 0 -(p6) fnma.s1 FR_GAMMA = FR_GAMMA,FR_S21,f0 - nop.i 0 + nop.m 0 +(p6) fnma.s1 FR_GAMMA = FR_GAMMA,FR_S21,f0 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 (p7) fma.s1 FR_GAMMA = FR_GAMMA,FR_S21,f0 mov GR_Sig2 = 1 };; @@ -1292,7 +1292,7 @@ GLOBAL_LIBM_ENTRY(tgamma) { .mfi nop.m 0 fma.s1 FR_Rcp1 = FR_Rcp0,FR_Rcp1,FR_Rcp0 - nop.i 0 + nop.i 0 };; // NR method: ineration #2 { .mfi @@ -1340,12 +1340,12 @@ GLOBAL_LIBM_ENTRY(tgamma) tgamma_positives: .pred.rel "mutex",p8,p9 { .mfi - nop.m 0 + nop.m 0 (p9) fma.d.s0 f8 = FR_GAMMA,FR_Rq1,f0 nop.i 0 } { .mfb - nop.m 0 + nop.m 0 (p8) fma.d.s0 f8 = FR_GAMMA,FR_RqLin,f0 br.ret.sptk b0 };; @@ -1366,7 +1366,7 @@ tgamma_from_0_to_2: };; { .mfi (p6) getf.sig GR_Sig = FR_NormX - nop.f 0 + nop.f 0 (p6) shl GR_Sig2 = GR_Sig2,63 } { .mfi @@ -1375,9 +1375,9 @@ tgamma_from_0_to_2: (p6) mov GR_NzOvfBound = 0xFBFF };; { .mfi - cmp.eq p8,p0 = GR_Sign_Exp,GR_ExpOf05 // r02 >= 1/2 + cmp.eq p8,p0 = GR_Sign_Exp,GR_ExpOf05 // r02 >= 1/2 nop.f 0 - cmp.eq p9,p10 = GR_Sign_Exp,GR_ExpOf025 // r02 >= 1/4 + cmp.eq p9,p10 = GR_Sign_Exp,GR_ExpOf025 // r02 >= 1/4 } { .mfi (p6) cmp.ltu.unc p11,p0 = GR_Sign_Exp,GR_NzOvfBound // p11 <- overflow @@ -1396,83 +1396,83 @@ tgamma_from_0_to_2: (p11) br.cond.spnt tgamma_ovf_near_0 //tgamma_spec_res };; { .mfi - ldfe FR_A15 = [GR_ad_Co],32 + ldfe FR_A15 = [GR_ad_Co],32 nop.f 0 (p12) cmp.eq.unc p13,p0 = GR_Sig,GR_Sig2 } { .mfb - ldfe FR_A14 = [GR_ad_Ce],32 + ldfe FR_A14 = [GR_ad_Ce],32 nop.f 0 (p13) br.cond.spnt tgamma_ovf_near_0_boundary //tgamma_spec_res };; { .mfi - ldfe FR_A13 = [GR_ad_Co],32 + ldfe FR_A13 = [GR_ad_Co],32 nop.f 0 nop.i 0 } { .mfi - ldfe FR_A12 = [GR_ad_Ce],32 + ldfe FR_A12 = [GR_ad_Ce],32 nop.f 0 nop.i 0 };; .pred.rel "mutex",p9,p10 { .mfi - ldfe FR_A11 = [GR_ad_Co],32 -(p10) fma.s1 FR_r2 = FR_r02,FR_r02,f0 + ldfe FR_A11 = [GR_ad_Co],32 +(p10) fma.s1 FR_r2 = FR_r02,FR_r02,f0 nop.i 0 } { .mfi - ldfe FR_A10 = [GR_ad_Ce],32 -(p9) fma.s1 FR_r2 = FR_r,FR_r,f0 + ldfe FR_A10 = [GR_ad_Ce],32 +(p9) fma.s1 FR_r2 = FR_r,FR_r,f0 nop.i 0 };; { .mfi - ldfe FR_A9 = [GR_ad_Co],32 + ldfe FR_A9 = [GR_ad_Co],32 (p6) fma.s1 FR_Rcp1 = FR_Rcp0,FR_Rcp1,FR_Rcp0 nop.i 0 } { .mfi - ldfe FR_A8 = [GR_ad_Ce],32 + ldfe FR_A8 = [GR_ad_Ce],32 (p10) fma.s1 FR_r = f0,f0,FR_r02 nop.i 0 };; { .mfi - ldfe FR_A7 = [GR_ad_Co],32 + ldfe FR_A7 = [GR_ad_Co],32 nop.f 0 nop.i 0 } { .mfi - ldfe FR_A6 = [GR_ad_Ce],32 - nop.f 0 + ldfe FR_A6 = [GR_ad_Ce],32 + nop.f 0 nop.i 0 };; { .mfi - ldfe FR_A5 = [GR_ad_Co],32 + ldfe FR_A5 = [GR_ad_Co],32 nop.f 0 nop.i 0 } { .mfi - ldfe FR_A4 = [GR_ad_Ce],32 + ldfe FR_A4 = [GR_ad_Ce],32 nop.f 0 nop.i 0 };; { .mfi - ldfe FR_A3 = [GR_ad_Co],32 + ldfe FR_A3 = [GR_ad_Co],32 nop.f 0 nop.i 0 } { .mfi - ldfe FR_A2 = [GR_ad_Ce],32 + ldfe FR_A2 = [GR_ad_Ce],32 nop.f 0 nop.i 0 };; { .mfi - ldfe FR_A1 = [GR_ad_Co],32 - fma.s1 FR_r4 = FR_r2,FR_r2,f0 + ldfe FR_A1 = [GR_ad_Co],32 + fma.s1 FR_r4 = FR_r2,FR_r2,f0 nop.i 0 } { .mfi - ldfe FR_A0 = [GR_ad_Ce],32 + ldfe FR_A0 = [GR_ad_Ce],32 nop.f 0 nop.i 0 };; @@ -1493,7 +1493,7 @@ tgamma_from_0_to_2: };; { .mfi nop.m 0 - fma.s1 FR_r8 = FR_r4,FR_r4,f0 + fma.s1 FR_r8 = FR_r4,FR_r4,f0 nop.i 0 };; { .mfi @@ -1573,17 +1573,17 @@ tgamma_from_0_to_2: };; .pred.rel "mutex",p6,p7 { .mfi - nop.m 0 + nop.m 0 (p6) fma.s1 FR_A15 = FR_A15,FR_r8,FR_A7 nop.i 0 } { .mfi - nop.m 0 + nop.m 0 (p7) fma.d.s0 f8 = FR_A15,FR_r8,FR_A7 nop.i 0 };; { .mfb - nop.m 0 + nop.m 0 (p6) fma.d.s0 f8 = FR_A15,FR_Rcp3,f0 br.ret.sptk b0 };; @@ -1606,7 +1606,7 @@ tgamma_ovf_near_0_boundary: { .mfi nop.m 0 nop.f 0 - shl r8 = r8,52 + shl r8 = r8,52 };; { .mfi sub r8 = r8,r0,1 @@ -1616,12 +1616,12 @@ tgamma_ovf_near_0_boundary: .pred.rel "mutex",p14,p15 { .mfi // set p8 to 0 in case of overflow and to 1 otherwise - // for negative arg: + // for negative arg: // no overflow if rounding mode either Z or +Inf, i.e. // GR_fpsr > 1 (p14) cmp.lt p8,p0 = 1,GR_fpsr nop.f 0 - // for positive arg: + // for positive arg: // no overflow if rounding mode either Z or -Inf, i.e. // (GR_fpsr & 1) == 0 (p15) tbit.z p0,p8 = GR_fpsr,0 @@ -1636,7 +1636,7 @@ tgamma_ovf_near_0_boundary: tgamma_ovf_near_0: { .mfi mov r8 = 0x1FFFE - nop.f 0 + nop.f 0 nop.i 0 };; { .mfi @@ -1646,12 +1646,12 @@ tgamma_ovf_near_0: };; .pred.rel "mutex",p14,p15 { .mfi - nop.m 0 + nop.m 0 (p15) fma.d.s0 f8 = f9,f9,f0 // Set I,O and +INF result - nop.i 0 + nop.i 0 } { .mfb - nop.m 0 + nop.m 0 (p14) fnma.d.s0 f8 = f9,f9,f0 // Set I,O and -INF result br.cond.sptk tgamma_libm_err };; @@ -1671,7 +1671,7 @@ tgamma_spec_res: };; { .mfb (p11) cmp.ltu.unc p7,p8 = GR_0x30033,GR_Sign_Exp - nop.f 0 + nop.f 0 (p10) br.cond.spnt tgamma_singularity };; .pred.rel "mutex",p7,p8 @@ -1686,7 +1686,7 @@ tgamma_spec_res: mov GR_TAG = 258 // overflow } { .mfb - nop.m 0 + nop.m 0 (p15) fma.d.s0 f8 = f9,f9,f0 // Set I,O and +INF result br.cond.sptk tgamma_libm_err };; @@ -1764,7 +1764,7 @@ tgamma_spec: { .mfi (p7) mov GR_TAG = 259 // negative (p7) frcpa.s0 f8,p0 = f1,f8 - nop.i 0 + nop.i 0 } { .mib nop.m 0 @@ -1788,10 +1788,10 @@ LOCAL_LIBM_ENTRY(__libm_error_region) add GR_Parameter_Y=-32,sp // Parameter 2 value nop.f 0 .save ar.pfs,GR_SAVE_PFS - mov GR_SAVE_PFS=ar.pfs // Save ar.pfs + mov GR_SAVE_PFS=ar.pfs // Save ar.pfs } { .mfi -.fframe 64 +.fframe 64 add sp=-64,sp // Create new stack nop.f 0 mov GR_SAVE_GP=gp // Save gp @@ -1799,18 +1799,18 @@ LOCAL_LIBM_ENTRY(__libm_error_region) { .mmi stfd [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack add GR_Parameter_X = 16,sp // Parameter 1 address -.save b0, GR_SAVE_B0 - mov GR_SAVE_B0=b0 // Save b0 +.save b0, GR_SAVE_B0 + mov GR_SAVE_B0=b0 // Save b0 };; .body { .mib - stfd [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack - add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address + stfd [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack + add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address nop.b 0 } { .mib stfd [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack - add GR_Parameter_Y = -16,GR_Parameter_Y + add GR_Parameter_Y = -16,GR_Parameter_Y br.call.sptk b0=__libm_error_support# // Call error handling function };; { .mmi @@ -1825,10 +1825,10 @@ LOCAL_LIBM_ENTRY(__libm_error_region) mov b0 = GR_SAVE_B0 // Restore return address };; { .mib - mov gp = GR_SAVE_GP // Restore gp + mov gp = GR_SAVE_GP // Restore gp mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs br.ret.sptk b0 // Return -};; +};; LOCAL_LIBM_END(__libm_error_region) .type __libm_error_support#,@function diff --git a/sysdeps/ia64/fpu/w_tgammaf.S b/sysdeps/ia64/fpu/w_tgammaf.S index dda0d0f..ffd7daa 100644 --- a/sysdeps/ia64/fpu/w_tgammaf.S +++ b/sysdeps/ia64/fpu/w_tgammaf.S @@ -21,25 +21,25 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,INCLUDING,BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,INCLUDING,BUT NOT // LIMITED TO,THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT,INDIRECT,INCIDENTAL,SPECIAL, // EXEMPLARY,OR CONSEQUENTIAL DAMAGES (INCLUDING,BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,DATA,OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,DATA,OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY,WHETHER IN CONTRACT,STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE,EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE,EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code,and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // //********************************************************************* // -// History: +// History: // 11/30/01 Initial version // 05/20/02 Cleaned up namespace and sf0 syntax // 02/10/03 Reordered header: .section, .global, .proc, .align @@ -75,8 +75,8 @@ // IEEE Special Conditions: // // tgammaf(+inf) = +inf -// tgammaf(-inf) = QNaN -// tgammaf(+/-0) = +/-inf +// tgammaf(-inf) = QNaN +// tgammaf(+/-0) = +/-inf // tgammaf(x<0, x - integer) = QNaN // tgammaf(SNaN) = QNaN // tgammaf(QNaN) = QNaN @@ -86,7 +86,7 @@ // Overview // // The method consists of three cases. -// +// // If 2 <= x < OVERFLOW_BOUNDARY use case tgamma_regular; // else if 0 < x < 2 use case tgamma_from_0_to_2; // else if -(i+1) < x < -i, i = 0...43 use case tgamma_negatives; @@ -111,9 +111,9 @@ // r = x - N, note 0 <= r < 1 // // n = N & ~0xF - index of table that contains coefficient of -// polynomial approximation +// polynomial approximation // i = N & 0xF - is used in recursive formula -// +// // // Step 2: Approximation // --------------------- @@ -125,7 +125,7 @@ // ----------------- // In case when i > 0 we need to multiply P12n(r) by product // R(i,x)=(x-1)*(x-2)*...*(x-i). To reduce number of fp-instructions -// we can calculate R as follow: +// we can calculate R as follow: // R(i,x) = ((x-1)*(x-2))*((x-3)*(x-4))*...*((x-(i-1))*(x-i)) if i is // even or R = ((x-1)*(x-2))*((x-3)*(x-4))*...*((x-(i-2))*(x-(i-1)))* // *(i-1) if i is odd. In both cases we need to calculate @@ -147,7 +147,7 @@ // x_min is point of local minimum on [1; 2] interval. // if 1.5 <= x < 1.75 than GAMMA(x) = P7(x-1.5) // if 1.75 <= x < 2.0 than GAMMA(x) = P7(x-1.5) -// and +// and // if 0 < x < 1 than GAMMA(x) = GAMMA(x+1)/x // // Case -(i+1) < x < -i, i = 0...43 @@ -158,13 +158,13 @@ // // Step 1: Reduction // ----------------- -// Note that period of sin(PI*x) is 2 and range reduction for -// sin(PI*x) is like to range reduction for GAMMA(x) +// Note that period of sin(PI*x) is 2 and range reduction for +// sin(PI*x) is like to range reduction for GAMMA(x) // i.e rs = x - round(x) and |rs| <= 0.5. // // Step 2: Approximation // --------------------- -// To approximate sin(PI*x)/PI = sin(PI*(2*n+rs))/PI = +// To approximate sin(PI*x)/PI = sin(PI*(2*n+rs))/PI = // = (-1)^n*sin(PI*rs)/PI Taylor series is used. // sin(PI*rs)/PI ~ S17(rs). // @@ -172,7 +172,7 @@ // ---------------- // To calculate 1/x and 1/(GAMMA(x)*S12(rs)) we use frcpa // instruction with following Newton-Raphson interations. -// +// // //********************************************************************* @@ -218,7 +218,7 @@ FR_X = f10 FR_Y = f1 FR_RESULT = f8 -FR_iXt = f11 +FR_iXt = f11 FR_Xt = f12 FR_r = f13 FR_r2 = f14 @@ -671,7 +671,7 @@ GLOBAL_LIBM_ENTRY(tgammaf) { .mfi nop.m 0 (p14) fma.s1 FR_rs2 = FR_rs,FR_rs,f0 - nop.i 0 + nop.i 0 } { .mfb nop.m 0 @@ -680,12 +680,12 @@ GLOBAL_LIBM_ENTRY(tgammaf) (p7) br.cond.spnt tgammaf_overflow_near0_bound };; { .mfi - nop.m 0 + nop.m 0 (p6) fnma.s1 FR_Rq1 = FR_Rq1,FR_Rq0,f0 - nop.i 0 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 (p10) fma.s1 FR_Rq2 = FR_Rq2,FR_Rq3,f0 and GR_Sig = 0x7,GR_Sig };; @@ -730,24 +730,24 @@ GLOBAL_LIBM_ENTRY(tgammaf) nop.i 0 };; { .mfi - nop.m 0 + nop.m 0 (p9) fma.s1 FR_Rq1 = FR_Rq1,FR_Rq2,f0 nop.i 0 } { .mfi nop.m 0 fma.s1 FR_C51 = FR_C51,FR_r,FR_C50 - nop.i 0 + nop.i 0 };; { .mfi (p14) getf.exp GR_SignExp = FR_rs fma.s1 FR_C01 = FR_C01,FR_C11,f0 - nop.i 0 + nop.i 0 } { .mfi nop.m 0 (p14) fma.s1 FR_S01 = FR_S01,FR_rs2,FR_S00 - nop.i 0 + nop.i 0 };; { .mfi nop.m 0 @@ -763,7 +763,7 @@ GLOBAL_LIBM_ENTRY(tgammaf) { .mfi nop.m 0 (p14) fma.s1 FR_S11 = FR_S11,FR_rs2,FR_S10 -(p14) tbit.z.unc p11,p12 = GR_SignExp,17 +(p14) tbit.z.unc p11,p12 = GR_SignExp,17 } { .mfi nop.m 0 @@ -788,7 +788,7 @@ GLOBAL_LIBM_ENTRY(tgammaf) { .mfi nop.m 0 (p7) fma.s1 FR_An = FR_Rq1,FR_An,f0 - nop.i 0 + nop.i 0 };; { .mfb nop.m 0 @@ -841,7 +841,7 @@ GLOBAL_LIBM_ENTRY(tgammaf) };; { .mfi - nop.m 0 + nop.m 0 (p14) fma.s1 FR_GAMMA = FR_C01,FR_C41,f0 (p14) tbit.z.unc p6,p7 = GR_Sig,0 } @@ -954,7 +954,7 @@ tgammaf_from_0_to_1: { .mfi cmp.gt p9,p0 = GR_Arg,GR_ExpOf05 fma.s1 FR_r = f0,f0,FR_NormX // reduced arg for (0;1) - mov GR_ExpOf025 = 0x7FA + mov GR_ExpOf025 = 0x7FA };; { .mfi getf.s GR_ArgNz = f8 @@ -973,7 +973,7 @@ tgammaf_from_0_to_1: (p6) mov GR_Tbl12Offs = 0x40 // 0.25 <= x < 0.5 } { .mfi - add GR_ad_Ce = 0x2C0,GR_ad_Data + add GR_ad_Ce = 0x2C0,GR_ad_Data nop.f 0 add GR_ad_Co = 0x2A0,GR_ad_Data };; @@ -992,7 +992,7 @@ tgammaf_from_0_to_1: ldfpd FR_A7,FR_A6 = [GR_ad_Co],16 ldfpd FR_A5,FR_A4 = [GR_ad_Ce],16 // jump if argument is close to 0 positive -(p12) br.cond.spnt tgammaf_overflow +(p12) br.cond.spnt tgammaf_overflow };; { .mfi ldfpd FR_A3,FR_A2 = [GR_ad_Co],16 @@ -1003,7 +1003,7 @@ tgammaf_from_0_to_1: { .mfb ldfpd FR_A1,FR_A0 = [GR_ad_Ce],16 nop.f 0 - br.cond.sptk tgamma_from_0_to_2 + br.cond.sptk tgamma_from_0_to_2 };; // here if 1 < x < 2 @@ -1023,7 +1023,7 @@ tgammaf_from_1_to_2: { .mfi nop.m 0 nop.f 0 - and GR_TblOffs = GR_TblOffs,GR_TblOffsMask + and GR_TblOffs = GR_TblOffs,GR_TblOffsMask };; { .mfi shladd GR_ad_Co = GR_TblOffs,3,GR_ad_Co @@ -1106,17 +1106,17 @@ tgamma_from_0_to_2: nop.i 0 };; { .mfi - nop.m 0 + nop.m 0 (p10) fma.s1 FR_GAMMA = FR_A7,FR_r4,FR_A3 nop.i 0 } { .mfi - nop.m 0 + nop.m 0 (p11) fma.s.s0 f8 = FR_A7,FR_r4,FR_A3 nop.i 0 };; { .mfb - nop.m 0 + nop.m 0 (p10) fma.s.s0 f8 = FR_GAMMA,FR_Rcp2,f0 br.ret.sptk b0 };; @@ -1140,7 +1140,7 @@ tgammaf_overflow_near0_bound: { .mfi nop.m 0 nop.f 0 - shl r8 = r8,20 + shl r8 = r8,20 };; { .mfi sub r8 = r8,r0,1 @@ -1150,12 +1150,12 @@ tgammaf_overflow_near0_bound: .pred.rel "mutex",p14,p15 { .mfi // set p8 to 0 in case of overflow and to 1 otherwise - // for negative arg: + // for negative arg: // no overflow if rounding mode either Z or +Inf, i.e. // GR_fpsr > 1 (p14) cmp.lt p8,p0 = 1,GR_fpsr nop.f 0 - // for positive arg: + // for positive arg: // no overflow if rounding mode either Z or -Inf, i.e. // (GR_fpsr & 1) == 0 (p15) tbit.z p0,p8 = GR_fpsr,0 @@ -1186,7 +1186,7 @@ tgammaf_overflow: mov GR_TAG = 261 // overflow } { .mfb - nop.m 0 + nop.m 0 (p15) fma.s.s0 f8 = f9,f9,f0 // set I,O and +INF result br.cond.sptk tgammaf_libm_err };; @@ -1259,7 +1259,7 @@ tgammaf_spec_args: { .mfi (p7) mov GR_TAG = 262 // negative (p7) frcpa.s0 f8,p0 = f1,f8 - nop.i 0 + nop.i 0 } { .mib nop.m 0 @@ -1283,10 +1283,10 @@ LOCAL_LIBM_ENTRY(__libm_error_region) add GR_Parameter_Y=-32,sp // Parameter 2 value nop.f 0 .save ar.pfs,GR_SAVE_PFS - mov GR_SAVE_PFS=ar.pfs // Save ar.pfs + mov GR_SAVE_PFS=ar.pfs // Save ar.pfs } { .mfi -.fframe 64 +.fframe 64 add sp=-64,sp // Create new stack nop.f 0 mov GR_SAVE_GP=gp // Save gp @@ -1294,18 +1294,18 @@ LOCAL_LIBM_ENTRY(__libm_error_region) { .mmi stfs [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack add GR_Parameter_X = 16,sp // Parameter 1 address -.save b0, GR_SAVE_B0 - mov GR_SAVE_B0=b0 // Save b0 +.save b0, GR_SAVE_B0 + mov GR_SAVE_B0=b0 // Save b0 };; .body { .mib - stfs [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack - add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address - nop.b 0 + stfs [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack + add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address + nop.b 0 } { .mib stfs [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack - add GR_Parameter_Y = -16,GR_Parameter_Y + add GR_Parameter_Y = -16,GR_Parameter_Y br.call.sptk b0=__libm_error_support# // Call error handling function };; { .mmi @@ -1320,10 +1320,10 @@ LOCAL_LIBM_ENTRY(__libm_error_region) mov b0 = GR_SAVE_B0 // Restore return address };; { .mib - mov gp = GR_SAVE_GP // Restore gp + mov gp = GR_SAVE_GP // Restore gp mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs br.ret.sptk b0 // Return -};; +};; LOCAL_LIBM_END(__libm_error_region) .type __libm_error_support#,@function diff --git a/sysdeps/ia64/fpu/w_tgammal.S b/sysdeps/ia64/fpu/w_tgammal.S index f64e213..ca60eed 100644 --- a/sysdeps/ia64/fpu/w_tgammal.S +++ b/sysdeps/ia64/fpu/w_tgammal.S @@ -21,20 +21,20 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History @@ -56,7 +56,7 @@ // Floating-Point Registers: f8-f15 // f32-f127 // -// General Purpose Registers: r32-r67 +// General Purpose Registers: r32-r67 // // Predicate Registers: p6-p15 // @@ -65,8 +65,8 @@ // IEEE Special Conditions: // // tgammal(+inf) = +inf -// tgammal(-inf) = QNaN -// tgammal(+/-0) = +/-inf +// tgammal(-inf) = QNaN +// tgammal(+/-0) = +/-inf // tgammal(x<0, x - integer) = QNaN // tgammal(SNaN) = QNaN // tgammal(QNaN) = QNaN @@ -78,23 +78,23 @@ // Algorithm description // --------------------- // -// There are 3 main paths in the implementation +// There are 3 main paths in the implementation // (and additional special values branches) // // 1) |X| >= 13 - Stirling formula computation // a) Positive arguments: -// TGAMMAL(X) = exp((X-0.5)*ln(X) - X + C + S(Z)), -// where C = 0.5*ln(2*Pi) , Z = 1/Z, S(Z) - Bernulli polynomial +// TGAMMAL(X) = exp((X-0.5)*ln(X) - X + C + S(Z)), +// where C = 0.5*ln(2*Pi) , Z = 1/Z, S(Z) - Bernulli polynomial // (up to 'B18' term). -// Some of these calculation done in multiprecision. -// Ln returns multiprecision result too +// Some of these calculation done in multiprecision. +// Ln returns multiprecision result too // and exp also accepts and returns pair of values. -// +// // b) Negative arguments // TGAMMAL(-X) = PI/(X*TGAMMAL(X)*sin(PI*X)). // (X*sin(PI*X))/PI calculated in parallel with TGAMMAL. // Here we use polynomial of 9th degree with 2 multiprecision steps. -// Argument range reduction is: +// Argument range reduction is: // N = [x] with round to nearest, r = x - N, -0.5 <= r < 0.5 // After ((X-0.5)*ln(X) - X + C + S(Z)) completed we just invert // its result and compute exp with negative argument (1/exp(x)=exp(-x)) @@ -108,9 +108,9 @@ // and first 6 multiprecision computations. // Range reduction looks like // N = [x] with truncate, r = x - N - 0.5, -0.5 <= r < 0.5 -// For odd intervals we use reccurent formula: +// For odd intervals we use reccurent formula: // TGAMMAL(X) = TGAMMA(X-1)*(X-1) -// [1;2] interval is splitted to 3 subranges: +// [1;2] interval is splitted to 3 subranges: // [1;1.25], [1.25;1.75], [1.75;2] with the same polynomial forms // // b) Negative arguments @@ -121,7 +121,7 @@ // // 3) 0 < |X| < 1 - Near 0 part // a) Here we use reccurent formula TGAMMAL(X) = TGAMMAL(X+1)/X -// TGAMMAL(X+1) calculated as shown above, +// TGAMMAL(X+1) calculated as shown above, // 1/X result obtained in parallel. Then we just multiply these values. // There is only additional separated subrange: [0;0.125] with specific // polynomial constants set. @@ -136,7 +136,7 @@ RODATA .align 16 LOCAL_OBJECT_START(Constants_Tgammal_log_80_Q) -// log2_hi, log2_lo, Q_6, Q_5, Q_4, Q_3, Q_2, Q_1 +// log2_hi, log2_lo, Q_6, Q_5, Q_4, Q_3, Q_2, Q_1 data4 0x00000000,0xB1721800,0x00003FFE,0x00000000 data4 0x4361C4C6,0x82E30865,0x0000BFE2,0x00000000 data4 0xA51BE0AF,0x92492453,0x00003FFC,0x00000000 @@ -144,133 +144,133 @@ data4 0xA0CFD29F,0xAAAAAB73,0x0000BFFC,0x00000000 data4 0xCCCE3872,0xCCCCCCCC,0x00003FFC,0x00000000 data4 0xFFFFB4FB,0xFFFFFFFF,0x0000BFFC,0x00000000 data4 0xAAAAAAAB,0xAAAAAAAA,0x00003FFD,0x00000000 -data4 0x00000000,0x80000000,0x0000BFFE,0x00000000 +data4 0x00000000,0x80000000,0x0000BFFE,0x00000000 LOCAL_OBJECT_END(Constants_Tgammal_log_80_Q) .align 64 LOCAL_OBJECT_START(Constants_Tgammal_log_80_Z_G_H_h1) -// Z1 - 16 bit fixed, G1 and H1 IEEE single, h1 IEEE double +// Z1 - 16 bit fixed, G1 and H1 IEEE single, h1 IEEE double data4 0x00008000,0x3F800000,0x00000000,0x00000000 -data4 0x00000000,0x00000000,0x00000000,0x00000000 +data4 0x00000000,0x00000000,0x00000000,0x00000000 data4 0x00007879,0x3F70F0F0,0x3D785196,0x00000000 data4 0xEBA0E0D1,0x8B1D330B,0x00003FDA,0x00000000 data4 0x000071C8,0x3F638E38,0x3DF13843,0x00000000 data4 0x9EADD553,0xE2AF365E,0x00003FE2,0x00000000 data4 0x00006BCB,0x3F579430,0x3E2FF9A0,0x00000000 -data4 0x752F34A2,0xF585FEC3,0x0000BFE3,0x00000000 +data4 0x752F34A2,0xF585FEC3,0x0000BFE3,0x00000000 data4 0x00006667,0x3F4CCCC8,0x3E647FD6,0x00000000 -data4 0x893B03F3,0xF3546435,0x00003FE2,0x00000000 -data4 0x00006187,0x3F430C30,0x3E8B3AE7,0x00000000 -data4 0x39CDD2AC,0xBABA62E0,0x00003FE4,0x00000000 -data4 0x00005D18,0x3F3A2E88,0x3EA30C68,0x00000000 +data4 0x893B03F3,0xF3546435,0x00003FE2,0x00000000 +data4 0x00006187,0x3F430C30,0x3E8B3AE7,0x00000000 +data4 0x39CDD2AC,0xBABA62E0,0x00003FE4,0x00000000 +data4 0x00005D18,0x3F3A2E88,0x3EA30C68,0x00000000 data4 0x457978A1,0x8718789F,0x00003FE2,0x00000000 -data4 0x0000590C,0x3F321640,0x3EB9CEC8,0x00000000 -data4 0x3185E56A,0x9442DF96,0x0000BFE4,0x00000000 -data4 0x00005556,0x3F2AAAA8,0x3ECF9927,0x00000000 -data4 0x2BBE2CBD,0xCBF9A4BF,0x00003FE4,0x00000000 -data4 0x000051EC,0x3F23D708,0x3EE47FC5,0x00000000 -data4 0x852D5935,0xF3537535,0x00003FE3,0x00000000 -data4 0x00004EC5,0x3F1D89D8,0x3EF8947D,0x00000000 -data4 0x46CDF32F,0xA1F1E699,0x0000BFDF,0x00000000 -data4 0x00004BDB,0x3F17B420,0x3F05F3A1,0x00000000 -data4 0xD8484CE3,0x84A61856,0x00003FE4,0x00000000 +data4 0x0000590C,0x3F321640,0x3EB9CEC8,0x00000000 +data4 0x3185E56A,0x9442DF96,0x0000BFE4,0x00000000 +data4 0x00005556,0x3F2AAAA8,0x3ECF9927,0x00000000 +data4 0x2BBE2CBD,0xCBF9A4BF,0x00003FE4,0x00000000 +data4 0x000051EC,0x3F23D708,0x3EE47FC5,0x00000000 +data4 0x852D5935,0xF3537535,0x00003FE3,0x00000000 +data4 0x00004EC5,0x3F1D89D8,0x3EF8947D,0x00000000 +data4 0x46CDF32F,0xA1F1E699,0x0000BFDF,0x00000000 +data4 0x00004BDB,0x3F17B420,0x3F05F3A1,0x00000000 +data4 0xD8484CE3,0x84A61856,0x00003FE4,0x00000000 data4 0x00004925,0x3F124920,0x3F0F4303,0x00000000 -data4 0xFF28821B,0xC7DD97E0,0x0000BFE2,0x00000000 -data4 0x0000469F,0x3F0D3DC8,0x3F183EBF,0x00000000 -data4 0xEF1FD32F,0xD3C4A887,0x00003FE3,0x00000000 -data4 0x00004445,0x3F088888,0x3F20EC80,0x00000000 -data4 0x464C76DA,0x84672BE6,0x00003FE5,0x00000000 +data4 0xFF28821B,0xC7DD97E0,0x0000BFE2,0x00000000 +data4 0x0000469F,0x3F0D3DC8,0x3F183EBF,0x00000000 +data4 0xEF1FD32F,0xD3C4A887,0x00003FE3,0x00000000 +data4 0x00004445,0x3F088888,0x3F20EC80,0x00000000 +data4 0x464C76DA,0x84672BE6,0x00003FE5,0x00000000 data4 0x00004211,0x3F042108,0x3F29516A,0x00000000 -data4 0x18835FB9,0x9A43A511,0x0000BFE5,0x00000000 +data4 0x18835FB9,0x9A43A511,0x0000BFE5,0x00000000 LOCAL_OBJECT_END(Constants_Tgammal_log_80_Z_G_H_h1) .align 64 LOCAL_OBJECT_START(Constants_Tgammal_log_80_Z_G_H_h2) // Z2 - 16 bit fixed, G2 and H2 IEEE single, h2 IEEE double -data4 0x00008000,0x3F800000,0x00000000,0x00000000 -data4 0x00000000,0x00000000,0x00000000,0x00000000 -data4 0x00007F81,0x3F7F00F8,0x3B7F875D,0x00000000 +data4 0x00008000,0x3F800000,0x00000000,0x00000000 +data4 0x00000000,0x00000000,0x00000000,0x00000000 +data4 0x00007F81,0x3F7F00F8,0x3B7F875D,0x00000000 data4 0x211398BF,0xAD08B116,0x00003FDB,0x00000000 -data4 0x00007F02,0x3F7E03F8,0x3BFF015B,0x00000000 -data4 0xC376958E,0xB106790F,0x00003FDE,0x00000000 -data4 0x00007E85,0x3F7D08E0,0x3C3EE393,0x00000000 -data4 0x79A7679A,0xFD03F242,0x0000BFDA,0x00000000 -data4 0x00007E08,0x3F7C0FC0,0x3C7E0586,0x00000000 -data4 0x05E7AE08,0xF03F81C3,0x0000BFDF,0x00000000 -data4 0x00007D8D,0x3F7B1880,0x3C9E75D2,0x00000000 +data4 0x00007F02,0x3F7E03F8,0x3BFF015B,0x00000000 +data4 0xC376958E,0xB106790F,0x00003FDE,0x00000000 +data4 0x00007E85,0x3F7D08E0,0x3C3EE393,0x00000000 +data4 0x79A7679A,0xFD03F242,0x0000BFDA,0x00000000 +data4 0x00007E08,0x3F7C0FC0,0x3C7E0586,0x00000000 +data4 0x05E7AE08,0xF03F81C3,0x0000BFDF,0x00000000 +data4 0x00007D8D,0x3F7B1880,0x3C9E75D2,0x00000000 data4 0x049EB22F,0xD1B87D3C,0x00003FDE,0x00000000 -data4 0x00007D12,0x3F7A2328,0x3CBDC97A,0x00000000 -data4 0x3A9E81E0,0xFABC8B95,0x00003FDF,0x00000000 +data4 0x00007D12,0x3F7A2328,0x3CBDC97A,0x00000000 +data4 0x3A9E81E0,0xFABC8B95,0x00003FDF,0x00000000 data4 0x00007C98,0x3F792FB0,0x3CDCFE47,0x00000000 -data4 0x7C4B5443,0xF5F3653F,0x00003FDF,0x00000000 -data4 0x00007C20,0x3F783E08,0x3CFC15D0,0x00000000 -data4 0xF65A1773,0xE78AB204,0x00003FE0,0x00000000 -data4 0x00007BA8,0x3F774E38,0x3D0D874D,0x00000000 -data4 0x7B8EF695,0xDB7CBFFF,0x0000BFE0,0x00000000 -data4 0x00007B31,0x3F766038,0x3D1CF49B,0x00000000 -data4 0xCF773FB3,0xC0241AEA,0x0000BFE0,0x00000000 -data4 0x00007ABB,0x3F757400,0x3D2C531D,0x00000000 -data4 0xC9539FDF,0xFC8F4D48,0x00003FE1,0x00000000 -data4 0x00007A45,0x3F748988,0x3D3BA322,0x00000000 -data4 0x954665C2,0x9CD035FB,0x0000BFE1,0x00000000 -data4 0x000079D1,0x3F73A0D0,0x3D4AE46F,0x00000000 -data4 0xDD367A30,0xEC9017C7,0x00003FE1,0x00000000 -data4 0x0000795D,0x3F72B9D0,0x3D5A1756,0x00000000 -data4 0xCB11189C,0xEE6625D3,0x0000BFE1,0x00000000 -data4 0x000078EB,0x3F71D488,0x3D693B9D,0x00000000 -data4 0xBE11C424,0xA49C8DB5,0x0000BFE0,0x00000000 +data4 0x7C4B5443,0xF5F3653F,0x00003FDF,0x00000000 +data4 0x00007C20,0x3F783E08,0x3CFC15D0,0x00000000 +data4 0xF65A1773,0xE78AB204,0x00003FE0,0x00000000 +data4 0x00007BA8,0x3F774E38,0x3D0D874D,0x00000000 +data4 0x7B8EF695,0xDB7CBFFF,0x0000BFE0,0x00000000 +data4 0x00007B31,0x3F766038,0x3D1CF49B,0x00000000 +data4 0xCF773FB3,0xC0241AEA,0x0000BFE0,0x00000000 +data4 0x00007ABB,0x3F757400,0x3D2C531D,0x00000000 +data4 0xC9539FDF,0xFC8F4D48,0x00003FE1,0x00000000 +data4 0x00007A45,0x3F748988,0x3D3BA322,0x00000000 +data4 0x954665C2,0x9CD035FB,0x0000BFE1,0x00000000 +data4 0x000079D1,0x3F73A0D0,0x3D4AE46F,0x00000000 +data4 0xDD367A30,0xEC9017C7,0x00003FE1,0x00000000 +data4 0x0000795D,0x3F72B9D0,0x3D5A1756,0x00000000 +data4 0xCB11189C,0xEE6625D3,0x0000BFE1,0x00000000 +data4 0x000078EB,0x3F71D488,0x3D693B9D,0x00000000 +data4 0xBE11C424,0xA49C8DB5,0x0000BFE0,0x00000000 LOCAL_OBJECT_END(Constants_Tgammal_log_80_Z_G_H_h2) .align 64 LOCAL_OBJECT_START(Constants_Tgammal_log_80_h3_G_H) -// h3 IEEE double extended, H3 and G3 IEEE single -data4 0x112666B0,0xAAACAAB1,0x00003FD3,0x3F7FFC00 +// h3 IEEE double extended, H3 and G3 IEEE single +data4 0x112666B0,0xAAACAAB1,0x00003FD3,0x3F7FFC00 data4 0x9B7FAD21,0x90051030,0x00003FD8,0x3F7FF400 -data4 0xF4D783C4,0xA6B46F46,0x00003FDA,0x3F7FEC00 -data4 0x11C6DDCA,0xDA148D88,0x0000BFD8,0x3F7FE400 +data4 0xF4D783C4,0xA6B46F46,0x00003FDA,0x3F7FEC00 +data4 0x11C6DDCA,0xDA148D88,0x0000BFD8,0x3F7FE400 data4 0xCA964D95,0xCE65C1D8,0x0000BFD8,0x3F7FDC00 -data4 0x23412D13,0x883838EE,0x0000BFDB,0x3F7FD400 -data4 0x983ED687,0xB7E5CFA1,0x00003FDB,0x3F7FCC08 -data4 0xE3C3930B,0xDBE23B16,0x0000BFD9,0x3F7FC408 -data4 0x48AA4DFC,0x9B92F1FC,0x0000BFDC,0x3F7FBC10 -data4 0xCE9C8F7E,0x9A8CEB15,0x0000BFD9,0x3F7FB410 -data4 0x0DECE74A,0x8C220879,0x00003FDC,0x3F7FAC18 +data4 0x23412D13,0x883838EE,0x0000BFDB,0x3F7FD400 +data4 0x983ED687,0xB7E5CFA1,0x00003FDB,0x3F7FCC08 +data4 0xE3C3930B,0xDBE23B16,0x0000BFD9,0x3F7FC408 +data4 0x48AA4DFC,0x9B92F1FC,0x0000BFDC,0x3F7FBC10 +data4 0xCE9C8F7E,0x9A8CEB15,0x0000BFD9,0x3F7FB410 +data4 0x0DECE74A,0x8C220879,0x00003FDC,0x3F7FAC18 data4 0x2F053150,0xB25CA912,0x0000BFDA,0x3F7FA420 -data4 0xD9A5BE20,0xA5876555,0x00003FDB,0x3F7F9C20 -data4 0x2053F087,0xC919BB6E,0x00003FD9,0x3F7F9428 -data4 0x041E9A77,0xB70BDA79,0x00003FDC,0x3F7F8C30 -data4 0xEA1C9C30,0xF18A5C08,0x00003FDA,0x3F7F8438 -data4 0x796D89E5,0xA3790D84,0x0000BFDD,0x3F7F7C40 -data4 0xA2915A3A,0xE1852369,0x0000BFDD,0x3F7F7448 -data4 0xA39ED868,0xD803858F,0x00003FDC,0x3F7F6C50 -data4 0x9417EBB7,0xB2EEE356,0x0000BFDD,0x3F7F6458 -data4 0x9BB0D07F,0xED5C1F8A,0x0000BFDC,0x3F7F5C68 -data4 0xE87C740A,0xD6D201A0,0x0000BFDD,0x3F7F5470 -data4 0x1CA74025,0xE8DEBF5E,0x00003FDC,0x3F7F4C78 +data4 0xD9A5BE20,0xA5876555,0x00003FDB,0x3F7F9C20 +data4 0x2053F087,0xC919BB6E,0x00003FD9,0x3F7F9428 +data4 0x041E9A77,0xB70BDA79,0x00003FDC,0x3F7F8C30 +data4 0xEA1C9C30,0xF18A5C08,0x00003FDA,0x3F7F8438 +data4 0x796D89E5,0xA3790D84,0x0000BFDD,0x3F7F7C40 +data4 0xA2915A3A,0xE1852369,0x0000BFDD,0x3F7F7448 +data4 0xA39ED868,0xD803858F,0x00003FDC,0x3F7F6C50 +data4 0x9417EBB7,0xB2EEE356,0x0000BFDD,0x3F7F6458 +data4 0x9BB0D07F,0xED5C1F8A,0x0000BFDC,0x3F7F5C68 +data4 0xE87C740A,0xD6D201A0,0x0000BFDD,0x3F7F5470 +data4 0x1CA74025,0xE8DEBF5E,0x00003FDC,0x3F7F4C78 data4 0x1F34A7EB,0x9A995A97,0x0000BFDC,0x3F7F4488 -data4 0x359EED97,0x9CB0F742,0x0000BFDA,0x3F7F3C90 -data4 0xBBC6A1C8,0xD6F833C2,0x0000BFDD,0x3F7F34A0 -data4 0xE71090EC,0xE1F68F2A,0x00003FDC,0x3F7F2CA8 -data4 0xC160A74F,0xD1881CF1,0x0000BFDB,0x3F7F24B8 -data4 0xD78CB5A4,0x9AD05AE2,0x00003FD6,0x3F7F1CC8 -data4 0x9A77DC4B,0xE658CB8E,0x0000BFDD,0x3F7F14D8 -data4 0x6BD6D312,0xBA281296,0x00003FDC,0x3F7F0CE0 -data4 0xF95210D0,0xB478BBEB,0x0000BFDB,0x3F7F04F0 -data4 0x38800100,0x39400480,0x39A00640,0x39E00C41 // H's start here -data4 0x3A100A21,0x3A300F22,0x3A4FF51C,0x3A6FFC1D +data4 0x359EED97,0x9CB0F742,0x0000BFDA,0x3F7F3C90 +data4 0xBBC6A1C8,0xD6F833C2,0x0000BFDD,0x3F7F34A0 +data4 0xE71090EC,0xE1F68F2A,0x00003FDC,0x3F7F2CA8 +data4 0xC160A74F,0xD1881CF1,0x0000BFDB,0x3F7F24B8 +data4 0xD78CB5A4,0x9AD05AE2,0x00003FD6,0x3F7F1CC8 +data4 0x9A77DC4B,0xE658CB8E,0x0000BFDD,0x3F7F14D8 +data4 0x6BD6D312,0xBA281296,0x00003FDC,0x3F7F0CE0 +data4 0xF95210D0,0xB478BBEB,0x0000BFDB,0x3F7F04F0 +data4 0x38800100,0x39400480,0x39A00640,0x39E00C41 // H's start here +data4 0x3A100A21,0x3A300F22,0x3A4FF51C,0x3A6FFC1D data4 0x3A87F20B,0x3A97F68B,0x3AA7EB86,0x3AB7E101 -data4 0x3AC7E701,0x3AD7DD7B,0x3AE7D474,0x3AF7CBED -data4 0x3B03E1F3,0x3B0BDE2F,0x3B13DAAA,0x3B1BD766 -data4 0x3B23CC5C,0x3B2BC997,0x3B33C711,0x3B3BBCC6 -data4 0x3B43BAC0,0x3B4BB0F4,0x3B53AF6D,0x3B5BA620 -data4 0x3B639D12,0x3B6B9444,0x3B7393BC,0x3B7B8B6D +data4 0x3AC7E701,0x3AD7DD7B,0x3AE7D474,0x3AF7CBED +data4 0x3B03E1F3,0x3B0BDE2F,0x3B13DAAA,0x3B1BD766 +data4 0x3B23CC5C,0x3B2BC997,0x3B33C711,0x3B3BBCC6 +data4 0x3B43BAC0,0x3B4BB0F4,0x3B53AF6D,0x3B5BA620 +data4 0x3B639D12,0x3B6B9444,0x3B7393BC,0x3B7B8B6D LOCAL_OBJECT_END(Constants_Tgammal_log_80_h3_G_H) -.align 64 +.align 64 LOCAL_OBJECT_START(Constants_Tgammal_stirling) //0.5*ln(2*Pi)=9.1893853320467266954096885e-01 + 7.2239360881843238220057778e-17 data8 0x3FED67F1C864BEB4, 0x3C94D252F2400510 -// Bernulli numbers +// Bernulli numbers data8 0xAAAAAAAAAAAAAAAB, 0x00003FFB //B2 = 8.3333333333333333333333333333e-02 data8 0xBF66C16C16C16C17 //B4 = -2.7777777777777777777777777778e-03 data8 0x3F4A01A01A01A01A //B6 = 7.9365079365079365079365079365e-04 @@ -283,9 +283,9 @@ data8 0x3FC6FE96381E0680 //B18 = 1.7964437236883057316493849002e-01 data8 0x3FE0000000000000 // 0.5 LOCAL_OBJECT_END(Constants_Tgammal_stirling) -.align 64 +.align 64 LOCAL_OBJECT_START(Constants_Tgammal_sin) -// Polynomial coefficients for the sin(Pi*x)/Pi, 0 <= |x| < 0.5 +// Polynomial coefficients for the sin(Pi*x)/Pi, 0 <= |x| < 0.5 //A2 = 8.1174242528335360802316245099e-01 + 5.1302254650266899774269946201e-18 data8 0x3FE9F9CB402BC46C, 0x3C57A8B3819B7CEC //A1 = -1.6449340668482264060656916627e+00 + -3.0210280454695477893051351574e-17 @@ -299,7 +299,7 @@ data8 0xD63402E798FEC896, 0x00003FF9 //A4 = 2.6147847817611456327417812320e-02 data8 0xC354723906D95E92, 0x0000BFFC //A3 = -1.9075182412208257558294507774e-01 LOCAL_OBJECT_END(Constants_Tgammal_sin) -.align 64 +.align 64 LOCAL_OBJECT_START(Constants_Tgammal_exp_64_Arg) data4 0x00000000,0xB17217F4,0x00003FF2,0x00000000 // L_hi = hi part log(2)/2^12 data4 0xF278ECE6,0xF473DE6A,0x00003FD4,0x00000000 // L_lo = lo part log(2)/2^12 @@ -312,8 +312,8 @@ data4 0xFFFFFFFF,0xFFFFFFFF,0x00003FFD,0x00000000 // A1 LOCAL_OBJECT_END(Constants_Tgammal_exp_64_A) LOCAL_OBJECT_START(Constants_Tgammal_exp_64_T1) -data4 0x3F800000,0x3F8164D2,0x3F82CD87,0x3F843A29 -data4 0x3F85AAC3,0x3F871F62,0x3F88980F,0x3F8A14D5 +data4 0x3F800000,0x3F8164D2,0x3F82CD87,0x3F843A29 +data4 0x3F85AAC3,0x3F871F62,0x3F88980F,0x3F8A14D5 data4 0x3F8B95C2,0x3F8D1ADF,0x3F8EA43A,0x3F9031DC data4 0x3F91C3D3,0x3F935A2B,0x3F94F4F0,0x3F96942D data4 0x3F9837F0,0x3F99E046,0x3F9B8D3A,0x3F9D3EDA @@ -331,21 +331,21 @@ data4 0x3FF5257D,0x3FF7D0DF,0x3FFA83B3,0x3FFD3E0C LOCAL_OBJECT_END(Constants_Tgammal_exp_64_T1) LOCAL_OBJECT_START(Constants_Tgammal_exp_64_T2) -data4 0x3F800000,0x3F80058C,0x3F800B18,0x3F8010A4 -data4 0x3F801630,0x3F801BBD,0x3F80214A,0x3F8026D7 -data4 0x3F802C64,0x3F8031F2,0x3F803780,0x3F803D0E -data4 0x3F80429C,0x3F80482B,0x3F804DB9,0x3F805349 -data4 0x3F8058D8,0x3F805E67,0x3F8063F7,0x3F806987 -data4 0x3F806F17,0x3F8074A8,0x3F807A39,0x3F807FCA -data4 0x3F80855B,0x3F808AEC,0x3F80907E,0x3F809610 -data4 0x3F809BA2,0x3F80A135,0x3F80A6C7,0x3F80AC5A -data4 0x3F80B1ED,0x3F80B781,0x3F80BD14,0x3F80C2A8 -data4 0x3F80C83C,0x3F80CDD1,0x3F80D365,0x3F80D8FA -data4 0x3F80DE8F,0x3F80E425,0x3F80E9BA,0x3F80EF50 -data4 0x3F80F4E6,0x3F80FA7C,0x3F810013,0x3F8105AA -data4 0x3F810B41,0x3F8110D8,0x3F81166F,0x3F811C07 -data4 0x3F81219F,0x3F812737,0x3F812CD0,0x3F813269 -data4 0x3F813802,0x3F813D9B,0x3F814334,0x3F8148CE +data4 0x3F800000,0x3F80058C,0x3F800B18,0x3F8010A4 +data4 0x3F801630,0x3F801BBD,0x3F80214A,0x3F8026D7 +data4 0x3F802C64,0x3F8031F2,0x3F803780,0x3F803D0E +data4 0x3F80429C,0x3F80482B,0x3F804DB9,0x3F805349 +data4 0x3F8058D8,0x3F805E67,0x3F8063F7,0x3F806987 +data4 0x3F806F17,0x3F8074A8,0x3F807A39,0x3F807FCA +data4 0x3F80855B,0x3F808AEC,0x3F80907E,0x3F809610 +data4 0x3F809BA2,0x3F80A135,0x3F80A6C7,0x3F80AC5A +data4 0x3F80B1ED,0x3F80B781,0x3F80BD14,0x3F80C2A8 +data4 0x3F80C83C,0x3F80CDD1,0x3F80D365,0x3F80D8FA +data4 0x3F80DE8F,0x3F80E425,0x3F80E9BA,0x3F80EF50 +data4 0x3F80F4E6,0x3F80FA7C,0x3F810013,0x3F8105AA +data4 0x3F810B41,0x3F8110D8,0x3F81166F,0x3F811C07 +data4 0x3F81219F,0x3F812737,0x3F812CD0,0x3F813269 +data4 0x3F813802,0x3F813D9B,0x3F814334,0x3F8148CE data4 0x3F814E68,0x3F815402,0x3F81599C,0x3F815F37 LOCAL_OBJECT_END(Constants_Tgammal_exp_64_T2) @@ -423,7 +423,7 @@ LOCAL_OBJECT_END(Constants_Tgammal_exp_64_W2) LOCAL_OBJECT_START(Constants_Tgammal_poly) -// Polynomial coefficients for the tgammal(x), 2 <= |x| < 3 +// Polynomial coefficients for the tgammal(x), 2 <= |x| < 3 //A5 = 2.8360780594841213109180699803e-02 + 2.2504152891014320704380000000e-19 data8 0x3F9D0A9BC49353D2, 0x3C109AEA0F23CE2D //A4 = 1.0967323400216015538699565468e-01 + 9.9225166000430644587276000000e-18 @@ -452,7 +452,7 @@ data8 0xC5C7E6D62A6991D8, 0x00003FF4 //A8 = 7.5447412886334708803357581519e-04 data8 0xD2AF690725C62D88, 0x00003FF5 //A7 = 1.6074004848394703022110823298e-03 data8 0xAA44E635D4B7B682, 0x00003FF8 //A6 = 1.0392403425906843901680697839e-02 // -// Polynomial coefficients for the tgammal(x), 4 <= |x| < 5 +// Polynomial coefficients for the tgammal(x), 4 <= |x| < 5 //A5 = 1.1600674810589555185913468449e+00 + 3.0229979112715124660731000000e-17 data8 0x3FF28FA2EB44D22E, 0x3C816D285234C815 //A4 = 3.1374268565470946334983182169e+00 + 1.3694868953995008497659600000e-16 @@ -481,7 +481,7 @@ data8 0xDA38E39C13819D2A, 0x00003FF9 //A8 = 2.6638454961912040754759086920e-02 data8 0xD696DF8D8389FE53, 0x00003FFB //A7 = 1.0477995539298934056097943975e-01 data8 0xBDD5C153048BC435, 0x00003FFD //A6 = 3.7077144754791605130056406006e-01 // -// Polynomial coefficients for the tgammal(x), 6 <= |x| < 7 +// Polynomial coefficients for the tgammal(x), 6 <= |x| < 7 //A5 = 6.7169398121054200601065531373e+01 + 2.9481001527213915901489600000e-15 data8 0x4050CAD76B377BA0, 0x3CEA8DDB2B2DE93E //A4 = 1.6115104376855398982115730178e+02 + 1.3422421925418824418257300000e-14 @@ -510,7 +510,7 @@ data8 0x84FBDE0947718B58, 0x00004000 //A8 = 2.0778727617851237754568261869e+00 data8 0xEEC1371E265A2C3A, 0x00004001 //A7 = 7.4610858525146049022238037342e+00 data8 0xBF514B9BE68ED59D, 0x00004003 //A6 = 2.3914694993947572859629197920e+01 // -// Polynomial coefficients for the tgammal(x), 8 <= |x| < 9 +// Polynomial coefficients for the tgammal(x), 8 <= |x| < 9 //A5 = 5.8487447114416836484451778233e+03 + 4.7365465221455983144182900000e-13 data8 0x40B6D8BEA568B6FD, 0x3D60AA4D44C2589B //A4 = 1.2796464063087094473303295672e+04 + 1.2373341702514898266244200000e-12 @@ -539,7 +539,7 @@ data8 0xE5AA8BB1BF02934D, 0x00004006 //A8 = 2.2966619406617480799195651466e+02 data8 0xBF6CFEFD67F59845, 0x00004008 //A7 = 7.6570306334640770654588802417e+02 data8 0x8DB5D2F001635C29, 0x0000400A //A6 = 2.2673639984182571062068713002e+03 // -// Polynomial coefficients for the tgammal(x), 10 <= |x| < 11 +// Polynomial coefficients for the tgammal(x), 10 <= |x| < 11 //A5 = 7.2546009516580589115619659424e+05 + 1.0343348865365065212891728822e-10 data8 0x412623A830B99290, 0x3DDC6E7C157611C4 //A4 = 1.4756292870840241666883230209e+06 + 8.1516565365333844166705674775e-11 @@ -568,7 +568,7 @@ data8 0x872390769650FBE2, 0x0000400E //A8 = 3.4595564309496661629764193479e+04 data8 0xD3E5E8D6923910C1, 0x0000400F //A7 = 1.0849181904819284819615140521e+05 data8 0x930D70602F50B754, 0x00004011 //A6 = 3.0116351174131169193070583741e+05 // -// Polynomial coefficients for the tgammal(x), 12 <= |x| < 13 +// Polynomial coefficients for the tgammal(x), 12 <= |x| < 13 //A5 = 1.2249876249976964294910430908e+08 + 6.0051348061679753770848000000e-09 data8 0x419D34BB29FFC39D, 0x3E39CAB72E01818D //A4 = 2.3482765927605420351028442383e+08 + 1.1874729051592862323641700000e-08 @@ -601,7 +601,7 @@ LOCAL_OBJECT_END(Constants_Tgammal_poly) LOCAL_OBJECT_START(Constants_Tgammal_poly_splitted) -// Polynomial coefficients for the tgammal(x), 1 <= |x| < 1.25 +// Polynomial coefficients for the tgammal(x), 1 <= |x| < 1.25 //A5 = -9.8199506890310417350775651357e-01+ -3.2546247786122976510752200000e-17 data8 0xBFEF6C80EC38B509, 0xBC82C2FA7A3DE3BD //A4 = 9.8172808683439960475425323239e-01 + 4.4847611775298520359811400000e-17 @@ -630,7 +630,7 @@ data8 0xFF83DAC83119B52C, 0x00003FFE //A8 = 9.9810569179053383842734164901e-01 data8 0xFEF9F8AB891ABB24, 0x0000BFFE //A7 = -9.9600176036720260345608796766e-01 data8 0xFE3F0537573C8235, 0x00003FFE //A6 = 9.9314911461918778676646301341e-01 // -// Polynomial coefficients for the tgammal(x), 1.25 <= |x| < 1.75 +// Polynomial coefficients for the tgammal(x), 1.25 <= |x| < 1.75 //A5 = -7.7523052299853054125655660300e-02+ -1.2693512521686721504433600000e-17 data8 0xBFB3D88CFE50601B, 0xBC6D44ED60EE2170 //A4 = 1.4464535904462152982041800442e-01 + 2.5426820829345729856648800000e-17 @@ -659,7 +659,7 @@ data8 0xD3A963578BE291E3, 0x00003FF9 //A8 = 2.5837606456090186343624210891e-02 data8 0x9BA7EAE64C42FDF7, 0x0000BFFA //A7 = -3.8001935555045161419575037512e-02 data8 0xF0115BA1A77607E7, 0x00003FFA //A6 = 5.8610303817173477119764956736e-02 // -// Polynomial coefficients for the tgammal(x), 1.75 <= |x| < 2.0 +// Polynomial coefficients for the tgammal(x), 1.75 <= |x| < 2.0 //A5 = 2.6698206874501426502654943818e-04 + 3.4033756836921062797887300000e-20 data8 0x3F317F3740FE2A68, 0x3BE417093234B06E //A4 = 7.4249010753513894345090307070e-02 + 3.9810018444482764697014200000e-18 @@ -688,7 +688,7 @@ data8 0x89E224E42F93F005, 0x00003FF6 //A8 = 2.1039333407187324139473634747e-03 data8 0xBAF374824937A323, 0x00003FF6 //A7 = 2.8526458211545152218493600470e-03 data8 0xB6BF7564F52140C6, 0x00003FF8 //A6 = 1.1154045718131014476684982178e-02 // -// Polynomial coefficients for the tgammal(x), 0.0 <= |x| < 0.125 +// Polynomial coefficients for the tgammal(x), 0.0 <= |x| < 0.125 //A5 = -9.8199506890314514073736518185e-01+ -5.9363811993837985890950900000e-17 data8 0xBFEF6C80EC38B67A, 0xBC911C46B447C81F //A4 = 9.8172808683440015986576554496e-01 + 2.7457414262802803699834200000e-17 @@ -739,7 +739,7 @@ GR_l_Log_Table1 = r34 GR_l_BIAS = r34 GR_l_Index1 = r35 GR_l_Index2 = r36 -GR_l_signif_Z = r37 +GR_l_signif_Z = r37 GR_l_X_0 = r38 GR_l_X_1 = r39 GR_l_X_2 = r40 @@ -788,33 +788,33 @@ FR_l_poly = f51 FR_l_rsq = f52 FR_l_Y_lo_res = f53 -FR_l_Y0 = f55 -FR_l_Q0 = f56 -FR_l_E0 = f57 -FR_l_E2 = f58 -FR_l_E1 = f59 -FR_l_Y1 = f60 -FR_l_E3 = f61 -FR_l_Y2 = f62 - -FR_l_Z = f63 -FR_l_Z2 = f64 -FR_l_Z4 = f65 -FR_l_Z8 = f66 - -FR_l_CH = f67 -FR_l_CL = f68 - -FR_l_B2 = f69 -FR_l_B4 = f70 -FR_l_B6 = f71 -FR_l_B8 = f72 -FR_l_B10 = f73 -FR_l_B12 = f74 -FR_l_B14 = f75 -FR_l_B16 = f76 -FR_l_B18 = f77 -FR_l_Half = f78 +FR_l_Y0 = f55 +FR_l_Q0 = f56 +FR_l_E0 = f57 +FR_l_E2 = f58 +FR_l_E1 = f59 +FR_l_Y1 = f60 +FR_l_E3 = f61 +FR_l_Y2 = f62 + +FR_l_Z = f63 +FR_l_Z2 = f64 +FR_l_Z4 = f65 +FR_l_Z8 = f66 + +FR_l_CH = f67 +FR_l_CL = f68 + +FR_l_B2 = f69 +FR_l_B4 = f70 +FR_l_B6 = f71 +FR_l_B8 = f72 +FR_l_B10 = f73 +FR_l_B12 = f74 +FR_l_B14 = f75 +FR_l_B16 = f76 +FR_l_B18 = f77 +FR_l_Half = f78 FR_l_SS = f79 FR_l_AbsX_m_Half = f80 FR_l_CXH = f81 @@ -856,17 +856,17 @@ FR_n_TT = f97 FR_n_TH = f98 FR_n_TL = f99 -FR_n_A2H = f100 -FR_n_A2L = f101 -FR_n_A1H = f102 -FR_n_A1L = f103 -FR_n_A9 = f104 -FR_n_A8 = f105 -FR_n_A7 = f106 -FR_n_A6 = f107 -FR_n_A5 = f108 -FR_n_A4 = f109 -FR_n_A3 = f110 +FR_n_A2H = f100 +FR_n_A2L = f101 +FR_n_A1H = f102 +FR_n_A1L = f103 +FR_n_A9 = f104 +FR_n_A8 = f105 +FR_n_A7 = f106 +FR_n_A6 = f107 +FR_n_A5 = f108 +FR_n_A4 = f109 +FR_n_A3 = f110 FR_n_PolyH = f111 FR_n_PolyL = f112 @@ -881,17 +881,17 @@ FR_n_NegOne = f116 FR_n_Y0 = f117 -FR_n_Q0 = f118 -FR_n_E0 = f119 - -FR_n_E2 = f120 -FR_n_E1 = f121 - -FR_n_Y1 = f55 -FR_n_E3 = f56 - -FR_n_Y2 = f57 -FR_n_R0 = f58 +FR_n_Q0 = f118 +FR_n_E0 = f119 + +FR_n_E2 = f120 +FR_n_E1 = f121 + +FR_n_Y1 = f55 +FR_n_E3 = f56 + +FR_n_Y2 = f57 +FR_n_R0 = f58 FR_n_E4 = f59 FR_n_RcpResH = f60 @@ -1020,71 +1020,71 @@ FR_p_1p5 = f34 FR_p_AbsXM1 = f35 FR_p_2 = f36 -FR_p_A20 = f37 -FR_p_A19 = f38 -FR_p_A18 = f39 -FR_p_A17 = f40 -FR_p_A16 = f41 -FR_p_A15 = f42 -FR_p_A14 = f43 -FR_p_A13 = f44 -FR_p_A12 = f45 -FR_p_A11 = f46 -FR_p_A10 = f47 -FR_p_A9 = f48 -FR_p_A8 = f49 -FR_p_A7 = f50 -FR_p_A6 = f51 -FR_p_A5H = f52 -FR_p_A5L = f53 -FR_p_A4H = f54 -FR_p_A4L = f55 -FR_p_A3H = f56 -FR_p_A3L = f57 -FR_p_A2H = f58 -FR_p_A2L = f59 -FR_p_A1H = f60 -FR_p_A1L = f61 -FR_p_A0H = f62 -FR_p_A0L = f63 +FR_p_A20 = f37 +FR_p_A19 = f38 +FR_p_A18 = f39 +FR_p_A17 = f40 +FR_p_A16 = f41 +FR_p_A15 = f42 +FR_p_A14 = f43 +FR_p_A13 = f44 +FR_p_A12 = f45 +FR_p_A11 = f46 +FR_p_A10 = f47 +FR_p_A9 = f48 +FR_p_A8 = f49 +FR_p_A7 = f50 +FR_p_A6 = f51 +FR_p_A5H = f52 +FR_p_A5L = f53 +FR_p_A4H = f54 +FR_p_A4L = f55 +FR_p_A3H = f56 +FR_p_A3L = f57 +FR_p_A2H = f58 +FR_p_A2L = f59 +FR_p_A1H = f60 +FR_p_A1L = f61 +FR_p_A0H = f62 +FR_p_A0L = f63 FR_p_XR = f64 -FR_p_XR2 = f65 -FR_p_XR2L = f52 - -FR_p_XR3 = f58 -FR_p_XR3L = f38 - -FR_p_XR4 = f42 -FR_p_XR6 = f40 -FR_p_XR8 = f37 - -FR_p_Poly5H = f66 -FR_p_Poly5L = f67 -FR_p_Poly4H = f53 -FR_p_Poly4L = f44 -FR_p_Poly3H = f41 -FR_p_Poly3L = f47 -FR_p_Poly2H = f68 -FR_p_Poly2L = f54 -FR_p_Poly1H = f55 -FR_p_Poly1L = f46 -FR_p_Poly0H = f39 -FR_p_Poly0L = f43 - -FR_p_Temp5H = f69 -FR_p_Temp5L = f70 -FR_p_Temp4H = f71 -FR_p_Temp4L = f60 -FR_p_Temp2H = f72 -FR_p_Temp2L = f73 -FR_p_Temp1H = f59 -FR_p_Temp1L = f61 -FR_p_Temp0H = f49 -FR_p_Temp0L = f48 -FR_p_PolyTail = f45 -FR_p_OddPoly0H = f56 -FR_p_OddPoly0L = f51 +FR_p_XR2 = f65 +FR_p_XR2L = f52 + +FR_p_XR3 = f58 +FR_p_XR3L = f38 + +FR_p_XR4 = f42 +FR_p_XR6 = f40 +FR_p_XR8 = f37 + +FR_p_Poly5H = f66 +FR_p_Poly5L = f67 +FR_p_Poly4H = f53 +FR_p_Poly4L = f44 +FR_p_Poly3H = f41 +FR_p_Poly3L = f47 +FR_p_Poly2H = f68 +FR_p_Poly2L = f54 +FR_p_Poly1H = f55 +FR_p_Poly1L = f46 +FR_p_Poly0H = f39 +FR_p_Poly0L = f43 + +FR_p_Temp5H = f69 +FR_p_Temp5L = f70 +FR_p_Temp4H = f71 +FR_p_Temp4L = f60 +FR_p_Temp2H = f72 +FR_p_Temp2L = f73 +FR_p_Temp1H = f59 +FR_p_Temp1L = f61 +FR_p_Temp0H = f49 +FR_p_Temp0L = f48 +FR_p_PolyTail = f45 +FR_p_OddPoly0H = f56 +FR_p_OddPoly0L = f51 FR_p_0p25 = f73 @@ -1092,83 +1092,83 @@ FR_p_0p25 = f73 //======================================================= // Negative polynomial part registers // General Purpose Registers -GR_r_sin_Table = r47 -GR_r_sin_Table2 = r60 +GR_r_sin_Table = r47 +GR_r_sin_Table2 = r60 // Floating Point Registers -FR_r_IXNS = FR_n_IXNS -FR_r_IXN = FR_n_IXN +FR_r_IXNS = FR_n_IXNS +FR_r_IXN = FR_n_IXN FR_r_AbsX = FR_l_AbsX -FR_r_A9 = f74 -FR_r_A8 = f75 -FR_r_A7 = f76 -FR_r_A6 = f77 -FR_r_A5 = f78 -FR_r_A4 = f79 -FR_r_A3 = f80 -FR_r_A2H = f81 -FR_r_A2L = f82 -FR_r_A1H = f83 -FR_r_A1L = f84 - -FR_r_XNS = f85 -FR_r_XS = f86 -FR_r_XS2 = f87 -FR_r_XS2L = f88 -FR_r_XS4 = f89 -FR_r_XS7 = f90 -FR_r_XS8 = f91 +FR_r_A9 = f74 +FR_r_A8 = f75 +FR_r_A7 = f76 +FR_r_A6 = f77 +FR_r_A5 = f78 +FR_r_A4 = f79 +FR_r_A3 = f80 +FR_r_A2H = f81 +FR_r_A2L = f82 +FR_r_A1H = f83 +FR_r_A1L = f84 + +FR_r_XNS = f85 +FR_r_XS = f86 +FR_r_XS2 = f87 +FR_r_XS2L = f88 +FR_r_XS4 = f89 +FR_r_XS7 = f90 +FR_r_XS8 = f91 FR_r_Tail = f92 -FR_r_TT = f93 -FR_r_TH = f94 -FR_r_TL = f95 +FR_r_TT = f93 +FR_r_TH = f94 +FR_r_TL = f95 FR_r_ResH = f96 FR_r_ResL = f97 -FR_r_Res3H = f98 -FR_r_Res3L = f99 - -FR_r_Res1H = f100 -FR_r_Res1L = f101 - - - -FR_r_Y0 = f102 -FR_r_Q0 = f103 -FR_r_E0 = f104 -FR_r_E2 = f105 -FR_r_E1 = f106 -FR_r_Y1 = f107 -FR_r_E3 = f108 -FR_r_Y2 = f109 -FR_r_R0 = f110 -FR_r_E4 = f111 -FR_r_ZH = f112 -FR_r_Y3 = f113 -FR_r_R1 = f114 +FR_r_Res3H = f98 +FR_r_Res3L = f99 + +FR_r_Res1H = f100 +FR_r_Res1L = f101 + + + +FR_r_Y0 = f102 +FR_r_Q0 = f103 +FR_r_E0 = f104 +FR_r_E2 = f105 +FR_r_E1 = f106 +FR_r_Y1 = f107 +FR_r_E3 = f108 +FR_r_Y2 = f109 +FR_r_R0 = f110 +FR_r_E4 = f111 +FR_r_ZH = f112 +FR_r_Y3 = f113 +FR_r_R1 = f114 FR_r_ZHN = f115 FR_r_ZL = f115 FR_r_NegOne = f116 -FR_z_Y0 = f102 -FR_z_Q0 = f103 -FR_z_E0 = f104 -FR_z_E2 = f105 -FR_z_E1 = f106 -FR_z_Y1 = f107 -FR_z_E3 = f108 -FR_z_Y2 = f109 -FR_z_R0 = f110 -FR_z_E4 = f111 -FR_z_ZH = f112 -FR_z_Y3 = f113 -FR_z_R1 = f114 -FR_z_ZL = f115 +FR_z_Y0 = f102 +FR_z_Q0 = f103 +FR_z_E0 = f104 +FR_z_E2 = f105 +FR_z_E1 = f106 +FR_z_Y1 = f107 +FR_z_E3 = f108 +FR_z_Y2 = f109 +FR_z_R0 = f110 +FR_z_E4 = f111 +FR_z_ZH = f112 +FR_z_Y3 = f113 +FR_z_R1 = f114 +FR_z_ZL = f115 // General Purpose Registers @@ -1176,9 +1176,9 @@ GR_SAVE_PFS = r32 GR_DenOverflow = r33 GR_u_XN = r34 -GR_SAVE_B0 = r35 -GR_SAVE_GP = r36 -GR_SAVE_SP = r37 +GR_SAVE_B0 = r35 +GR_SAVE_GP = r36 +GR_SAVE_SP = r37 // Floating Point Registers FR_u_IXN = f34 @@ -1191,7 +1191,7 @@ GR_Parameter_RESULT = r66 GR_Parameter_TAG = r67 FR_RESULT = f8 -FR_X = f32 +FR_X = f32 FR_Y = f1 @@ -1200,9 +1200,9 @@ GLOBAL_LIBM_ENTRY(tgammal) { .mfi alloc r32 = ar.pfs,0,32,4,0 fabs FR_l_AbsX = f8 // Get absolute value of X - addl GR_n_sin_Table = @ltoff(Constants_Tgammal_sin), gp + addl GR_n_sin_Table = @ltoff(Constants_Tgammal_sin), gp } -{ .mfi +{ .mfi addl GR_l_Log_Table=@ltoff(Constants_Tgammal_log_80_Z_G_H_h1#),gp nop.f 0 addl GR_l_Stirling_Table = @ltoff(Constants_Tgammal_stirling), gp @@ -1220,7 +1220,7 @@ GLOBAL_LIBM_ENTRY(tgammal) };; { .mfi - ld8 GR_n_sin_Table = [GR_n_sin_Table] + ld8 GR_n_sin_Table = [GR_n_sin_Table] fclass.m p6,p0 = f8,0x1EF // Check x for NaN, 0, INF, denorm // NatVal. addl GR_c_NegSingularity = 0x1003E, r0 @@ -1241,9 +1241,9 @@ GLOBAL_LIBM_ENTRY(tgammal) };; { .mfi - ld8 GR_p_Table = [GR_p_Table] + ld8 GR_p_Table = [GR_p_Table] fcmp.lt.s1 p15, p14 = f8,f0 // p14 - positive arg, p15 - negative - shl GR_l_Index1 = GR_l_Index1,5 // Adjust Index1 ptr (x32) + shl GR_l_Index1 = GR_l_Index1,5 // Adjust Index1 ptr (x32) } { .mfb adds GR_c_NegUnderflow = 1765, r0 @@ -1263,7 +1263,7 @@ GLOBAL_LIBM_ENTRY(tgammal) andcm GR_c_X = GR_c_X, GR_c_SignBit // Remove sign };; -{ .mfi +{ .mfi addl GR_l_Log_Table = @ltoff(Constants_Tgammal_log_80_Z_G_H_h2#), gp fcmp.lt.s1 p10, p0 = FR_l_AbsX, f1 // If |X|<1 then p10 = 1 nop.i 0 @@ -1273,21 +1273,21 @@ GLOBAL_LIBM_ENTRY(tgammal) movl GR_l_BIAS = 0x000000000000FFFF // Bias for exponent };; -{ .mfi +{ .mfi ld8 GR_l_Log_Table = [GR_l_Log_Table] frcpa.s1 FR_l_Y0, p0 = f1, FR_l_AbsX // y = frcpa(x) nop.i 0 } { .mfi - ldfs FR_l_G_1 = [GR_l_Index1],4 // Load G_1 + ldfs FR_l_G_1 = [GR_l_Index1],4 // Load G_1 fsub.s1 FR_l_W = FR_l_AbsX, f1 // W = |X|-1 nop.i 0 };; -{ .mfi +{ .mfi getf.exp GR_l_N_Unbiased= FR_l_AbsX // exponent of |X| fmerge.se FR_l_S = f1, FR_l_AbsX // S = merging of X and 1.0 - cmp.gtu p11, p0 = GR_c_13, GR_c_X // If 1 <= |X| < 13 + cmp.gtu p11, p0 = GR_c_13, GR_c_X // If 1 <= |X| < 13 // then p11 = 1 } { .mfb @@ -1296,150 +1296,150 @@ GLOBAL_LIBM_ENTRY(tgammal) (p10) br.cond.spnt tgamma_lt_1 // Branch to |X| < 1 path /////////////////// };; -{ .mfi - ldfpd FR_n_A2H, FR_n_A2L = [GR_n_sin_Table], 16 +{ .mfi + ldfpd FR_n_A2H, FR_n_A2L = [GR_n_sin_Table], 16 nop.f 0 pmpyshr2.u GR_l_X_1 = GR_l_X_0,GR_l_Z_1,15 // Adjust Index2 (x32) } -{ .mfb - ldfe FR_l_B2 = [GR_l_Stirling_Table], 16 +{ .mfb + ldfe FR_l_B2 = [GR_l_Stirling_Table], 16 nop.f 0 (p11) br.cond.spnt tgamma_lt_13 // Branch to 1 <= |X| < 13 path /////////////// };; -{ .mfi - ldfe FR_l_h_1 = [GR_l_Index1],0 +{ .mfi + ldfe FR_l_h_1 = [GR_l_Index1],0 nop.f 0 sub GR_l_N = GR_l_N_Unbiased, GR_l_BIAS // N - BIAS } -{ .mib +{ .mib ldfpd FR_l_B4,FR_l_B6= [GR_l_Stirling_Table], 16 // Load C (p15) cmp.geu.unc p8,p0 = GR_l_N_Unbiased, GR_c_NegSingularity (p8) br.cond.spnt tgammal_singularity // Singularity for arg < to -2^63 ////// };; -{ .mmi -(p15) ldfpd FR_n_A1H, FR_n_A1L = [GR_n_sin_Table], 16 +{ .mmi +(p15) ldfpd FR_n_A1H, FR_n_A1L = [GR_n_sin_Table], 16 ldfpd FR_l_B8, FR_l_B10 = [GR_l_Stirling_Table], 16 - add GR_c_Table = 0x20, GR_c_Table + add GR_c_Table = 0x20, GR_c_Table };; { .mfi -(p15) ldfe FR_n_A9 = [GR_n_sin_Table], 16 - fma.s1 FR_l_Q0 = f1,FR_l_Y0,f0 // Q0 = Y0 +(p15) ldfe FR_n_A9 = [GR_n_sin_Table], 16 + fma.s1 FR_l_Q0 = f1,FR_l_Y0,f0 // Q0 = Y0 nop.i 0 } -{ .mfi - ldfpd FR_l_B12, FR_l_B14 = [GR_l_Stirling_Table], 16 - fnma.s1 FR_l_E0 = FR_l_Y0,FR_l_AbsX,f1 // e = 1-b*y +{ .mfi + ldfpd FR_l_B12, FR_l_B14 = [GR_l_Stirling_Table], 16 + fnma.s1 FR_l_E0 = FR_l_Y0,FR_l_AbsX,f1 // e = 1-b*y nop.i 0 };; -{ .mfi -(p15) ldfe FR_n_A8 = [GR_n_sin_Table], 16 +{ .mfi +(p15) ldfe FR_n_A8 = [GR_n_sin_Table], 16 fcvt.xf FR_c_XN = FR_n_IXN // Convert to FP repr. of int X - extr.u GR_l_Index2 = GR_l_X_1, 6, 4 // Extract Index2 + extr.u GR_l_Index2 = GR_l_X_1, 6, 4 // Extract Index2 } -{ .mfi +{ .mfi ldfpd FR_l_B16, FR_l_B18 = [GR_l_Stirling_Table], 16 nop.f 0 nop.i 0 };; -{ .mfi -(p15) ldfe FR_n_A7 = [GR_n_sin_Table], 16 +{ .mfi +(p15) ldfe FR_n_A7 = [GR_n_sin_Table], 16 fms.s1 FR_l_CXH = FR_l_CH, f1, FR_l_AbsX // CXH = CH+|X| shl GR_l_Index2 = GR_l_Index2,5 } -{ .mfi +{ .mfi ldfd FR_l_Half = [GR_l_Stirling_Table] // Load 0.5 nop.f 0 nop.i 0 };; -{ .mfi +{ .mfi add GR_l_Index2 = GR_l_Index2, GR_l_Log_Table // Add offset nop.f 0 nop.i 0 } -{ .mfi -(p15) ldfe FR_n_A6 = [GR_n_sin_Table], 16 +{ .mfi +(p15) ldfe FR_n_A6 = [GR_n_sin_Table], 16 (p15) fma.s1 FR_n_XS = FR_l_AbsX , f1, FR_n_XNS // xs = x - int(x) nop.i 0 };; -{ .mmi - ld2 GR_l_Z_2 = [GR_l_Index2],4 +{ .mmi + ld2 GR_l_Z_2 = [GR_l_Index2],4 addl GR_l_Log_Table = @ltoff(Constants_Tgammal_log_80_h3_G_H#),gp nop.i 0 };; -{ .mfi +{ .mfi ld8 GR_l_Log_Table = [GR_l_Log_Table] fma.s1 FR_l_E2 = FR_l_E0,FR_l_E0,FR_l_E0 // e2 = e+e^2 nop.i 0 } -{ .mfi - ldfs FR_l_G_2 = [GR_l_Index2],4 +{ .mfi + ldfs FR_l_G_2 = [GR_l_Index2],4 fma.s1 FR_l_E1 = FR_l_E0,FR_l_E0,f0 // e1 = e^2 nop.i 0 };; -{ .mmi - ldfs FR_l_H_2 = [GR_l_Index2],8 -(p15) ldfe FR_n_A5 = [GR_n_sin_Table], 16 +{ .mmi + ldfs FR_l_H_2 = [GR_l_Index2],8 +(p15) ldfe FR_n_A5 = [GR_n_sin_Table], 16 nop.i 0 };; -{ .mfi +{ .mfi setf.sig FR_l_float_N = GR_l_N // float_N = Make N a fp number nop.f 0 - pmpyshr2.u GR_l_X_2 = GR_l_X_1,GR_l_Z_2,15 // X_2 = X_1 * Z_2 + pmpyshr2.u GR_l_X_2 = GR_l_X_1,GR_l_Z_2,15 // X_2 = X_1 * Z_2 } -{ .mfi - ldfe FR_l_h_2 = [GR_l_Index2],0 +{ .mfi + ldfe FR_l_h_2 = [GR_l_Index2],0 fma.s1 FR_l_CXL = FR_l_AbsX, f1, FR_l_CXH // CXL = |X|+CXH add GR_l_Log_Table1= 0x200, GR_l_Log_Table };; -{ .mfi -(p15) ldfe FR_n_A4 = [GR_n_sin_Table], 16 +{ .mfi +(p15) ldfe FR_n_A4 = [GR_n_sin_Table], 16 (p15) fcmp.eq.unc.s1 p9,p0 = FR_l_AbsX, FR_c_XN //if argument is integer // and negative nop.i 0 } -{ .mfi +{ .mfi ldfe FR_c_PosOverflow = [GR_c_Table],16 //Load pos overflow value (p15) fma.s1 FR_n_XS2 = FR_n_XS, FR_n_XS, f0 // xs^2 = xs*xs nop.i 0 };; -{ .mfi -(p15) ldfe FR_n_A3 = [GR_n_sin_Table], 16 - nop.f 0 +{ .mfi +(p15) ldfe FR_n_A3 = [GR_n_sin_Table], 16 + nop.f 0 nop.i 0 };; -{ .mfi +{ .mfi (p15) getf.sig GR_n_XN = FR_n_IXN // int(x) to general reg fma.s1 FR_l_Y1 = FR_l_Y0,FR_l_E2,FR_l_Y0 // y1 = y+y*e2 - nop.i 0 + nop.i 0 } -{ .mfb - nop.m 0 +{ .mfb + nop.m 0 fma.s1 FR_l_E3 = FR_l_E1,FR_l_E1,FR_l_E0 // e3 = e+e1^2 (p9) br.cond.spnt tgammal_singularity // Singularity for integer ///////////// // and negative arguments ////////////// };; -{ .mfi +{ .mfi nop.m 0 fms.s1 FR_l_AbsX_m_Half = FR_l_AbsX, f1, FR_l_Half // |x|-0.5 extr.u GR_l_Index2 = GR_l_X_2, 1, 5 // Get Index3 };; -{ .mfi - shladd GR_l_Log_Table1= GR_l_Index2, 2, GR_l_Log_Table1 +{ .mfi + shladd GR_l_Log_Table1= GR_l_Index2, 2, GR_l_Log_Table1 nop.f 0 shladd GR_l_Index3 = GR_l_Index2,4, GR_l_Log_Table // Index3 } @@ -1450,10 +1450,10 @@ GLOBAL_LIBM_ENTRY(tgammal) // at underflow domain (X < -1765) ////// };; -{ .mfi - addl GR_l_Log_Table = @ltoff(Constants_Tgammal_log_80_Q#), gp +{ .mfi + addl GR_l_Log_Table = @ltoff(Constants_Tgammal_log_80_Q#), gp (p15) fma.s1 FR_n_TT = FR_n_A2L, FR_n_XS2, f0 // T=A2L*x^2 - tbit.nz.unc p13, p12 = GR_n_XN, 0x0 // whether [X] odd or even + tbit.nz.unc p13, p12 = GR_n_XN, 0x0 // whether [X] odd or even } { .mfi nop.m 0 @@ -1461,128 +1461,128 @@ GLOBAL_LIBM_ENTRY(tgammal) nop.i 0 };; -{ .mfi - ld8 GR_l_Log_Table = [GR_l_Log_Table] +{ .mfi + ld8 GR_l_Log_Table = [GR_l_Log_Table] (p15) fma.s1 FR_n_A7 = FR_n_A8, FR_n_XS2, FR_n_A7 // poly tail - nop.i 0 + nop.i 0 } -{ .mfi - ldfe FR_l_h_3 = [GR_l_Index3],12 +{ .mfi + ldfe FR_l_h_3 = [GR_l_Index3],12 (p15) fma.s1 FR_n_XS4 = FR_n_XS2, FR_n_XS2, f0 // xs^4 = xs^2*xs^2 - nop.i 0 + nop.i 0 };; -{ .mfi - ldfs FR_l_H_3 = [GR_l_Log_Table1], 0 +{ .mfi + ldfs FR_l_H_3 = [GR_l_Log_Table1], 0 fma.s1 FR_l_Y2 = FR_l_Y1, FR_l_E3, FR_l_Y0 // y2 = y+y1*e3 - nop.i 0 + nop.i 0 } -{ .mfi - ldfs FR_l_G_3 = [GR_l_Index3], 0 +{ .mfi + ldfs FR_l_G_3 = [GR_l_Index3], 0 fnma.s1 FR_l_Z = FR_l_AbsX,FR_l_Q0,f1 // r = a-b*q - nop.i 0 + nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fmpy.s1 FR_l_G = FR_l_G_1, FR_l_G_2 // G = G1 * G_2 nop.i 0 } -{ .mfi +{ .mfi nop.m 0 fadd.s1 FR_l_H = FR_l_H_1, FR_l_H_2 // H = H_1 + H_2 nop.i 0 };; -{ .mfi +{ .mfi ldfe FR_l_log2_hi = [GR_l_Log_Table],16 // load log2_hi part - fadd.s1 FR_l_h = FR_l_h_1, FR_l_h_2 // h = h_1 + h_2 + fadd.s1 FR_l_h = FR_l_h_1, FR_l_h_2 // h = h_1 + h_2 nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fcvt.xf FR_l_float_N = FR_l_float_N // int(N) - nop.i 0 + nop.i 0 };; -{ .mfi +{ .mfi ldfe FR_l_log2_lo = [GR_l_Log_Table],16 // Load log2_lo part fma.s1 FR_l_CXL = FR_l_CXL, f1, FR_l_CL - nop.i 0 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 (p15) fma.s1 FR_n_TT = FR_n_A2H, FR_n_XS2L, FR_n_TT // T=A2H*x2L+T - nop.i 0 + nop.i 0 };; -{ .mfi - ldfe FR_l_Q_6 = [GR_l_Log_Table],16 +{ .mfi + ldfe FR_l_Q_6 = [GR_l_Log_Table],16 (p15) fma.s1 FR_n_A3 = FR_n_A4, FR_n_XS2, FR_n_A3 // poly tail - nop.i 0 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 (p15) fma.s1 FR_n_A5 = FR_n_A6, FR_n_XS2, FR_n_A5 // poly tail - nop.i 0 + nop.i 0 };; -{ .mfi - ldfe FR_l_Q_5 = [GR_l_Log_Table],16 +{ .mfi + ldfe FR_l_Q_5 = [GR_l_Log_Table],16 (p15) fabs FR_n_XS = FR_n_XS // abs(xs) - nop.i 0 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_l_Z = FR_l_Z,FR_l_Y2,FR_l_Q0 // x_hi = q+r*y2 - nop.i 0 + nop.i 0 };; -{ .mfi - ldfe FR_l_Q_4 = [GR_l_Log_Table],16 +{ .mfi + ldfe FR_l_Q_4 = [GR_l_Log_Table],16 (p15) fma.s1 FR_n_A7 = FR_n_A9, FR_n_XS4, FR_n_A7 // poly tail - nop.i 0 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 (p15) fma.s1 FR_n_XS7 = FR_n_XS4, FR_n_XS2, f0 // = x^4*x^2 - nop.i 0 + nop.i 0 };; -{ .mfi - ldfe FR_l_Q_3 = [GR_l_Log_Table],16 +{ .mfi + ldfe FR_l_Q_3 = [GR_l_Log_Table],16 fneg FR_n_NegOne = f1 // -1.0 - nop.i 0 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 (p15) fma.s1 FR_n_XS8 = FR_n_XS4, FR_n_XS4, f0 // xs^8 = xs^4*xs^4 - nop.i 0 + nop.i 0 };; -{ .mfi - ldfe FR_l_Q_2 = [GR_l_Log_Table],16 - fadd.s1 FR_l_h = FR_l_h, FR_l_h_3 // h = h_1 + h_2 + h_3 - nop.i 0 +{ .mfi + ldfe FR_l_Q_2 = [GR_l_Log_Table],16 + fadd.s1 FR_l_h = FR_l_h, FR_l_h_3 // h = h_1 + h_2 + h_3 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 (p15) fma.s1 FR_n_TH = FR_n_A2H, FR_n_XS2, FR_n_TT // A2H*xs2+T - nop.i 0 + nop.i 0 };; -{ .mfi - ldfe FR_l_Q_1 = [GR_l_Log_Table],16 - fmpy.s1 FR_l_G = FR_l_G, FR_l_G_3 // G = G_1 * G_2 * G_3 - nop.i 0 +{ .mfi + ldfe FR_l_Q_1 = [GR_l_Log_Table],16 + fmpy.s1 FR_l_G = FR_l_G, FR_l_G_3 // G = G_1 * G_2 * G_3 + nop.i 0 } { .mfi nop.m 0 - fadd.s1 FR_l_H = FR_l_H, FR_l_H_3 // H = H_1 + H_2 + H_3 - nop.i 0 + fadd.s1 FR_l_H = FR_l_H, FR_l_H_3 // H = H_1 + H_2 + H_3 + nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fma.s1 FR_l_Z2 = FR_l_Z, FR_l_Z, f0 // Z^2 nop.i 0 @@ -1593,19 +1593,19 @@ GLOBAL_LIBM_ENTRY(tgammal) nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 (p14) fcmp.gt.unc.s1 p7,p0 = FR_l_AbsX, FR_c_PosOverflow //X > 1755.5483 // (overflow domain, result cannot be represented by normal value) nop.i 0 } -{ .mfi +{ .mfi nop.m 0 (p15) fma.s1 FR_n_XS7 = FR_n_XS7, FR_n_XS, f0 // x^7 construction nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 (p15) fms.s1 FR_n_TL = FR_n_A2H, FR_n_XS2, FR_n_TH // A2H*xs2+TH nop.i 0 @@ -1613,10 +1613,10 @@ GLOBAL_LIBM_ENTRY(tgammal) { .mfi nop.m 0 (p15) fma.s1 FR_n_PolyH = FR_n_TH, f1, FR_n_A1H // PolyH=TH+A1H - nop.i 0 + nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fmpy.s1 FR_l_GS_hi = FR_l_G, FR_l_S // GS_hi = G*S nop.i 0 @@ -1627,18 +1627,18 @@ GLOBAL_LIBM_ENTRY(tgammal) (p7) br.cond.spnt tgammal_overflow // Overflow path for arg > 1755.5483 ////// };; -{ .mfi +{ .mfi nop.m 0 fma.s1 FR_l_B14 = FR_l_B16, FR_l_Z2, FR_l_B14// bernulli tail nop.i 0 } -{ .mfi +{ .mfi nop.m 0 fma.s1 FR_l_Z4 = FR_l_Z2, FR_l_Z2, f0 // Z^4 = Z^2*Z^2 nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fma.s1 FR_l_B2 = FR_l_B4, FR_l_Z2, FR_l_B2 // bernulli tail nop.i 0 @@ -1649,7 +1649,7 @@ GLOBAL_LIBM_ENTRY(tgammal) nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fma.s1 FR_l_B10 = FR_l_B12, FR_l_Z2, FR_l_B10// bernulli tail nop.i 0 @@ -1660,7 +1660,7 @@ GLOBAL_LIBM_ENTRY(tgammal) nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 (p15) fma.s1 FR_n_TL = FR_n_TL, f1, FR_n_TT // TL = TL+T nop.i 0 @@ -1671,7 +1671,7 @@ GLOBAL_LIBM_ENTRY(tgammal) nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fma.s1 FR_l_poly_lo = FR_l_r, FR_l_Q_6, FR_l_Q_5 // Q_5+r*Q_6 nop.i 0 @@ -1679,10 +1679,10 @@ GLOBAL_LIBM_ENTRY(tgammal) { .mfi nop.m 0 fsub.s1 FR_l_r_cor = FR_l_GS_hi, f1 // r_cor = GS_hi -1 - nop.i 0 + nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fms.s1 FR_l_GS_lo = FR_l_G, FR_l_S, FR_l_GS_hi // G*S-GS_hi nop.i 0 @@ -1690,10 +1690,10 @@ GLOBAL_LIBM_ENTRY(tgammal) { .mfi nop.m 0 fma.s1 FR_l_poly = FR_l_r, FR_l_Q_2, FR_l_Q_1 //poly=r*Q2+Q1 - nop.i 0 + nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fmpy.s1 FR_l_rsq = FR_l_r, FR_l_r // rsq = r * r nop.i 0 @@ -1702,33 +1702,33 @@ GLOBAL_LIBM_ENTRY(tgammal) nop.m 0 fma.s1 FR_l_G = FR_l_float_N, FR_l_log2_hi, FR_l_H // Tbl = // float_N*log2_hi + H - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_l_Y_lo = FR_l_float_N, FR_l_log2_lo, FR_l_h // Y_lo= // float_N*log2_lo + h - nop.i 0 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_l_B14 = FR_l_B18, FR_l_Z4, FR_l_B14 //bernulli tail - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_l_B2 = FR_l_B6, FR_l_Z4, FR_l_B2 //bernulli tail - nop.i 0 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_l_Z8 = FR_l_Z4, FR_l_Z4, f0 //bernulli tail - nop.i 0 + nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fma.s1 FR_l_poly_lo = FR_l_r, FR_l_poly_lo, FR_l_Q_4 // poly_lo = // Q_4 + r * poly_lo @@ -1737,10 +1737,10 @@ GLOBAL_LIBM_ENTRY(tgammal) { .mfi nop.m 0 fsub.s1 FR_l_r_cor = FR_l_r_cor, FR_l_r // r_cor = r_cor - r - nop.i 0 + nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 (p15) fma.s1 FR_n_PolyL = FR_n_PolyL, f1, FR_n_TH // polyL+TH nop.i 0 @@ -1751,19 +1751,19 @@ GLOBAL_LIBM_ENTRY(tgammal) nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 - fadd.s1 FR_l_logl_YHi = FR_l_G, FR_l_r // Y_hi = Tbl + r - nop.i 0 + fadd.s1 FR_l_logl_YHi = FR_l_G, FR_l_r // Y_hi = Tbl + r + nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fma.s1 FR_l_B10 = FR_l_B14, FR_l_Z4, FR_l_B10 //bernulli tail - nop.i 0 + nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fma.s1 FR_l_poly_lo = FR_l_r, FR_l_poly_lo, FR_l_Q_3 // poly_lo = // Q_3 + r * poly_lo @@ -1772,18 +1772,18 @@ GLOBAL_LIBM_ENTRY(tgammal) { .mfi nop.m 0 fadd.s1 FR_l_r_cor = FR_l_r_cor, FR_l_GS_lo // r_cor=r_cor+GS_lo - nop.i 0 + nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 (p15) fma.s1 FR_n_PolyL = FR_n_PolyL, f1, FR_n_TT // polyL+TT nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 - fsub.s1 FR_l_Y_lo_res = FR_l_G, FR_l_logl_YHi // Y_lo = Tbl - Y_hi + fsub.s1 FR_l_Y_lo_res = FR_l_G, FR_l_logl_YHi // Y_lo = Tbl - Y_hi nop.i 0 } { .mfi @@ -1793,13 +1793,13 @@ GLOBAL_LIBM_ENTRY(tgammal) nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fma.s1 FR_l_SS = FR_l_B10, FR_l_Z8, FR_l_B2 // bernulli tail nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fadd.s1 FR_l_r_cor = FR_l_r_cor, FR_l_Y_lo // r_cor = r_cor+Y_lo nop.i 0 @@ -1808,16 +1808,16 @@ GLOBAL_LIBM_ENTRY(tgammal) nop.m 0 fma.s1 FR_l_poly = FR_l_rsq, FR_l_poly_lo, FR_l_poly //poly= // r^2*polyLo+poly - nop.i 0 + nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 (p15) fma.s1 FR_n_TT = FR_n_PolyL, FR_n_XS2, f0 // T=polyL*xs^2 - nop.i 0 + nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fadd.s1 FR_l_Y_lo = FR_l_Y_lo_res, FR_l_r // Y_lo = Y_lo + r nop.i 0 @@ -1829,7 +1829,7 @@ GLOBAL_LIBM_ENTRY(tgammal) nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fma.s1 FR_l_SSCXH = FR_l_SS, FR_l_Z, FR_l_CXH // SS*Z+CXH nop.i 0 @@ -1841,7 +1841,7 @@ GLOBAL_LIBM_ENTRY(tgammal) nop.i 0 };; -{ .mlx +{ .mlx nop.m 0 movl GR_e_rshf_2to51 = 0x4718000000000000 // 1.10000 2^(63+51) } @@ -1852,14 +1852,14 @@ GLOBAL_LIBM_ENTRY(tgammal) { .mfi nop.m 0 - fma.s1 FR_l_poly = FR_l_rsq, FR_l_poly, FR_l_r_cor // poly = + fma.s1 FR_l_poly = FR_l_rsq, FR_l_poly, FR_l_r_cor // poly = // rsq * poly + r_cor - nop.i 0 + nop.i 0 };; -{ .mfi +{ .mfi addl GR_e_ad_Arg = @ltoff(Constants_Tgammal_exp_64_Arg#),gp -(p15) fma.s1 FR_n_TT = FR_n_PolyH, FR_n_XS2L, FR_n_TT +(p15) fma.s1 FR_n_TT = FR_n_PolyH, FR_n_XS2L, FR_n_TT mov GR_e_exp_mask = 0x1FFFF // Form exponent mask } { .mlx @@ -1868,25 +1868,25 @@ GLOBAL_LIBM_ENTRY(tgammal) };; -{ .mmi +{ .mmi setf.sig FR_e_INV_LN2_2TO63 = GR_e_sig_inv_ln2 // form 1/ln2 * 2^63 setf.d FR_e_RSHF_2TO51 = GR_e_rshf_2to51 // 1.1000 * 2^(63+51) nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fms.s1 FR_l_SSCXL = FR_l_CXH, f1, FR_l_SSCXH // CXH+SS*CXH nop.i 0 } { .mfi nop.m 0 - fma.s1 FR_e_expl_Input_AbsX = FR_l_XYH, f1, FR_l_SSCXH // HI EXP + fma.s1 FR_e_expl_Input_AbsX = FR_l_XYH, f1, FR_l_SSCXH // HI EXP nop.i 0 };; .pred.rel "mutex",p14,p15 -{ .mfi +{ .mfi nop.m 0 (p14) fma.s1 FR_e_expl_Input_X = FR_l_XYH, f1, FR_l_SSCXH // HI EXP mov GR_e_exp_bias = 0x0FFFF // Set exponent bias @@ -1897,13 +1897,13 @@ GLOBAL_LIBM_ENTRY(tgammal) nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fadd.s1 FR_l_logl_YLo = FR_l_Y_lo, FR_l_poly // YLo = YLo+poly nop.i 0 };; -{ .mfi +{ .mfi setf.exp FR_e_2TOM51 = GR_e_exp_2tom51 //2^-51 for scaling float_N (p15) fma.s1 FR_n_TH = FR_n_PolyH, FR_n_XS2, FR_n_TT // TH= // polyH*xs^2+T @@ -1915,7 +1915,7 @@ GLOBAL_LIBM_ENTRY(tgammal) nop.b 0 };; -{ .mfi +{ .mfi add GR_e_ad_A = 0x20, GR_e_ad_Arg // Point to A table nop.f 0 add GR_e_ad_T1 = 0x50, GR_e_ad_Arg // Point to T1 table @@ -1926,7 +1926,7 @@ GLOBAL_LIBM_ENTRY(tgammal) nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fma.s1 FR_l_SSCXL = FR_l_SS, FR_l_Z, FR_l_SSCXL nop.i 0 @@ -1937,22 +1937,22 @@ GLOBAL_LIBM_ENTRY(tgammal) nop.i 0 };; -{ .mfi +{ .mfi ldfe FR_e_L_hi = [GR_e_ad_Arg],16 // Get L_hi nop.f 0 nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fma.s1 FR_l_XYL = FR_l_logl_YLo, FR_l_AbsX_m_Half, FR_l_XYL // XYL = YLo*|x-0.5|+XYL nop.i 0 };; -{ .mfi +{ .mfi ldfe FR_e_L_lo = [GR_e_ad_Arg],16 // Get L_lo -(p15) fms.s1 FR_n_TL = FR_n_PolyH, FR_n_XS2, FR_n_TH // TL = +(p15) fms.s1 FR_n_TL = FR_n_PolyH, FR_n_XS2, FR_n_TH // TL = // = polyH*xs^2-TH add GR_e_ad_W1 = 0x100, GR_e_ad_T2 // Point to W1 table } @@ -1962,13 +1962,13 @@ GLOBAL_LIBM_ENTRY(tgammal) add GR_e_ad_W2 = 0x300, GR_e_ad_T2 // Point to W2 table };; -{ .mmi +{ .mmi getf.exp GR_e_signexp_x = FR_e_expl_Input_X // Extract sign and exp - ldfe FR_e_A3 = [GR_e_ad_A],16 // Get A3 + ldfe FR_e_A3 = [GR_e_ad_A],16 // Get A3 nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fma.s1 FR_l_SSCXL = FR_l_SSCXL, f1, FR_l_CXL nop.i 0 @@ -1979,19 +1979,19 @@ GLOBAL_LIBM_ENTRY(tgammal) nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fma.s1 FR_e_N_signif=FR_e_expl_Input_X,FR_e_INV_LN2_2TO63,FR_e_RSHF_2TO51 and GR_e_exp_x = GR_e_signexp_x, GR_e_exp_mask };; -{ .mmi +{ .mmi sub GR_e_exp_x = GR_e_exp_x, GR_e_exp_bias // Get exponent ldfe FR_e_A2 = [GR_e_ad_A],16 // Get A2 for main path nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 (p15) fma.s1 FR_n_PolyH = FR_n_Poly1H, FR_n_XS, f0//sin(Pi*x) poly nop.i 0 @@ -2002,13 +2002,13 @@ GLOBAL_LIBM_ENTRY(tgammal) nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 (p15) fma.s1 FR_n_TL = FR_n_TL, f1, FR_n_TT//sin(Pi*x) poly nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fma.s1 FR_l_Temp = FR_l_XYL, f1, FR_l_SSCXL // XYL+SS*CXL nop.i 0 @@ -2020,19 +2020,19 @@ GLOBAL_LIBM_ENTRY(tgammal) nop.i 0 };; -{ .mfi +{ .mfi ldfe FR_e_A1 = [GR_e_ad_A],16 // Get A1 nop.f 0 nop.i 0 } { .mfi nop.m 0 - fms.s1 FR_e_float_N = FR_e_N_signif, FR_e_2TOM51, FR_e_RSHF + fms.s1 FR_e_float_N = FR_e_N_signif, FR_e_2TOM51, FR_e_RSHF // Get float N = signd*2^51-RSHIFTER nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 (p15) fma.s1 FR_n_Poly1L = FR_n_Poly1L, f1, FR_n_TH //sin(Pi*x) poly nop.i 0 @@ -2043,54 +2043,54 @@ GLOBAL_LIBM_ENTRY(tgammal) nop.i 0 };; -{ .mfi +{ .mfi getf.sig GR_e_N_fix = FR_e_N_signif // Get N from significand nop.f 0 nop.i 0 };; .pred.rel "mutex",p14,p15 -{ .mfi +{ .mfi nop.m 0 -(p14) fma.s1 FR_e_expl_Input_Y = FR_e_expl_Input_Y, f1, FR_l_Temp +(p14) fma.s1 FR_e_expl_Input_Y = FR_e_expl_Input_Y, f1, FR_l_Temp nop.i 0 } -{ .mfi +{ .mfi nop.m 0 -(p15) fms.s1 FR_e_expl_Input_Y = FR_e_expl_Input_Y, f1, FR_l_Temp +(p15) fms.s1 FR_e_expl_Input_Y = FR_e_expl_Input_Y, f1, FR_l_Temp // arguments for exp computation nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fnma.s1 FR_e_r = FR_e_L_hi, FR_e_float_N, FR_e_expl_Input_X // r = -L_hi * float_N + x extr.u GR_e_M1 = GR_e_N_fix, 6, 6 // Extract index M_1 };; -{ .mfi +{ .mfi nop.m 0 (p15) fma.s1 FR_n_Poly1L = FR_n_Poly1L, f1, FR_n_TL //sin(Pi*x) poly nop.i 0 };; -{ .mmf +{ .mmf nop.m 0 nop.m 0 - fma.s1 FR_e_r = FR_e_r, f1, FR_e_expl_Input_Y + fma.s1 FR_e_r = FR_e_r, f1, FR_e_expl_Input_Y // r = r + FR_e_expl_Input_Y };; -{ .mmi +{ .mmi shladd GR_e_ad_W1 = GR_e_M1,3,GR_e_ad_W1 // Point to W1 shladd GR_e_ad_T1 = GR_e_M1,2,GR_e_ad_T1 // Point to T1 extr.u GR_e_M2 = GR_e_N_fix, 0, 6 // Extract index M_2 };; -{ .mfi +{ .mfi ldfs FR_e_T1 = [GR_e_ad_T1],0 // Get T1 nop.f 0 extr GR_e_K = GR_e_N_fix, 12, 32 //Extract limit range K @@ -2102,7 +2102,7 @@ GLOBAL_LIBM_ENTRY(tgammal) shladd GR_e_ad_W2 = GR_e_M2,3,GR_e_ad_W2 // Point to W2 };; -{ .mfi +{ .mfi ldfs FR_e_T2 = [GR_e_ad_T2],0 // Get T2 nop.f 0 add GR_e_exp_2_k = GR_e_exp_bias, GR_e_K // exp of 2^k @@ -2113,27 +2113,27 @@ GLOBAL_LIBM_ENTRY(tgammal) sub GR_e_exp_2_mk = GR_e_exp_bias, GR_e_K // exp of 2^-k };; -{ .mmi +{ .mmi ldfd FR_e_W2 = [GR_e_ad_W2],0 // Get W2 nop.m 0 nop.i 0 };; -{ .mmf +{ .mmf setf.exp FR_e_scale = GR_e_exp_2_k // Set scale = 2^k setf.exp FR_e_2_mk = GR_e_exp_2_mk // Form 2^-k - fnma.s1 FR_e_r = FR_e_L_lo, FR_e_float_N, FR_e_r + fnma.s1 FR_e_r = FR_e_L_lo, FR_e_float_N, FR_e_r // r = -L_lo * float_N + r };; -{ .mfi +{ .mfi nop.m 0 -(p15) fma.s1 FR_n_PolyL = FR_n_Tail, FR_n_XS7, FR_n_PolyL +(p15) fma.s1 FR_n_PolyL = FR_n_Tail, FR_n_XS7, FR_n_PolyL //sin(Pi*x) poly nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fma.s1 FR_e_poly = FR_e_r, FR_e_A3, FR_e_A2 // poly=r*A3+A2 nop.i 0 @@ -2144,7 +2144,7 @@ GLOBAL_LIBM_ENTRY(tgammal) nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fmpy.s1 FR_e_T = FR_e_T1, FR_e_T2 // T = T1 * T2 nop.i 0 @@ -2155,54 +2155,54 @@ GLOBAL_LIBM_ENTRY(tgammal) nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 (p15) fma.s1 FR_n_TT = FR_n_PolyL, FR_l_AbsX, f0 //sin(Pi*x) poly nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 - fma.s1 FR_e_poly = FR_e_r, FR_e_poly, FR_e_A1 + fma.s1 FR_e_poly = FR_e_r, FR_e_poly, FR_e_A1 // poly = r * poly + A1 nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fma.s1 FR_e_T_scale = FR_e_T, FR_e_scale, f0 // T_scale=T*scale nop.i 0 } { .mfi nop.m 0 - fma.s1 FR_e_W = FR_e_W2, FR_e_W1_p1, FR_e_W1 + fma.s1 FR_e_W = FR_e_W2, FR_e_W1_p1, FR_e_W1 // W = W2 * (W1+1.0) + W1 nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 -(p15) fma.s1 FR_n_SinxH = FR_n_PolyH, FR_l_AbsX, FR_n_TT +(p15) fma.s1 FR_n_SinxH = FR_n_PolyH, FR_l_AbsX, FR_n_TT // sin(Pi*x) poly nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 mov FR_e_Y_hi = FR_e_T // Assume Y_hi = T nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 - fma.s1 FR_e_poly = FR_e_rsq, FR_e_poly, FR_e_r + fma.s1 FR_e_poly = FR_e_rsq, FR_e_poly, FR_e_r // poly = rsq * poly + r nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 - fma.s1 FR_e_Wp1_T_scale = FR_e_W, FR_e_T_scale, FR_e_T_scale + fma.s1 FR_e_Wp1_T_scale = FR_e_W, FR_e_T_scale, FR_e_T_scale // (W+1)*T*scale nop.i 0 } @@ -2212,51 +2212,51 @@ GLOBAL_LIBM_ENTRY(tgammal) nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 (p15) fms.s1 FR_n_SinxL = FR_n_PolyH, FR_l_AbsX, FR_n_SinxH // Low part of sin nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 (p15) frcpa.s1 FR_n_Y0, p0 = f1, FR_n_SinxH // y = frcpa(b) nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fma.s1 FR_e_result_lo = FR_e_Wp1_T_scale, FR_e_poly, FR_e_W_T_scale // Low part of exp result nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 (p15) fma.s1 FR_n_SinxL = FR_n_SinxL, f1, FR_n_TT // sin low result nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 (p15) fma.s1 FR_n_Q0 = f1,FR_n_Y0,f0 // q = y nop.i 0 } -{ .mfi +{ .mfi nop.m 0 -(p15) fnma.s1 FR_n_E0 = FR_n_Y0, FR_n_SinxH, f1 // e = 1-b*y +(p15) fnma.s1 FR_n_E0 = FR_n_Y0, FR_n_SinxH, f1 // e = 1-b*y nop.i 0 };; -{ .mfb +{ .mfb nop.m 0 (p14) fma.s0 f8 = FR_e_Y_hi, FR_e_scale, FR_e_result_lo (p14) br.ret.spnt b0 // Exit for positive Stirling path ////////////////////// };; -{ .mfi +{ .mfi nop.m 0 fma.s1 FR_e_expl_Output_X = FR_e_Y_hi, FR_e_scale, f0 // exp result nop.i 0 @@ -2267,107 +2267,107 @@ GLOBAL_LIBM_ENTRY(tgammal) nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fma.s1 FR_n_E2 = FR_n_E0,FR_n_E0,FR_n_E0 // e2 = e+e^2 nop.i 0 } -{ .mfi +{ .mfi nop.m 0 fma.s1 FR_n_E1 = FR_n_E0,FR_n_E0,f0 // e1 = e^2 nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fma.s1 FR_n_Y1 = FR_n_Y0,FR_n_E2,FR_n_Y0 // y1 = y+y*e2 nop.i 0 } -{ .mfi +{ .mfi nop.m 0 fma.s1 FR_n_E3 = FR_n_E1,FR_n_E1,FR_n_E0 // e3 = e+e1^2 nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fma.s1 FR_n_Y2 = FR_n_Y1,FR_n_E3,FR_n_Y0 // y2 = y+y1*e3 nop.i 0 } -{ .mfi +{ .mfi nop.m 0 fnma.s1 FR_n_R0 = FR_n_SinxH,FR_n_Q0,f1 // r = a-b*q nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fnma.s1 FR_n_E4 = FR_n_SinxH,FR_n_Y2,f1 // e4 = 1-b*y2 nop.i 0 } -{ .mfi +{ .mfi nop.m 0 fma.s1 FR_n_RcpResH = FR_n_R0,FR_n_Y2,FR_n_Q0 // x = q+r*y2 nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fma.s1 FR_n_Y3 = FR_n_Y2,FR_n_E4,FR_n_Y2 // y3 = y2+y2*e4 nop.i 0 } -{ .mfi +{ .mfi nop.m 0 fnma.s1 FR_n_R1 = FR_n_SinxH,FR_n_RcpResH,f1 // r1 = a-b*x nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 - fnma.s1 FR_n_R1 = FR_n_SinxL,FR_n_RcpResH,FR_n_R1 + fnma.s1 FR_n_R1 = FR_n_SinxL,FR_n_RcpResH,FR_n_R1 // r1 = r1 - b_lo*X nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fma.s1 FR_n_RcpResL = FR_n_R1,FR_n_Y3,f0 // x_lo = r1*y3 nop.i 0 } -{ .mfi +{ .mfi nop.m 0 fma.s1 FR_n_Temp = FR_n_RcpResH, FR_e_expl_Output_Y, f0 // Multiplying exp and sin result nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fma.s1 FR_n_Temp = FR_n_RcpResL, FR_e_expl_Output_X, FR_n_Temp // Multiplying exp and sin result nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fma.s1 FR_n_ResH = FR_n_RcpResH, FR_e_expl_Output_X, FR_n_Temp // Multiplying exp and sin result nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fms.s1 FR_n_ResL = FR_n_RcpResH, FR_e_expl_Output_X, FR_n_ResH // Multiplying exp and sin result nop.i 0 } -{ .mfi +{ .mfi nop.m 0 (p12) fma.s1 FR_n_ResH = FR_n_ResH, FR_n_NegOne, f0 // Negate nop.i 0 };; -{ .mfi +{ .mfi nop.m 0 fma.s1 FR_n_ResL = FR_n_ResL, f1, FR_n_Temp // Multiplying exp and sin result - low result obtained @@ -2375,12 +2375,12 @@ GLOBAL_LIBM_ENTRY(tgammal) };; .pred.rel "mutex",p12,p13 -{ .mfi +{ .mfi nop.m 0 (p13) fma.s0 f8 = FR_n_ResH, f1, FR_n_ResL // For odd nop.i 0 } -{ .mfb +{ .mfb nop.m 0 (p12) fms.s0 f8 = FR_n_ResH, f1, FR_n_ResL // For even br.ret.sptk b0 // Exit for negative Stirling path ////////////////////// @@ -2391,928 +2391,928 @@ GLOBAL_LIBM_ENTRY(tgammal) //------------------------------------------------------------------------------ .align 64 tgamma_lt_13: -{ .mfi +{ .mfi getf.sig GR_p_XN = FR_p_IXN // Get significand fcvt.xf FR_p_XN = FR_p_IXN // xn = [x] add GR_r_sin_Table2= 0x40, GR_r_sin_Table // Shifted table addr. -} +} { .mfi ldfpd FR_p_0p5, FR_p_1p5 = [GR_c_Table], 16 // 0.5 & 1.5 fms.s1 FR_p_AbsXM1 = FR_p_AbsX, f1, f1 // X-1 add GR_p_Table2 = 0xB0, GR_p_Table -};; +};; -{ .mfi +{ .mfi add GR_r_sin_Table = -16, GR_r_sin_Table // For compensation fcvt.xf FR_r_XNS = FR_r_IXNS // Convert int repr to float shr.u GR_p_X_Sgnd = GR_p_X_Sgnd, 59 // Get only 5 bit of signd -};; +};; -{ .mfi +{ .mfi ldfpd FR_r_A2H,FR_r_A2L = [GR_r_sin_Table], 16 // Load A2 nop.f 0 - add GR_p_Int = -2, GR_p_XN // int = int - 2 -} + add GR_p_Int = -2, GR_p_XN // int = int - 2 +} { .mfi - ldfe FR_r_A6 = [GR_r_sin_Table2], 16 + ldfe FR_r_A6 = [GR_r_sin_Table2], 16 nop.f 0 cmp.gtu p11, p12 = 0x2, GR_p_XN // p11: x < 2 (splitted intervals), // p12: x > 2 (base intervals) -};; +};; -{ .mfi - ldfpd FR_r_A1H, FR_r_A1L = [GR_r_sin_Table], 16 +{ .mfi + ldfpd FR_r_A1H, FR_r_A1L = [GR_r_sin_Table], 16 nop.f 0 shr GR_p_Int = GR_p_Int, 1 // int/2 -} +} { .mfi - ldfe FR_r_A5 = [GR_r_sin_Table2], 16 + ldfe FR_r_A5 = [GR_r_sin_Table2], 16 nop.f 0 (p11) cmp.gtu.unc p10, p11 = 0x1C, GR_p_X_Sgnd // sgnd(x) < 0.75 -};; +};; -{ .mfi - ldfe FR_r_A9 = [GR_r_sin_Table], 16 +{ .mfi + ldfe FR_r_A9 = [GR_r_sin_Table], 16 nop.f 0 shl GR_p_Offset = GR_p_Int, 4 // offset = int*16 -} +} { .mfi - ldfe FR_r_A4 = [GR_r_sin_Table2], 16 + ldfe FR_r_A4 = [GR_r_sin_Table2], 16 nop.f 0 (p10) cmp.gtu.unc p9, p10 = 0x14, GR_p_X_Sgnd // sgnd(x) < 0.25 -};; +};; -{ .mfi - ldfe FR_r_A8 = [GR_r_sin_Table], 16 +{ .mfi + ldfe FR_r_A8 = [GR_r_sin_Table], 16 nop.f 0 (p12) tbit.nz.unc p13, p12 = GR_p_XN, 0x0 // p13: reccurent computations // X is at [3;4], [5;6], [7;8]... interval -} +} { .mfi - ldfe FR_r_A3 = [GR_r_sin_Table2], 16 + ldfe FR_r_A3 = [GR_r_sin_Table2], 16 nop.f 0 shladd GR_p_Offset = GR_p_Int, 2, GR_p_Offset // +int*4 -};; +};; .pred.rel "mutex",p9,p11 -{ .mfi - add GR_p_Offset = GR_p_Int, GR_p_Offset +{ .mfi + add GR_p_Offset = GR_p_Int, GR_p_Offset // +int, so offset = int*21 (p9) fms.s1 FR_p_XR = FR_p_AbsX, f1, f1 // r = x-1 - nop.i 0 -} + nop.i 0 +} { .mfi - ldfe FR_r_A7 = [GR_r_sin_Table], 16 -(p11) fms.s1 FR_p_XR = FR_p_2, f1, FR_p_AbsX + ldfe FR_r_A7 = [GR_r_sin_Table], 16 +(p11) fms.s1 FR_p_XR = FR_p_2, f1, FR_p_AbsX // r = 2-x for 1.75 < x < 2 - nop.i 0 -};; + nop.i 0 +};; .pred.rel "mutex",p9,p10 .pred.rel "mutex",p10,p11 .pred.rel "mutex",p9,p11 -{ .mfi +{ .mfi (p9) add GR_p_Offset = 126, r0 // 1.0 < x < 1.25 table -(p15) fcmp.eq.unc.s1 p7,p0 = FR_p_AbsX, FR_p_XN +(p15) fcmp.eq.unc.s1 p7,p0 = FR_p_AbsX, FR_p_XN // If arg is integer and negative - singularity branch - nop.i 0 + nop.i 0 } -{ .mfi +{ .mfi (p10) add GR_p_Offset = 147, r0 // 1.25 < x < 1.75 table nop.f 0 (p11) add GR_p_Offset = 168, r0 // 1.75 < x < 2.0 table -};; +};; -{ .mmf - shladd GR_p_Table = GR_p_Offset, 4, GR_p_Table +{ .mmf + shladd GR_p_Table = GR_p_Offset, 4, GR_p_Table shladd GR_p_Table2 = GR_p_Offset, 4, GR_p_Table2 fma.s1 FR_r_XS = FR_r_AbsX , f1, FR_r_XNS // xs = x - [x] -};; +};; -{ .mmb - ldfpd FR_p_A5H, FR_p_A5L = [GR_p_Table], 16 - ldfpd FR_p_A2H, FR_p_A2L = [GR_p_Table2], 16 +{ .mmb + ldfpd FR_p_A5H, FR_p_A5L = [GR_p_Table], 16 + ldfpd FR_p_A2H, FR_p_A2L = [GR_p_Table2], 16 (p7) br.cond.spnt tgammal_singularity // Singularity for integer ///////////// // and negative argument /////////////// };; -{ .mfi - ldfpd FR_p_A4H, FR_p_A4L = [GR_p_Table], 16 +{ .mfi + ldfpd FR_p_A4H, FR_p_A4L = [GR_p_Table], 16 fma.s1 FR_p_XN = FR_p_XN, f1, FR_p_0p5 // xn = xn+0.5 - nop.i 0 + nop.i 0 } -{ .mfi - ldfpd FR_p_A1H, FR_p_A1L = [GR_p_Table2], 16 +{ .mfi + ldfpd FR_p_A1H, FR_p_A1L = [GR_p_Table2], 16 (p10) fms.s1 FR_p_XR = FR_p_AbsX, f1, FR_p_1p5 // r = x - 1.5 - nop.i 0 + nop.i 0 };; -{ .mmi - ldfpd FR_p_A3H, FR_p_A3L = [GR_p_Table], 16 - ldfpd FR_p_A0H, FR_p_A0L = [GR_p_Table2], 16 +{ .mmi + ldfpd FR_p_A3H, FR_p_A3L = [GR_p_Table], 16 + ldfpd FR_p_A0H, FR_p_A0L = [GR_p_Table2], 16 nop.i 0 };; -{ .mmi - ldfe FR_p_A20 = [GR_p_Table], 16 - ldfe FR_p_A12 = [GR_p_Table2], 16 +{ .mmi + ldfe FR_p_A20 = [GR_p_Table], 16 + ldfe FR_p_A12 = [GR_p_Table2], 16 nop.i 0 };; -{ .mmf - ldfe FR_p_A19 = [GR_p_Table], 16 - ldfe FR_p_A11 = [GR_p_Table2], 16 +{ .mmf + ldfe FR_p_A19 = [GR_p_Table], 16 + ldfe FR_p_A11 = [GR_p_Table2], 16 fma.s1 FR_r_XS2 = FR_r_XS, FR_r_XS, f0 // xs2 = xs*xs };; -{ .mmi - ldfe FR_p_A18 = [GR_p_Table], 16 - ldfe FR_p_A10 = [GR_p_Table2], 16 +{ .mmi + ldfe FR_p_A18 = [GR_p_Table], 16 + ldfe FR_p_A10 = [GR_p_Table2], 16 nop.i 0 };; .pred.rel "mutex",p12,p13 -{ .mfi - ldfe FR_p_A17 = [GR_p_Table], 16 +{ .mfi + ldfe FR_p_A17 = [GR_p_Table], 16 (p12) fms.s1 FR_p_XR = FR_p_AbsX, f1, FR_p_XN // r = x - xn - nop.i 0 + nop.i 0 } -{ .mfi +{ .mfi ldfe FR_p_A9 = [GR_p_Table2], 16 (p13) fms.s1 FR_p_XR = FR_p_AbsX, f1, FR_p_XN - nop.i 0 + nop.i 0 };; -{ .mmi - ldfe FR_p_A16 = [GR_p_Table], 16 - ldfe FR_p_A8 = [GR_p_Table2], 16 +{ .mmi + ldfe FR_p_A16 = [GR_p_Table], 16 + ldfe FR_p_A8 = [GR_p_Table2], 16 (p9) cmp.eq p12, p0 = r0, r0 // clear p12 };; -{ .mmi - ldfe FR_p_A15 = [GR_p_Table], 16 - ldfe FR_p_A7 = [GR_p_Table2], 16 +{ .mmi + ldfe FR_p_A15 = [GR_p_Table], 16 + ldfe FR_p_A7 = [GR_p_Table2], 16 (p10) cmp.eq p12, p0 = r0, r0 // clear p12 };; -{ .mfi - ldfe FR_p_A14 = [GR_p_Table], 16 +{ .mfi + ldfe FR_p_A14 = [GR_p_Table], 16 fma.s1 FR_r_TH = FR_r_A2H, FR_r_XS2, f0 // sin for neg (p11) cmp.eq p12, p0 = r0, r0 // clear p12 } -{ .mfi +{ .mfi ldfe FR_p_A6 = [GR_p_Table2], 16 fma.s1 FR_r_TL = FR_r_A2L, FR_r_XS2, f0 // sin for neg - nop.i 0 + nop.i 0 };; -{ .mfi +{ .mfi ldfe FR_p_A13 = [GR_p_Table], 16 fms.s1 FR_r_XS2L = FR_r_XS, FR_r_XS, FR_r_XS2 // x2Lo part - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Temp5H = FR_p_A5H, FR_p_XR, f0 // A5H*r // 'Low poly' - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_XR2 = FR_p_XR, FR_p_XR, f0 // r^2 = r*r - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fabs FR_r_XS = FR_r_XS // abs(xs) - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 - fma.s1 FR_p_Temp2H = FR_p_A2H, FR_p_XR, f0 // A2H*r +{ .mfi + nop.m 0 + fma.s1 FR_p_Temp2H = FR_p_A2H, FR_p_XR, f0 // A2H*r // 'High poly' - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fms.s1 FR_r_TT = FR_r_A2H, FR_r_XS2, FR_r_TH // sin for neg - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_ResH = FR_r_TH, f1, FR_r_A1H // sin for neg - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_TL = FR_r_A2H, FR_r_XS2L, FR_r_TL // sin for neg - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fms.s1 FR_p_Temp5L = FR_p_A5H,FR_p_XR,FR_p_Temp5H //A5H*r delta // 'Low poly' - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Poly5H = FR_p_Temp5H, f1, FR_p_A4H // A5H*r+A4H // 'Low poly' - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fms.s1 FR_p_Temp2L = FR_p_A2H, FR_p_XR, FR_p_Temp2H//A2H*r delta //'High poly' - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Poly2H = FR_p_Temp2H, f1, FR_p_A1H // A2H*r+A1H //'High poly' - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_XR3 = FR_p_XR2, FR_p_XR, f0 // r^3 = r^2*r - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fms.s1 FR_p_XR2L = FR_p_XR, FR_p_XR, FR_p_XR2 // r^2 delta - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_A18 = FR_p_A19, FR_p_XR, FR_p_A18 // Poly tail - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_A14 = FR_p_A15, FR_p_XR, FR_p_A14 // Poly tail - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_XR4 = FR_p_XR2, FR_p_XR2, f0 // r^4 = r^2*r^2 - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Temp5L = FR_p_A5L, FR_p_XR, FR_p_Temp5L// Low part // of A5*r+A4 - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fms.s1 FR_p_Poly5L = FR_p_A4H, f1, FR_p_Poly5H // Low part // of A5*r+A4 - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Temp4H = FR_p_Poly5H, FR_p_XR, f0 // (A5H*r+A4H)*r - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Temp2L = FR_p_A2L, FR_p_XR, FR_p_Temp2L // A2*r low - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fms.s1 FR_p_Poly2L = FR_p_A1H, f1, FR_p_Poly2H // High poly - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Temp1H = FR_p_Poly2H, FR_p_XR, f0 // High poly - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fms.s1 FR_p_XR3L = FR_p_XR2, FR_p_XR, FR_p_XR3 // x^3 delta - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_A16 = FR_p_A17, FR_p_XR, FR_p_A16 // Poly tail - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fms.s1 FR_r_ResL = FR_r_A1H, f1, FR_r_ResH // sin for neg - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_TL = FR_r_TL, f1, FR_r_TT // sin for neg - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Temp5L = FR_p_Temp5L, f1, FR_p_A4L // Low poly - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Poly5L = FR_p_Poly5L, f1, FR_p_Temp5H // Low poly - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fms.s1 FR_p_Temp4L = FR_p_Poly5H,FR_p_XR,FR_p_Temp4H //Low poly - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Poly4H = FR_p_Temp4H, f1, FR_p_A3H // Low poly - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Temp2L = FR_p_Temp2L, f1, FR_p_A1L // High poly - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Poly2L = FR_p_Poly2L, f1, FR_p_Temp2H // High poly - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fms.s1 FR_p_Temp1L = FR_p_Poly2H,FR_p_XR,FR_p_Temp1H //High poly - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Poly1H = FR_p_Temp1H, f1, FR_p_A0H // High poly - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_A12 = FR_p_A13, FR_p_XR, FR_p_A12 // Poly tail - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_XR3L = FR_p_XR2L, FR_p_XR, FR_p_XR3L // x^3 low - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Poly5L = FR_p_Poly5L, f1, FR_p_Temp5L // Low poly - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_A10 = FR_p_A11, FR_p_XR, FR_p_A10 // Poly tail - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fms.s1 FR_p_Poly4L = FR_p_A3H, f1, FR_p_Poly4H // Low poly - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_A6 = FR_p_A7, FR_p_XR, FR_p_A6 // Poly tail - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_A8 = FR_p_A9, FR_p_XR, FR_p_A8 // Poly tail - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_XR6 = FR_p_XR4, FR_p_XR2, f0 // Poly tail - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Poly2L = FR_p_Poly2L, f1, FR_p_Temp2L // High poly - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fms.s1 FR_p_Poly1L = FR_p_A0H, f1, FR_p_Poly1H // High poly - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_ResL = FR_r_ResL, f1, FR_r_TH // sin for neg - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_TT = FR_r_TL, f1, FR_r_A1L // sin for neg - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Temp4L = FR_p_Poly5L,FR_p_XR,FR_p_Temp4L // Low poly - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_A18 = FR_p_A20, FR_p_XR2, FR_p_A18 // Poly tail - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Poly4L = FR_p_Poly4L, f1, FR_p_Temp4H // Low poly - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_A14 = FR_p_A16, FR_p_XR2, FR_p_A14 // Poly tail - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_A6 = FR_p_A8, FR_p_XR2, FR_p_A6 // Poly tail - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_A10 = FR_p_A12, FR_p_XR2, FR_p_A10 // Poly tail - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Temp1L = FR_p_Poly2L,FR_p_XR,FR_p_Temp1L //High poly - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Poly1L = FR_p_Poly1L, f1, FR_p_Temp1H // High poly - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_ResL = FR_r_ResL, f1, FR_r_TT // sin for neg - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_TH = FR_r_ResH, FR_r_XS2, f0 // sin for neg - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Temp4L = FR_p_Temp4L, f1, FR_p_A3L // Low poly - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Poly3H = FR_p_Poly4H, FR_p_XR3, f0 // Low poly - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_A14 = FR_p_A18, FR_p_XR4, FR_p_A14 // Poly tail - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_XR8 = FR_p_XR4, FR_p_XR4, f0 // Poly tail - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_TL = FR_r_ResH, FR_r_XS2L, f0 // sin for neg - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Temp1L = FR_p_Temp1L, f1, FR_p_A0L // High poly - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_A6 = FR_p_A10, FR_p_XR4, FR_p_A6 // Poly tail - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fms.s1 FR_r_TT = FR_r_ResH, FR_r_XS2, FR_r_TH // sin for neg - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_Res3H = FR_r_TH, f1, f1 // sin for neg - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Poly4L = FR_p_Poly4L, f1, FR_p_Temp4L // Low poly - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Poly3L = FR_p_Poly4H, FR_p_XR3L, f0 // Low poly - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Poly0H = FR_p_Poly3H,f1,FR_p_Poly1H //Low & High add - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_A7 = FR_r_A8, FR_r_XS2, FR_r_A7 // sin for neg - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_TL = FR_r_ResL, FR_r_XS2, FR_r_TL // sin for neg - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_XS4 = FR_r_XS2, FR_r_XS2, f0 // sin for neg - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Poly1L = FR_p_Poly1L, f1, FR_p_Temp1L // High poly - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_PolyTail = FR_p_A14, FR_p_XR8, FR_p_A6 // Poly tail - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fms.s1 FR_r_Res3L = f1, f1, FR_r_Res3H // sin for neg - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_ResH = FR_r_Res3H, FR_r_XS, f0 // sin for neg - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fms.s1 FR_p_Temp0L = FR_p_Poly4H,FR_p_XR3,FR_p_Poly3H //Low poly - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Poly3L = FR_p_Poly4L,FR_p_XR3,FR_p_Poly3L //Low poly - nop.i 0 + nop.i 0 };; { .mfi - nop.m 0 + nop.m 0 fms.s1 FR_p_Poly0L = FR_p_Poly1H,f1,FR_p_Poly0H //Low & High add - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 -(p13) fma.s1 FR_p_OddPoly0H = FR_p_Poly0H, FR_p_AbsXM1, f0 +{ .mfi + nop.m 0 +(p13) fma.s1 FR_p_OddPoly0H = FR_p_Poly0H, FR_p_AbsXM1, f0 // Reccurent computations - multiplying by X-1 - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_TL = FR_r_TL, f1, FR_r_TT // sin for neg - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_A3 = FR_r_A4, FR_r_XS2, FR_r_A3 // sin for neg - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Poly1L = FR_p_PolyTail,FR_p_XR6,FR_p_Poly1L//High - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_A5 = FR_r_A6, FR_r_XS2, FR_r_A5 // sin for neg - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_Res3L = FR_r_Res3L, f1, FR_r_TH // sin for neg - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fms.s1 FR_r_ResL = FR_r_Res3H, FR_r_XS, FR_r_ResH//sin for neg - nop.i 0 + nop.i 0 };; { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_p_Poly3L = FR_p_Poly3L, f1, FR_p_Temp0L // Low poly - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_A7 = FR_r_A9, FR_r_XS4, FR_r_A7 // sin for neg - nop.i 0 + nop.i 0 };; { .mfi - nop.m 0 + nop.m 0 fma.s1 FR_p_Poly0L = FR_p_Poly0L,f1,FR_p_Poly3H //Low & High add - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 (p13) fms.s1 FR_p_OddPoly0L = FR_p_Poly0H, FR_p_AbsXM1, FR_p_OddPoly0H // Reccurent computations - multiplying by X-1 (low part) - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_A3 = FR_r_A5, FR_r_XS4, FR_r_A3 // sin for neg - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_XS7 = FR_r_XS4, FR_r_XS2, f0 // xs^6 - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_Res3L = FR_r_Res3L, f1, FR_r_TL // sin for neg - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_XS8 = FR_r_XS4, FR_r_XS4, f0 // sin for neg - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Temp0H = FR_p_Poly3L,f1,FR_p_Poly1L //Low & High add - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_XS7 = FR_r_XS7, FR_r_XS, f0 // xs^7 - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_ResL = FR_r_Res3L, FR_r_XS, FR_r_ResL//sin for neg - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_Tail = FR_r_A7, FR_r_XS8, FR_r_A3 // sin tail res - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Poly0L = FR_p_Poly0L,f1,FR_p_Temp0H //Low & High add - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_ResL = FR_r_Tail,FR_r_XS7,FR_r_ResL //sin for neg - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 (p13) fma.s1 FR_p_OddPoly0L = FR_p_Poly0L, FR_p_AbsXM1, FR_p_OddPoly0L // Reccurent computations - multiplying by X-1 (low part) - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_TT = FR_r_ResL, FR_r_AbsX, f0 // X*sin - nop.i 0 + nop.i 0 };; .pred.rel "mutex",p12,p13 -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 (p12) fma.s0 f8 = FR_p_Poly0H, f1, FR_p_Poly0L // Even - nop.i 0 + nop.i 0 } -{ .mfb - nop.m 0 +{ .mfb + nop.m 0 (p13) fma.s0 f8 = FR_p_OddPoly0H, f1, FR_p_OddPoly0L // Odd (p14) br.ret.spnt b0 // Exit for 1 <= |X| < 13 path (positive arguments)///// };; -{ .mfi - nop.m 0 -(p13) fma.s1 FR_p_Poly0H = FR_p_OddPoly0H, f1, f0 +{ .mfi + nop.m 0 +(p13) fma.s1 FR_p_Poly0H = FR_p_OddPoly0H, f1, f0 // Reccurent computations - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 -(p13) fma.s1 FR_p_Poly0L = FR_p_OddPoly0L, f1, f0 +{ .mfi + nop.m 0 +(p13) fma.s1 FR_p_Poly0L = FR_p_OddPoly0L, f1, f0 // Reccurent computations - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_Res1H = FR_r_ResH, FR_r_AbsX, FR_r_TT // X*sin (p11) cmp.eq p13, p12 = r0, r0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fms.s1 FR_r_Res1L = FR_r_ResH,FR_r_AbsX,FR_r_Res1H// X*sin (p9) cmp.eq p13, p12 = r0, r0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_Res1L = FR_r_Res1L, f1, FR_r_TT // sin for neg (p10) cmp.eq p13, p12 = r0, r0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_TL = FR_p_Poly0L, FR_r_Res1H, f0 // mult by sin - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_TL = FR_p_Poly0H,FR_r_Res1L,FR_r_TL//mult by sin - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_ResH = FR_p_Poly0H,FR_r_Res1H,FR_r_TL//mult by sin - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fms.s1 FR_r_ResL = FR_p_Poly0H,FR_r_Res1H,FR_r_ResH//sin mult - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 frcpa.s1 FR_r_Y0,p0 = f1,FR_r_ResH // y = frcpa(b) - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fneg FR_r_NegOne = f1 // Form -1.0 - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_ResL = FR_r_ResL, f1, FR_r_TL //Low result of mult - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_Q0 = f1,FR_r_Y0,f0 // q = a*y - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 - fnma.s1 FR_r_E0 = FR_r_Y0,FR_r_ResH,f1 // e = 1-b*y - nop.i 0 +{ .mfi + nop.m 0 + fnma.s1 FR_r_E0 = FR_r_Y0,FR_r_ResH,f1 // e = 1-b*y + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_E2 = FR_r_E0,FR_r_E0,FR_r_E0 // e2 = e+e^2 - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_E1 = FR_r_E0,FR_r_E0,f0 // e1 = e^2 - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_Y1 = FR_r_Y0,FR_r_E2,FR_r_Y0 // y1 = y+y*e2 - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_E3 = FR_r_E1,FR_r_E1,FR_r_E0 // e3 = e+e1^2 - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_Y2 = FR_r_Y1,FR_r_E3,FR_r_Y0 // y2 = y+y1*e3 - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fnma.s1 FR_r_R0 = FR_r_ResH,FR_r_Q0,f1 // r = a-b*q - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fnma.s1 FR_r_E4 = FR_r_ResH,FR_r_Y2,f1 // e4 = 1-b*y2 - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_ZH = FR_r_R0,FR_r_Y2,FR_r_Q0 // x = q+r*y2 - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_Y3 = FR_r_Y2,FR_r_E4,FR_r_Y2 // y3 = y2+y2*e4 - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fnma.s1 FR_r_R1 = FR_r_ResH,FR_r_ZH,f1 // r1 = a-b*x - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fnma.s1 FR_r_R1 = FR_r_ResL,FR_r_ZH,FR_r_R1 // r1=r1-b_lo*X - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 (p12) fma.s1 FR_r_ZHN = FR_r_ZH,FR_r_NegOne, f0 // Negate for evens - nop.i 0 + nop.i 0 };; .pred.rel "mutex",p13,p12 -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 (p13) fma.s0 f8 = FR_r_R1,FR_r_Y3,FR_r_ZH // Final result - nop.i 0 + nop.i 0 } -{ .mfb - nop.m 0 +{ .mfb + nop.m 0 (p12) fnma.s0 f8 = FR_r_R1,FR_r_Y3,FR_r_ZHN // Final result br.ret.sptk b0 // Exit for 1 <= |X| < 13 path (negative arguments)////// };; @@ -3322,935 +3322,935 @@ tgamma_lt_13: //------------------------------------------------------------------------------ .align 64 tgamma_lt_1: -{ .mfi +{ .mfi getf.exp GR_p_Exp = FR_p_AbsX // exp of abs X fma.s1 FR_z_Q0 = f1,FR_z_Y0,f0 // q = a*y add GR_r_sin_Table2= 0x50, GR_r_sin_Table -} +} { .mfi - ldfpd FR_p_0p5, FR_p_1p5 = [GR_c_Table], 16 - fnma.s1 FR_z_E0 = FR_z_Y0,f8,f1 // e = 1-b*y + ldfpd FR_p_0p5, FR_p_1p5 = [GR_c_Table], 16 + fnma.s1 FR_z_E0 = FR_z_Y0,f8,f1 // e = 1-b*y add GR_p_Table2 = 0xB0, GR_p_Table -};; +};; -{ .mfi +{ .mfi ldfd FR_p_0p25 = [GR_c_Table] fcvt.xf FR_r_XNS = FR_r_IXNS // Convert int repr to float - shr.u GR_p_X_Sgnd = GR_p_X_Sgnd, 60 + shr.u GR_p_X_Sgnd = GR_p_X_Sgnd, 60 // Obtain only 4 bits of significand } -{ .mfi +{ .mfi nop.m 0 nop.f 0 add GR_p_Bias = 0xffff, r0 // Set bias -};; +};; -{ .mfi - ldfpd FR_r_A2H, FR_r_A2L = [GR_r_sin_Table], 16 +{ .mfi + ldfpd FR_r_A2H, FR_r_A2L = [GR_r_sin_Table], 16 nop.f 0 shl GR_p_XN = GR_p_Exp, 4 // Shift exp to 4 bits left to set place for significand -} +} { .mlx ldfe FR_r_A6 = [GR_r_sin_Table2], 16 movl GR_p_0p75 = 0xfffec // 0.75 -};; +};; -{ .mfi - ldfpd FR_r_A1H, FR_r_A1L = [GR_r_sin_Table], 16 +{ .mfi + ldfpd FR_r_A1H, FR_r_A1L = [GR_r_sin_Table], 16 nop.f 0 - or GR_p_XN = GR_p_XN, GR_p_X_Sgnd + or GR_p_XN = GR_p_XN, GR_p_X_Sgnd // Combine exp with 4 high bits of significand -} +} { .mfi - ldfe FR_r_A5 = [GR_r_sin_Table2], 16 + ldfe FR_r_A5 = [GR_r_sin_Table2], 16 nop.f 0 sub GR_p_Exp = GR_p_Exp, GR_p_Bias // Unbiased exp -};; +};; -{ .mmi - ldfe FR_r_A9 = [GR_r_sin_Table], 16 - ldfe FR_r_A4 = [GR_r_sin_Table2], 16 +{ .mmi + ldfe FR_r_A9 = [GR_r_sin_Table], 16 + ldfe FR_r_A4 = [GR_r_sin_Table2], 16 cmp.gtu.unc p10, p11 = GR_p_0p75, GR_p_XN // sgnd(x) < 0.75 -};; +};; -{ .mfi - ldfe FR_r_A8 = [GR_r_sin_Table], 16 +{ .mfi + ldfe FR_r_A8 = [GR_r_sin_Table], 16 fma.s1 FR_z_E2 = FR_z_E0,FR_z_E0,FR_z_E0 // e2 = e+e^2 (p10) cmp.gt.unc p9, p10 = -2, GR_p_Exp // x < 0.25 -} +} { .mfi - ldfe FR_r_A3 = [GR_r_sin_Table2], 16 + ldfe FR_r_A3 = [GR_r_sin_Table2], 16 fma.s1 FR_z_E1 = FR_z_E0,FR_z_E0,f0 // e1 = e^2 (p11) add GR_p_Offset = 168, r0 // [0.75;1] interval -};; +};; -{ .mmi +{ .mmi (p10) add GR_p_Offset = 147, r0 // [0.25;0.75] interval - ldfe FR_r_A7 = [GR_r_sin_Table], 16 + ldfe FR_r_A7 = [GR_r_sin_Table], 16 (p9) cmp.gt.unc p8, p9 = -3, GR_p_Exp // x < 0.125 -};; +};; .pred.rel "mutex",p9,p8 -{ .mmi +{ .mmi (p9) add GR_p_Offset = 126, r0 // [0.125;0.25] interval (p8) add GR_p_Offset = 189, r0 // [0.;0.125] interval - nop.i 0 -};; + nop.i 0 +};; -{ .mmf +{ .mmf shladd GR_p_Table = GR_p_Offset, 4, GR_p_Table //Make addresses shladd GR_p_Table2 = GR_p_Offset, 4, GR_p_Table2 fma.s1 FR_r_XS = FR_r_AbsX , f1, FR_r_XNS // xs = |x|-[x] -};; +};; .pred.rel "mutex",p8,p11 -{ .mfi - ldfpd FR_p_A5H, FR_p_A5L = [GR_p_Table], 16 +{ .mfi + ldfpd FR_p_A5H, FR_p_A5L = [GR_p_Table], 16 (p11) fms.s1 FR_p_XR = f1, f1, FR_p_AbsX // r = 1 - |x| // for [0.75;1] interval - nop.i 0 + nop.i 0 } -{ .mfi - ldfpd FR_p_A2H, FR_p_A2L = [GR_p_Table2], 16 +{ .mfi + ldfpd FR_p_A2H, FR_p_A2L = [GR_p_Table2], 16 (p8) fms.s1 FR_p_XR = FR_p_AbsX, f1, f0 // r = |x| // for [0.;0.125] interval - nop.i 0 + nop.i 0 };; -{ .mfi - ldfpd FR_p_A4H, FR_p_A4L = [GR_p_Table], 16 +{ .mfi + ldfpd FR_p_A4H, FR_p_A4L = [GR_p_Table], 16 fma.s1 FR_z_Y1 = FR_z_Y0,FR_z_E2,FR_z_Y0 // y1 = y+y*e2 - nop.i 0 + nop.i 0 } -{ .mfi - ldfpd FR_p_A1H, FR_p_A1L = [GR_p_Table2], 16 +{ .mfi + ldfpd FR_p_A1H, FR_p_A1L = [GR_p_Table2], 16 fma.s1 FR_z_E3 = FR_z_E1,FR_z_E1,FR_z_E0 // e3 = e+e1^2 - nop.i 0 + nop.i 0 };; .pred.rel "mutex",p9,p10 -{ .mfi - ldfpd FR_p_A3H, FR_p_A3L = [GR_p_Table], 16 +{ .mfi + ldfpd FR_p_A3H, FR_p_A3L = [GR_p_Table], 16 (p9) fms.s1 FR_p_XR = FR_p_AbsX, f1, f0 // r = |x| // for [0.125;0.25] interval - nop.i 0 + nop.i 0 } -{ .mfi - ldfpd FR_p_A0H, FR_p_A0L = [GR_p_Table2], 16 +{ .mfi + ldfpd FR_p_A0H, FR_p_A0L = [GR_p_Table2], 16 (p10) fms.s1 FR_p_XR = FR_p_AbsX, f1, FR_p_0p5 // r = |x| - 0.5 // for [0.25;0.75] interval - nop.i 0 + nop.i 0 };; -{ .mmi - ldfe FR_p_A20 = [GR_p_Table], 16 - ldfe FR_p_A12 = [GR_p_Table2], 16 +{ .mmi + ldfe FR_p_A20 = [GR_p_Table], 16 + ldfe FR_p_A12 = [GR_p_Table2], 16 nop.i 0 };; -{ .mfi - ldfe FR_p_A19 = [GR_p_Table], 16 +{ .mfi + ldfe FR_p_A19 = [GR_p_Table], 16 fma.s1 FR_r_XS2 = FR_r_XS, FR_r_XS, f0 // xs^2 - nop.i 0 + nop.i 0 } -{ .mfi - ldfe FR_p_A11 = [GR_p_Table2], 16 +{ .mfi + ldfe FR_p_A11 = [GR_p_Table2], 16 nop.f 0 - nop.i 0 + nop.i 0 };; -{ .mmi - ldfe FR_p_A18 = [GR_p_Table], 16 - ldfe FR_p_A10 = [GR_p_Table2], 16 +{ .mmi + ldfe FR_p_A18 = [GR_p_Table], 16 + ldfe FR_p_A10 = [GR_p_Table2], 16 nop.i 0 };; .pred.rel "mutex",p12,p13 -{ .mfi - ldfe FR_p_A17 = [GR_p_Table], 16 +{ .mfi + ldfe FR_p_A17 = [GR_p_Table], 16 fma.s1 FR_z_Y2 = FR_z_Y1,FR_z_E3,FR_z_Y0 // y2 = y+y1*e3 - nop.i 0 + nop.i 0 } -{ .mfi - ldfe FR_p_A9 = [GR_p_Table2], 16 +{ .mfi + ldfe FR_p_A9 = [GR_p_Table2], 16 fnma.s1 FR_z_R0 = f8,FR_z_Q0,f1 // r = a-b*q - nop.i 0 + nop.i 0 };; -{ .mmi - ldfe FR_p_A16 = [GR_p_Table], 16 - ldfe FR_p_A8 = [GR_p_Table2], 16 - nop.i 0 +{ .mmi + ldfe FR_p_A16 = [GR_p_Table], 16 + ldfe FR_p_A8 = [GR_p_Table2], 16 + nop.i 0 };; -{ .mmi - ldfe FR_p_A15 = [GR_p_Table], 16 - ldfe FR_p_A7 = [GR_p_Table2], 16 +{ .mmi + ldfe FR_p_A15 = [GR_p_Table], 16 + ldfe FR_p_A7 = [GR_p_Table2], 16 nop.i 0 };; -{ .mfi - ldfe FR_p_A14 = [GR_p_Table], 16 +{ .mfi + ldfe FR_p_A14 = [GR_p_Table], 16 fma.s1 FR_r_TH = FR_r_A2H, FR_r_XS2, f0 // neg sin - nop.i 0 + nop.i 0 } -{ .mfi - ldfe FR_p_A6 = [GR_p_Table2], 16 +{ .mfi + ldfe FR_p_A6 = [GR_p_Table2], 16 fma.s1 FR_r_TL = FR_r_A2L, FR_r_XS2, f0 // neg sin - nop.i 0 + nop.i 0 };; -{ .mfi - ldfe FR_p_A13 = [GR_p_Table], 16 +{ .mfi + ldfe FR_p_A13 = [GR_p_Table], 16 fms.s1 FR_r_XS2L = FR_r_XS, FR_r_XS, FR_r_XS2 // xs^2 delta - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Temp5H = FR_p_A5H, FR_p_XR, f0 // Low poly - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_XR2 = FR_p_XR, FR_p_XR, f0 // poly tail - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fabs FR_r_XS = FR_r_XS // Absolute value of xs - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Temp2H = FR_p_A2H, FR_p_XR, f0 // High poly - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fnma.s1 FR_z_E4 = f8,FR_z_Y2,f1 // e4 = 1-b*y2 - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_z_ZH = FR_z_R0,FR_z_Y2,FR_z_Q0 // 1/x = q+r*y2 - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fms.s1 FR_r_TT = FR_r_A2H, FR_r_XS2, FR_r_TH // neg sin - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_ResH = FR_r_TH, f1, FR_r_A1H // neg sin - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_TL = FR_r_A2H, FR_r_XS2L, FR_r_TL // neg sin - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fms.s1 FR_p_Temp5L = FR_p_A5H, FR_p_XR, FR_p_Temp5H // Low poly - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Poly5H = FR_p_Temp5H, f1, FR_p_A4H // Low poly - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fms.s1 FR_p_Temp2L = FR_p_A2H, FR_p_XR, FR_p_Temp2H // High poly - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Poly2H = FR_p_Temp2H, f1, FR_p_A1H // High poly - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_XR3 = FR_p_XR2, FR_p_XR, f0 // r^3 - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fms.s1 FR_p_XR2L = FR_p_XR, FR_p_XR, FR_p_XR2 // r^2 delta - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_A18 = FR_p_A19, FR_p_XR, FR_p_A18 // poly tail - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_A14 = FR_p_A15, FR_p_XR, FR_p_A14 // poly tail - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_XR4 = FR_p_XR2, FR_p_XR2, f0 // poly tail - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_z_Y3 = FR_z_Y2,FR_z_E4,FR_z_Y2 // y3 = y2+y2*e4 - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Temp5L = FR_p_A5L, FR_p_XR, FR_p_Temp5L // Low poly - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fms.s1 FR_p_Poly5L = FR_p_A4H, f1, FR_p_Poly5H // Low poly - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Temp4H = FR_p_Poly5H, FR_p_XR, f0 // Low poly - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Temp2L = FR_p_A2L, FR_p_XR, FR_p_Temp2L // High poly - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fms.s1 FR_p_Poly2L = FR_p_A1H, f1, FR_p_Poly2H // High poly - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Temp1H = FR_p_Poly2H, FR_p_XR, f0 // High poly - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fms.s1 FR_p_XR3L = FR_p_XR2, FR_p_XR, FR_p_XR3 // x^3 delta - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_A16 = FR_p_A17, FR_p_XR, FR_p_A16 //poly tail - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fms.s1 FR_r_ResL = FR_r_A1H, f1, FR_r_ResH // neg sin - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_TL = FR_r_TL, f1, FR_r_TT // neg sin - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Temp5L = FR_p_Temp5L, f1, FR_p_A4L // Low poly - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Poly5L = FR_p_Poly5L, f1, FR_p_Temp5H //Low poly - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fms.s1 FR_p_Temp4L = FR_p_Poly5H, FR_p_XR, FR_p_Temp4H//Low poly - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Poly4H = FR_p_Temp4H, f1, FR_p_A3H // Low poly - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Temp2L = FR_p_Temp2L, f1, FR_p_A1L // High poly - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Poly2L = FR_p_Poly2L, f1, FR_p_Temp2H // High poly - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fms.s1 FR_p_Temp1L = FR_p_Poly2H,FR_p_XR,FR_p_Temp1H //High poly - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Poly1H = FR_p_Temp1H, f1, FR_p_A0H // High poly - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_A12 = FR_p_A13, FR_p_XR, FR_p_A12 // poly tail - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_XR3L = FR_p_XR2L, FR_p_XR, FR_p_XR3L // x^3 low - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Poly5L = FR_p_Poly5L, f1, FR_p_Temp5L //Low poly - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_A10 = FR_p_A11, FR_p_XR, FR_p_A10 //poly tail - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fms.s1 FR_p_Poly4L = FR_p_A3H, f1, FR_p_Poly4H /// Low poly - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_A6 = FR_p_A7, FR_p_XR, FR_p_A6 // poly tail - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_A8 = FR_p_A9, FR_p_XR, FR_p_A8 // poly tail - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_XR6 = FR_p_XR4, FR_p_XR2, f0 // r^6 - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Poly2L = FR_p_Poly2L, f1, FR_p_Temp2L // High poly - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fms.s1 FR_p_Poly1L = FR_p_A0H, f1, FR_p_Poly1H // High poly - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_ResL = FR_r_ResL, f1, FR_r_TH // neg sin - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_TT = FR_r_TL, f1, FR_r_A1L // neg sin - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Temp4L = FR_p_Poly5L,FR_p_XR,FR_p_Temp4L //Low poly - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_A18 = FR_p_A20, FR_p_XR2, FR_p_A18 // poly tail - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Poly4L = FR_p_Poly4L, f1, FR_p_Temp4H // Low poly - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_A14 = FR_p_A16, FR_p_XR2, FR_p_A14 // poly tail - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_A6 = FR_p_A8, FR_p_XR2, FR_p_A6 // poly tail - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_A10 = FR_p_A12, FR_p_XR2, FR_p_A10 // poly tail - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Temp1L = FR_p_Poly2L,FR_p_XR,FR_p_Temp1L //High poly - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Poly1L = FR_p_Poly1L, f1, FR_p_Temp1H // High poly - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_ResL = FR_r_ResL, f1, FR_r_TT // neg sin - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_TH = FR_r_ResH, FR_r_XS2, f0 // neg sin - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Temp4L = FR_p_Temp4L, f1, FR_p_A3L // Low poly - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Poly3H = FR_p_Poly4H, FR_p_XR3, f0 // Low poly - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_A14 = FR_p_A18, FR_p_XR4, FR_p_A14 // poly tail - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_XR8 = FR_p_XR4, FR_p_XR4, f0 // r^8 - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_TL = FR_r_ResH, FR_r_XS2L, f0 // neg sin - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fnma.s1 FR_z_R1 = f8,FR_z_ZH,f1 // r1 = a-b*x - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Temp1L = FR_p_Temp1L, f1, FR_p_A0L // High poly - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_A6 = FR_p_A10, FR_p_XR4, FR_p_A6 // poly tail - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fms.s1 FR_r_TT = FR_r_ResH, FR_r_XS2, FR_r_TH // neg sin - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_Res3H = FR_r_TH, f1, f1 // neg sin - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Poly4L = FR_p_Poly4L, f1, FR_p_Temp4L // Low poly - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Poly3L = FR_p_Poly4H, FR_p_XR3L, f0 // Low poly - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Poly0H = FR_p_Poly3H, f1, FR_p_Poly1H // Result - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_A7 = FR_r_A8, FR_r_XS2, FR_r_A7 // neg sin - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_TL = FR_r_ResL, FR_r_XS2, FR_r_TL // neg sin - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_XS4 = FR_r_XS2, FR_r_XS2, f0 // xs^4 - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Poly1L = FR_p_Poly1L, f1, FR_p_Temp1L // High poly - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_PolyTail = FR_p_A14, FR_p_XR8, FR_p_A6 // poly tail - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fms.s1 FR_r_Res3L = f1, f1, FR_r_Res3H // neg sin - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_ResH = FR_r_Res3H, FR_r_XS, f0 // neg sin - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fms.s1 FR_p_Temp0L = FR_p_Poly4H,FR_p_XR3,FR_p_Poly3H //Low poly - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Poly3L = FR_p_Poly4L,FR_p_XR3,FR_p_Poly3L //Low poly - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fms.s1 FR_p_Poly0L = FR_p_Poly1H, f1, FR_p_Poly0H // Result - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_z_ZL = FR_z_R1,FR_z_Y3, f0 // x_lo = r1*y3 - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_TL = FR_r_TL, f1, FR_r_TT // neg sin - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_A3 = FR_r_A4, FR_r_XS2, FR_r_A3 /// neg sin - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Poly1L = FR_p_PolyTail,FR_p_XR6,FR_p_Poly1L // High - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_A5 = FR_r_A6, FR_r_XS2, FR_r_A5 // neg sin - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_Res3L = FR_r_Res3L, f1, FR_r_TH // neg sin - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fms.s1 FR_r_ResL = FR_r_Res3H, FR_r_XS, FR_r_ResH // neg sin - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Poly3L = FR_p_Poly3L, f1, FR_p_Temp0L // Low poly - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_A7 = FR_r_A9, FR_r_XS4, FR_r_A7 // neg sin - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Poly0L = FR_p_Poly0L, f1, FR_p_Poly3H // result - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 (p14) fma.s1 f8 = FR_p_Poly0H, FR_z_ZH, f0 // z*poly - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Temp1L = FR_p_Poly0H, FR_z_ZL, f0 // z*poly low - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_A3 = FR_r_A5, FR_r_XS4, FR_r_A3 // sin tail - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_XS7 = FR_r_XS4, FR_r_XS2, f0 // xs^6 - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_Res3L = FR_r_Res3L, f1, FR_r_TL // sin low - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_XS8 = FR_r_XS4, FR_r_XS4, f0 // xs^8 - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Temp0H = FR_p_Poly3L, f1, FR_p_Poly1L // result - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 (p14) fms.s1 FR_p_Temp1H = FR_p_Poly0H, FR_z_ZH, f8 // hi result - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_XS7 = FR_r_XS7, FR_r_XS, f0 // xs^7 - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_ResL = FR_r_Res3L, FR_r_XS, FR_r_ResL // lo result - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_Tail = FR_r_A7, FR_r_XS8, FR_r_A3 // tail result - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_p_Poly0L = FR_p_Poly0L, f1, FR_p_Temp0H // lo result - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_ResL = FR_r_Tail, FR_r_XS7, FR_r_ResL // lo result - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 (p14) fma.s1 FR_p_Temp1L = FR_p_Poly0L,FR_z_ZH,FR_p_Temp1L //hi result - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_TT = FR_r_ResL, f1, f0 // for low result - nop.i 0 + nop.i 0 };; .pred.rel "mutex",p12,p13 -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 (p14) fma.s1 FR_p_Temp1L = FR_p_Temp1L, f1, FR_p_Temp1H // for lo res - nop.i 0 + nop.i 0 };; -{ .mfi +{ .mfi (p10) cmp.eq p13, p12 = r0, r0 // set p13, clear p12 fma.s1 FR_r_Res1H = FR_r_ResH, f1, FR_r_TT // hi res - nop.i 0 + nop.i 0 };; -{ .mfb +{ .mfb (p9) cmp.eq p13, p12 = r0, r0 // set p13, clear p12 (p14) fma.s0 f8 = f8, f1, FR_p_Temp1L // Final result (p14) br.ret.spnt b0 // Exit for 0 < |X| < 1 path (positive arguments)/////// };; -{ .mfi +{ .mfi (p11) cmp.eq p13, p12 = r0, r0 // set p13, clear p12 fms.s1 FR_r_Res1L = FR_r_ResH, f1, FR_r_Res1H // Low sin result - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_Res1L = FR_r_Res1L, f1, FR_r_TT // Low sin result - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_TL = FR_p_Poly0L,FR_r_Res1H,f0 //Low sin result - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_TL = FR_p_Poly0H, FR_r_Res1L, FR_r_TL //Low sin - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_ResH = FR_p_Poly0H, FR_r_Res1H, FR_r_TL //High sin - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fms.s1 FR_r_ResL = FR_p_Poly0H,FR_r_Res1H,FR_r_ResH //Low res - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 frcpa.s1 FR_r_Y0,p0 = f1,FR_r_ResH // y = frcpa(b) - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fneg FR_r_NegOne = f1 // Construct -1.0 - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_ResL = FR_r_ResL, f1, FR_r_TL // low sin - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_Q0 = f1,FR_r_Y0,f0 // q = a*y - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 - fnma.s1 FR_r_E0 = FR_r_Y0,FR_r_ResH,f1 // e = 1-b*y - nop.i 0 +{ .mfi + nop.m 0 + fnma.s1 FR_r_E0 = FR_r_Y0,FR_r_ResH,f1 // e = 1-b*y + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_E2 = FR_r_E0,FR_r_E0,FR_r_E0 // e2 = e+e^2 - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_E1 = FR_r_E0,FR_r_E0,f0 // e1 = e^2 - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_Y1 = FR_r_Y0,FR_r_E2,FR_r_Y0 // y1 = y+y*e2 - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_E3 = FR_r_E1,FR_r_E1,FR_r_E0 // e3 = e+e1^2 - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_Y2 = FR_r_Y1,FR_r_E3,FR_r_Y0 // y2 = y+y1*e3 - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fnma.s1 FR_r_R0 = FR_r_ResH,FR_r_Q0,f1 // r = a-b*q - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fnma.s1 FR_r_E4 = FR_r_ResH,FR_r_Y2,f1 // e4 = 1-b*y2 - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_ZH = FR_r_R0,FR_r_Y2,FR_r_Q0 // x = q+r*y2 - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_Y3 = FR_r_Y2,FR_r_E4,FR_r_Y2 // y3 = y2+y2*e4 - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fnma.s1 FR_r_R1 = FR_r_ResH,FR_r_ZH,f1 // r1 = a-b*x - nop.i 0 + nop.i 0 };; -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fnma.s1 FR_r_R1 = FR_r_ResL,FR_r_ZH,FR_r_R1 // r1=r1 - b_lo*X - nop.i 0 + nop.i 0 } -{ .mfi - nop.m 0 +{ .mfi + nop.m 0 fma.s1 FR_r_ZHN = FR_r_ZH,FR_r_NegOne, f0 // Negate - nop.i 0 + nop.i 0 };; .pred.rel "mutex",p13,p12 -{ .mfb - nop.m 0 +{ .mfb + nop.m 0 fnma.s0 f8 = FR_r_R1,FR_r_Y3,FR_r_ZHN // Result for neg br.ret.sptk b0 // Exit for 0 < |X| < 1 path (negative arguments)////// };; @@ -4263,7 +4263,7 @@ tgamma_lt_1: .align 32 tgammal_spec: { .mlx - nop.m 0 + nop.m 0 movl GR_DenOverflow = 0x2000000000000001 } { .mfi @@ -4283,20 +4283,20 @@ tgammal_spec: } { .mfi -(p9) cmp.ltu.unc p10,p11 = GR_l_signif_Z, GR_DenOverflow +(p9) cmp.ltu.unc p10,p11 = GR_l_signif_Z, GR_DenOverflow (p9) fnorm.s0 f8 = f8 - nop.i 0 + nop.i 0 };; { .mfb - nop.m 0 + nop.m 0 (p9) fcvt.fx.trunc.s1 FR_n_IXN = FR_l_AbsX // Round by truncate (p11) br.cond.sptk tgamma_lt_1 // Return to gamma ('good' denormal)//////////// };; { .mfb - nop.m 0 - nop.f 0 + nop.m 0 + nop.f 0 (p10) br.cond.spnt tgammal_overflow // "Bad" denormal - overflow! ///////////// };; @@ -4314,17 +4314,17 @@ tgammal_spec: { .mfi (p7) mov GR_Parameter_TAG = 256 // negative (p7) frcpa.s0 f8,p0 = f1,f8 // Raise V flag - nop.i 0 + nop.i 0 } { .mfb nop.m 0 - nop.f 0 + nop.f 0 (p8) br.cond.spnt tgammal_singularity // Branch for +ZERO //////////////////// };; { .mfb - nop.m 0 - nop.f 0 + nop.m 0 + nop.f 0 br.cond.spnt tgammal_libm_err // Branch for -ZERO /////////////////////// };; @@ -4358,7 +4358,7 @@ tgammal_overflow: { .mfi addl r8 = 0x1FFFE, r0 // Exp of INF fcmp.lt.s1 p15,p14 = f8,f0 // p14 - pos arg, p15 - neg arg - nop.i 0 + nop.i 0 };; { .mfi @@ -4369,12 +4369,12 @@ tgammal_overflow: .pred.rel "mutex",p14,p15 { .mfi - nop.m 0 + nop.m 0 (p14) fma.s0 f8 = f9,f9,f0 // Set I,O and +INF result - nop.i 0 + nop.i 0 } { .mfb - nop.m 0 + nop.m 0 (p15) fnma.s0 f8 = f9,f9,f0 // Set I,O and -INF result br.cond.sptk tgammal_libm_err // Call error handler ///////////////////// // with overflow error //////////////////// @@ -4389,9 +4389,9 @@ tgammal_overflow: .align 32 tgammal_underflow: { .mfi - nop.m 0 + nop.m 0 fcvt.fx.trunc.s1 FR_u_IXN = f8 // Convert arg to int repres. in FR - nop.i 0 + nop.i 0 };; { .mmi diff --git a/sysdeps/ia64/softpipe.h b/sysdeps/ia64/softpipe.h index cf0eb53..d71af73 100644 --- a/sysdeps/ia64/softpipe.h +++ b/sysdeps/ia64/softpipe.h @@ -18,7 +18,7 @@ /* The latency of a memory load assumed by the assembly implementation of the mem and str functions. Since we don't have any clue about - where the data might be, let's assume it's in the L2 cache. + where the data might be, let's assume it's in the L2 cache. Assuming L3 would be too pessimistic :-) Some functions define MEMLAT as 2, because they expect their data diff --git a/sysdeps/ia64/strchr.S b/sysdeps/ia64/strchr.S index 63db7ff..fc55ad1 100644 --- a/sysdeps/ia64/strchr.S +++ b/sysdeps/ia64/strchr.S @@ -51,7 +51,7 @@ ENTRY(strchr) .save ar.lc, saved_lc mov saved_lc = ar.lc // save the loop counter .body - mov ret0 = str + mov ret0 = str and tmp = 7, str // tmp = str % 8 mux1 chrx8 = chr, @brcst extr.u chr = chr, 0, 8 // retain only the last byte @@ -74,9 +74,9 @@ ENTRY(strchr) ld8 val1 = [ret0], 8;; nop.b 0 nop.b 0 -.l2: +.l2: ld8.s val2 = [ret0], 8 // don't bomb out here - czx1.r pos0 = val1 + czx1.r pos0 = val1 xor tmp = val1, chrx8 // if val1 contains chr, tmp will ;; // contain a zero in its position czx1.r poschr = tmp @@ -87,7 +87,7 @@ ENTRY(strchr) (p6) br.cond.spnt .notfound chk.s val2, .recovery .back: - mov val1 = val2 + mov val1 = val2 br.cond.dptk .l2 .foundit: (p6) cmp.lt p8, p0 = pos0, poschr // we found chr and null in the word diff --git a/sysdeps/ia64/strlen.S b/sysdeps/ia64/strlen.S index 518d86b..ad6a7fb 100644 --- a/sysdeps/ia64/strlen.S +++ b/sysdeps/ia64/strlen.S @@ -52,7 +52,7 @@ ENTRY(strlen) .save ar.lc, saved_lc mov saved_lc = ar.lc // save the loop counter .body - mov str = in0 + mov str = in0 mov len = r0 // len = 0 and tmp = 7, in0 // tmp = str % 8 ;; @@ -74,13 +74,13 @@ ENTRY(strlen) nop.b 0 nop.b 0 .l2: ld8.s val2 = [str], 8 // don't bomb out here - czx1.r pos0 = val1 + czx1.r pos0 = val1 ;; cmp.ne p6, p0 = 8, pos0 (p6) br.cond.spnt .foundit chk.s val2, .recovery .back: - mov val1 = val2 + mov val1 = val2 br.cond.dptk .l2 .foundit: sub tmp = str, origadd // tmp = crt address - orig diff --git a/sysdeps/ia64/strncmp.S b/sysdeps/ia64/strncmp.S index 743121d..faa0d10 100644 --- a/sysdeps/ia64/strncmp.S +++ b/sysdeps/ia64/strncmp.S @@ -58,5 +58,5 @@ ENTRY(strncmp) sub ret0 = val1, val2 .restore_and_exit: br.ret.sptk.many b0 -END(strncmp) +END(strncmp) libc_hidden_builtin_def (strncmp) diff --git a/sysdeps/unix/sysv/linux/ia64/register-dump.h b/sysdeps/unix/sysv/linux/ia64/register-dump.h index 513db97..dad3451 100644 --- a/sysdeps/unix/sysv/linux/ia64/register-dump.h +++ b/sysdeps/unix/sysv/linux/ia64/register-dump.h @@ -147,7 +147,7 @@ register_dump (int fd, struct sigcontext *ctx) for (i = 0; i < 3; ++i) ADD_MEM (bpregs[i], sizeof (bpregs[0]) - 1); - + ADD_STRING ("\n\n IP: "); ADD_MEM (spregs[0], sizeof (spregs[0])); ADD_STRING (" RSC: "); -- 2.7.4