From 58142d7ee7491faf4ebf7cf8df88ec71f5ea0a46 Mon Sep 17 00:00:00 2001 From: Tomas Frydrych Date: Thu, 6 Dec 2007 16:21:48 +0000 Subject: [PATCH] 2007-12-06 Tomas Frydrych * clutter/clutter-fixed.c: * clutter/clutter-fixed.h: (clutter_sqrti): Added extra iteration to the Newton-Rapson algorithm for argumens lesser than 342 to improve precission. --- ChangeLog | 8 ++++++++ clutter/clutter-fixed.c | 19 ++++++++++++++++--- clutter/clutter-fixed.h | 2 +- 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/ChangeLog b/ChangeLog index 96821fe..bc24d1f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +2007-12-06 Tomas Frydrych + + * clutter/clutter-fixed.c: + * clutter/clutter-fixed.h: + (clutter_sqrti): + Added extra iteration to the Newton-Rapson algorithm for arguments + lesser than 342 to improve precission. + 2007-12-06 Emmanuele Bassi * clutter/clutter-main.c: diff --git a/clutter/clutter-fixed.c b/clutter/clutter-fixed.c index 1eaa313..fefe44a 100644 --- a/clutter/clutter-fixed.c +++ b/clutter/clutter-fixed.c @@ -606,8 +606,8 @@ clutter_sqrtx (ClutterFixed x) * * Very fast fixed point implementation of square root for integers. * - * This function is about 10x faster than clib sqrt() on x86, and (this is - * not a typo!) more than 800x faster on ARM without FPU. It's error is < 5% + * This function is at least 6x faster than clib sqrt() on x86, and (this is + * not a typo!) about 500x faster on ARM without FPU. It's error is < 5% * for arguments < #CLUTTER_SQRTI_ARG_5_PERCENT and < 10% for arguments < * #CLUTTER_SQRTI_ARG_10_PERCENT. The maximum argument that can be passed to * this function is CLUTTER_SQRTI_ARG_MAX. @@ -673,13 +673,26 @@ clutter_sqrti (gint number) flt2.f = flt.f + 2.0; flt2.i &= 0x7FFFFF; - /* Now we correct the estimate, only single iterration is needed */ + /* Now we correct the estimate */ y_1 = (flt2.i >> 11) * (flt2.i >> 11); y_1 = (y_1 >> 8) * (x >> 8); y_1 = f - y_1; flt2.i = (flt2.i >> 11) * (y_1 >> 11); + /* If the original argument is less than 342, we do another + * iteration to improve precission (for arguments >= 342, the single + * iteration produces generally better results). + */ + if (x < 171) + { + y_1 = (flt2.i >> 11) * (flt2.i >> 11); + y_1 = (y_1 >> 8) * (x >> 8); + + y_1 = f - y_1; + flt2.i = (flt2.i >> 11) * (y_1 >> 11); + } + /* Invert, round and convert from 10.22 to an integer * 0x1e3c68 is a magical rounding constant that produces slightly * better results than 0x200000. diff --git a/clutter/clutter-fixed.h b/clutter/clutter-fixed.h index feac5d6..88ac4f6 100644 --- a/clutter/clutter-fixed.h +++ b/clutter/clutter-fixed.h @@ -338,7 +338,7 @@ ClutterFixed clutter_tani (ClutterAngle angle); * Since: 0.6 */ #ifndef __SSE2__ -#define CLUTTER_SQRTI_ARG_5_PERCENT 131 +#define CLUTTER_SQRTI_ARG_5_PERCENT 210 #else #define CLUTTER_SQRTI_ARG_5_PERCENT INT_MAX #endif -- 2.7.4