From f684a78d9ea988883c9b2c7bcc4ea4d5e68bd998 Mon Sep 17 00:00:00 2001 From: mtklein Date: Thu, 30 Jul 2015 09:29:37 -0700 Subject: [PATCH] Runtime CPU detection for rsqrt(). This enables the NEON sk_float_rsqrt() code for configurations that have NEON at run-time but not compile-time. These devices will see about a 2x (1.26 -> 2.33) slowdown in sk_float_rsqrt(), but it should be more precise than our portable fallback. (When inlined, the portable fallback and the NEON code are almost identical in speed. The only difference is precision. Going through a function pointer is causing all this slowdown. This is a good example of a place where Skia really benefits from compile-time NEON.) BUG=skia:4117,skia:4114 No public API changes. TBR=reed@google.com Review URL: https://codereview.chromium.org/1264893002 --- include/core/SkFloatingPoint.h | 13 ++++--------- src/core/SkOpts.cpp | 15 ++++++++++++++- src/core/SkOpts.h | 5 ++++- src/opts/SkOpts_neon.cpp | 2 ++ 4 files changed, 24 insertions(+), 11 deletions(-) diff --git a/include/core/SkFloatingPoint.h b/include/core/SkFloatingPoint.h index 73eb26c0db..5ca4d103d0 100644 --- a/include/core/SkFloatingPoint.h +++ b/include/core/SkFloatingPoint.h @@ -127,6 +127,8 @@ extern const uint32_t gIEEENegativeInfinity; #define SK_FloatInfinity (*SkTCast(&gIEEEInfinity)) #define SK_FloatNegativeInfinity (*SkTCast(&gIEEENegativeInfinity)) +namespace SkOpts { extern float (*rsqrt)(float); } + // Fast, approximate inverse square root. // Compare to name-brand "1.0f / sk_float_sqrt(x)". Should be around 10x faster on SSE, 2x on NEON. static inline float sk_float_rsqrt(const float x) { @@ -149,15 +151,8 @@ static inline float sk_float_rsqrt(const float x) { estimate = vmul_f32(estimate, vrsqrts_f32(xx, estimate_sq)); return vget_lane_f32(estimate, 0); // 1 will work fine too; the answer's in both places. #else - // Get initial estimate. - int i = *SkTCast(&x); - i = 0x5F1FFFF9 - (i>>1); - float estimate = *SkTCast(&i); - - // One step of Newton's method to refine. - const float estimate_sq = estimate*estimate; - estimate *= 0.703952253f*(2.38924456f-x*estimate_sq); - return estimate; + // Perhaps runtime-detected NEON, or a portable fallback. + return SkOpts::rsqrt(x); #endif } diff --git a/src/core/SkOpts.cpp b/src/core/SkOpts.cpp index 4f7c5e9345..7da306c99d 100644 --- a/src/core/SkOpts.cpp +++ b/src/core/SkOpts.cpp @@ -20,8 +20,21 @@ #include #endif +static float rsqrt_portable(float x) { + // Get initial estimate. + int i = *SkTCast(&x); + i = 0x5F1FFFF9 - (i>>1); + float estimate = *SkTCast(&i); + + // One step of Newton's method to refine. + const float estimate_sq = estimate*estimate; + estimate *= 0.703952253f*(2.38924456f-x*estimate_sq); + return estimate; +} + namespace SkOpts { - // (Define default function pointer values here...) + // Define default function pointer values here... + decltype(rsqrt) rsqrt = rsqrt_portable; // Each Init_foo() is defined in src/opts/SkOpts_foo.cpp. void Init_sse2(); diff --git a/src/core/SkOpts.h b/src/core/SkOpts.h index 71abae5d7a..f02ec97553 100644 --- a/src/core/SkOpts.h +++ b/src/core/SkOpts.h @@ -16,7 +16,10 @@ namespace SkOpts { // Called by SkGraphics::Init(), and automatically #if SK_ALLOW_STATIC_GLOBAL_INITIALIZERS. void Init(); - // (Function pointers go here). + // Declare function pointers here... + + // Returns a fast approximation of 1.0f/sqrtf(x). + extern float (*rsqrt)(float); } #endif//SkOpts_DEFINED diff --git a/src/opts/SkOpts_neon.cpp b/src/opts/SkOpts_neon.cpp index 3508b35318..ef667dc065 100644 --- a/src/opts/SkOpts_neon.cpp +++ b/src/opts/SkOpts_neon.cpp @@ -6,9 +6,11 @@ */ #include "SkOpts.h" +#include "SkFloatingPoint.h" namespace SkOpts { void Init_neon() { + rsqrt = sk_float_rsqrt; // This copy of sk_float_rsqrt will take the NEON path. } } -- 2.34.1