From a766ca87bfb1129a8cf4f8a428121caf60726de4 Mon Sep 17 00:00:00 2001 From: mtklein Date: Tue, 26 Jan 2016 07:40:30 -0800 Subject: [PATCH] de-proc sk_float_rsqrt This is the first of many little baby steps to have us stop runtime-detecting NEON. BUG=skia: GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1616013003 CQ_EXTRA_TRYBOTS=client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot Committed: https://skia.googlesource.com/skia/+/efcc125acd2d71eb077caf6db65fdd6b9eb1dc0d Review URL: https://codereview.chromium.org/1616013003 --- include/private/SkFloatingPoint.h | 23 +++++++++++++++-------- src/core/SkOpts.cpp | 2 -- src/core/SkOpts.h | 3 --- src/opts/SkFloatingPoint_opts.h | 35 ----------------------------------- src/opts/SkOpts_neon.cpp | 2 -- tests/MathTest.cpp | 12 +++++++----- 6 files changed, 22 insertions(+), 55 deletions(-) delete mode 100644 src/opts/SkFloatingPoint_opts.h diff --git a/include/private/SkFloatingPoint.h b/include/private/SkFloatingPoint.h index f7ee816..ffed5c0 100644 --- a/include/private/SkFloatingPoint.h +++ b/include/private/SkFloatingPoint.h @@ -127,20 +127,28 @@ extern const uint32_t gIEEENegativeInfinity; #define SK_FloatInfinity (*SkTCast(&gIEEEInfinity)) #define SK_FloatNegativeInfinity (*SkTCast(&gIEEENegativeInfinity)) -// We forward declare this to break an #include cycle. -// (SkScalar -> SkFloatingPoint -> SkOpts.h -> SkXfermode -> SkColor -> SkScalar) -namespace SkOpts { extern float (*rsqrt)(float); } +static inline float sk_float_rsqrt_portable(float x) { + // Get initial estimate. + int i = *SkTCast(&x); + i = 0x5F1FFFF9 - (i>>1); + float estimate = *SkTCast(&i); + + // One step of Newton's method to refine. + const float estimate_sq = estimate*estimate; + estimate *= 0.703952253f*(2.38924456f-x*estimate_sq); + return estimate; +} // Fast, approximate inverse square root. // Compare to name-brand "1.0f / sk_float_sqrt(x)". Should be around 10x faster on SSE, 2x on NEON. -static inline float sk_float_rsqrt(const float x) { +static inline float sk_float_rsqrt(float x) { // We want all this inlined, so we'll inline SIMD and just take the hit when we don't know we've got // it at compile time. This is going to be too fast to productively hide behind a function pointer. // -// We do one step of Newton's method to refine the estimates in the NEON and null paths. No +// We do one step of Newton's method to refine the estimates in the NEON and portable paths. No // refinement is faster, but very innacurate. Two steps is more accurate, but slower than 1/sqrt. // -// Optimized constants in the null path courtesy of http://rrrola.wz.cz/inv_sqrt.html +// Optimized constants in the portable path courtesy of http://rrrola.wz.cz/inv_sqrt.html #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE1 return _mm_cvtss_f32(_mm_rsqrt_ss(_mm_set_ss(x))); #elif defined(SK_ARM_HAS_NEON) @@ -153,8 +161,7 @@ static inline float sk_float_rsqrt(const float x) { estimate = vmul_f32(estimate, vrsqrts_f32(xx, estimate_sq)); return vget_lane_f32(estimate, 0); // 1 will work fine too; the answer's in both places. #else - // Perhaps runtime-detected NEON, or a portable fallback. - return SkOpts::rsqrt(x); + return sk_float_rsqrt_portable(x); #endif } diff --git a/src/core/SkOpts.cpp b/src/core/SkOpts.cpp index 28dd1af..669401b 100644 --- a/src/core/SkOpts.cpp +++ b/src/core/SkOpts.cpp @@ -13,7 +13,6 @@ #include "SkBlitRow_opts.h" #include "SkBlurImageFilter_opts.h" #include "SkColorCubeFilter_opts.h" -#include "SkFloatingPoint_opts.h" #include "SkMatrix_opts.h" #include "SkMorphologyImageFilter_opts.h" #include "SkSwizzler_opts.h" @@ -55,7 +54,6 @@ namespace SkOpts { // If our global compile options are set high enough, these defaults might even be // CPU-specialized, e.g. a typical x86-64 machine might start with SSE2 defaults. // They'll still get a chance to be replaced with even better ones, e.g. using SSE4.1. - decltype(rsqrt) rsqrt = sk_default::rsqrt; decltype(memset16) memset16 = sk_default::memset16; decltype(memset32) memset32 = sk_default::memset32; decltype(create_xfermode) create_xfermode = sk_default::create_xfermode; diff --git a/src/core/SkOpts.h b/src/core/SkOpts.h index 1a9820b..41ad8eb 100644 --- a/src/core/SkOpts.h +++ b/src/core/SkOpts.h @@ -23,9 +23,6 @@ namespace SkOpts { // Declare function pointers here... - // Returns a fast approximation of 1.0f/sqrtf(x). - extern float (*rsqrt)(float); - // See SkUtils.h extern void (*memset16)(uint16_t[], uint16_t, int); extern void (*memset32)(uint32_t[], uint32_t, int); diff --git a/src/opts/SkFloatingPoint_opts.h b/src/opts/SkFloatingPoint_opts.h deleted file mode 100644 index 8b6536a..0000000 --- a/src/opts/SkFloatingPoint_opts.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright 2015 Google Inc. - * - * Use of this source code is governed by a BSD-style license that can be - * found in the LICENSE file. - */ - -#ifndef SkFloatingPoint_opts_DEFINED -#define SkFloatingPoint_opts_DEFINED - -#include "SkFloatingPoint.h" - -namespace SK_OPTS_NS { - -#if defined(SK_ARM_HAS_NEON) - static float rsqrt(float x) { - return sk_float_rsqrt(x); // This sk_float_rsqrt copy will take the NEON compile-time path. - } -#else - static float rsqrt(float x) { - // Get initial estimate. - int i = *SkTCast(&x); - i = 0x5F1FFFF9 - (i>>1); - float estimate = *SkTCast(&i); - - // One step of Newton's method to refine. - const float estimate_sq = estimate*estimate; - estimate *= 0.703952253f*(2.38924456f-x*estimate_sq); - return estimate; - } -#endif - -} // namespace SK_OPTS_NS - -#endif//SkFloatingPoint_opts_DEFINED diff --git a/src/opts/SkOpts_neon.cpp b/src/opts/SkOpts_neon.cpp index 9cff229..dcb057e 100644 --- a/src/opts/SkOpts_neon.cpp +++ b/src/opts/SkOpts_neon.cpp @@ -12,7 +12,6 @@ #include "SkBlitRow_opts.h" #include "SkBlurImageFilter_opts.h" #include "SkColorCubeFilter_opts.h" -#include "SkFloatingPoint_opts.h" #include "SkMatrix_opts.h" #include "SkMorphologyImageFilter_opts.h" #include "SkSwizzler_opts.h" @@ -22,7 +21,6 @@ namespace SkOpts { void Init_neon() { - rsqrt = sk_neon::rsqrt; memset16 = sk_neon::memset16; memset32 = sk_neon::memset32; create_xfermode = sk_neon::create_xfermode; diff --git a/tests/MathTest.cpp b/tests/MathTest.cpp index 24e46f3..de7ad1d 100644 --- a/tests/MathTest.cpp +++ b/tests/MathTest.cpp @@ -382,14 +382,15 @@ static void unittest_half(skiatest::Reporter* reporter) { } -static void test_rsqrt(skiatest::Reporter* reporter) { +template +static void test_rsqrt(skiatest::Reporter* reporter, RSqrtFn rsqrt) { const float maxRelativeError = 6.50196699e-4f; // test close to 0 up to 1 float input = 0.000001f; for (int i = 0; i < 1000; ++i) { float exact = 1.0f/sk_float_sqrt(input); - float estimate = sk_float_rsqrt(input); + float estimate = rsqrt(input); float relativeError = sk_float_abs(exact - estimate)/exact; REPORTER_ASSERT(reporter, relativeError <= maxRelativeError); input += 0.001f; @@ -399,7 +400,7 @@ static void test_rsqrt(skiatest::Reporter* reporter) { input = 1.0f; for (int i = 0; i < 1000; ++i) { float exact = 1.0f/sk_float_sqrt(input); - float estimate = sk_float_rsqrt(input); + float estimate = rsqrt(input); float relativeError = sk_float_abs(exact - estimate)/exact; REPORTER_ASSERT(reporter, relativeError <= maxRelativeError); input += 0.01f; @@ -409,7 +410,7 @@ static void test_rsqrt(skiatest::Reporter* reporter) { input = 1000000.0f; for (int i = 0; i < 100; ++i) { float exact = 1.0f/sk_float_sqrt(input); - float estimate = sk_float_rsqrt(input); + float estimate = rsqrt(input); float relativeError = sk_float_abs(exact - estimate)/exact; REPORTER_ASSERT(reporter, relativeError <= maxRelativeError); input += 754326.f; @@ -555,7 +556,8 @@ DEF_TEST(Math, reporter) { unittest_fastfloat(reporter); unittest_isfinite(reporter); unittest_half(reporter); - test_rsqrt(reporter); + test_rsqrt(reporter, sk_float_rsqrt); + test_rsqrt(reporter, sk_float_rsqrt_portable); for (i = 0; i < 10000; i++) { SkFixed numer = rand.nextS(); -- 2.7.4