From b42b785d1cbc98bd34aceae338060831b974f9c5 Mon Sep 17 00:00:00 2001 From: reed Date: Mon, 11 Jul 2016 13:17:35 -0700 Subject: [PATCH] try to speed-up maprect + round2i + contains We call roundOut in a few places. If we can get SkNx::Ceil we could efficiently implement that as well. BUG=skia: GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2133413002 CQ_INCLUDE_TRYBOTS=client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot Review-Url: https://codereview.chromium.org/2133413002 --- bench/RectBench.cpp | 90 +++++++++++++++++++++++++++++++ gyp/core.gypi | 2 +- gyp/tools.gyp | 1 + include/core/SkRect.h | 24 ++++++++- {src/core => include/private}/SkNx.h | 4 +- {src/opts => include/private}/SkNx_neon.h | 0 {src/opts => include/private}/SkNx_sse.h | 0 tests/RectTest.cpp | 32 +++++++++++ 8 files changed, 149 insertions(+), 4 deletions(-) rename {src/core => include/private}/SkNx.h (99%) rename {src/opts => include/private}/SkNx_neon.h (100%) rename {src/opts => include/private}/SkNx_sse.h (100%) diff --git a/bench/RectBench.cpp b/bench/RectBench.cpp index 46a515d..0f757a2 100644 --- a/bench/RectBench.cpp +++ b/bench/RectBench.cpp @@ -302,3 +302,93 @@ DEF_BENCH(return new BlitMaskBench(SkCanvas::kPoints_PointMode, DEF_BENCH(return new BlitMaskBench(SkCanvas::kPoints_PointMode, BlitMaskBench::KMaskShader, "maskshader");) + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +typedef int (*RectMathProc)(const SkMatrix&, const SkRect[], const SkIRect[], int count); + +class RectMathBench : public Benchmark { + SkString fName; + SkRandom fRand; + SkString fSuffix; + RectMathProc fProc; + +public: + enum { + N = 300, + OUTER = 10000, + }; + SkRect fRects[N]; + SkIRect fIRects[N]; + volatile int fCounter; + + RectMathBench(RectMathProc proc, const char* suffix) { + fProc = proc; + fSuffix.set(suffix); + SkRandom rand; + for (int i = 0; i < N; ++i) { + fRects[i].setXYWH(rand.nextUScalar1() * 100, rand.nextUScalar1() * 100, + rand.nextUScalar1() * 100, rand.nextUScalar1() * 100); + fIRects[i].setXYWH(i, i, 10, 10); + } + } + + bool isVisual() override { return false; } + +protected: + const char* onGetName() override { + fName.printf("rect_math_%s", fSuffix.c_str()); + return fName.c_str(); + } + + void onDraw(int loops, SkCanvas* canvas) override { + SkMatrix mat; + for (int j = 0; j < OUTER; ++j) { + mat.setScaleTranslate(fRand.nextUScalar1(), fRand.nextUScalar1(), + fRand.nextUScalar1(), fRand.nextUScalar1()); + fCounter += fProc(mat, fRects, fIRects, N); + } + } + +private: + typedef Benchmark INHERITED; +}; + +static int rectmath0(const SkMatrix& mat, const SkRect rr[], const SkIRect ir[], int count) { + int counter = 0; + for (int i = 0; i < count; ++i) { + SkRect dst; + mat.mapRectScaleTranslate(&dst, rr[i]); + counter += dst.round().contains(ir[i]); + } + return counter; +} + +static int rectmath1(const SkMatrix& mat, const SkRect rr[], const SkIRect ir[], int count) { + int counter = 0; + for (int i = 0; i < count; ++i) { + SkRect dst; + mat.mapRectScaleTranslate(&dst, rr[i]); + counter += dst.round2i().contains(ir[i]); + } + return counter; +} + +#if 0 +static bool contains(SkIRect outer, SkIRect inner) { + Sk4i le(outer.fLeft, outer.fTop, inner.fRight, inner.fBottom); + Sk4i ge(inner.fLeft, inner.fTop, outer.fRight, outer.fBottom); + return (le <= ge).allTrue(); +} + +static int rectmath3(const SkMatrix& mat, const SkRect rr[], const SkIRect ir[], int count) { + int counter = 0; + for (int i = 0; i < count; ++i) { + counter += contains(mat.mapRectScaleTranslate(rr[i]).round2i(), ir[i]); + } + return counter; +} +#endif + +DEF_BENCH(return new RectMathBench(rectmath0, "0");) +DEF_BENCH(return new RectMathBench(rectmath1, "1");) diff --git a/gyp/core.gypi b/gyp/core.gypi index 97e38c2..d35edff 100644 --- a/gyp/core.gypi +++ b/gyp/core.gypi @@ -192,7 +192,6 @@ '<(skia_src_path)/core/SkNinePatchIter.h', '<(skia_src_path)/core/SkNormalSource.cpp', '<(skia_src_path)/core/SkNormalSource.h', - '<(skia_src_path)/core/SkNx.h', '<(skia_src_path)/core/SkOpts.cpp', '<(skia_src_path)/core/SkOpts.h', '<(skia_src_path)/core/SkOrderedReadBuffer.h', @@ -426,6 +425,7 @@ '<(skia_include_path)/private/SkGpuFenceSync.h', '<(skia_include_path)/private/SkMiniRecorder.h', '<(skia_include_path)/private/SkMutex.h', + '<(skia_include_path)/private/SkNx.h', '<(skia_include_path)/private/SkOnce.h', '<(skia_include_path)/private/SkRecords.h', '<(skia_include_path)/private/SkSemaphore.h', diff --git a/gyp/tools.gyp b/gyp/tools.gyp index 9d02ee1..e2f0bd8 100644 --- a/gyp/tools.gyp +++ b/gyp/tools.gyp @@ -387,6 +387,7 @@ '../tools/picture_utils.h', ], 'include_dirs': [ + '../include/private', '../src/core/', ], 'dependencies': [ diff --git a/include/core/SkRect.h b/include/core/SkRect.h index 39cbb33..c71d712 100644 --- a/include/core/SkRect.h +++ b/include/core/SkRect.h @@ -10,6 +10,7 @@ #include "SkPoint.h" #include "SkSize.h" +#include "../private/SkNx.h" struct SkRect; @@ -869,7 +870,28 @@ public: this->roundOut(&ir); return ir; } - + + /** + * Round the rect's values and return the result as a new SkIRect. + * This follows the same semantics as SkScalarRoundToInt(). + */ + SkIRect round2i() const { + SkIRect dst; + Sk4s rd = (Sk4s::Load(&fLeft) + Sk4s(0.5)).floor(); + SkNx_cast(rd).store(&dst.fLeft); + return dst; + } + + /** + * Round the rect's values and return the result as a new SkRect. + * This follows the same semantics as SkScalarRoundToScalar(). + */ + SkRect round2s() const { + SkRect dst; + (Sk4s::Load(&fLeft) + Sk4s(0.5)).floor().store(&dst.fLeft); + return dst; + } + /** * Swap top/bottom or left/right if there are flipped (i.e. if width() * or height() would have returned a negative value.) This should be called diff --git a/src/core/SkNx.h b/include/private/SkNx.h similarity index 99% rename from src/core/SkNx.h rename to include/private/SkNx.h index d0c7f41..8d1b29d 100644 --- a/src/core/SkNx.h +++ b/include/private/SkNx.h @@ -296,9 +296,9 @@ typedef SkNx<4, int> Sk4i; // Include platform specific specializations if available. #if !defined(SKNX_NO_SIMD) && SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 - #include "../opts/SkNx_sse.h" + #include "SkNx_sse.h" #elif !defined(SKNX_NO_SIMD) && defined(SK_ARM_HAS_NEON) - #include "../opts/SkNx_neon.h" + #include "SkNx_neon.h" #endif SI void Sk4f_ToBytes(uint8_t p[16], const Sk4f& a, const Sk4f& b, const Sk4f& c, const Sk4f& d) { diff --git a/src/opts/SkNx_neon.h b/include/private/SkNx_neon.h similarity index 100% rename from src/opts/SkNx_neon.h rename to include/private/SkNx_neon.h diff --git a/src/opts/SkNx_sse.h b/include/private/SkNx_sse.h similarity index 100% rename from src/opts/SkNx_sse.h rename to include/private/SkNx_sse.h diff --git a/tests/RectTest.cpp b/tests/RectTest.cpp index 820586c..719b5b9 100644 --- a/tests/RectTest.cpp +++ b/tests/RectTest.cpp @@ -88,3 +88,35 @@ DEF_TEST(Rect, reporter) { test_stroke_width_clipping(reporter); test_skbug4406(reporter); } + +DEF_TEST(Rect_round, reporter) { + SkRandom rand; + + for (int i = 0; i < 100000; ++i) { + SkRect src = SkRect::MakeXYWH(rand.nextSScalar1() * 1000, + rand.nextSScalar1() * 1000, + rand.nextUScalar1() * 1000, + rand.nextUScalar1() * 1000); + SkRect rd0 = { + SkScalarRoundToScalar(src.fLeft), + SkScalarRoundToScalar(src.fTop), + SkScalarRoundToScalar(src.fRight), + SkScalarRoundToScalar(src.fBottom) + }; + SkRect rd1 = src.round2s(); + + REPORTER_ASSERT(reporter, rd0 == rd1); + + SkIRect ir0 = { + SkScalarRoundToInt(src.fLeft), + SkScalarRoundToInt(src.fTop), + SkScalarRoundToInt(src.fRight), + SkScalarRoundToInt(src.fBottom) + }; + SkIRect ir1 = src.round(); + SkIRect ir2 = src.round2i(); + + REPORTER_ASSERT(reporter, ir0 == ir1); + REPORTER_ASSERT(reporter, ir0 == ir2); + } +} -- 2.7.4