From: mtklein Date: Wed, 13 May 2015 15:02:14 +0000 (-0700) Subject: Turn on Sk4px xfermodes when we have NEON too. X-Git-Tag: submit/tizen/20180928.044319~2419 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=04d24a3f86b6f2382e5c6ffaf161ffc734a4d02a;p=platform%2Fupstream%2FlibSkiaSharp.git Turn on Sk4px xfermodes when we have NEON too. For SSE, Sk4px is better than Sk4f is better than SkXfermodes_opts_SSE2 (where implemented). For NEON, Sk4px is better than SkXfermodes_opts_arm_neon is better than Sk4f (where implemented). This is a 1.6-1.9x speedup for Plus,Modulate, and Screen for NEON. BUG=skia: Review URL: https://codereview.chromium.org/1128053004 --- diff --git a/src/core/SkXfermode.cpp b/src/core/SkXfermode.cpp index ee000233d5..a2ab65b0a3 100644 --- a/src/core/SkXfermode.cpp +++ b/src/core/SkXfermode.cpp @@ -19,19 +19,17 @@ #include "SkUtilsArm.h" #include "SkWriteBuffer.h" -#ifndef SK_SUPPORT_LEGACY_SCALAR_XFERMODES +// When implemented, the Sk4f and Sk4px xfermodes beat src/opts/SkXfermodes_opts_SSE2's. +// When implemented, the Sk4px, but not Sk4f, xfermodes beat src/opts/SkXfermodes_arm_neon's. #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 - /* - * To be conservative, we only enable the new code path (using SkPMFloat) when we - * "know" we're faster, which at the moment is only when we have SSE2 or better. - */ -#else - #define SK_SUPPORT_LEGACY_SCALAR_XFERMODES -#endif + #define SK_4F_XFERMODES_ARE_FAST + #define SK_4PX_XFERMODES_ARE_FAST +#elif defined(SK_ARM_HAS_NEON) + #define SK_4PX_XFERMODES_ARE_FAST #endif #if !SK_ARM_NEON_IS_NONE -#include "SkXfermode_opts_arm_neon.h" + #include "SkXfermode_opts_arm_neon.h" #endif #define SkAlphaMulAlpha(a, b) SkMulDiv255Round(a, b) @@ -1196,7 +1194,6 @@ void SkDstInXfermode::toString(SkString* str) const { { screen_modeproc, SkXfermode::kOne_Coeff, SkXfermode::kISC_Coeff }, */ -#ifndef SK_SUPPORT_LEGACY_SCALAR_XFERMODES static const float gInv255 = 0.0039215683f; // (1.0f / 255) - ULP == SkBits2Float(0x3B808080) static Sk4f ramp(const Sk4f& v0, const Sk4f& v1, const Sk4f& t) { @@ -1412,7 +1409,6 @@ private: typedef SkProcCoeffXfermode INHERITED; }; -#endif /////////////////////////////////////////////////////////////////////////////// @@ -1474,57 +1470,32 @@ SkXfermode* create_mode(int iMode) { rec.fProc = pp; } - SkXfermode* xfer = NULL; - -#ifndef SK_SUPPORT_LEGACY_SCALAR_XFERMODES +#if defined(SK_4PX_XFERMODES_ARE_FAST) && !defined(SK_PREFER_LEGACY_FLOAT_XFERMODES) switch (mode) { - case SkXfermode::kSrcATop_Mode: - xfer = SkT4fXfermode::Create(rec); - break; - case SkXfermode::kDstATop_Mode: - xfer = SkT4fXfermode::Create(rec); - break; - case SkXfermode::kXor_Mode: - xfer = SkT4fXfermode::Create(rec); - break; - #ifdef SK_PREFER_LEGACY_FLOAT_XFERMODES - case SkXfermode::kPlus_Mode: - xfer = SkT4fXfermode::Create(rec); - break; - case SkXfermode::kModulate_Mode: - xfer = SkT4fXfermode::Create(rec); - break; - case SkXfermode::kScreen_Mode: - xfer = SkT4fXfermode::Create(rec); - break; - #else - case SkXfermode::kPlus_Mode: - xfer = SkT4pxXfermode::Create(rec); - break; - case SkXfermode::kModulate_Mode: - xfer = SkT4pxXfermode::Create(rec); - break; - case SkXfermode::kScreen_Mode: - xfer = SkT4pxXfermode::Create(rec); - break; - #endif - case SkXfermode::kMultiply_Mode: - xfer = SkT4fXfermode::Create(rec); - break; - case SkXfermode::kDifference_Mode: - xfer = SkT4fXfermode::Create(rec); - break; - case SkXfermode::kExclusion_Mode: - xfer = SkT4fXfermode::Create(rec); - break; - default: - break; + case SkXfermode::kPlus_Mode: return SkT4pxXfermode::Create(rec); + case SkXfermode::kModulate_Mode: return SkT4pxXfermode::Create(rec); + case SkXfermode::kScreen_Mode: return SkT4pxXfermode::Create(rec); + default: break; } - if (xfer) { - return xfer; +#endif + +#if defined(SK_4F_XFERMODES_ARE_FAST) + switch (mode) { + case SkXfermode::kSrcATop_Mode: return SkT4fXfermode::Create(rec); + case SkXfermode::kDstATop_Mode: return SkT4fXfermode::Create(rec); + case SkXfermode::kXor_Mode: return SkT4fXfermode::Create(rec); + case SkXfermode::kPlus_Mode: return SkT4fXfermode::Create(rec); + case SkXfermode::kModulate_Mode: return SkT4fXfermode::Create(rec); + case SkXfermode::kScreen_Mode: return SkT4fXfermode::Create(rec); + case SkXfermode::kMultiply_Mode: return SkT4fXfermode::Create(rec); + case SkXfermode::kDifference_Mode: return SkT4fXfermode::Create(rec); + case SkXfermode::kExclusion_Mode: return SkT4fXfermode::Create(rec); + default: break; } #endif + SkXfermode* xfer = NULL; + // check if we have a platform optim for that SkProcCoeffXfermode* xfm = SkPlatformXfermodeFactory(rec, mode); if (xfm != NULL) { diff --git a/src/opts/SkNx_neon.h b/src/opts/SkNx_neon.h index b9d4357e5a..e4dbec9083 100644 --- a/src/opts/SkNx_neon.h +++ b/src/opts/SkNx_neon.h @@ -355,6 +355,8 @@ public: void store(uint8_t vals[16]) const { vst1q_u8(vals, fVec); } + SkNi saturatedAdd(const SkNi& o) const { return vqaddq_u8(fVec, o.fVec); } + SkNi operator + (const SkNi& o) const { return vaddq_u8(fVec, o.fVec); } SkNi operator - (const SkNi& o) const { return vsubq_u8(fVec, o.fVec); } SkNi operator * (const SkNi& o) const { return vmulq_u8(fVec, o.fVec); }