From 654ad19fbd5a295c4bd3072dc8a7b88f212bbd49 Mon Sep 17 00:00:00 2001 From: mtklein Date: Tue, 21 Jul 2015 12:08:41 -0700 Subject: [PATCH] Revert of De-templatize Sk4pxXfermode code a bit. (patchset #2 id:20001 of https://codereview.chromium.org/1242743004/) Reason for revert: http://build.chromium.org/p/client.skia.compile/builders/Build-Ubuntu-GCC-Arm7-Debug-Android_NoNeon/builds/1168/steps/build%20most/logs/stdio Original issue's description: > De-templatize Sk4pxXfermode code a bit. > > This deduplicates a few pieces of code: > - we end up with one copy of each xfer32() driver loop instead of one per xfermode; > - we end up with two* copies of each xfermode implementation instead of ten**. > > * For a given Mode: Mode() itself and xfer_aa(). > ** From unrolling: twice at a stride of 8, once at 4, once at 2, and once at 1, then all again for when we have AA. > > This decreases the size of SkXfermode.o from 1.5M to 620K on x86-64 and from 1.3M to 680K on ARMv7+NEON. > > If we wanted to, we could eliminate the xfer_aa() copy by tagging each Mode() function as __attribute__((noinline)) or its equivalent. This would result in another ~100K space savings. > > Performance is affected in proportion to the original xfermode speed: > fast modes like Plus take the largest proportional hit, and slow modes > like HardLight or SoftLight see essentially no hit at all. > > This adds SK_VECTORCALL to help keep this code fast on ARMv7 and Windows. I've looked at the ARMv7 generated code... it looks good, even pretty. > > For compatibility with SK_VECTORCALL, we now pass the vector-sized arguments by value instead of by reference. Some refactoring now allows us to declare each mode as just a static function instead of a struct, which simplifies things. > > TBR=reed@google.com > No public API changes. > > BUG=skia: > > Committed: https://skia.googlesource.com/skia/+/e617e1525916d7ee684142728c0905828caf49da TBR=msarett@google.com,mtklein@chromium.org NOPRESUBMIT=true NOTREECHECKS=true NOTRY=true BUG=skia: Review URL: https://codereview.chromium.org/1245273005 --- include/core/SkPostConfig.h | 8 --- src/core/Sk4pxXfermode.h | 127 ++++++++++++++++++++++---------------------- 2 files changed, 64 insertions(+), 71 deletions(-) diff --git a/include/core/SkPostConfig.h b/include/core/SkPostConfig.h index 2074230..73c8fa1 100644 --- a/include/core/SkPostConfig.h +++ b/include/core/SkPostConfig.h @@ -297,14 +297,6 @@ # endif #endif -#if defined(SK_BUILD_FOR_WIN) - #define SK_VECTORCALL __vectorcall -#elif defined(SK_CPU_ARM32) - #define SK_VECTORCALL __attribute__((pcs("aapcs-vfp"))) -#else - #define SK_VECTORCALL -#endif - ////////////////////////////////////////////////////////////////////// #if defined(__clang__) || defined(__GNUC__) diff --git a/src/core/Sk4pxXfermode.h b/src/core/Sk4pxXfermode.h index d89dbf8..0c8dcb5 100644 --- a/src/core/Sk4pxXfermode.h +++ b/src/core/Sk4pxXfermode.h @@ -17,7 +17,12 @@ namespace { // Most xfermodes can be done most efficiently 4 pixels at a time in 8 or 16-bit fixed point. -#define XFERMODE(Name) static Sk4px SK_VECTORCALL Name(Sk4px s, Sk4px d) +#define XFERMODE(Name) \ + struct Name { \ + static Sk4px Xfer(const Sk4px&, const Sk4px&); \ + static const SkXfermode::Mode kMode = SkXfermode::k##Name##_Mode; \ + }; \ + inline Sk4px Name::Xfer(const Sk4px& s, const Sk4px& d) XFERMODE(Clear) { return Sk4px::DupPMColor(0); } XFERMODE(Src) { return s; } @@ -25,13 +30,13 @@ XFERMODE(Dst) { return d; } XFERMODE(SrcIn) { return s.approxMulDiv255(d.alphas() ); } XFERMODE(SrcOut) { return s.approxMulDiv255(d.alphas().inv()); } XFERMODE(SrcOver) { return s + d.approxMulDiv255(s.alphas().inv()); } -XFERMODE(DstIn) { return SrcIn (d,s); } -XFERMODE(DstOut) { return SrcOut (d,s); } -XFERMODE(DstOver) { return SrcOver(d,s); } +XFERMODE(DstIn) { return SrcIn ::Xfer(d,s); } +XFERMODE(DstOut) { return SrcOut ::Xfer(d,s); } +XFERMODE(DstOver) { return SrcOver::Xfer(d,s); } // [ S * Da + (1 - Sa) * D] XFERMODE(SrcATop) { return (s * d.alphas() + d * s.alphas().inv()).div255(); } -XFERMODE(DstATop) { return SrcATop(d,s); } +XFERMODE(DstATop) { return SrcATop::Xfer(d,s); } //[ S * (1 - Da) + (1 - Sa) * D ] XFERMODE(Xor) { return (s * d.alphas().inv() + d * s.alphas().inv()).div255(); } // [S + D ] @@ -81,7 +86,7 @@ XFERMODE(HardLight) { auto colors = (both + isLite.thenElse(lite, dark)).div255(); return alphas.zeroColors() + colors.zeroAlphas(); } -XFERMODE(Overlay) { return HardLight(d,s); } +XFERMODE(Overlay) { return HardLight::Xfer(d,s); } XFERMODE(Darken) { auto sa = s.alphas(), @@ -112,7 +117,12 @@ XFERMODE(Lighten) { #undef XFERMODE // Some xfermodes use math like divide or sqrt that's best done in floats 1 pixel at a time. -#define XFERMODE(Name) static SkPMFloat SK_VECTORCALL Name(SkPMFloat s, SkPMFloat d) +#define XFERMODE(Name) \ + struct Name { \ + static SkPMFloat Xfer(const SkPMFloat&, const SkPMFloat&); \ + static const SkXfermode::Mode kMode = SkXfermode::k##Name##_Mode; \ + }; \ + inline SkPMFloat Name::Xfer(const SkPMFloat& s, const SkPMFloat& d) XFERMODE(ColorDodge) { auto sa = s.alphas(), @@ -175,15 +185,15 @@ XFERMODE(SoftLight) { // A reasonable fallback mode for doing AA is to simply apply the transfermode first, // then linearly interpolate the AA. -template -static Sk4px SK_VECTORCALL xfer_aa(Sk4px s, Sk4px d, Sk4px aa) { - Sk4px bw = Mode(s, d); +template +static Sk4px xfer_aa(const Sk4px& s, const Sk4px& d, const Sk4px& aa) { + Sk4px bw = Mode::Xfer(s, d); return (bw * aa + d * aa.inv()).div255(); } // For some transfermodes we specialize AA, either for correctness or performance. #define XFERMODE_AA(Name) \ - template <> Sk4px SK_VECTORCALL xfer_aa(Sk4px s, Sk4px d, Sk4px aa) + template <> Sk4px xfer_aa(const Sk4px& s, const Sk4px& d, const Sk4px& aa) // Plus' clamp needs to happen after AA. skia:3852 XFERMODE_AA(Plus) { // [ clamp( (1-AA)D + (AA)(S+D) ) == clamp(D + AA*S) ] @@ -192,47 +202,44 @@ XFERMODE_AA(Plus) { // [ clamp( (1-AA)D + (AA)(S+D) ) == clamp(D + AA*S) ] #undef XFERMODE_AA -class Sk4pxXfermode : public SkProcCoeffXfermode { +template +class SkT4pxXfermode : public SkProcCoeffXfermode { public: - typedef Sk4px (SK_VECTORCALL *Proc4)(Sk4px, Sk4px); - typedef Sk4px (SK_VECTORCALL *AAProc4)(Sk4px, Sk4px, Sk4px); - - Sk4pxXfermode(const ProcCoeff& rec, SkXfermode::Mode mode, Proc4 proc4, AAProc4 aaproc4) - : INHERITED(rec, mode) - , fProc4(proc4) - , fAAProc4(aaproc4) {} + static SkProcCoeffXfermode* Create(const ProcCoeff& rec) { + return SkNEW_ARGS(SkT4pxXfermode, (rec)); + } void xfer32(SkPMColor dst[], const SkPMColor src[], int n, const SkAlpha aa[]) const override { if (NULL == aa) { Sk4px::MapDstSrc(n, dst, src, [&](const Sk4px& dst4, const Sk4px& src4) { - return fProc4(src4, dst4); + return ProcType::Xfer(src4, dst4); }); } else { Sk4px::MapDstSrcAlpha(n, dst, src, aa, [&](const Sk4px& dst4, const Sk4px& src4, const Sk4px& alpha) { - return fAAProc4(src4, dst4, alpha); + return xfer_aa(src4, dst4, alpha); }); } } private: - Proc4 fProc4; - AAProc4 fAAProc4; + SkT4pxXfermode(const ProcCoeff& rec) : INHERITED(rec, ProcType::kMode) {} + typedef SkProcCoeffXfermode INHERITED; }; -class SkPMFloatXfermode : public SkProcCoeffXfermode { +template +class SkTPMFloatXfermode : public SkProcCoeffXfermode { public: - typedef SkPMFloat (SK_VECTORCALL *ProcF)(SkPMFloat, SkPMFloat); - SkPMFloatXfermode(const ProcCoeff& rec, SkXfermode::Mode mode, ProcF procf) - : INHERITED(rec, mode) - , fProcF(procf) {} + static SkProcCoeffXfermode* Create(const ProcCoeff& rec) { + return SkNEW_ARGS(SkTPMFloatXfermode, (rec)); + } void xfer32(SkPMColor dst[], const SkPMColor src[], int n, const SkAlpha aa[]) const override { for (int i = 0; i < n; i++) { SkPMFloat s(src[i]), d(dst[i]), - b(fProcF(s,d)); + b(ProcType::Xfer(s,d)); if (aa) { // We do aa in full float precision before going back down to bytes, because we can! SkPMFloat a = Sk4f(aa[i]) * Sk4f(1.0f/255); @@ -243,46 +250,40 @@ public: } private: - ProcF fProcF; + SkTPMFloatXfermode(const ProcCoeff& rec) : INHERITED(rec, ProcType::kMode) {} + typedef SkProcCoeffXfermode INHERITED; }; static SkProcCoeffXfermode* SkCreate4pxXfermode(const ProcCoeff& rec, SkXfermode::Mode mode) { #if !defined(SK_CPU_ARM32) || defined(SK_ARM_HAS_NEON) switch (mode) { - #define CASE(Mode) case SkXfermode::k##Mode##_Mode: \ - return SkNEW_ARGS(Sk4pxXfermode, (rec, mode, &Mode, &xfer_aa)) - CASE(Clear); - CASE(Src); - CASE(Dst); - CASE(SrcOver); - CASE(DstOver); - CASE(SrcIn); - CASE(DstIn); - CASE(SrcOut); - CASE(DstOut); - CASE(SrcATop); - CASE(DstATop); - CASE(Xor); - CASE(Plus); - CASE(Modulate); - CASE(Screen); - CASE(Multiply); - CASE(Difference); - CASE(Exclusion); - CASE(HardLight); - CASE(Overlay); - CASE(Darken); - CASE(Lighten); - #undef CASE - - #define CASE(Mode) case SkXfermode::k##Mode##_Mode: \ - return SkNEW_ARGS(SkPMFloatXfermode, (rec, mode, &Mode)) - CASE(ColorDodge); - CASE(ColorBurn); - CASE(SoftLight); - #undef CASE - + case SkXfermode::kClear_Mode: return SkT4pxXfermode::Create(rec); + case SkXfermode::kSrc_Mode: return SkT4pxXfermode::Create(rec); + case SkXfermode::kDst_Mode: return SkT4pxXfermode::Create(rec); + case SkXfermode::kSrcOver_Mode: return SkT4pxXfermode::Create(rec); + case SkXfermode::kDstOver_Mode: return SkT4pxXfermode::Create(rec); + case SkXfermode::kSrcIn_Mode: return SkT4pxXfermode::Create(rec); + case SkXfermode::kDstIn_Mode: return SkT4pxXfermode::Create(rec); + case SkXfermode::kSrcOut_Mode: return SkT4pxXfermode::Create(rec); + case SkXfermode::kDstOut_Mode: return SkT4pxXfermode::Create(rec); + case SkXfermode::kSrcATop_Mode: return SkT4pxXfermode::Create(rec); + case SkXfermode::kDstATop_Mode: return SkT4pxXfermode::Create(rec); + case SkXfermode::kXor_Mode: return SkT4pxXfermode::Create(rec); + case SkXfermode::kPlus_Mode: return SkT4pxXfermode::Create(rec); + case SkXfermode::kModulate_Mode: return SkT4pxXfermode::Create(rec); + case SkXfermode::kScreen_Mode: return SkT4pxXfermode::Create(rec); + case SkXfermode::kMultiply_Mode: return SkT4pxXfermode::Create(rec); + case SkXfermode::kDifference_Mode: return SkT4pxXfermode::Create(rec); + case SkXfermode::kExclusion_Mode: return SkT4pxXfermode::Create(rec); + case SkXfermode::kHardLight_Mode: return SkT4pxXfermode::Create(rec); + case SkXfermode::kOverlay_Mode: return SkT4pxXfermode::Create(rec); + case SkXfermode::kDarken_Mode: return SkT4pxXfermode::Create(rec); + case SkXfermode::kLighten_Mode: return SkT4pxXfermode::Create(rec); + + case SkXfermode::kColorDodge_Mode: return SkTPMFloatXfermode::Create(rec); + case SkXfermode::kColorBurn_Mode: return SkTPMFloatXfermode::Create(rec); + case SkXfermode::kSoftLight_Mode: return SkTPMFloatXfermode::Create(rec); default: break; } #endif -- 2.7.4