ARM Skia NEON patches - 28 - Xfermode: SIMD modeprocs
authorcommit-bot@chromium.org <commit-bot@chromium.org@2bbb7eff-a529-9590-31e7-b0007b416f81>
Wed, 9 Oct 2013 14:39:46 +0000 (14:39 +0000)
committercommit-bot@chromium.org <commit-bot@chromium.org@2bbb7eff-a529-9590-31e7-b0007b416f81>
Wed, 9 Oct 2013 14:39:46 +0000 (14:39 +0000)
Xfermode: allow for SIMD modeprocs

    This patch introduces the ability to have SIMD Xfermode modeprocs.
    In the NEON implementation, SIMD modeprocs will process 8 pixels
    at a time.

Signed-off-by: Kévin PETIT <kevin.petit@arm.com>
BUG=

Committed: http://code.google.com/p/skia/source/detail?r=11654

R=djsollen@google.com, mtklein@google.com, reed@google.com

Author: kevin.petit.arm@gmail.com

Review URL: https://codereview.chromium.org/23644006

git-svn-id: http://skia.googlecode.com/svn/trunk@11669 2bbb7eff-a529-9590-31e7-b0007b416f81

gyp/opts.gyp
include/core/SkXfermode.h
src/core/SkXfermode.cpp
src/core/SkXfermode_proccoeff.h [new file with mode: 0644]
src/opts/SkXfermode_opts_arm.cpp [new file with mode: 0644]
src/opts/SkXfermode_opts_none.cpp [new file with mode: 0644]

index 4f5eda8..04966ba 100644 (file)
@@ -52,6 +52,7 @@
             '../src/opts/SkBlitRow_opts_SSE2.cpp',
             '../src/opts/SkBlitRect_opts_SSE2.cpp',
             '../src/opts/SkUtils_opts_SSE2.cpp',
+            '../src/opts/SkXfermode_opts_none.cpp',
           ],
         }],
         [ 'skia_arch_type == "arm" and arm_version >= 7', {
@@ -76,6 +77,7 @@
             '../src/opts/SkBlitMask_opts_arm.cpp',
             '../src/opts/SkBlitRow_opts_arm.cpp',
             '../src/opts/SkBlitRow_opts_arm.h',
+            '../src/opts/SkXfermode_opts_arm.cpp',
           ],
           'conditions': [
             [ 'arm_neon == 1 or arm_neon_optional == 1', {
             '../src/opts/SkBlitMask_opts_none.cpp',
             '../src/opts/SkBlitRow_opts_none.cpp',
             '../src/opts/SkUtils_opts_none.cpp',
+            '../src/opts/SkXfermode_opts_none.cpp',
           ],
         }],
       ],
index 04f3bfe..ed07bd5 100644 (file)
@@ -275,6 +275,10 @@ protected:
         fProc = proc;
     }
 
+    SkXfermodeProc getProc() const {
+        return fProc;
+    }
+
 private:
     SkXfermodeProc  fProc;
 
index 8c6eb2c..6f2fee6 100644 (file)
@@ -8,6 +8,7 @@
 
 
 #include "SkXfermode.h"
+#include "SkXfermode_proccoeff.h"
 #include "SkColorPriv.h"
 #include "SkFlattenableBuffers.h"
 #include "SkMathPriv.h"
@@ -624,16 +625,7 @@ static SkPMColor luminosity_modeproc(SkPMColor src, SkPMColor dst) {
     return SkPackARGB32(a, r, g, b);
 }
 
-
-struct ProcCoeff {
-    SkXfermodeProc      fProc;
-    SkXfermode::Coeff   fSC;
-    SkXfermode::Coeff   fDC;
-};
-
-#define CANNOT_USE_COEFF    SkXfermode::Coeff(-1)
-
-static const ProcCoeff gProcCoeffs[] = {
+const ProcCoeff gProcCoeffs[] = {
     { clear_modeproc,   SkXfermode::kZero_Coeff,    SkXfermode::kZero_Coeff },
     { src_modeproc,     SkXfermode::kOne_Coeff,     SkXfermode::kZero_Coeff },
     { dst_modeproc,     SkXfermode::kZero_Coeff,    SkXfermode::kOne_Coeff },
@@ -1345,83 +1337,51 @@ GrEffectRef* XferEffect::TestCreate(SkRandom* rand,
 ///////////////////////////////////////////////////////////////////////////////
 ///////////////////////////////////////////////////////////////////////////////
 
-class SkProcCoeffXfermode : public SkProcXfermode {
-public:
-    SkProcCoeffXfermode(const ProcCoeff& rec, Mode mode)
-            : INHERITED(rec.fProc) {
-        fMode = mode;
-        // these may be valid, or may be CANNOT_USE_COEFF
-        fSrcCoeff = rec.fSC;
-        fDstCoeff = rec.fDC;
+bool SkProcCoeffXfermode::asMode(Mode* mode) const {
+    if (mode) {
+        *mode = fMode;
     }
+    return true;
+}
 
-    virtual bool asMode(Mode* mode) const SK_OVERRIDE {
-        if (mode) {
-            *mode = fMode;
-        }
-        return true;
+bool SkProcCoeffXfermode::asCoeff(Coeff* sc, Coeff* dc) const {
+    if (CANNOT_USE_COEFF == fSrcCoeff) {
+        return false;
     }
 
-    virtual bool asCoeff(Coeff* sc, Coeff* dc) const SK_OVERRIDE {
-        if (CANNOT_USE_COEFF == fSrcCoeff) {
-            return false;
-        }
-
-        if (sc) {
-            *sc = fSrcCoeff;
-        }
-        if (dc) {
-            *dc = fDstCoeff;
-        }
-        return true;
+    if (sc) {
+        *sc = fSrcCoeff;
+    }
+    if (dc) {
+        *dc = fDstCoeff;
     }
+    return true;
+}
 
 #if SK_SUPPORT_GPU
-    virtual bool asNewEffectOrCoeff(GrContext*,
-                                    GrEffectRef** effect,
-                                    Coeff* src,
-                                    Coeff* dst,
-                                    GrTexture* background) const SK_OVERRIDE {
-        if (this->asCoeff(src, dst)) {
-            return true;
-        }
-        if (XferEffect::IsSupportedMode(fMode)) {
-            if (NULL != effect) {
-                *effect = XferEffect::Create(fMode, background);
-                SkASSERT(NULL != *effect);
-            }
-            return true;
+bool SkProcCoeffXfermode::asNewEffectOrCoeff(GrContext*,
+                                             GrEffectRef** effect,
+                                             Coeff* src,
+                                             Coeff* dst,
+                                             GrTexture* background) const {
+    if (this->asCoeff(src, dst)) {
+        return true;
+    }
+    if (XferEffect::IsSupportedMode(fMode)) {
+        if (NULL != effect) {
+            *effect = XferEffect::Create(fMode, background);
+            SkASSERT(NULL != *effect);
         }
-        return false;
+        return true;
     }
+    return false;
+}
 #endif
 
-    SK_DEVELOPER_TO_STRING()
-    SK_DECLARE_PUBLIC_FLATTENABLE_DESERIALIZATION_PROCS(SkProcCoeffXfermode)
-
-protected:
-    SkProcCoeffXfermode(SkFlattenableReadBuffer& buffer) : INHERITED(buffer) {
-        fMode = (SkXfermode::Mode)buffer.read32();
-
-        const ProcCoeff& rec = gProcCoeffs[fMode];
-        // these may be valid, or may be CANNOT_USE_COEFF
-        fSrcCoeff = rec.fSC;
-        fDstCoeff = rec.fDC;
-        // now update our function-ptr in the super class
-        this->INHERITED::setProc(rec.fProc);
-    }
-
-    virtual void flatten(SkFlattenableWriteBuffer& buffer) const SK_OVERRIDE {
-        this->INHERITED::flatten(buffer);
-        buffer.write32(fMode);
-    }
-
-private:
-    Mode    fMode;
-    Coeff   fSrcCoeff, fDstCoeff;
-
-    typedef SkProcXfermode INHERITED;
-};
+void SkProcCoeffXfermode::flatten(SkFlattenableWriteBuffer& buffer) const {
+    this->INHERITED::flatten(buffer);
+    buffer.write32(fMode);
+}
 
 const char* SkXfermode::ModeName(Mode mode) {
     SkASSERT((unsigned) mode <= (unsigned)kLastMode);
@@ -1693,6 +1653,9 @@ void SkXfermode::Term() {
     }
 }
 
+extern SkProcCoeffXfermode* SkPlatformXfermodeFactory(const ProcCoeff& rec,
+                                                      SkXfermode::Mode mode);
+
 SkXfermode* SkXfermode::Create(Mode mode) {
     SkASSERT(SK_ARRAY_COUNT(gProcCoeffs) == kModeCount);
     SkASSERT(SK_ARRAY_COUNT(gCachedXfermodes) == kModeCount);
@@ -1714,29 +1677,36 @@ SkXfermode* SkXfermode::Create(Mode mode) {
     SkXfermode* xfer = gCachedXfermodes[mode];
     if (NULL == xfer) {
         const ProcCoeff& rec = gProcCoeffs[mode];
-        // All modes can in theory be represented by the ProcCoeff rec, since
-        // it contains function ptrs. However, a few modes are both simple and
-        // commonly used, so we call those out for their own subclasses here.
-        switch (mode) {
-            case kClear_Mode:
-                xfer = SkNEW_ARGS(SkClearXfermode, (rec));
-                break;
-            case kSrc_Mode:
-                xfer = SkNEW_ARGS(SkSrcXfermode, (rec));
-                break;
-            case kSrcOver_Mode:
-                SkASSERT(false);    // should not land here
-                break;
-            case kDstIn_Mode:
-                xfer = SkNEW_ARGS(SkDstInXfermode, (rec));
-                break;
-            case kDstOut_Mode:
-                xfer = SkNEW_ARGS(SkDstOutXfermode, (rec));
-                break;
-            default:
-                // no special-case, just rely in the rec and its function-ptrs
-                xfer = SkNEW_ARGS(SkProcCoeffXfermode, (rec, mode));
-                break;
+
+        // check if we have a platform optim for that
+        SkProcCoeffXfermode* xfm = SkPlatformXfermodeFactory(rec, mode);
+        if (xfm != NULL) {
+            xfer = xfm;
+        } else {
+            // All modes can in theory be represented by the ProcCoeff rec, since
+            // it contains function ptrs. However, a few modes are both simple and
+            // commonly used, so we call those out for their own subclasses here.
+            switch (mode) {
+                case kClear_Mode:
+                    xfer = SkNEW_ARGS(SkClearXfermode, (rec));
+                    break;
+                case kSrc_Mode:
+                    xfer = SkNEW_ARGS(SkSrcXfermode, (rec));
+                    break;
+                case kSrcOver_Mode:
+                    SkASSERT(false);    // should not land here
+                    break;
+                case kDstIn_Mode:
+                    xfer = SkNEW_ARGS(SkDstInXfermode, (rec));
+                    break;
+                case kDstOut_Mode:
+                    xfer = SkNEW_ARGS(SkDstOutXfermode, (rec));
+                    break;
+                default:
+                    // no special-case, just rely in the rec and its function-ptrs
+                    xfer = SkNEW_ARGS(SkProcCoeffXfermode, (rec, mode));
+                    break;
+            }
         }
         gCachedXfermodes[mode] = xfer;
     }
diff --git a/src/core/SkXfermode_proccoeff.h b/src/core/SkXfermode_proccoeff.h
new file mode 100644 (file)
index 0000000..60ebe3f
--- /dev/null
@@ -0,0 +1,63 @@
+#ifndef SkXfermode_proccoeff_DEFINED
+#define SkXfermode_proccoeff_DEFINED
+
+#include "SkXfermode.h"
+#include "SkFlattenableBuffers.h"
+
+struct ProcCoeff {
+    SkXfermodeProc      fProc;
+    SkXfermode::Coeff   fSC;
+    SkXfermode::Coeff   fDC;
+};
+
+#define CANNOT_USE_COEFF    SkXfermode::Coeff(-1)
+
+extern  const ProcCoeff gProcCoeffs[];
+
+class SkProcCoeffXfermode : public SkProcXfermode {
+public:
+    SkProcCoeffXfermode(const ProcCoeff& rec, Mode mode)
+            : INHERITED(rec.fProc) {
+        fMode = mode;
+        // these may be valid, or may be CANNOT_USE_COEFF
+        fSrcCoeff = rec.fSC;
+        fDstCoeff = rec.fDC;
+    }
+
+    virtual bool asMode(Mode* mode) const SK_OVERRIDE;
+
+    virtual bool asCoeff(Coeff* sc, Coeff* dc) const SK_OVERRIDE;
+
+#if SK_SUPPORT_GPU
+    virtual bool asNewEffectOrCoeff(GrContext*,
+                                    GrEffectRef** effect,
+                                    Coeff* src,
+                                    Coeff* dst,
+                                    GrTexture* background) const SK_OVERRIDE;
+#endif
+
+    SK_DEVELOPER_TO_STRING()
+    SK_DECLARE_PUBLIC_FLATTENABLE_DESERIALIZATION_PROCS(SkProcCoeffXfermode)
+
+protected:
+    SkProcCoeffXfermode(SkFlattenableReadBuffer& buffer) : INHERITED(buffer) {
+        fMode = (SkXfermode::Mode)buffer.read32();
+
+        const ProcCoeff& rec = gProcCoeffs[fMode];
+        // these may be valid, or may be CANNOT_USE_COEFF
+        fSrcCoeff = rec.fSC;
+        fDstCoeff = rec.fDC;
+        // now update our function-ptr in the super class
+        this->INHERITED::setProc(rec.fProc);
+    }
+
+    virtual void flatten(SkFlattenableWriteBuffer& buffer) const SK_OVERRIDE;
+
+private:
+    Mode    fMode;
+    Coeff   fSrcCoeff, fDstCoeff;
+
+    typedef SkProcXfermode INHERITED;
+};
+
+#endif // #ifndef SkXfermode_proccoeff_DEFINED
diff --git a/src/opts/SkXfermode_opts_arm.cpp b/src/opts/SkXfermode_opts_arm.cpp
new file mode 100644 (file)
index 0000000..db5d531
--- /dev/null
@@ -0,0 +1,158 @@
+#include "SkXfermode.h"
+#include "SkXfermode_proccoeff.h"
+#include "SkColorPriv.h"
+#include "SkUtilsArm.h"
+
+#if !SK_ARM_NEON_IS_NONE
+
+#include <arm_neon.h>
+
+////////////////////////////////////////////////////////////////////////////////
+
+typedef uint8x8x4_t (*SkXfermodeProcSIMD)(uint8x8x4_t src, uint8x8x4_t dst);
+
+class SkNEONProcCoeffXfermode : public SkProcCoeffXfermode {
+public:
+    SkNEONProcCoeffXfermode(const ProcCoeff& rec, SkXfermode::Mode mode,
+                            SkXfermodeProcSIMD procSIMD)
+            : INHERITED(rec, mode), fProcSIMD(procSIMD) {}
+
+    virtual void xfer32(SkPMColor dst[], const SkPMColor src[], int count,
+                        const SkAlpha aa[]) const SK_OVERRIDE;
+
+    SK_DEVELOPER_TO_STRING()
+    SK_DECLARE_PUBLIC_FLATTENABLE_DESERIALIZATION_PROCS(SkNEONProcCoeffXfermode)
+
+private:
+    SkNEONProcCoeffXfermode(SkFlattenableReadBuffer& buffer)
+        : INHERITED(buffer) {
+
+        fProcSIMD = NULL;
+        if (!buffer.isCrossProcess()) {
+            fProcSIMD = (SkXfermodeProcSIMD)buffer.readFunctionPtr();
+        }
+    }
+
+    virtual void flatten(SkFlattenableWriteBuffer& buffer) const SK_OVERRIDE;
+
+    SkXfermodeProcSIMD fProcSIMD;
+    typedef SkProcCoeffXfermode INHERITED;
+};
+
+
+void SkNEONProcCoeffXfermode::xfer32(SkPMColor dst[], const SkPMColor src[],
+                                     int count, const SkAlpha aa[]) const {
+    SkASSERT(dst && src && count >= 0);
+
+    SkXfermodeProc proc = this->getProc();
+    SkXfermodeProcSIMD procSIMD = fProcSIMD;
+
+    if (NULL == aa) {
+        // Unrolled NEON code
+        while (count >= 8) {
+            uint8x8x4_t vsrc, vdst, vres;
+
+            asm volatile (
+                "vld4.u8    %h[vsrc], [%[src]]!  \t\n"
+                "vld4.u8    %h[vdst], [%[dst]]   \t\n"
+                : [vsrc] "=w" (vsrc), [vdst] "=w" (vdst)
+                : [src] "r" (src), [dst] "r" (dst)
+                :
+            );
+
+            vres = procSIMD(vsrc, vdst);
+
+            vst4_u8((uint8_t*)dst, vres);
+
+            count -= 8;
+            dst += 8;
+        }
+        // Leftovers
+        for (int i = 0; i < count; i++) {
+            dst[i] = proc(src[i], dst[i]);
+        }
+    } else {
+        for (int i = count - 1; i >= 0; --i) {
+            unsigned a = aa[i];
+            if (0 != a) {
+                SkPMColor dstC = dst[i];
+                SkPMColor C = proc(src[i], dstC);
+                if (a != 0xFF) {
+                    C = SkFourByteInterp(C, dstC, a);
+                }
+                dst[i] = C;
+            }
+        }
+    }
+}
+
+#ifdef SK_DEVELOPER
+void SkNEONProcCoeffXfermode::toString(SkString* str) const {
+    this->INHERITED::toString(str);
+}
+#endif
+
+void SkNEONProcCoeffXfermode::flatten(SkFlattenableWriteBuffer& buffer) const {
+    this->INHERITED::flatten(buffer);
+    if (!buffer.isCrossProcess()) {
+        buffer.writeFunctionPtr((void*)fProcSIMD);
+    }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+SkXfermodeProcSIMD gNEONXfermodeProcs[] = {
+    [SkXfermode::kClear_Mode]   = NULL,
+    [SkXfermode::kSrc_Mode]     = NULL,
+    [SkXfermode::kDst_Mode]     = NULL,
+    [SkXfermode::kSrcOver_Mode] = NULL,
+    [SkXfermode::kDstOver_Mode] = NULL,
+    [SkXfermode::kSrcIn_Mode]   = NULL,
+    [SkXfermode::kDstIn_Mode]   = NULL,
+    [SkXfermode::kSrcOut_Mode]  = NULL,
+    [SkXfermode::kDstOut_Mode]  = NULL,
+    [SkXfermode::kSrcATop_Mode] = NULL,
+    [SkXfermode::kDstATop_Mode] = NULL,
+    [SkXfermode::kXor_Mode]     = NULL,
+    [SkXfermode::kPlus_Mode]    = NULL,
+    [SkXfermode::kModulate_Mode]= NULL,
+    [SkXfermode::kScreen_Mode]  = NULL,
+
+    [SkXfermode::kOverlay_Mode]    = NULL,
+    [SkXfermode::kDarken_Mode]     = NULL,
+    [SkXfermode::kLighten_Mode]    = NULL,
+    [SkXfermode::kColorDodge_Mode] = NULL,
+    [SkXfermode::kColorBurn_Mode]  = NULL,
+    [SkXfermode::kHardLight_Mode]  = NULL,
+    [SkXfermode::kSoftLight_Mode]  = NULL,
+    [SkXfermode::kDifference_Mode] = NULL,
+    [SkXfermode::kExclusion_Mode]  = NULL,
+    [SkXfermode::kMultiply_Mode]   = NULL,
+
+    [SkXfermode::kHue_Mode]        = NULL,
+    [SkXfermode::kSaturation_Mode] = NULL,
+    [SkXfermode::kColor_Mode]      = NULL,
+    [SkXfermode::kLuminosity_Mode] = NULL,
+};
+
+SK_COMPILE_ASSERT(
+    SK_ARRAY_COUNT(gNEONXfermodeProcs) == SkXfermode::kLastMode + 1,
+    mode_count_arm
+);
+
+#endif
+
+SkProcCoeffXfermode* SkPlatformXfermodeFactory(const ProcCoeff& rec,
+                                               SkXfermode::Mode mode) {
+#if !SK_ARM_NEON_IS_NONE
+    #if SK_ARM_NEON_IS_DYNAMIC
+    if ((sk_cpu_arm_has_neon()) && (gNEONXfermodeProcs[mode] != NULL)) {
+    #elif SK_ARM_NEON_IS_ALWAYS
+    if (gNEONXfermodeProcs[mode] != NULL) {
+    #endif
+        return SkNEW_ARGS(SkNEONProcCoeffXfermode,
+                          (rec, mode, gNEONXfermodeProcs[mode]));
+    }
+#endif
+    return NULL;
+}
diff --git a/src/opts/SkXfermode_opts_none.cpp b/src/opts/SkXfermode_opts_none.cpp
new file mode 100644 (file)
index 0000000..ca53fa0
--- /dev/null
@@ -0,0 +1,11 @@
+#include "SkXfermode.h"
+#include "SkXfermode_proccoeff.h"
+
+// The prototype below is for Clang
+extern SkProcCoeffXfermode* SkPlatformXfermodeFactory(const ProcCoeff& rec,
+                                                      SkXfermode::Mode mode);
+
+SkProcCoeffXfermode* SkPlatformXfermodeFactory(const ProcCoeff& rec,
+                                               SkXfermode::Mode mode) {
+    return NULL;
+}