arm: dynamic NEON support for SkBitmapProcState functions.
authordigit@google.com <digit@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81>
Mon, 13 Aug 2012 14:06:34 +0000 (14:06 +0000)
committerdigit@google.com <digit@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81>
Mon, 13 Aug 2012 14:06:34 +0000 (14:06 +0000)
This patch does the following:

  - Move the NEON-specific code from src/core/SkBitmapProcState_filter.h
    to src/opts/SkBitmapProcState_filter_neon.h

  - Implement the NEON-specific functions in the new source file
    src/opts/SkBitmapProcState_opts_arm_neon.cpp, added to the "opts_neon"
    static library target. All functions now use the _neon suffix, even
    in full-NEON builds.

  - Move most of the content of src/core/SkBitmapProcState.cpp to a
    new header: src/core/SkBitmapProcState_procs.h

This header is included by two source files:

  src/core/SkBitmapProcState.cpp, to define the regular functions.
  src/opts/SkBitmapProcState_opts_arm_neon.cpp to define NEON ones.

This is to deal with the fact that all NEON functions now
use the _neon suffix, even in SK_ARM_NEON_IS_ALWAYS mode,
and to be able to include the same header twice in the
SK_ARM_NEON_IS_DYNAMIC case.
Review URL: https://codereview.appspot.com/6449117

git-svn-id: http://skia.googlecode.com/svn/trunk@5055 2bbb7eff-a529-9590-31e7-b0007b416f81

gyp/opts.gyp
src/core/SkBitmapProcState.cpp
src/core/SkBitmapProcState_filter.h
src/core/SkBitmapProcState_procs.h [new file with mode: 0644]
src/core/SkBitmapProcState_shaderproc.h
src/opts/SkBitmapProcState_arm_neon.cpp [new file with mode: 0644]
src/opts/SkBitmapProcState_filter_neon.h [new file with mode: 0644]

index a9f85cc..2ac395c 100644 (file)
       'sources': [
         '../src/opts/memset16_neon.S',
         '../src/opts/memset32_neon.S',
+        '../src/opts/SkBitmapProcState_arm_neon.cpp',
         '../src/opts/SkBitmapProcState_matrixProcs_neon.cpp',
         '../src/opts/SkBitmapProcState_matrix_clamp_neon.h',
         '../src/opts/SkBitmapProcState_matrix_repeat_neon.h',
index 8326a32..01ef5a5 100644 (file)
  * found in the LICENSE file.
  */
 #include "SkBitmapProcState.h"
-#include "SkBitmapProcState_filter.h"
 #include "SkColorPriv.h"
 #include "SkFilterProc.h"
 #include "SkPaint.h"
 #include "SkShader.h"   // for tilemodes
+#include "SkUtilsArm.h"
+
+#if !SK_ARM_NEON_IS_NONE
+// These are defined in src/opts/SkBitmapProcState_arm_neon.cpp
+extern const SkBitmapProcState::SampleProc16 gSkBitmapProcStateSample16_neon[];
+extern const SkBitmapProcState::SampleProc32 gSkBitmapProcStateSample32_neon[];
+extern void  S16_D16_filter_DX_neon(const SkBitmapProcState&, const uint32_t*, int, uint16_t*);
+extern void  Clamp_S16_D16_filter_DX_shaderproc_neon(const SkBitmapProcState&, int, int, uint16_t*, int);
+extern void  Repeat_S16_D16_filter_DX_shaderproc_neon(const SkBitmapProcState&, int, int, uint16_t*, int);
+extern void  SI8_opaque_D32_filter_DX_neon(const SkBitmapProcState&, const uint32_t*, int, SkPMColor*);
+extern void  SI8_opaque_D32_filter_DX_shaderproc_neon(const SkBitmapProcState&, int, int, uint32_t*, int);
+extern void  Clamp_SI8_opaque_D32_filter_DX_shaderproc_neon(const SkBitmapProcState&, int, int, uint32_t*, int);
+#endif
 
-// returns expanded * 5bits
-static inline uint32_t Filter_565_Expanded(unsigned x, unsigned y,
-                                           uint32_t a00, uint32_t a01,
-                                           uint32_t a10, uint32_t a11) {
-    SkASSERT((unsigned)x <= 0xF);
-    SkASSERT((unsigned)y <= 0xF);
-    
-    a00 = SkExpand_rgb_16(a00);
-    a01 = SkExpand_rgb_16(a01);
-    a10 = SkExpand_rgb_16(a10);
-    a11 = SkExpand_rgb_16(a11);
-    
-    int xy = x * y >> 3;
-    return  a00 * (32 - 2*y - 2*x + xy) +
-            a01 * (2*x - xy) +
-            a10 * (2*y - xy) +
-            a11 * xy;
-}
-
-// turn an expanded 565 * 5bits into SkPMColor
-// g:11 | r:10 | x:1 | b:10
-static inline SkPMColor SkExpanded_565_To_PMColor(uint32_t c) {
-    unsigned r = (c >> 13) & 0xFF;
-    unsigned g = (c >> 24);
-    unsigned b = (c >> 2) & 0xFF;
-    return SkPackARGB32(0xFF, r, g, b);
-}
-
-// returns answer in SkPMColor format
-static inline SkPMColor Filter_4444_D32(unsigned x, unsigned y,
-                                        uint32_t a00, uint32_t a01,
-                                        uint32_t a10, uint32_t a11) {
-    SkASSERT((unsigned)x <= 0xF);
-    SkASSERT((unsigned)y <= 0xF);
-    
-    a00 = SkExpand_4444(a00);
-    a01 = SkExpand_4444(a01);
-    a10 = SkExpand_4444(a10);
-    a11 = SkExpand_4444(a11);
-
-    int xy = x * y >> 4;
-    uint32_t result =   a00 * (16 - y - x + xy) +
-                        a01 * (x - xy) +
-                        a10 * (y - xy) +
-                        a11 * xy;
-
-    return SkCompact_8888(result);
-}
-
-static inline U8CPU Filter_8(unsigned x, unsigned y,
-                             U8CPU a00, U8CPU a01,
-                             U8CPU a10, U8CPU a11) {
-    SkASSERT((unsigned)x <= 0xF);
-    SkASSERT((unsigned)y <= 0xF);
-    
-    int xy = x * y;
-    unsigned result =   a00 * (256 - 16*y - 16*x + xy) +
-                        a01 * (16*x - xy) +
-                        a10 * (16*y - xy) +
-                        a11 * xy;
-    
-    return result >> 8;
-}
-
-/*****************************************************************************
- *
- *  D32 functions
- *
- */
-
-// SRC == 8888
-
-#define FILTER_PROC(x, y, a, b, c, d, dst)   Filter_32_opaque(x, y, a, b, c, d, dst)
-
-#define MAKENAME(suffix)        S32_opaque_D32 ## suffix
-#define DSTSIZE                 32
-#define SRCTYPE                 SkPMColor
-#define CHECKSTATE(state)       SkASSERT(state.fBitmap->config() == SkBitmap::kARGB_8888_Config); \
-                                SkASSERT(state.fAlphaScale == 256)
-#define RETURNDST(src)          src
-#define SRC_TO_FILTER(src)      src
-#include "SkBitmapProcState_sample.h"
-
-#undef FILTER_PROC
-#define FILTER_PROC(x, y, a, b, c, d, dst)   Filter_32_alpha(x, y, a, b, c, d, dst, alphaScale)
-
-#define MAKENAME(suffix)        S32_alpha_D32 ## suffix
-#define DSTSIZE                 32
-#define SRCTYPE                 SkPMColor
-#define CHECKSTATE(state)       SkASSERT(state.fBitmap->config() == SkBitmap::kARGB_8888_Config); \
-                                SkASSERT(state.fAlphaScale < 256)
-#define PREAMBLE(state)         unsigned alphaScale = state.fAlphaScale
-#define RETURNDST(src)          SkAlphaMulQ(src, alphaScale)
-#define SRC_TO_FILTER(src)      src
-#include "SkBitmapProcState_sample.h"
-
-// SRC == 565
-
-#undef FILTER_PROC
-#define FILTER_PROC(x, y, a, b, c, d, dst) \
-    do {                                                        \
-        uint32_t tmp = Filter_565_Expanded(x, y, a, b, c, d);   \
-        *(dst) = SkExpanded_565_To_PMColor(tmp);                \
-    } while (0)
-
-#define MAKENAME(suffix)        S16_opaque_D32 ## suffix
-#define DSTSIZE                 32
-#define SRCTYPE                 uint16_t
-#define CHECKSTATE(state)       SkASSERT(state.fBitmap->config() == SkBitmap::kRGB_565_Config); \
-                                SkASSERT(state.fAlphaScale == 256)
-#define RETURNDST(src)          SkPixel16ToPixel32(src)
-#define SRC_TO_FILTER(src)      src
-#include "SkBitmapProcState_sample.h"
-
-#undef FILTER_PROC
-#define FILTER_PROC(x, y, a, b, c, d, dst) \
-    do {                                                                    \
-        uint32_t tmp = Filter_565_Expanded(x, y, a, b, c, d);               \
-        *(dst) = SkAlphaMulQ(SkExpanded_565_To_PMColor(tmp), alphaScale);   \
-    } while (0)
-
-#define MAKENAME(suffix)        S16_alpha_D32 ## suffix
-#define DSTSIZE                 32
-#define SRCTYPE                 uint16_t
-#define CHECKSTATE(state)       SkASSERT(state.fBitmap->config() == SkBitmap::kRGB_565_Config); \
-                                SkASSERT(state.fAlphaScale < 256)
-#define PREAMBLE(state)         unsigned alphaScale = state.fAlphaScale
-#define RETURNDST(src)          SkAlphaMulQ(SkPixel16ToPixel32(src), alphaScale)
-#define SRC_TO_FILTER(src)      src
-#include "SkBitmapProcState_sample.h"
-
-// SRC == Index8
-
-#undef FILTER_PROC
-#define FILTER_PROC(x, y, a, b, c, d, dst)   Filter_32_opaque(x, y, a, b, c, d, dst)
-
-#define MAKENAME(suffix)        SI8_opaque_D32 ## suffix
-#define DSTSIZE                 32
-#define SRCTYPE                 uint8_t
-#define CHECKSTATE(state)       SkASSERT(state.fBitmap->config() == SkBitmap::kIndex8_Config); \
-                                SkASSERT(state.fAlphaScale == 256)
-#define PREAMBLE(state)         const SkPMColor* SK_RESTRICT table = state.fBitmap->getColorTable()->lockColors()
-#define RETURNDST(src)          table[src]
-#define SRC_TO_FILTER(src)      table[src]
-#define POSTAMBLE(state)        state.fBitmap->getColorTable()->unlockColors(false)
-#include "SkBitmapProcState_sample.h"
-
-#undef FILTER_PROC
-#define FILTER_PROC(x, y, a, b, c, d, dst)   Filter_32_alpha(x, y, a, b, c, d, dst, alphaScale)
-
-#define MAKENAME(suffix)        SI8_alpha_D32 ## suffix
-#define DSTSIZE                 32
-#define SRCTYPE                 uint8_t
-#define CHECKSTATE(state)       SkASSERT(state.fBitmap->config() == SkBitmap::kIndex8_Config); \
-                                SkASSERT(state.fAlphaScale < 256)
-#define PREAMBLE(state)         unsigned alphaScale = state.fAlphaScale; \
-                                const SkPMColor* SK_RESTRICT table = state.fBitmap->getColorTable()->lockColors()
-#define RETURNDST(src)          SkAlphaMulQ(table[src], alphaScale)
-#define SRC_TO_FILTER(src)      table[src]
-#define POSTAMBLE(state)        state.fBitmap->getColorTable()->unlockColors(false)
-#include "SkBitmapProcState_sample.h"
-
-// SRC == 4444
-
-#undef FILTER_PROC
-#define FILTER_PROC(x, y, a, b, c, d, dst)  *(dst) = Filter_4444_D32(x, y, a, b, c, d)
-
-#define MAKENAME(suffix)        S4444_opaque_D32 ## suffix
-#define DSTSIZE                 32
-#define SRCTYPE                 SkPMColor16
-#define CHECKSTATE(state)       SkASSERT(state.fBitmap->config() == SkBitmap::kARGB_4444_Config); \
-                                SkASSERT(state.fAlphaScale == 256)
-#define RETURNDST(src)          SkPixel4444ToPixel32(src)
-#define SRC_TO_FILTER(src)      src
-#include "SkBitmapProcState_sample.h"
-
-#undef FILTER_PROC
-#define FILTER_PROC(x, y, a, b, c, d, dst)  \
-    do {                                                    \
-        uint32_t tmp = Filter_4444_D32(x, y, a, b, c, d);   \
-        *(dst) = SkAlphaMulQ(tmp, alphaScale);              \
-    } while (0)
-
-#define MAKENAME(suffix)        S4444_alpha_D32 ## suffix
-#define DSTSIZE                 32
-#define SRCTYPE                 SkPMColor16
-#define CHECKSTATE(state)       SkASSERT(state.fBitmap->config() == SkBitmap::kARGB_4444_Config); \
-                                SkASSERT(state.fAlphaScale < 256)
-#define PREAMBLE(state)         unsigned alphaScale = state.fAlphaScale
-#define RETURNDST(src)          SkAlphaMulQ(SkPixel4444ToPixel32(src), alphaScale)
-#define SRC_TO_FILTER(src)      src
-#include "SkBitmapProcState_sample.h"
-
-// SRC == A8
-
-#undef FILTER_PROC
-#define FILTER_PROC(x, y, a, b, c, d, dst) \
-    do {                                                        \
-        unsigned tmp = Filter_8(x, y, a, b, c, d);              \
-        *(dst) = SkAlphaMulQ(pmColor, SkAlpha255To256(tmp));    \
-    } while (0)
-
-#define MAKENAME(suffix)        SA8_alpha_D32 ## suffix
-#define DSTSIZE                 32
-#define SRCTYPE                 uint8_t
-#define CHECKSTATE(state)       SkASSERT(state.fBitmap->config() == SkBitmap::kA8_Config);
-#define PREAMBLE(state)         const SkPMColor pmColor = state.fPaintPMColor;
-#define RETURNDST(src)          SkAlphaMulQ(pmColor, SkAlpha255To256(src))
-#define SRC_TO_FILTER(src)      src
-#include "SkBitmapProcState_sample.h"
-
-/*****************************************************************************
- *
- *  D16 functions
- *
- */
-
-// SRC == 8888
-
-#undef FILTER_PROC
-#define FILTER_PROC(x, y, a, b, c, d, dst) \
-    do {                                                \
-        SkPMColor dstColor;                             \
-        Filter_32_opaque(x, y, a, b, c, d, &dstColor);  \
-        (*dst) = SkPixel32ToPixel16(dstColor);          \
-    } while (0)
-
-#define MAKENAME(suffix)        S32_D16 ## suffix
-#define DSTSIZE                 16
-#define SRCTYPE                 SkPMColor
-#define CHECKSTATE(state)       SkASSERT(state.fBitmap->config() == SkBitmap::kARGB_8888_Config); \
-                                SkASSERT(state.fBitmap->isOpaque())
-#define RETURNDST(src)          SkPixel32ToPixel16(src)
-#define SRC_TO_FILTER(src)      src
-#include "SkBitmapProcState_sample.h"
-
-// SRC == 565
-
-#undef FILTER_PROC
-#define FILTER_PROC(x, y, a, b, c, d, dst) \
-    do {                                                        \
-        uint32_t tmp = Filter_565_Expanded(x, y, a, b, c, d);   \
-        *(dst) = SkCompact_rgb_16((tmp) >> 5);                  \
-    } while (0)
-
-#define MAKENAME(suffix)        S16_D16 ## suffix
-#define DSTSIZE                 16
-#define SRCTYPE                 uint16_t
-#define CHECKSTATE(state)       SkASSERT(state.fBitmap->config() == SkBitmap::kRGB_565_Config)
-#define RETURNDST(src)          src
-#define SRC_TO_FILTER(src)      src
-#include "SkBitmapProcState_sample.h"
-
-// SRC == Index8
-
-#undef FILTER_PROC
-#define FILTER_PROC(x, y, a, b, c, d, dst) \
-    do {                                                        \
-        uint32_t tmp = Filter_565_Expanded(x, y, a, b, c, d);   \
-        *(dst) = SkCompact_rgb_16((tmp) >> 5);                  \
-    } while (0)
-
-#define MAKENAME(suffix)        SI8_D16 ## suffix
-#define DSTSIZE                 16
-#define SRCTYPE                 uint8_t
-#define CHECKSTATE(state)       SkASSERT(state.fBitmap->config() == SkBitmap::kIndex8_Config); \
-                                SkASSERT(state.fBitmap->isOpaque())
-#define PREAMBLE(state)         const uint16_t* SK_RESTRICT table = state.fBitmap->getColorTable()->lock16BitCache()
-#define RETURNDST(src)          table[src]
-#define SRC_TO_FILTER(src)      table[src]
-#define POSTAMBLE(state)        state.fBitmap->getColorTable()->unlock16BitCache()
-#include "SkBitmapProcState_sample.h"
-
-///////////////////////////////////////////////////////////////////////////////
-
-#undef FILTER_PROC
-#define FILTER_PROC(x, y, a, b, c, d, dst) \
-    do {                                                        \
-        uint32_t tmp = Filter_565_Expanded(x, y, a, b, c, d);   \
-        *(dst) = SkCompact_rgb_16((tmp) >> 5);                  \
-    } while (0)
-
-
-// clamp
-
-#define TILEX_PROCF(fx, max)    SkClampMax((fx) >> 16, max)
-#define TILEY_PROCF(fy, max)    SkClampMax((fy) >> 16, max)
-#define TILEX_LOW_BITS(fx, max) (((fx) >> 12) & 0xF)
-#define TILEY_LOW_BITS(fy, max) (((fy) >> 12) & 0xF)
-
-#define MAKENAME(suffix)        Clamp_S16_D16 ## suffix
-#define SRCTYPE                 uint16_t
-#define DSTTYPE                 uint16_t
-#define CHECKSTATE(state)       SkASSERT(state.fBitmap->config() == SkBitmap::kRGB_565_Config)
-#define SRC_TO_FILTER(src)      src
-#include "SkBitmapProcState_shaderproc.h"
-
-
-#define TILEX_PROCF(fx, max)    (((fx) & 0xFFFF) * ((max) + 1) >> 16)
-#define TILEY_PROCF(fy, max)    (((fy) & 0xFFFF) * ((max) + 1) >> 16)
-#define TILEX_LOW_BITS(fx, max) ((((fx) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
-#define TILEY_LOW_BITS(fy, max) ((((fy) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
-
-#define MAKENAME(suffix)        Repeat_S16_D16 ## suffix
-#define SRCTYPE                 uint16_t
-#define DSTTYPE                 uint16_t
-#define CHECKSTATE(state)       SkASSERT(state.fBitmap->config() == SkBitmap::kRGB_565_Config)
-#define SRC_TO_FILTER(src)      src
-#include "SkBitmapProcState_shaderproc.h"
-
-
-#define TILEX_PROCF(fx, max)    SkClampMax((fx) >> 16, max)
-#define TILEY_PROCF(fy, max)    SkClampMax((fy) >> 16, max)
-#define TILEX_LOW_BITS(fx, max) (((fx) >> 12) & 0xF)
-#define TILEY_LOW_BITS(fy, max) (((fy) >> 12) & 0xF)
-
-#undef FILTER_PROC
-#define FILTER_PROC(x, y, a, b, c, d, dst)   Filter_32_opaque(x, y, a, b, c, d, dst)
-#define MAKENAME(suffix)        Clamp_SI8_opaque_D32 ## suffix
-#define SRCTYPE                 uint8_t
-#define DSTTYPE                 uint32_t
-#define CHECKSTATE(state)       SkASSERT(state.fBitmap->config() == SkBitmap::kIndex8_Config)
-#define PREAMBLE(state)         const SkPMColor* SK_RESTRICT table = state.fBitmap->getColorTable()->lockColors()
-#define SRC_TO_FILTER(src)      table[src]
-#define POSTAMBLE(state)        state.fBitmap->getColorTable()->unlockColors(false)
-#include "SkBitmapProcState_shaderproc.h"
+#if !SK_ARM_NEON_IS_ALWAYS
+#define   NAME_WRAP(x)  x
+#include "SkBitmapProcState_filter.h"
+#include "SkBitmapProcState_procs.h"
+#endif
 
 ///////////////////////////////////////////////////////////////////////////////
 
@@ -448,7 +139,8 @@ bool SkBitmapProcState::chooseProcs(const SkMatrix& inv, const SkPaint& paint) {
             return false;
     }
 
-    static const SampleProc32 gSample32[] = {
+#if !SK_ARM_NEON_IS_ALWAYS
+    static const SampleProc32 gSkBitmapProcStateSample32[] = {
         S32_opaque_D32_nofilter_DXDY,
         S32_alpha_D32_nofilter_DXDY,
         S32_opaque_D32_nofilter_DX,
@@ -496,7 +188,7 @@ bool SkBitmapProcState::chooseProcs(const SkMatrix& inv, const SkPaint& paint) {
         SA8_alpha_D32_filter_DX
     };
     
-    static const SampleProc16 gSample16[] = {
+    static const SampleProc16 gSkBitmapProcStateSample16[] = {
         S32_D16_nofilter_DXDY,
         S32_D16_nofilter_DX,
         S32_D16_filter_DXDY,
@@ -517,21 +209,22 @@ bool SkBitmapProcState::chooseProcs(const SkMatrix& inv, const SkPaint& paint) {
         // Don't support A8 -> 565
         NULL, NULL, NULL, NULL
     };
+#endif
 
-    fSampleProc32 = gSample32[index];
+    fSampleProc32 = SK_ARM_NEON_WRAP(gSkBitmapProcStateSample32)[index];
     index >>= 1;    // shift away any opaque/alpha distinction
-    fSampleProc16 = gSample16[index];
+    fSampleProc16 = SK_ARM_NEON_WRAP(gSkBitmapProcStateSample16)[index];
 
     // our special-case shaderprocs
-    if (S16_D16_filter_DX == fSampleProc16) {
+    if (SK_ARM_NEON_WRAP(S16_D16_filter_DX) == fSampleProc16) {
         if (clamp_clamp) {
-            fShaderProc16 = Clamp_S16_D16_filter_DX_shaderproc;
+            fShaderProc16 = SK_ARM_NEON_WRAP(Clamp_S16_D16_filter_DX_shaderproc);
         } else if (SkShader::kRepeat_TileMode == fTileModeX &&
                    SkShader::kRepeat_TileMode == fTileModeY) {
-            fShaderProc16 = Repeat_S16_D16_filter_DX_shaderproc;
+            fShaderProc16 = SK_ARM_NEON_WRAP(Repeat_S16_D16_filter_DX_shaderproc);
         }
-    } else if (SI8_opaque_D32_filter_DX == fSampleProc32 && clamp_clamp) {
-        fShaderProc32 = Clamp_SI8_opaque_D32_filter_DX_shaderproc;
+    } else if (SK_ARM_NEON_WRAP(SI8_opaque_D32_filter_DX) == fSampleProc32 && clamp_clamp) {
+        fShaderProc32 = SK_ARM_NEON_WRAP(Clamp_SI8_opaque_D32_filter_DX_shaderproc);
     }
 
     // see if our platform has any accelerated overrides
index f69e17a..7fcf754 100644 (file)
     the drawing pipeline may rely on this (e.g. which blitrow proc to use).
  */
 
-#if defined(__ARM_HAVE_NEON) && !defined(SK_CPU_BENDIAN)
-static inline void Filter_32_opaque_neon(unsigned x, unsigned y, 
+static inline void Filter_32_opaque(unsigned x, unsigned y,
                                     SkPMColor a00, SkPMColor a01,
                                     SkPMColor a10, SkPMColor a11,
-                                    SkPMColor *dst) {
-    asm volatile(
-                 "vdup.8         d0, %[y]                \n\t"   // duplicate y into d0
-                 "vmov.u8        d16, #16                \n\t"   // set up constant in d16
-                 "vsub.u8        d1, d16, d0             \n\t"   // d1 = 16-y
-                 
-                 "vdup.32        d4, %[a00]              \n\t"   // duplicate a00 into d4
-                 "vdup.32        d5, %[a10]              \n\t"   // duplicate a10 into d5
-                 "vmov.32        d4[1], %[a01]           \n\t"   // set top of d4 to a01
-                 "vmov.32        d5[1], %[a11]           \n\t"   // set top of d5 to a11
-                 
-                 "vmull.u8       q3, d4, d1              \n\t"   // q3 = [a01|a00] * (16-y)
-                 "vmull.u8       q0, d5, d0              \n\t"   // q0 = [a11|a10] * y
-                 
-                 "vdup.16        d5, %[x]                \n\t"   // duplicate x into d5
-                 "vmov.u16       d16, #16                \n\t"   // set up constant in d16
-                 "vsub.u16       d3, d16, d5             \n\t"   // d3 = 16-x
-                 
-                 "vmul.i16       d4, d7, d5              \n\t"   // d4  = a01 * x
-                 "vmla.i16       d4, d1, d5              \n\t"   // d4 += a11 * x
-                 "vmla.i16       d4, d6, d3              \n\t"   // d4 += a00 * (16-x)
-                 "vmla.i16       d4, d0, d3              \n\t"   // d4 += a10 * (16-x)
-                 "vshrn.i16      d0, q2, #8              \n\t"   // shift down result by 8
-                 "vst1.32        {d0[0]}, [%[dst]]       \n\t"   // store result
-                 :
-                 : [x] "r" (x), [y] "r" (y), [a00] "r" (a00), [a01] "r" (a01), [a10] "r" (a10), [a11] "r" (a11), [dst] "r" (dst)
-                 : "cc", "memory", "r4", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d16"
-                 );
-}
-
-static inline void Filter_32_alpha_neon(unsigned x, unsigned y,
-                                          SkPMColor a00, SkPMColor a01,
-                                          SkPMColor a10, SkPMColor a11,
-                                          SkPMColor *dst, uint16_t scale) {
-    asm volatile(
-                 "vdup.8         d0, %[y]                \n\t"   // duplicate y into d0
-                 "vmov.u8        d16, #16                \n\t"   // set up constant in d16
-                 "vsub.u8        d1, d16, d0             \n\t"   // d1 = 16-y
-                 
-                 "vdup.32        d4, %[a00]              \n\t"   // duplicate a00 into d4
-                 "vdup.32        d5, %[a10]              \n\t"   // duplicate a10 into d5
-                 "vmov.32        d4[1], %[a01]           \n\t"   // set top of d4 to a01
-                 "vmov.32        d5[1], %[a11]           \n\t"   // set top of d5 to a11
-                 
-                 "vmull.u8       q3, d4, d1              \n\t"   // q3 = [a01|a00] * (16-y)
-                 "vmull.u8       q0, d5, d0              \n\t"   // q0 = [a11|a10] * y
-                 
-                 "vdup.16        d5, %[x]                \n\t"   // duplicate x into d5
-                 "vmov.u16       d16, #16                \n\t"   // set up constant in d16
-                 "vsub.u16       d3, d16, d5             \n\t"   // d3 = 16-x
-                 
-                 "vmul.i16       d4, d7, d5              \n\t"   // d4  = a01 * x
-                 "vmla.i16       d4, d1, d5              \n\t"   // d4 += a11 * x
-                 "vmla.i16       d4, d6, d3              \n\t"   // d4 += a00 * (16-x)
-                 "vmla.i16       d4, d0, d3              \n\t"   // d4 += a10 * (16-x)
-                 "vdup.16        d3, %[scale]            \n\t"   // duplicate scale into d3
-                 "vshr.u16       d4, d4, #8              \n\t"   // shift down result by 8
-                 "vmul.i16       d4, d4, d3              \n\t"   // multiply result by scale
-                 "vshrn.i16      d0, q2, #8              \n\t"   // shift down result by 8
-                 "vst1.32        {d0[0]}, [%[dst]]       \n\t"   // store result
-                 :
-                 : [x] "r" (x), [y] "r" (y), [a00] "r" (a00), [a01] "r" (a01), [a10] "r" (a10), [a11] "r" (a11), [dst] "r" (dst), [scale] "r" (scale)
-                 : "cc", "memory", "r4", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d16"
-                 );
-}
-#define Filter_32_opaque    Filter_32_opaque_neon
-#define Filter_32_alpha     Filter_32_alpha_neon
-#else
-static inline void Filter_32_opaque_portable(unsigned x, unsigned y,
-                                             SkPMColor a00, SkPMColor a01,
-                                             SkPMColor a10, SkPMColor a11,
-                                             SkPMColor* dstColor) {
+                                    SkPMColor* dstColor) {
     SkASSERT((unsigned)x <= 0xF);
     SkASSERT((unsigned)y <= 0xF);
     
@@ -118,11 +46,11 @@ static inline void Filter_32_opaque_portable(unsigned x, unsigned y,
     *dstColor = ((lo >> 8) & mask) | (hi & ~mask);
 }
 
-static inline void Filter_32_alpha_portable(unsigned x, unsigned y,
-                                            SkPMColor a00, SkPMColor a01,
-                                            SkPMColor a10, SkPMColor a11,
-                                            SkPMColor* dstColor,
-                                            unsigned alphaScale) {
+static inline void Filter_32_alpha(unsigned x, unsigned y,
+                                   SkPMColor a00, SkPMColor a01,
+                                   SkPMColor a10, SkPMColor a11,
+                                   SkPMColor* dstColor,
+                                   unsigned alphaScale) {
     SkASSERT((unsigned)x <= 0xF);
     SkASSERT((unsigned)y <= 0xF);
     SkASSERT(alphaScale <= 256);
@@ -150,7 +78,4 @@ static inline void Filter_32_alpha_portable(unsigned x, unsigned y,
 
     *dstColor = ((lo >> 8) & mask) | (hi & ~mask);
 }
-#define Filter_32_opaque    Filter_32_opaque_portable
-#define Filter_32_alpha     Filter_32_alpha_portable
-#endif
 
diff --git a/src/core/SkBitmapProcState_procs.h b/src/core/SkBitmapProcState_procs.h
new file mode 100644 (file)
index 0000000..1b9328e
--- /dev/null
@@ -0,0 +1,343 @@
+
+/*
+ * Copyright 2011 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+// Define NAME_WRAP(x) before including this header to perform name-wrapping
+// E.g. for ARM NEON, defined it as 'x ## _neon' to ensure all important
+// identifiers have a _neon suffix.
+#ifndef NAME_WRAP
+#error "Please define NAME_WRAP() before including this file"
+#endif
+
+// returns expanded * 5bits
+static inline uint32_t Filter_565_Expanded(unsigned x, unsigned y,
+                                           uint32_t a00, uint32_t a01,
+                                           uint32_t a10, uint32_t a11) {
+    SkASSERT((unsigned)x <= 0xF);
+    SkASSERT((unsigned)y <= 0xF);
+    
+    a00 = SkExpand_rgb_16(a00);
+    a01 = SkExpand_rgb_16(a01);
+    a10 = SkExpand_rgb_16(a10);
+    a11 = SkExpand_rgb_16(a11);
+    
+    int xy = x * y >> 3;
+    return  a00 * (32 - 2*y - 2*x + xy) +
+            a01 * (2*x - xy) +
+            a10 * (2*y - xy) +
+            a11 * xy;
+}
+
+// turn an expanded 565 * 5bits into SkPMColor
+// g:11 | r:10 | x:1 | b:10
+static inline SkPMColor SkExpanded_565_To_PMColor(uint32_t c) {
+    unsigned r = (c >> 13) & 0xFF;
+    unsigned g = (c >> 24);
+    unsigned b = (c >> 2) & 0xFF;
+    return SkPackARGB32(0xFF, r, g, b);
+}
+
+// returns answer in SkPMColor format
+static inline SkPMColor Filter_4444_D32(unsigned x, unsigned y,
+                                        uint32_t a00, uint32_t a01,
+                                        uint32_t a10, uint32_t a11) {
+    SkASSERT((unsigned)x <= 0xF);
+    SkASSERT((unsigned)y <= 0xF);
+    
+    a00 = SkExpand_4444(a00);
+    a01 = SkExpand_4444(a01);
+    a10 = SkExpand_4444(a10);
+    a11 = SkExpand_4444(a11);
+
+    int xy = x * y >> 4;
+    uint32_t result =   a00 * (16 - y - x + xy) +
+                        a01 * (x - xy) +
+                        a10 * (y - xy) +
+                        a11 * xy;
+
+    return SkCompact_8888(result);
+}
+
+static inline U8CPU Filter_8(unsigned x, unsigned y,
+                             U8CPU a00, U8CPU a01,
+                             U8CPU a10, U8CPU a11) {
+    SkASSERT((unsigned)x <= 0xF);
+    SkASSERT((unsigned)y <= 0xF);
+    
+    int xy = x * y;
+    unsigned result =   a00 * (256 - 16*y - 16*x + xy) +
+                        a01 * (16*x - xy) +
+                        a10 * (16*y - xy) +
+                        a11 * xy;
+    
+    return result >> 8;
+}
+
+/*****************************************************************************
+ *
+ *  D32 functions
+ *
+ */
+
+// SRC == 8888
+
+#define FILTER_PROC(x, y, a, b, c, d, dst)   NAME_WRAP(Filter_32_opaque)(x, y, a, b, c, d, dst)
+
+#define MAKENAME(suffix)        NAME_WRAP(S32_opaque_D32 ## suffix)
+#define DSTSIZE                 32
+#define SRCTYPE                 SkPMColor
+#define CHECKSTATE(state)       SkASSERT(state.fBitmap->config() == SkBitmap::kARGB_8888_Config); \
+                                SkASSERT(state.fAlphaScale == 256)
+#define RETURNDST(src)          src
+#define SRC_TO_FILTER(src)      src
+#include "SkBitmapProcState_sample.h"
+
+#undef FILTER_PROC
+#define FILTER_PROC(x, y, a, b, c, d, dst)   NAME_WRAP(Filter_32_alpha)(x, y, a, b, c, d, dst, alphaScale)
+
+#define MAKENAME(suffix)        NAME_WRAP(S32_alpha_D32 ## suffix)
+#define DSTSIZE                 32
+#define SRCTYPE                 SkPMColor
+#define CHECKSTATE(state)       SkASSERT(state.fBitmap->config() == SkBitmap::kARGB_8888_Config); \
+                                SkASSERT(state.fAlphaScale < 256)
+#define PREAMBLE(state)         unsigned alphaScale = state.fAlphaScale
+#define RETURNDST(src)          SkAlphaMulQ(src, alphaScale)
+#define SRC_TO_FILTER(src)      src
+#include "SkBitmapProcState_sample.h"
+
+// SRC == 565
+
+#undef FILTER_PROC
+#define FILTER_PROC(x, y, a, b, c, d, dst) \
+    do {                                                        \
+        uint32_t tmp = Filter_565_Expanded(x, y, a, b, c, d);   \
+        *(dst) = SkExpanded_565_To_PMColor(tmp);                \
+    } while (0)
+
+#define MAKENAME(suffix)        NAME_WRAP(S16_opaque_D32 ## suffix)
+#define DSTSIZE                 32
+#define SRCTYPE                 uint16_t
+#define CHECKSTATE(state)       SkASSERT(state.fBitmap->config() == SkBitmap::kRGB_565_Config); \
+                                SkASSERT(state.fAlphaScale == 256)
+#define RETURNDST(src)          SkPixel16ToPixel32(src)
+#define SRC_TO_FILTER(src)      src
+#include "SkBitmapProcState_sample.h"
+
+#undef FILTER_PROC
+#define FILTER_PROC(x, y, a, b, c, d, dst) \
+    do {                                                                    \
+        uint32_t tmp = Filter_565_Expanded(x, y, a, b, c, d);               \
+        *(dst) = SkAlphaMulQ(SkExpanded_565_To_PMColor(tmp), alphaScale);   \
+    } while (0)
+
+#define MAKENAME(suffix)        NAME_WRAP(S16_alpha_D32 ## suffix)
+#define DSTSIZE                 32
+#define SRCTYPE                 uint16_t
+#define CHECKSTATE(state)       SkASSERT(state.fBitmap->config() == SkBitmap::kRGB_565_Config); \
+                                SkASSERT(state.fAlphaScale < 256)
+#define PREAMBLE(state)         unsigned alphaScale = state.fAlphaScale
+#define RETURNDST(src)          SkAlphaMulQ(SkPixel16ToPixel32(src), alphaScale)
+#define SRC_TO_FILTER(src)      src
+#include "SkBitmapProcState_sample.h"
+
+// SRC == Index8
+
+#undef FILTER_PROC
+#define FILTER_PROC(x, y, a, b, c, d, dst)   NAME_WRAP(Filter_32_opaque)(x, y, a, b, c, d, dst)
+
+#define MAKENAME(suffix)        NAME_WRAP(SI8_opaque_D32 ## suffix)
+#define DSTSIZE                 32
+#define SRCTYPE                 uint8_t
+#define CHECKSTATE(state)       SkASSERT(state.fBitmap->config() == SkBitmap::kIndex8_Config); \
+                                SkASSERT(state.fAlphaScale == 256)
+#define PREAMBLE(state)         const SkPMColor* SK_RESTRICT table = state.fBitmap->getColorTable()->lockColors()
+#define RETURNDST(src)          table[src]
+#define SRC_TO_FILTER(src)      table[src]
+#define POSTAMBLE(state)        state.fBitmap->getColorTable()->unlockColors(false)
+#include "SkBitmapProcState_sample.h"
+
+#undef FILTER_PROC
+#define FILTER_PROC(x, y, a, b, c, d, dst)   NAME_WRAP(Filter_32_alpha)(x, y, a, b, c, d, dst, alphaScale)
+
+#define MAKENAME(suffix)        NAME_WRAP(SI8_alpha_D32 ## suffix)
+#define DSTSIZE                 32
+#define SRCTYPE                 uint8_t
+#define CHECKSTATE(state)       SkASSERT(state.fBitmap->config() == SkBitmap::kIndex8_Config); \
+                                SkASSERT(state.fAlphaScale < 256)
+#define PREAMBLE(state)         unsigned alphaScale = state.fAlphaScale; \
+                                const SkPMColor* SK_RESTRICT table = state.fBitmap->getColorTable()->lockColors()
+#define RETURNDST(src)          SkAlphaMulQ(table[src], alphaScale)
+#define SRC_TO_FILTER(src)      table[src]
+#define POSTAMBLE(state)        state.fBitmap->getColorTable()->unlockColors(false)
+#include "SkBitmapProcState_sample.h"
+
+// SRC == 4444
+
+#undef FILTER_PROC
+#define FILTER_PROC(x, y, a, b, c, d, dst)  *(dst) = Filter_4444_D32(x, y, a, b, c, d)
+
+#define MAKENAME(suffix)        NAME_WRAP(S4444_opaque_D32 ## suffix)
+#define DSTSIZE                 32
+#define SRCTYPE                 SkPMColor16
+#define CHECKSTATE(state)       SkASSERT(state.fBitmap->config() == SkBitmap::kARGB_4444_Config); \
+                                SkASSERT(state.fAlphaScale == 256)
+#define RETURNDST(src)          SkPixel4444ToPixel32(src)
+#define SRC_TO_FILTER(src)      src
+#include "SkBitmapProcState_sample.h"
+
+#undef FILTER_PROC
+#define FILTER_PROC(x, y, a, b, c, d, dst)  \
+    do {                                                    \
+        uint32_t tmp = Filter_4444_D32(x, y, a, b, c, d);   \
+        *(dst) = SkAlphaMulQ(tmp, alphaScale);              \
+    } while (0)
+
+#define MAKENAME(suffix)        NAME_WRAP(S4444_alpha_D32 ## suffix)
+#define DSTSIZE                 32
+#define SRCTYPE                 SkPMColor16
+#define CHECKSTATE(state)       SkASSERT(state.fBitmap->config() == SkBitmap::kARGB_4444_Config); \
+                                SkASSERT(state.fAlphaScale < 256)
+#define PREAMBLE(state)         unsigned alphaScale = state.fAlphaScale
+#define RETURNDST(src)          SkAlphaMulQ(SkPixel4444ToPixel32(src), alphaScale)
+#define SRC_TO_FILTER(src)      src
+#include "SkBitmapProcState_sample.h"
+
+// SRC == A8
+
+#undef FILTER_PROC
+#define FILTER_PROC(x, y, a, b, c, d, dst) \
+    do {                                                        \
+        unsigned tmp = Filter_8(x, y, a, b, c, d);              \
+        *(dst) = SkAlphaMulQ(pmColor, SkAlpha255To256(tmp));    \
+    } while (0)
+
+#define MAKENAME(suffix)        NAME_WRAP(SA8_alpha_D32 ## suffix)
+#define DSTSIZE                 32
+#define SRCTYPE                 uint8_t
+#define CHECKSTATE(state)       SkASSERT(state.fBitmap->config() == SkBitmap::kA8_Config);
+#define PREAMBLE(state)         const SkPMColor pmColor = state.fPaintPMColor;
+#define RETURNDST(src)          SkAlphaMulQ(pmColor, SkAlpha255To256(src))
+#define SRC_TO_FILTER(src)      src
+#include "SkBitmapProcState_sample.h"
+
+/*****************************************************************************
+ *
+ *  D16 functions
+ *
+ */
+
+// SRC == 8888
+
+#undef FILTER_PROC
+#define FILTER_PROC(x, y, a, b, c, d, dst) \
+    do {                                                \
+        SkPMColor dstColor;                             \
+        NAME_WRAP(Filter_32_opaque)(x, y, a, b, c, d, &dstColor);  \
+        (*dst) = SkPixel32ToPixel16(dstColor);          \
+    } while (0)
+
+#define MAKENAME(suffix)        NAME_WRAP(S32_D16 ## suffix)
+#define DSTSIZE                 16
+#define SRCTYPE                 SkPMColor
+#define CHECKSTATE(state)       SkASSERT(state.fBitmap->config() == SkBitmap::kARGB_8888_Config); \
+                                SkASSERT(state.fBitmap->isOpaque())
+#define RETURNDST(src)          SkPixel32ToPixel16(src)
+#define SRC_TO_FILTER(src)      src
+#include "SkBitmapProcState_sample.h"
+
+// SRC == 565
+
+#undef FILTER_PROC
+#define FILTER_PROC(x, y, a, b, c, d, dst) \
+    do {                                                        \
+        uint32_t tmp = Filter_565_Expanded(x, y, a, b, c, d);   \
+        *(dst) = SkCompact_rgb_16((tmp) >> 5);                  \
+    } while (0)
+
+#define MAKENAME(suffix)        NAME_WRAP(S16_D16 ## suffix)
+#define DSTSIZE                 16
+#define SRCTYPE                 uint16_t
+#define CHECKSTATE(state)       SkASSERT(state.fBitmap->config() == SkBitmap::kRGB_565_Config)
+#define RETURNDST(src)          src
+#define SRC_TO_FILTER(src)      src
+#include "SkBitmapProcState_sample.h"
+
+// SRC == Index8
+
+#undef FILTER_PROC
+#define FILTER_PROC(x, y, a, b, c, d, dst) \
+    do {                                                        \
+        uint32_t tmp = Filter_565_Expanded(x, y, a, b, c, d);   \
+        *(dst) = SkCompact_rgb_16((tmp) >> 5);                  \
+    } while (0)
+
+#define MAKENAME(suffix)        NAME_WRAP(SI8_D16 ## suffix)
+#define DSTSIZE                 16
+#define SRCTYPE                 uint8_t
+#define CHECKSTATE(state)       SkASSERT(state.fBitmap->config() == SkBitmap::kIndex8_Config); \
+                                SkASSERT(state.fBitmap->isOpaque())
+#define PREAMBLE(state)         const uint16_t* SK_RESTRICT table = state.fBitmap->getColorTable()->lock16BitCache()
+#define RETURNDST(src)          table[src]
+#define SRC_TO_FILTER(src)      table[src]
+#define POSTAMBLE(state)        state.fBitmap->getColorTable()->unlock16BitCache()
+#include "SkBitmapProcState_sample.h"
+
+///////////////////////////////////////////////////////////////////////////////
+
+#undef FILTER_PROC
+#define FILTER_PROC(x, y, a, b, c, d, dst) \
+    do {                                                        \
+        uint32_t tmp = Filter_565_Expanded(x, y, a, b, c, d);   \
+        *(dst) = SkCompact_rgb_16((tmp) >> 5);                  \
+    } while (0)
+
+
+// clamp
+
+#define TILEX_PROCF(fx, max)    SkClampMax((fx) >> 16, max)
+#define TILEY_PROCF(fy, max)    SkClampMax((fy) >> 16, max)
+#define TILEX_LOW_BITS(fx, max) (((fx) >> 12) & 0xF)
+#define TILEY_LOW_BITS(fy, max) (((fy) >> 12) & 0xF)
+
+#define MAKENAME(suffix)        NAME_WRAP(Clamp_S16_D16 ## suffix)
+#define SRCTYPE                 uint16_t
+#define DSTTYPE                 uint16_t
+#define CHECKSTATE(state)       SkASSERT(state.fBitmap->config() == SkBitmap::kRGB_565_Config)
+#define SRC_TO_FILTER(src)      src
+#include "SkBitmapProcState_shaderproc.h"
+
+
+#define TILEX_PROCF(fx, max)    (((fx) & 0xFFFF) * ((max) + 1) >> 16)
+#define TILEY_PROCF(fy, max)    (((fy) & 0xFFFF) * ((max) + 1) >> 16)
+#define TILEX_LOW_BITS(fx, max) ((((fx) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
+#define TILEY_LOW_BITS(fy, max) ((((fy) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
+
+#define MAKENAME(suffix)        NAME_WRAP(Repeat_S16_D16 ## suffix)
+#define SRCTYPE                 uint16_t
+#define DSTTYPE                 uint16_t
+#define CHECKSTATE(state)       SkASSERT(state.fBitmap->config() == SkBitmap::kRGB_565_Config)
+#define SRC_TO_FILTER(src)      src
+#include "SkBitmapProcState_shaderproc.h"
+
+
+#define TILEX_PROCF(fx, max)    SkClampMax((fx) >> 16, max)
+#define TILEY_PROCF(fy, max)    SkClampMax((fy) >> 16, max)
+#define TILEX_LOW_BITS(fx, max) (((fx) >> 12) & 0xF)
+#define TILEY_LOW_BITS(fy, max) (((fy) >> 12) & 0xF)
+
+#undef FILTER_PROC
+#define FILTER_PROC(x, y, a, b, c, d, dst)   NAME_WRAP(Filter_32_opaque)(x, y, a, b, c, d, dst)
+#define MAKENAME(suffix)        NAME_WRAP(Clamp_SI8_opaque_D32 ## suffix)
+#define SRCTYPE                 uint8_t
+#define DSTTYPE                 uint32_t
+#define CHECKSTATE(state)       SkASSERT(state.fBitmap->config() == SkBitmap::kIndex8_Config)
+#define PREAMBLE(state)         const SkPMColor* SK_RESTRICT table = state.fBitmap->getColorTable()->lockColors()
+#define SRC_TO_FILTER(src)      table[src]
+#define POSTAMBLE(state)        state.fBitmap->getColorTable()->unlockColors(false)
+#include "SkBitmapProcState_shaderproc.h"
+
+#undef NAME_WRAP
\ No newline at end of file
index ead57f1..33c238c 100644 (file)
 
 #define SCALE_FILTER_NAME       MAKENAME(_filter_DX_shaderproc)
 
-static void SCALE_FILTER_NAME(const SkBitmapProcState& s, int x, int y,
-                              DSTTYPE* SK_RESTRICT colors, int count) {
+// Can't be static in the general case because some of these implementations
+// will be defined and referenced in different object files.
+void SCALE_FILTER_NAME(const SkBitmapProcState& s, int x, int y,
+                       DSTTYPE* SK_RESTRICT colors, int count) {
     SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask |
                              SkMatrix::kScale_Mask)) == 0);
     SkASSERT(s.fInvKy == 0);
diff --git a/src/opts/SkBitmapProcState_arm_neon.cpp b/src/opts/SkBitmapProcState_arm_neon.cpp
new file mode 100644 (file)
index 0000000..d50707d
--- /dev/null
@@ -0,0 +1,92 @@
+
+/*
+ * Copyright 2012 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+#include "SkBitmapProcState.h"
+#include "SkBitmapProcState_filter.h"
+#include "SkColorPriv.h"
+#include "SkFilterProc.h"
+#include "SkPaint.h"
+#include "SkShader.h"   // for tilemodes
+#include "SkUtilsArm.h"
+
+// Required to ensure the table is part of the final binary.
+extern const SkBitmapProcState::SampleProc32 gSkBitmapProcStateSample32_neon[];
+extern const SkBitmapProcState::SampleProc16 gSkBitmapProcStateSample16_neon[];
+
+#define   NAME_WRAP(x)  x ## _neon
+#include "SkBitmapProcState_filter_neon.h"
+#include "SkBitmapProcState_procs.h"
+
+const SkBitmapProcState::SampleProc32 gSkBitmapProcStateSample32_neon[] = {
+    S32_opaque_D32_nofilter_DXDY_neon,
+    S32_alpha_D32_nofilter_DXDY_neon,
+    S32_opaque_D32_nofilter_DX_neon,
+    S32_alpha_D32_nofilter_DX_neon,
+    S32_opaque_D32_filter_DXDY_neon,
+    S32_alpha_D32_filter_DXDY_neon,
+    S32_opaque_D32_filter_DX_neon,
+    S32_alpha_D32_filter_DX_neon,
+
+    S16_opaque_D32_nofilter_DXDY_neon,
+    S16_alpha_D32_nofilter_DXDY_neon,
+    S16_opaque_D32_nofilter_DX_neon,
+    S16_alpha_D32_nofilter_DX_neon,
+    S16_opaque_D32_filter_DXDY_neon,
+    S16_alpha_D32_filter_DXDY_neon,
+    S16_opaque_D32_filter_DX_neon,
+    S16_alpha_D32_filter_DX_neon,
+
+    SI8_opaque_D32_nofilter_DXDY_neon,
+    SI8_alpha_D32_nofilter_DXDY_neon,
+    SI8_opaque_D32_nofilter_DX_neon,
+    SI8_alpha_D32_nofilter_DX_neon,
+    SI8_opaque_D32_filter_DXDY_neon,
+    SI8_alpha_D32_filter_DXDY_neon,
+    SI8_opaque_D32_filter_DX_neon,
+    SI8_alpha_D32_filter_DX_neon,
+
+    S4444_opaque_D32_nofilter_DXDY_neon,
+    S4444_alpha_D32_nofilter_DXDY_neon,
+    S4444_opaque_D32_nofilter_DX_neon,
+    S4444_alpha_D32_nofilter_DX_neon,
+    S4444_opaque_D32_filter_DXDY_neon,
+    S4444_alpha_D32_filter_DXDY_neon,
+    S4444_opaque_D32_filter_DX_neon,
+    S4444_alpha_D32_filter_DX_neon,
+
+    // A8 treats alpha/opauqe the same (equally efficient)
+    SA8_alpha_D32_nofilter_DXDY_neon,
+    SA8_alpha_D32_nofilter_DXDY_neon,
+    SA8_alpha_D32_nofilter_DX_neon,
+    SA8_alpha_D32_nofilter_DX_neon,
+    SA8_alpha_D32_filter_DXDY_neon,
+    SA8_alpha_D32_filter_DXDY_neon,
+    SA8_alpha_D32_filter_DX_neon,
+    SA8_alpha_D32_filter_DX_neon
+};
+
+const SkBitmapProcState::SampleProc16 gSkBitmapProcStateSample16_neon[] = {
+    S32_D16_nofilter_DXDY_neon,
+    S32_D16_nofilter_DX_neon,
+    S32_D16_filter_DXDY_neon,
+    S32_D16_filter_DX_neon,
+
+    S16_D16_nofilter_DXDY_neon,
+    S16_D16_nofilter_DX_neon,
+    S16_D16_filter_DXDY_neon,
+    S16_D16_filter_DX_neon,
+
+    SI8_D16_nofilter_DXDY_neon,
+    SI8_D16_nofilter_DX_neon,
+    SI8_D16_filter_DXDY_neon,
+    SI8_D16_filter_DX_neon,
+
+    // Don't support 4444 -> 565
+    NULL, NULL, NULL, NULL,
+    // Don't support A8 -> 565
+    NULL, NULL, NULL, NULL
+};
diff --git a/src/opts/SkBitmapProcState_filter_neon.h b/src/opts/SkBitmapProcState_filter_neon.h
new file mode 100644 (file)
index 0000000..aadab91
--- /dev/null
@@ -0,0 +1,88 @@
+
+/*
+ * Copyright 2012 The Android Open Source Project
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+
+#include "SkColorPriv.h"
+
+/*
+    Filter_32_opaque
+    
+    There is no hard-n-fast rule that the filtering must produce
+    exact results for the color components, but if the 4 incoming colors are
+    all opaque, then the output color must also be opaque. Subsequent parts of
+    the drawing pipeline may rely on this (e.g. which blitrow proc to use).
+ */
+
+static inline void Filter_32_opaque_neon(unsigned x, unsigned y, 
+                                         SkPMColor a00, SkPMColor a01,
+                                         SkPMColor a10, SkPMColor a11,
+                                         SkPMColor *dst) {
+    asm volatile(
+                 "vdup.8         d0, %[y]                \n\t"   // duplicate y into d0
+                 "vmov.u8        d16, #16                \n\t"   // set up constant in d16
+                 "vsub.u8        d1, d16, d0             \n\t"   // d1 = 16-y
+                 
+                 "vdup.32        d4, %[a00]              \n\t"   // duplicate a00 into d4
+                 "vdup.32        d5, %[a10]              \n\t"   // duplicate a10 into d5
+                 "vmov.32        d4[1], %[a01]           \n\t"   // set top of d4 to a01
+                 "vmov.32        d5[1], %[a11]           \n\t"   // set top of d5 to a11
+                 
+                 "vmull.u8       q3, d4, d1              \n\t"   // q3 = [a01|a00] * (16-y)
+                 "vmull.u8       q0, d5, d0              \n\t"   // q0 = [a11|a10] * y
+                 
+                 "vdup.16        d5, %[x]                \n\t"   // duplicate x into d5
+                 "vmov.u16       d16, #16                \n\t"   // set up constant in d16
+                 "vsub.u16       d3, d16, d5             \n\t"   // d3 = 16-x
+                 
+                 "vmul.i16       d4, d7, d5              \n\t"   // d4  = a01 * x
+                 "vmla.i16       d4, d1, d5              \n\t"   // d4 += a11 * x
+                 "vmla.i16       d4, d6, d3              \n\t"   // d4 += a00 * (16-x)
+                 "vmla.i16       d4, d0, d3              \n\t"   // d4 += a10 * (16-x)
+                 "vshrn.i16      d0, q2, #8              \n\t"   // shift down result by 8
+                 "vst1.32        {d0[0]}, [%[dst]]       \n\t"   // store result
+                 :
+                 : [x] "r" (x), [y] "r" (y), [a00] "r" (a00), [a01] "r" (a01), [a10] "r" (a10), [a11] "r" (a11), [dst] "r" (dst)
+                 : "cc", "memory", "r4", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d16"
+                 );
+}
+
+static inline void Filter_32_alpha_neon(unsigned x, unsigned y,
+                                        SkPMColor a00, SkPMColor a01,
+                                        SkPMColor a10, SkPMColor a11,
+                                        SkPMColor *dst, uint16_t scale) {
+    asm volatile(
+                 "vdup.8         d0, %[y]                \n\t"   // duplicate y into d0
+                 "vmov.u8        d16, #16                \n\t"   // set up constant in d16
+                 "vsub.u8        d1, d16, d0             \n\t"   // d1 = 16-y
+                 
+                 "vdup.32        d4, %[a00]              \n\t"   // duplicate a00 into d4
+                 "vdup.32        d5, %[a10]              \n\t"   // duplicate a10 into d5
+                 "vmov.32        d4[1], %[a01]           \n\t"   // set top of d4 to a01
+                 "vmov.32        d5[1], %[a11]           \n\t"   // set top of d5 to a11
+                 
+                 "vmull.u8       q3, d4, d1              \n\t"   // q3 = [a01|a00] * (16-y)
+                 "vmull.u8       q0, d5, d0              \n\t"   // q0 = [a11|a10] * y
+                 
+                 "vdup.16        d5, %[x]                \n\t"   // duplicate x into d5
+                 "vmov.u16       d16, #16                \n\t"   // set up constant in d16
+                 "vsub.u16       d3, d16, d5             \n\t"   // d3 = 16-x
+                 
+                 "vmul.i16       d4, d7, d5              \n\t"   // d4  = a01 * x
+                 "vmla.i16       d4, d1, d5              \n\t"   // d4 += a11 * x
+                 "vmla.i16       d4, d6, d3              \n\t"   // d4 += a00 * (16-x)
+                 "vmla.i16       d4, d0, d3              \n\t"   // d4 += a10 * (16-x)
+                 "vdup.16        d3, %[scale]            \n\t"   // duplicate scale into d3
+                 "vshr.u16       d4, d4, #8              \n\t"   // shift down result by 8
+                 "vmul.i16       d4, d4, d3              \n\t"   // multiply result by scale
+                 "vshrn.i16      d0, q2, #8              \n\t"   // shift down result by 8
+                 "vst1.32        {d0[0]}, [%[dst]]       \n\t"   // store result
+                 :
+                 : [x] "r" (x), [y] "r" (y), [a00] "r" (a00), [a01] "r" (a01), [a10] "r" (a10), [a11] "r" (a11), [dst] "r" (dst), [scale] "r" (scale)
+                 : "cc", "memory", "r4", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d16"
+                 );
+}