From: digit@google.com Date: Mon, 13 Aug 2012 14:06:34 +0000 (+0000) Subject: arm: dynamic NEON support for SkBitmapProcState functions. X-Git-Tag: accepted/tizen/5.0/unified/20181102.025319~15215 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=3ada0efdc8de8316df8113ec54ffd1a3f33ecd21;p=platform%2Fupstream%2FlibSkiaSharp.git arm: dynamic NEON support for SkBitmapProcState functions. This patch does the following: - Move the NEON-specific code from src/core/SkBitmapProcState_filter.h to src/opts/SkBitmapProcState_filter_neon.h - Implement the NEON-specific functions in the new source file src/opts/SkBitmapProcState_opts_arm_neon.cpp, added to the "opts_neon" static library target. All functions now use the _neon suffix, even in full-NEON builds. - Move most of the content of src/core/SkBitmapProcState.cpp to a new header: src/core/SkBitmapProcState_procs.h This header is included by two source files: src/core/SkBitmapProcState.cpp, to define the regular functions. src/opts/SkBitmapProcState_opts_arm_neon.cpp to define NEON ones. This is to deal with the fact that all NEON functions now use the _neon suffix, even in SK_ARM_NEON_IS_ALWAYS mode, and to be able to include the same header twice in the SK_ARM_NEON_IS_DYNAMIC case. Review URL: https://codereview.appspot.com/6449117 git-svn-id: http://skia.googlecode.com/svn/trunk@5055 2bbb7eff-a529-9590-31e7-b0007b416f81 --- diff --git a/gyp/opts.gyp b/gyp/opts.gyp index a9f85cc..2ac395c 100644 --- a/gyp/opts.gyp +++ b/gyp/opts.gyp @@ -142,6 +142,7 @@ 'sources': [ '../src/opts/memset16_neon.S', '../src/opts/memset32_neon.S', + '../src/opts/SkBitmapProcState_arm_neon.cpp', '../src/opts/SkBitmapProcState_matrixProcs_neon.cpp', '../src/opts/SkBitmapProcState_matrix_clamp_neon.h', '../src/opts/SkBitmapProcState_matrix_repeat_neon.h', diff --git a/src/core/SkBitmapProcState.cpp b/src/core/SkBitmapProcState.cpp index 8326a32..01ef5a5 100644 --- a/src/core/SkBitmapProcState.cpp +++ b/src/core/SkBitmapProcState.cpp @@ -6,338 +6,29 @@ * found in the LICENSE file. */ #include "SkBitmapProcState.h" -#include "SkBitmapProcState_filter.h" #include "SkColorPriv.h" #include "SkFilterProc.h" #include "SkPaint.h" #include "SkShader.h" // for tilemodes +#include "SkUtilsArm.h" + +#if !SK_ARM_NEON_IS_NONE +// These are defined in src/opts/SkBitmapProcState_arm_neon.cpp +extern const SkBitmapProcState::SampleProc16 gSkBitmapProcStateSample16_neon[]; +extern const SkBitmapProcState::SampleProc32 gSkBitmapProcStateSample32_neon[]; +extern void S16_D16_filter_DX_neon(const SkBitmapProcState&, const uint32_t*, int, uint16_t*); +extern void Clamp_S16_D16_filter_DX_shaderproc_neon(const SkBitmapProcState&, int, int, uint16_t*, int); +extern void Repeat_S16_D16_filter_DX_shaderproc_neon(const SkBitmapProcState&, int, int, uint16_t*, int); +extern void SI8_opaque_D32_filter_DX_neon(const SkBitmapProcState&, const uint32_t*, int, SkPMColor*); +extern void SI8_opaque_D32_filter_DX_shaderproc_neon(const SkBitmapProcState&, int, int, uint32_t*, int); +extern void Clamp_SI8_opaque_D32_filter_DX_shaderproc_neon(const SkBitmapProcState&, int, int, uint32_t*, int); +#endif -// returns expanded * 5bits -static inline uint32_t Filter_565_Expanded(unsigned x, unsigned y, - uint32_t a00, uint32_t a01, - uint32_t a10, uint32_t a11) { - SkASSERT((unsigned)x <= 0xF); - SkASSERT((unsigned)y <= 0xF); - - a00 = SkExpand_rgb_16(a00); - a01 = SkExpand_rgb_16(a01); - a10 = SkExpand_rgb_16(a10); - a11 = SkExpand_rgb_16(a11); - - int xy = x * y >> 3; - return a00 * (32 - 2*y - 2*x + xy) + - a01 * (2*x - xy) + - a10 * (2*y - xy) + - a11 * xy; -} - -// turn an expanded 565 * 5bits into SkPMColor -// g:11 | r:10 | x:1 | b:10 -static inline SkPMColor SkExpanded_565_To_PMColor(uint32_t c) { - unsigned r = (c >> 13) & 0xFF; - unsigned g = (c >> 24); - unsigned b = (c >> 2) & 0xFF; - return SkPackARGB32(0xFF, r, g, b); -} - -// returns answer in SkPMColor format -static inline SkPMColor Filter_4444_D32(unsigned x, unsigned y, - uint32_t a00, uint32_t a01, - uint32_t a10, uint32_t a11) { - SkASSERT((unsigned)x <= 0xF); - SkASSERT((unsigned)y <= 0xF); - - a00 = SkExpand_4444(a00); - a01 = SkExpand_4444(a01); - a10 = SkExpand_4444(a10); - a11 = SkExpand_4444(a11); - - int xy = x * y >> 4; - uint32_t result = a00 * (16 - y - x + xy) + - a01 * (x - xy) + - a10 * (y - xy) + - a11 * xy; - - return SkCompact_8888(result); -} - -static inline U8CPU Filter_8(unsigned x, unsigned y, - U8CPU a00, U8CPU a01, - U8CPU a10, U8CPU a11) { - SkASSERT((unsigned)x <= 0xF); - SkASSERT((unsigned)y <= 0xF); - - int xy = x * y; - unsigned result = a00 * (256 - 16*y - 16*x + xy) + - a01 * (16*x - xy) + - a10 * (16*y - xy) + - a11 * xy; - - return result >> 8; -} - -/***************************************************************************** - * - * D32 functions - * - */ - -// SRC == 8888 - -#define FILTER_PROC(x, y, a, b, c, d, dst) Filter_32_opaque(x, y, a, b, c, d, dst) - -#define MAKENAME(suffix) S32_opaque_D32 ## suffix -#define DSTSIZE 32 -#define SRCTYPE SkPMColor -#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kARGB_8888_Config); \ - SkASSERT(state.fAlphaScale == 256) -#define RETURNDST(src) src -#define SRC_TO_FILTER(src) src -#include "SkBitmapProcState_sample.h" - -#undef FILTER_PROC -#define FILTER_PROC(x, y, a, b, c, d, dst) Filter_32_alpha(x, y, a, b, c, d, dst, alphaScale) - -#define MAKENAME(suffix) S32_alpha_D32 ## suffix -#define DSTSIZE 32 -#define SRCTYPE SkPMColor -#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kARGB_8888_Config); \ - SkASSERT(state.fAlphaScale < 256) -#define PREAMBLE(state) unsigned alphaScale = state.fAlphaScale -#define RETURNDST(src) SkAlphaMulQ(src, alphaScale) -#define SRC_TO_FILTER(src) src -#include "SkBitmapProcState_sample.h" - -// SRC == 565 - -#undef FILTER_PROC -#define FILTER_PROC(x, y, a, b, c, d, dst) \ - do { \ - uint32_t tmp = Filter_565_Expanded(x, y, a, b, c, d); \ - *(dst) = SkExpanded_565_To_PMColor(tmp); \ - } while (0) - -#define MAKENAME(suffix) S16_opaque_D32 ## suffix -#define DSTSIZE 32 -#define SRCTYPE uint16_t -#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kRGB_565_Config); \ - SkASSERT(state.fAlphaScale == 256) -#define RETURNDST(src) SkPixel16ToPixel32(src) -#define SRC_TO_FILTER(src) src -#include "SkBitmapProcState_sample.h" - -#undef FILTER_PROC -#define FILTER_PROC(x, y, a, b, c, d, dst) \ - do { \ - uint32_t tmp = Filter_565_Expanded(x, y, a, b, c, d); \ - *(dst) = SkAlphaMulQ(SkExpanded_565_To_PMColor(tmp), alphaScale); \ - } while (0) - -#define MAKENAME(suffix) S16_alpha_D32 ## suffix -#define DSTSIZE 32 -#define SRCTYPE uint16_t -#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kRGB_565_Config); \ - SkASSERT(state.fAlphaScale < 256) -#define PREAMBLE(state) unsigned alphaScale = state.fAlphaScale -#define RETURNDST(src) SkAlphaMulQ(SkPixel16ToPixel32(src), alphaScale) -#define SRC_TO_FILTER(src) src -#include "SkBitmapProcState_sample.h" - -// SRC == Index8 - -#undef FILTER_PROC -#define FILTER_PROC(x, y, a, b, c, d, dst) Filter_32_opaque(x, y, a, b, c, d, dst) - -#define MAKENAME(suffix) SI8_opaque_D32 ## suffix -#define DSTSIZE 32 -#define SRCTYPE uint8_t -#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kIndex8_Config); \ - SkASSERT(state.fAlphaScale == 256) -#define PREAMBLE(state) const SkPMColor* SK_RESTRICT table = state.fBitmap->getColorTable()->lockColors() -#define RETURNDST(src) table[src] -#define SRC_TO_FILTER(src) table[src] -#define POSTAMBLE(state) state.fBitmap->getColorTable()->unlockColors(false) -#include "SkBitmapProcState_sample.h" - -#undef FILTER_PROC -#define FILTER_PROC(x, y, a, b, c, d, dst) Filter_32_alpha(x, y, a, b, c, d, dst, alphaScale) - -#define MAKENAME(suffix) SI8_alpha_D32 ## suffix -#define DSTSIZE 32 -#define SRCTYPE uint8_t -#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kIndex8_Config); \ - SkASSERT(state.fAlphaScale < 256) -#define PREAMBLE(state) unsigned alphaScale = state.fAlphaScale; \ - const SkPMColor* SK_RESTRICT table = state.fBitmap->getColorTable()->lockColors() -#define RETURNDST(src) SkAlphaMulQ(table[src], alphaScale) -#define SRC_TO_FILTER(src) table[src] -#define POSTAMBLE(state) state.fBitmap->getColorTable()->unlockColors(false) -#include "SkBitmapProcState_sample.h" - -// SRC == 4444 - -#undef FILTER_PROC -#define FILTER_PROC(x, y, a, b, c, d, dst) *(dst) = Filter_4444_D32(x, y, a, b, c, d) - -#define MAKENAME(suffix) S4444_opaque_D32 ## suffix -#define DSTSIZE 32 -#define SRCTYPE SkPMColor16 -#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kARGB_4444_Config); \ - SkASSERT(state.fAlphaScale == 256) -#define RETURNDST(src) SkPixel4444ToPixel32(src) -#define SRC_TO_FILTER(src) src -#include "SkBitmapProcState_sample.h" - -#undef FILTER_PROC -#define FILTER_PROC(x, y, a, b, c, d, dst) \ - do { \ - uint32_t tmp = Filter_4444_D32(x, y, a, b, c, d); \ - *(dst) = SkAlphaMulQ(tmp, alphaScale); \ - } while (0) - -#define MAKENAME(suffix) S4444_alpha_D32 ## suffix -#define DSTSIZE 32 -#define SRCTYPE SkPMColor16 -#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kARGB_4444_Config); \ - SkASSERT(state.fAlphaScale < 256) -#define PREAMBLE(state) unsigned alphaScale = state.fAlphaScale -#define RETURNDST(src) SkAlphaMulQ(SkPixel4444ToPixel32(src), alphaScale) -#define SRC_TO_FILTER(src) src -#include "SkBitmapProcState_sample.h" - -// SRC == A8 - -#undef FILTER_PROC -#define FILTER_PROC(x, y, a, b, c, d, dst) \ - do { \ - unsigned tmp = Filter_8(x, y, a, b, c, d); \ - *(dst) = SkAlphaMulQ(pmColor, SkAlpha255To256(tmp)); \ - } while (0) - -#define MAKENAME(suffix) SA8_alpha_D32 ## suffix -#define DSTSIZE 32 -#define SRCTYPE uint8_t -#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kA8_Config); -#define PREAMBLE(state) const SkPMColor pmColor = state.fPaintPMColor; -#define RETURNDST(src) SkAlphaMulQ(pmColor, SkAlpha255To256(src)) -#define SRC_TO_FILTER(src) src -#include "SkBitmapProcState_sample.h" - -/***************************************************************************** - * - * D16 functions - * - */ - -// SRC == 8888 - -#undef FILTER_PROC -#define FILTER_PROC(x, y, a, b, c, d, dst) \ - do { \ - SkPMColor dstColor; \ - Filter_32_opaque(x, y, a, b, c, d, &dstColor); \ - (*dst) = SkPixel32ToPixel16(dstColor); \ - } while (0) - -#define MAKENAME(suffix) S32_D16 ## suffix -#define DSTSIZE 16 -#define SRCTYPE SkPMColor -#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kARGB_8888_Config); \ - SkASSERT(state.fBitmap->isOpaque()) -#define RETURNDST(src) SkPixel32ToPixel16(src) -#define SRC_TO_FILTER(src) src -#include "SkBitmapProcState_sample.h" - -// SRC == 565 - -#undef FILTER_PROC -#define FILTER_PROC(x, y, a, b, c, d, dst) \ - do { \ - uint32_t tmp = Filter_565_Expanded(x, y, a, b, c, d); \ - *(dst) = SkCompact_rgb_16((tmp) >> 5); \ - } while (0) - -#define MAKENAME(suffix) S16_D16 ## suffix -#define DSTSIZE 16 -#define SRCTYPE uint16_t -#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kRGB_565_Config) -#define RETURNDST(src) src -#define SRC_TO_FILTER(src) src -#include "SkBitmapProcState_sample.h" - -// SRC == Index8 - -#undef FILTER_PROC -#define FILTER_PROC(x, y, a, b, c, d, dst) \ - do { \ - uint32_t tmp = Filter_565_Expanded(x, y, a, b, c, d); \ - *(dst) = SkCompact_rgb_16((tmp) >> 5); \ - } while (0) - -#define MAKENAME(suffix) SI8_D16 ## suffix -#define DSTSIZE 16 -#define SRCTYPE uint8_t -#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kIndex8_Config); \ - SkASSERT(state.fBitmap->isOpaque()) -#define PREAMBLE(state) const uint16_t* SK_RESTRICT table = state.fBitmap->getColorTable()->lock16BitCache() -#define RETURNDST(src) table[src] -#define SRC_TO_FILTER(src) table[src] -#define POSTAMBLE(state) state.fBitmap->getColorTable()->unlock16BitCache() -#include "SkBitmapProcState_sample.h" - -/////////////////////////////////////////////////////////////////////////////// - -#undef FILTER_PROC -#define FILTER_PROC(x, y, a, b, c, d, dst) \ - do { \ - uint32_t tmp = Filter_565_Expanded(x, y, a, b, c, d); \ - *(dst) = SkCompact_rgb_16((tmp) >> 5); \ - } while (0) - - -// clamp - -#define TILEX_PROCF(fx, max) SkClampMax((fx) >> 16, max) -#define TILEY_PROCF(fy, max) SkClampMax((fy) >> 16, max) -#define TILEX_LOW_BITS(fx, max) (((fx) >> 12) & 0xF) -#define TILEY_LOW_BITS(fy, max) (((fy) >> 12) & 0xF) - -#define MAKENAME(suffix) Clamp_S16_D16 ## suffix -#define SRCTYPE uint16_t -#define DSTTYPE uint16_t -#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kRGB_565_Config) -#define SRC_TO_FILTER(src) src -#include "SkBitmapProcState_shaderproc.h" - - -#define TILEX_PROCF(fx, max) (((fx) & 0xFFFF) * ((max) + 1) >> 16) -#define TILEY_PROCF(fy, max) (((fy) & 0xFFFF) * ((max) + 1) >> 16) -#define TILEX_LOW_BITS(fx, max) ((((fx) & 0xFFFF) * ((max) + 1) >> 12) & 0xF) -#define TILEY_LOW_BITS(fy, max) ((((fy) & 0xFFFF) * ((max) + 1) >> 12) & 0xF) - -#define MAKENAME(suffix) Repeat_S16_D16 ## suffix -#define SRCTYPE uint16_t -#define DSTTYPE uint16_t -#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kRGB_565_Config) -#define SRC_TO_FILTER(src) src -#include "SkBitmapProcState_shaderproc.h" - - -#define TILEX_PROCF(fx, max) SkClampMax((fx) >> 16, max) -#define TILEY_PROCF(fy, max) SkClampMax((fy) >> 16, max) -#define TILEX_LOW_BITS(fx, max) (((fx) >> 12) & 0xF) -#define TILEY_LOW_BITS(fy, max) (((fy) >> 12) & 0xF) - -#undef FILTER_PROC -#define FILTER_PROC(x, y, a, b, c, d, dst) Filter_32_opaque(x, y, a, b, c, d, dst) -#define MAKENAME(suffix) Clamp_SI8_opaque_D32 ## suffix -#define SRCTYPE uint8_t -#define DSTTYPE uint32_t -#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kIndex8_Config) -#define PREAMBLE(state) const SkPMColor* SK_RESTRICT table = state.fBitmap->getColorTable()->lockColors() -#define SRC_TO_FILTER(src) table[src] -#define POSTAMBLE(state) state.fBitmap->getColorTable()->unlockColors(false) -#include "SkBitmapProcState_shaderproc.h" +#if !SK_ARM_NEON_IS_ALWAYS +#define NAME_WRAP(x) x +#include "SkBitmapProcState_filter.h" +#include "SkBitmapProcState_procs.h" +#endif /////////////////////////////////////////////////////////////////////////////// @@ -448,7 +139,8 @@ bool SkBitmapProcState::chooseProcs(const SkMatrix& inv, const SkPaint& paint) { return false; } - static const SampleProc32 gSample32[] = { +#if !SK_ARM_NEON_IS_ALWAYS + static const SampleProc32 gSkBitmapProcStateSample32[] = { S32_opaque_D32_nofilter_DXDY, S32_alpha_D32_nofilter_DXDY, S32_opaque_D32_nofilter_DX, @@ -496,7 +188,7 @@ bool SkBitmapProcState::chooseProcs(const SkMatrix& inv, const SkPaint& paint) { SA8_alpha_D32_filter_DX }; - static const SampleProc16 gSample16[] = { + static const SampleProc16 gSkBitmapProcStateSample16[] = { S32_D16_nofilter_DXDY, S32_D16_nofilter_DX, S32_D16_filter_DXDY, @@ -517,21 +209,22 @@ bool SkBitmapProcState::chooseProcs(const SkMatrix& inv, const SkPaint& paint) { // Don't support A8 -> 565 NULL, NULL, NULL, NULL }; +#endif - fSampleProc32 = gSample32[index]; + fSampleProc32 = SK_ARM_NEON_WRAP(gSkBitmapProcStateSample32)[index]; index >>= 1; // shift away any opaque/alpha distinction - fSampleProc16 = gSample16[index]; + fSampleProc16 = SK_ARM_NEON_WRAP(gSkBitmapProcStateSample16)[index]; // our special-case shaderprocs - if (S16_D16_filter_DX == fSampleProc16) { + if (SK_ARM_NEON_WRAP(S16_D16_filter_DX) == fSampleProc16) { if (clamp_clamp) { - fShaderProc16 = Clamp_S16_D16_filter_DX_shaderproc; + fShaderProc16 = SK_ARM_NEON_WRAP(Clamp_S16_D16_filter_DX_shaderproc); } else if (SkShader::kRepeat_TileMode == fTileModeX && SkShader::kRepeat_TileMode == fTileModeY) { - fShaderProc16 = Repeat_S16_D16_filter_DX_shaderproc; + fShaderProc16 = SK_ARM_NEON_WRAP(Repeat_S16_D16_filter_DX_shaderproc); } - } else if (SI8_opaque_D32_filter_DX == fSampleProc32 && clamp_clamp) { - fShaderProc32 = Clamp_SI8_opaque_D32_filter_DX_shaderproc; + } else if (SK_ARM_NEON_WRAP(SI8_opaque_D32_filter_DX) == fSampleProc32 && clamp_clamp) { + fShaderProc32 = SK_ARM_NEON_WRAP(Clamp_SI8_opaque_D32_filter_DX_shaderproc); } // see if our platform has any accelerated overrides diff --git a/src/core/SkBitmapProcState_filter.h b/src/core/SkBitmapProcState_filter.h index f69e17a..7fcf754 100644 --- a/src/core/SkBitmapProcState_filter.h +++ b/src/core/SkBitmapProcState_filter.h @@ -18,82 +18,10 @@ the drawing pipeline may rely on this (e.g. which blitrow proc to use). */ -#if defined(__ARM_HAVE_NEON) && !defined(SK_CPU_BENDIAN) -static inline void Filter_32_opaque_neon(unsigned x, unsigned y, +static inline void Filter_32_opaque(unsigned x, unsigned y, SkPMColor a00, SkPMColor a01, SkPMColor a10, SkPMColor a11, - SkPMColor *dst) { - asm volatile( - "vdup.8 d0, %[y] \n\t" // duplicate y into d0 - "vmov.u8 d16, #16 \n\t" // set up constant in d16 - "vsub.u8 d1, d16, d0 \n\t" // d1 = 16-y - - "vdup.32 d4, %[a00] \n\t" // duplicate a00 into d4 - "vdup.32 d5, %[a10] \n\t" // duplicate a10 into d5 - "vmov.32 d4[1], %[a01] \n\t" // set top of d4 to a01 - "vmov.32 d5[1], %[a11] \n\t" // set top of d5 to a11 - - "vmull.u8 q3, d4, d1 \n\t" // q3 = [a01|a00] * (16-y) - "vmull.u8 q0, d5, d0 \n\t" // q0 = [a11|a10] * y - - "vdup.16 d5, %[x] \n\t" // duplicate x into d5 - "vmov.u16 d16, #16 \n\t" // set up constant in d16 - "vsub.u16 d3, d16, d5 \n\t" // d3 = 16-x - - "vmul.i16 d4, d7, d5 \n\t" // d4 = a01 * x - "vmla.i16 d4, d1, d5 \n\t" // d4 += a11 * x - "vmla.i16 d4, d6, d3 \n\t" // d4 += a00 * (16-x) - "vmla.i16 d4, d0, d3 \n\t" // d4 += a10 * (16-x) - "vshrn.i16 d0, q2, #8 \n\t" // shift down result by 8 - "vst1.32 {d0[0]}, [%[dst]] \n\t" // store result - : - : [x] "r" (x), [y] "r" (y), [a00] "r" (a00), [a01] "r" (a01), [a10] "r" (a10), [a11] "r" (a11), [dst] "r" (dst) - : "cc", "memory", "r4", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d16" - ); -} - -static inline void Filter_32_alpha_neon(unsigned x, unsigned y, - SkPMColor a00, SkPMColor a01, - SkPMColor a10, SkPMColor a11, - SkPMColor *dst, uint16_t scale) { - asm volatile( - "vdup.8 d0, %[y] \n\t" // duplicate y into d0 - "vmov.u8 d16, #16 \n\t" // set up constant in d16 - "vsub.u8 d1, d16, d0 \n\t" // d1 = 16-y - - "vdup.32 d4, %[a00] \n\t" // duplicate a00 into d4 - "vdup.32 d5, %[a10] \n\t" // duplicate a10 into d5 - "vmov.32 d4[1], %[a01] \n\t" // set top of d4 to a01 - "vmov.32 d5[1], %[a11] \n\t" // set top of d5 to a11 - - "vmull.u8 q3, d4, d1 \n\t" // q3 = [a01|a00] * (16-y) - "vmull.u8 q0, d5, d0 \n\t" // q0 = [a11|a10] * y - - "vdup.16 d5, %[x] \n\t" // duplicate x into d5 - "vmov.u16 d16, #16 \n\t" // set up constant in d16 - "vsub.u16 d3, d16, d5 \n\t" // d3 = 16-x - - "vmul.i16 d4, d7, d5 \n\t" // d4 = a01 * x - "vmla.i16 d4, d1, d5 \n\t" // d4 += a11 * x - "vmla.i16 d4, d6, d3 \n\t" // d4 += a00 * (16-x) - "vmla.i16 d4, d0, d3 \n\t" // d4 += a10 * (16-x) - "vdup.16 d3, %[scale] \n\t" // duplicate scale into d3 - "vshr.u16 d4, d4, #8 \n\t" // shift down result by 8 - "vmul.i16 d4, d4, d3 \n\t" // multiply result by scale - "vshrn.i16 d0, q2, #8 \n\t" // shift down result by 8 - "vst1.32 {d0[0]}, [%[dst]] \n\t" // store result - : - : [x] "r" (x), [y] "r" (y), [a00] "r" (a00), [a01] "r" (a01), [a10] "r" (a10), [a11] "r" (a11), [dst] "r" (dst), [scale] "r" (scale) - : "cc", "memory", "r4", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d16" - ); -} -#define Filter_32_opaque Filter_32_opaque_neon -#define Filter_32_alpha Filter_32_alpha_neon -#else -static inline void Filter_32_opaque_portable(unsigned x, unsigned y, - SkPMColor a00, SkPMColor a01, - SkPMColor a10, SkPMColor a11, - SkPMColor* dstColor) { + SkPMColor* dstColor) { SkASSERT((unsigned)x <= 0xF); SkASSERT((unsigned)y <= 0xF); @@ -118,11 +46,11 @@ static inline void Filter_32_opaque_portable(unsigned x, unsigned y, *dstColor = ((lo >> 8) & mask) | (hi & ~mask); } -static inline void Filter_32_alpha_portable(unsigned x, unsigned y, - SkPMColor a00, SkPMColor a01, - SkPMColor a10, SkPMColor a11, - SkPMColor* dstColor, - unsigned alphaScale) { +static inline void Filter_32_alpha(unsigned x, unsigned y, + SkPMColor a00, SkPMColor a01, + SkPMColor a10, SkPMColor a11, + SkPMColor* dstColor, + unsigned alphaScale) { SkASSERT((unsigned)x <= 0xF); SkASSERT((unsigned)y <= 0xF); SkASSERT(alphaScale <= 256); @@ -150,7 +78,4 @@ static inline void Filter_32_alpha_portable(unsigned x, unsigned y, *dstColor = ((lo >> 8) & mask) | (hi & ~mask); } -#define Filter_32_opaque Filter_32_opaque_portable -#define Filter_32_alpha Filter_32_alpha_portable -#endif diff --git a/src/core/SkBitmapProcState_procs.h b/src/core/SkBitmapProcState_procs.h new file mode 100644 index 0000000..1b9328e --- /dev/null +++ b/src/core/SkBitmapProcState_procs.h @@ -0,0 +1,343 @@ + +/* + * Copyright 2011 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +// Define NAME_WRAP(x) before including this header to perform name-wrapping +// E.g. for ARM NEON, defined it as 'x ## _neon' to ensure all important +// identifiers have a _neon suffix. +#ifndef NAME_WRAP +#error "Please define NAME_WRAP() before including this file" +#endif + +// returns expanded * 5bits +static inline uint32_t Filter_565_Expanded(unsigned x, unsigned y, + uint32_t a00, uint32_t a01, + uint32_t a10, uint32_t a11) { + SkASSERT((unsigned)x <= 0xF); + SkASSERT((unsigned)y <= 0xF); + + a00 = SkExpand_rgb_16(a00); + a01 = SkExpand_rgb_16(a01); + a10 = SkExpand_rgb_16(a10); + a11 = SkExpand_rgb_16(a11); + + int xy = x * y >> 3; + return a00 * (32 - 2*y - 2*x + xy) + + a01 * (2*x - xy) + + a10 * (2*y - xy) + + a11 * xy; +} + +// turn an expanded 565 * 5bits into SkPMColor +// g:11 | r:10 | x:1 | b:10 +static inline SkPMColor SkExpanded_565_To_PMColor(uint32_t c) { + unsigned r = (c >> 13) & 0xFF; + unsigned g = (c >> 24); + unsigned b = (c >> 2) & 0xFF; + return SkPackARGB32(0xFF, r, g, b); +} + +// returns answer in SkPMColor format +static inline SkPMColor Filter_4444_D32(unsigned x, unsigned y, + uint32_t a00, uint32_t a01, + uint32_t a10, uint32_t a11) { + SkASSERT((unsigned)x <= 0xF); + SkASSERT((unsigned)y <= 0xF); + + a00 = SkExpand_4444(a00); + a01 = SkExpand_4444(a01); + a10 = SkExpand_4444(a10); + a11 = SkExpand_4444(a11); + + int xy = x * y >> 4; + uint32_t result = a00 * (16 - y - x + xy) + + a01 * (x - xy) + + a10 * (y - xy) + + a11 * xy; + + return SkCompact_8888(result); +} + +static inline U8CPU Filter_8(unsigned x, unsigned y, + U8CPU a00, U8CPU a01, + U8CPU a10, U8CPU a11) { + SkASSERT((unsigned)x <= 0xF); + SkASSERT((unsigned)y <= 0xF); + + int xy = x * y; + unsigned result = a00 * (256 - 16*y - 16*x + xy) + + a01 * (16*x - xy) + + a10 * (16*y - xy) + + a11 * xy; + + return result >> 8; +} + +/***************************************************************************** + * + * D32 functions + * + */ + +// SRC == 8888 + +#define FILTER_PROC(x, y, a, b, c, d, dst) NAME_WRAP(Filter_32_opaque)(x, y, a, b, c, d, dst) + +#define MAKENAME(suffix) NAME_WRAP(S32_opaque_D32 ## suffix) +#define DSTSIZE 32 +#define SRCTYPE SkPMColor +#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kARGB_8888_Config); \ + SkASSERT(state.fAlphaScale == 256) +#define RETURNDST(src) src +#define SRC_TO_FILTER(src) src +#include "SkBitmapProcState_sample.h" + +#undef FILTER_PROC +#define FILTER_PROC(x, y, a, b, c, d, dst) NAME_WRAP(Filter_32_alpha)(x, y, a, b, c, d, dst, alphaScale) + +#define MAKENAME(suffix) NAME_WRAP(S32_alpha_D32 ## suffix) +#define DSTSIZE 32 +#define SRCTYPE SkPMColor +#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kARGB_8888_Config); \ + SkASSERT(state.fAlphaScale < 256) +#define PREAMBLE(state) unsigned alphaScale = state.fAlphaScale +#define RETURNDST(src) SkAlphaMulQ(src, alphaScale) +#define SRC_TO_FILTER(src) src +#include "SkBitmapProcState_sample.h" + +// SRC == 565 + +#undef FILTER_PROC +#define FILTER_PROC(x, y, a, b, c, d, dst) \ + do { \ + uint32_t tmp = Filter_565_Expanded(x, y, a, b, c, d); \ + *(dst) = SkExpanded_565_To_PMColor(tmp); \ + } while (0) + +#define MAKENAME(suffix) NAME_WRAP(S16_opaque_D32 ## suffix) +#define DSTSIZE 32 +#define SRCTYPE uint16_t +#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kRGB_565_Config); \ + SkASSERT(state.fAlphaScale == 256) +#define RETURNDST(src) SkPixel16ToPixel32(src) +#define SRC_TO_FILTER(src) src +#include "SkBitmapProcState_sample.h" + +#undef FILTER_PROC +#define FILTER_PROC(x, y, a, b, c, d, dst) \ + do { \ + uint32_t tmp = Filter_565_Expanded(x, y, a, b, c, d); \ + *(dst) = SkAlphaMulQ(SkExpanded_565_To_PMColor(tmp), alphaScale); \ + } while (0) + +#define MAKENAME(suffix) NAME_WRAP(S16_alpha_D32 ## suffix) +#define DSTSIZE 32 +#define SRCTYPE uint16_t +#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kRGB_565_Config); \ + SkASSERT(state.fAlphaScale < 256) +#define PREAMBLE(state) unsigned alphaScale = state.fAlphaScale +#define RETURNDST(src) SkAlphaMulQ(SkPixel16ToPixel32(src), alphaScale) +#define SRC_TO_FILTER(src) src +#include "SkBitmapProcState_sample.h" + +// SRC == Index8 + +#undef FILTER_PROC +#define FILTER_PROC(x, y, a, b, c, d, dst) NAME_WRAP(Filter_32_opaque)(x, y, a, b, c, d, dst) + +#define MAKENAME(suffix) NAME_WRAP(SI8_opaque_D32 ## suffix) +#define DSTSIZE 32 +#define SRCTYPE uint8_t +#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kIndex8_Config); \ + SkASSERT(state.fAlphaScale == 256) +#define PREAMBLE(state) const SkPMColor* SK_RESTRICT table = state.fBitmap->getColorTable()->lockColors() +#define RETURNDST(src) table[src] +#define SRC_TO_FILTER(src) table[src] +#define POSTAMBLE(state) state.fBitmap->getColorTable()->unlockColors(false) +#include "SkBitmapProcState_sample.h" + +#undef FILTER_PROC +#define FILTER_PROC(x, y, a, b, c, d, dst) NAME_WRAP(Filter_32_alpha)(x, y, a, b, c, d, dst, alphaScale) + +#define MAKENAME(suffix) NAME_WRAP(SI8_alpha_D32 ## suffix) +#define DSTSIZE 32 +#define SRCTYPE uint8_t +#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kIndex8_Config); \ + SkASSERT(state.fAlphaScale < 256) +#define PREAMBLE(state) unsigned alphaScale = state.fAlphaScale; \ + const SkPMColor* SK_RESTRICT table = state.fBitmap->getColorTable()->lockColors() +#define RETURNDST(src) SkAlphaMulQ(table[src], alphaScale) +#define SRC_TO_FILTER(src) table[src] +#define POSTAMBLE(state) state.fBitmap->getColorTable()->unlockColors(false) +#include "SkBitmapProcState_sample.h" + +// SRC == 4444 + +#undef FILTER_PROC +#define FILTER_PROC(x, y, a, b, c, d, dst) *(dst) = Filter_4444_D32(x, y, a, b, c, d) + +#define MAKENAME(suffix) NAME_WRAP(S4444_opaque_D32 ## suffix) +#define DSTSIZE 32 +#define SRCTYPE SkPMColor16 +#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kARGB_4444_Config); \ + SkASSERT(state.fAlphaScale == 256) +#define RETURNDST(src) SkPixel4444ToPixel32(src) +#define SRC_TO_FILTER(src) src +#include "SkBitmapProcState_sample.h" + +#undef FILTER_PROC +#define FILTER_PROC(x, y, a, b, c, d, dst) \ + do { \ + uint32_t tmp = Filter_4444_D32(x, y, a, b, c, d); \ + *(dst) = SkAlphaMulQ(tmp, alphaScale); \ + } while (0) + +#define MAKENAME(suffix) NAME_WRAP(S4444_alpha_D32 ## suffix) +#define DSTSIZE 32 +#define SRCTYPE SkPMColor16 +#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kARGB_4444_Config); \ + SkASSERT(state.fAlphaScale < 256) +#define PREAMBLE(state) unsigned alphaScale = state.fAlphaScale +#define RETURNDST(src) SkAlphaMulQ(SkPixel4444ToPixel32(src), alphaScale) +#define SRC_TO_FILTER(src) src +#include "SkBitmapProcState_sample.h" + +// SRC == A8 + +#undef FILTER_PROC +#define FILTER_PROC(x, y, a, b, c, d, dst) \ + do { \ + unsigned tmp = Filter_8(x, y, a, b, c, d); \ + *(dst) = SkAlphaMulQ(pmColor, SkAlpha255To256(tmp)); \ + } while (0) + +#define MAKENAME(suffix) NAME_WRAP(SA8_alpha_D32 ## suffix) +#define DSTSIZE 32 +#define SRCTYPE uint8_t +#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kA8_Config); +#define PREAMBLE(state) const SkPMColor pmColor = state.fPaintPMColor; +#define RETURNDST(src) SkAlphaMulQ(pmColor, SkAlpha255To256(src)) +#define SRC_TO_FILTER(src) src +#include "SkBitmapProcState_sample.h" + +/***************************************************************************** + * + * D16 functions + * + */ + +// SRC == 8888 + +#undef FILTER_PROC +#define FILTER_PROC(x, y, a, b, c, d, dst) \ + do { \ + SkPMColor dstColor; \ + NAME_WRAP(Filter_32_opaque)(x, y, a, b, c, d, &dstColor); \ + (*dst) = SkPixel32ToPixel16(dstColor); \ + } while (0) + +#define MAKENAME(suffix) NAME_WRAP(S32_D16 ## suffix) +#define DSTSIZE 16 +#define SRCTYPE SkPMColor +#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kARGB_8888_Config); \ + SkASSERT(state.fBitmap->isOpaque()) +#define RETURNDST(src) SkPixel32ToPixel16(src) +#define SRC_TO_FILTER(src) src +#include "SkBitmapProcState_sample.h" + +// SRC == 565 + +#undef FILTER_PROC +#define FILTER_PROC(x, y, a, b, c, d, dst) \ + do { \ + uint32_t tmp = Filter_565_Expanded(x, y, a, b, c, d); \ + *(dst) = SkCompact_rgb_16((tmp) >> 5); \ + } while (0) + +#define MAKENAME(suffix) NAME_WRAP(S16_D16 ## suffix) +#define DSTSIZE 16 +#define SRCTYPE uint16_t +#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kRGB_565_Config) +#define RETURNDST(src) src +#define SRC_TO_FILTER(src) src +#include "SkBitmapProcState_sample.h" + +// SRC == Index8 + +#undef FILTER_PROC +#define FILTER_PROC(x, y, a, b, c, d, dst) \ + do { \ + uint32_t tmp = Filter_565_Expanded(x, y, a, b, c, d); \ + *(dst) = SkCompact_rgb_16((tmp) >> 5); \ + } while (0) + +#define MAKENAME(suffix) NAME_WRAP(SI8_D16 ## suffix) +#define DSTSIZE 16 +#define SRCTYPE uint8_t +#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kIndex8_Config); \ + SkASSERT(state.fBitmap->isOpaque()) +#define PREAMBLE(state) const uint16_t* SK_RESTRICT table = state.fBitmap->getColorTable()->lock16BitCache() +#define RETURNDST(src) table[src] +#define SRC_TO_FILTER(src) table[src] +#define POSTAMBLE(state) state.fBitmap->getColorTable()->unlock16BitCache() +#include "SkBitmapProcState_sample.h" + +/////////////////////////////////////////////////////////////////////////////// + +#undef FILTER_PROC +#define FILTER_PROC(x, y, a, b, c, d, dst) \ + do { \ + uint32_t tmp = Filter_565_Expanded(x, y, a, b, c, d); \ + *(dst) = SkCompact_rgb_16((tmp) >> 5); \ + } while (0) + + +// clamp + +#define TILEX_PROCF(fx, max) SkClampMax((fx) >> 16, max) +#define TILEY_PROCF(fy, max) SkClampMax((fy) >> 16, max) +#define TILEX_LOW_BITS(fx, max) (((fx) >> 12) & 0xF) +#define TILEY_LOW_BITS(fy, max) (((fy) >> 12) & 0xF) + +#define MAKENAME(suffix) NAME_WRAP(Clamp_S16_D16 ## suffix) +#define SRCTYPE uint16_t +#define DSTTYPE uint16_t +#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kRGB_565_Config) +#define SRC_TO_FILTER(src) src +#include "SkBitmapProcState_shaderproc.h" + + +#define TILEX_PROCF(fx, max) (((fx) & 0xFFFF) * ((max) + 1) >> 16) +#define TILEY_PROCF(fy, max) (((fy) & 0xFFFF) * ((max) + 1) >> 16) +#define TILEX_LOW_BITS(fx, max) ((((fx) & 0xFFFF) * ((max) + 1) >> 12) & 0xF) +#define TILEY_LOW_BITS(fy, max) ((((fy) & 0xFFFF) * ((max) + 1) >> 12) & 0xF) + +#define MAKENAME(suffix) NAME_WRAP(Repeat_S16_D16 ## suffix) +#define SRCTYPE uint16_t +#define DSTTYPE uint16_t +#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kRGB_565_Config) +#define SRC_TO_FILTER(src) src +#include "SkBitmapProcState_shaderproc.h" + + +#define TILEX_PROCF(fx, max) SkClampMax((fx) >> 16, max) +#define TILEY_PROCF(fy, max) SkClampMax((fy) >> 16, max) +#define TILEX_LOW_BITS(fx, max) (((fx) >> 12) & 0xF) +#define TILEY_LOW_BITS(fy, max) (((fy) >> 12) & 0xF) + +#undef FILTER_PROC +#define FILTER_PROC(x, y, a, b, c, d, dst) NAME_WRAP(Filter_32_opaque)(x, y, a, b, c, d, dst) +#define MAKENAME(suffix) NAME_WRAP(Clamp_SI8_opaque_D32 ## suffix) +#define SRCTYPE uint8_t +#define DSTTYPE uint32_t +#define CHECKSTATE(state) SkASSERT(state.fBitmap->config() == SkBitmap::kIndex8_Config) +#define PREAMBLE(state) const SkPMColor* SK_RESTRICT table = state.fBitmap->getColorTable()->lockColors() +#define SRC_TO_FILTER(src) table[src] +#define POSTAMBLE(state) state.fBitmap->getColorTable()->unlockColors(false) +#include "SkBitmapProcState_shaderproc.h" + +#undef NAME_WRAP \ No newline at end of file diff --git a/src/core/SkBitmapProcState_shaderproc.h b/src/core/SkBitmapProcState_shaderproc.h index ead57f1..33c238c 100644 --- a/src/core/SkBitmapProcState_shaderproc.h +++ b/src/core/SkBitmapProcState_shaderproc.h @@ -10,8 +10,10 @@ #define SCALE_FILTER_NAME MAKENAME(_filter_DX_shaderproc) -static void SCALE_FILTER_NAME(const SkBitmapProcState& s, int x, int y, - DSTTYPE* SK_RESTRICT colors, int count) { +// Can't be static in the general case because some of these implementations +// will be defined and referenced in different object files. +void SCALE_FILTER_NAME(const SkBitmapProcState& s, int x, int y, + DSTTYPE* SK_RESTRICT colors, int count) { SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask | SkMatrix::kScale_Mask)) == 0); SkASSERT(s.fInvKy == 0); diff --git a/src/opts/SkBitmapProcState_arm_neon.cpp b/src/opts/SkBitmapProcState_arm_neon.cpp new file mode 100644 index 0000000..d50707d --- /dev/null +++ b/src/opts/SkBitmapProcState_arm_neon.cpp @@ -0,0 +1,92 @@ + +/* + * Copyright 2012 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ +#include "SkBitmapProcState.h" +#include "SkBitmapProcState_filter.h" +#include "SkColorPriv.h" +#include "SkFilterProc.h" +#include "SkPaint.h" +#include "SkShader.h" // for tilemodes +#include "SkUtilsArm.h" + +// Required to ensure the table is part of the final binary. +extern const SkBitmapProcState::SampleProc32 gSkBitmapProcStateSample32_neon[]; +extern const SkBitmapProcState::SampleProc16 gSkBitmapProcStateSample16_neon[]; + +#define NAME_WRAP(x) x ## _neon +#include "SkBitmapProcState_filter_neon.h" +#include "SkBitmapProcState_procs.h" + +const SkBitmapProcState::SampleProc32 gSkBitmapProcStateSample32_neon[] = { + S32_opaque_D32_nofilter_DXDY_neon, + S32_alpha_D32_nofilter_DXDY_neon, + S32_opaque_D32_nofilter_DX_neon, + S32_alpha_D32_nofilter_DX_neon, + S32_opaque_D32_filter_DXDY_neon, + S32_alpha_D32_filter_DXDY_neon, + S32_opaque_D32_filter_DX_neon, + S32_alpha_D32_filter_DX_neon, + + S16_opaque_D32_nofilter_DXDY_neon, + S16_alpha_D32_nofilter_DXDY_neon, + S16_opaque_D32_nofilter_DX_neon, + S16_alpha_D32_nofilter_DX_neon, + S16_opaque_D32_filter_DXDY_neon, + S16_alpha_D32_filter_DXDY_neon, + S16_opaque_D32_filter_DX_neon, + S16_alpha_D32_filter_DX_neon, + + SI8_opaque_D32_nofilter_DXDY_neon, + SI8_alpha_D32_nofilter_DXDY_neon, + SI8_opaque_D32_nofilter_DX_neon, + SI8_alpha_D32_nofilter_DX_neon, + SI8_opaque_D32_filter_DXDY_neon, + SI8_alpha_D32_filter_DXDY_neon, + SI8_opaque_D32_filter_DX_neon, + SI8_alpha_D32_filter_DX_neon, + + S4444_opaque_D32_nofilter_DXDY_neon, + S4444_alpha_D32_nofilter_DXDY_neon, + S4444_opaque_D32_nofilter_DX_neon, + S4444_alpha_D32_nofilter_DX_neon, + S4444_opaque_D32_filter_DXDY_neon, + S4444_alpha_D32_filter_DXDY_neon, + S4444_opaque_D32_filter_DX_neon, + S4444_alpha_D32_filter_DX_neon, + + // A8 treats alpha/opauqe the same (equally efficient) + SA8_alpha_D32_nofilter_DXDY_neon, + SA8_alpha_D32_nofilter_DXDY_neon, + SA8_alpha_D32_nofilter_DX_neon, + SA8_alpha_D32_nofilter_DX_neon, + SA8_alpha_D32_filter_DXDY_neon, + SA8_alpha_D32_filter_DXDY_neon, + SA8_alpha_D32_filter_DX_neon, + SA8_alpha_D32_filter_DX_neon +}; + +const SkBitmapProcState::SampleProc16 gSkBitmapProcStateSample16_neon[] = { + S32_D16_nofilter_DXDY_neon, + S32_D16_nofilter_DX_neon, + S32_D16_filter_DXDY_neon, + S32_D16_filter_DX_neon, + + S16_D16_nofilter_DXDY_neon, + S16_D16_nofilter_DX_neon, + S16_D16_filter_DXDY_neon, + S16_D16_filter_DX_neon, + + SI8_D16_nofilter_DXDY_neon, + SI8_D16_nofilter_DX_neon, + SI8_D16_filter_DXDY_neon, + SI8_D16_filter_DX_neon, + + // Don't support 4444 -> 565 + NULL, NULL, NULL, NULL, + // Don't support A8 -> 565 + NULL, NULL, NULL, NULL +}; diff --git a/src/opts/SkBitmapProcState_filter_neon.h b/src/opts/SkBitmapProcState_filter_neon.h new file mode 100644 index 0000000..aadab91 --- /dev/null +++ b/src/opts/SkBitmapProcState_filter_neon.h @@ -0,0 +1,88 @@ + +/* + * Copyright 2012 The Android Open Source Project + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + + +#include "SkColorPriv.h" + +/* + Filter_32_opaque + + There is no hard-n-fast rule that the filtering must produce + exact results for the color components, but if the 4 incoming colors are + all opaque, then the output color must also be opaque. Subsequent parts of + the drawing pipeline may rely on this (e.g. which blitrow proc to use). + */ + +static inline void Filter_32_opaque_neon(unsigned x, unsigned y, + SkPMColor a00, SkPMColor a01, + SkPMColor a10, SkPMColor a11, + SkPMColor *dst) { + asm volatile( + "vdup.8 d0, %[y] \n\t" // duplicate y into d0 + "vmov.u8 d16, #16 \n\t" // set up constant in d16 + "vsub.u8 d1, d16, d0 \n\t" // d1 = 16-y + + "vdup.32 d4, %[a00] \n\t" // duplicate a00 into d4 + "vdup.32 d5, %[a10] \n\t" // duplicate a10 into d5 + "vmov.32 d4[1], %[a01] \n\t" // set top of d4 to a01 + "vmov.32 d5[1], %[a11] \n\t" // set top of d5 to a11 + + "vmull.u8 q3, d4, d1 \n\t" // q3 = [a01|a00] * (16-y) + "vmull.u8 q0, d5, d0 \n\t" // q0 = [a11|a10] * y + + "vdup.16 d5, %[x] \n\t" // duplicate x into d5 + "vmov.u16 d16, #16 \n\t" // set up constant in d16 + "vsub.u16 d3, d16, d5 \n\t" // d3 = 16-x + + "vmul.i16 d4, d7, d5 \n\t" // d4 = a01 * x + "vmla.i16 d4, d1, d5 \n\t" // d4 += a11 * x + "vmla.i16 d4, d6, d3 \n\t" // d4 += a00 * (16-x) + "vmla.i16 d4, d0, d3 \n\t" // d4 += a10 * (16-x) + "vshrn.i16 d0, q2, #8 \n\t" // shift down result by 8 + "vst1.32 {d0[0]}, [%[dst]] \n\t" // store result + : + : [x] "r" (x), [y] "r" (y), [a00] "r" (a00), [a01] "r" (a01), [a10] "r" (a10), [a11] "r" (a11), [dst] "r" (dst) + : "cc", "memory", "r4", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d16" + ); +} + +static inline void Filter_32_alpha_neon(unsigned x, unsigned y, + SkPMColor a00, SkPMColor a01, + SkPMColor a10, SkPMColor a11, + SkPMColor *dst, uint16_t scale) { + asm volatile( + "vdup.8 d0, %[y] \n\t" // duplicate y into d0 + "vmov.u8 d16, #16 \n\t" // set up constant in d16 + "vsub.u8 d1, d16, d0 \n\t" // d1 = 16-y + + "vdup.32 d4, %[a00] \n\t" // duplicate a00 into d4 + "vdup.32 d5, %[a10] \n\t" // duplicate a10 into d5 + "vmov.32 d4[1], %[a01] \n\t" // set top of d4 to a01 + "vmov.32 d5[1], %[a11] \n\t" // set top of d5 to a11 + + "vmull.u8 q3, d4, d1 \n\t" // q3 = [a01|a00] * (16-y) + "vmull.u8 q0, d5, d0 \n\t" // q0 = [a11|a10] * y + + "vdup.16 d5, %[x] \n\t" // duplicate x into d5 + "vmov.u16 d16, #16 \n\t" // set up constant in d16 + "vsub.u16 d3, d16, d5 \n\t" // d3 = 16-x + + "vmul.i16 d4, d7, d5 \n\t" // d4 = a01 * x + "vmla.i16 d4, d1, d5 \n\t" // d4 += a11 * x + "vmla.i16 d4, d6, d3 \n\t" // d4 += a00 * (16-x) + "vmla.i16 d4, d0, d3 \n\t" // d4 += a10 * (16-x) + "vdup.16 d3, %[scale] \n\t" // duplicate scale into d3 + "vshr.u16 d4, d4, #8 \n\t" // shift down result by 8 + "vmul.i16 d4, d4, d3 \n\t" // multiply result by scale + "vshrn.i16 d0, q2, #8 \n\t" // shift down result by 8 + "vst1.32 {d0[0]}, [%[dst]] \n\t" // store result + : + : [x] "r" (x), [y] "r" (y), [a00] "r" (a00), [a01] "r" (a01), [a10] "r" (a10), [a11] "r" (a11), [dst] "r" (dst), [scale] "r" (scale) + : "cc", "memory", "r4", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d16" + ); +}