From fce02aca62525c3041226501574f740f7ea3714b Mon Sep 17 00:00:00 2001 From: "digit@google.com" Date: Wed, 1 Aug 2012 14:25:07 +0000 Subject: [PATCH] arm: dynamic NEON support for SkBitmapProcState matrix operations. This patch implements dynamic ARM NEON support for the functions implemented by src/core/SkBitmapProcState_matrixProcs.cpp. - Because the SkBitmapProcState_matrix_{clamp,repeat}.h headers are NEON-specific, they are renamed with a _neon.h suffix, and moved to src/opts/ (from src/core/) - Add a new file src/opts/SkBitmapProcState_matrixProcs_neon.cpp which implements the NEON code paths for all builds, and add it to the 'opts_neon' static library. - Modify SkBitmapProcState_matrixProcs.cpp to select the right code-path depending on our build configuration. Note that in the case where 'arm_neon == 1', we do not embed regular ARM code paths in the final binary. Only 'arm_neon_optional == 1' builds will contain both regular and NEON code paths at the same time. Note that there doesn't seem to be a simple way to put the NEON-specific selection from that currently is in SkBitmapProcState_matrixProcs.cpp into src/opts/. Doing so would require much more drastic restructuring. This is also true of the other SkBitmapProcState source files that will be touched in a future patch. Review URL: https://codereview.appspot.com/6453065 git-svn-id: http://skia.googlecode.com/svn/trunk@4888 2bbb7eff-a529-9590-31e7-b0007b416f81 --- gyp/opts.gyp | 6 +- src/core/SkBitmapProcState_matrixProcs.cpp | 128 +++++------------- src/core/SkUtilsArm.cpp | 9 +- src/opts/SkBitmapProcState_matrixProcs_neon.cpp | 145 +++++++++++++++++++++ .../SkBitmapProcState_matrix_clamp_neon.h} | 10 +- .../SkBitmapProcState_matrix_repeat_neon.h} | 45 +------ 6 files changed, 197 insertions(+), 146 deletions(-) create mode 100644 src/opts/SkBitmapProcState_matrixProcs_neon.cpp rename src/{core/SkBitmapProcState_matrix_clamp.h => opts/SkBitmapProcState_matrix_clamp_neon.h} (99%) rename src/{core/SkBitmapProcState_matrix_repeat.h => opts/SkBitmapProcState_matrix_repeat_neon.h} (93%) diff --git a/gyp/opts.gyp b/gyp/opts.gyp index ca2b1c1..8c85b9a 100644 --- a/gyp/opts.gyp +++ b/gyp/opts.gyp @@ -126,6 +126,7 @@ '../include/config', '../include/core', '../src/core', + '../src/opts', ], 'cflags!': [ '-fno-omit-frame-pointer', @@ -134,12 +135,15 @@ '-mfpu=vfpv3-d16', ], 'cflags': [ - '-fomit-frame-pointer', '-mfpu=neon', + '-fomit-frame-pointer', ], 'sources': [ '../src/opts/memset16_neon.S', '../src/opts/memset32_neon.S', + '../src/opts/SkBitmapProcState_matrixProcs_neon.cpp', + '../src/opts/SkBitmapProcState_matrix_clamp_neon.h', + '../src/opts/SkBitmapProcState_matrix_repeat_neon.h', ], }, ], diff --git a/src/core/SkBitmapProcState_matrixProcs.cpp b/src/core/SkBitmapProcState_matrixProcs.cpp index 1e12f9a..77c6200 100644 --- a/src/core/SkBitmapProcState_matrixProcs.cpp +++ b/src/core/SkBitmapProcState_matrixProcs.cpp @@ -8,6 +8,7 @@ #include "SkPerspIter.h" #include "SkShader.h" #include "SkUtils.h" +#include "SkUtilsArm.h" // Helper to ensure that when we shift down, we do it w/o sign-extension // so the caller doesn't have to manually mask off the top 16 bits @@ -67,27 +68,31 @@ static inline bool can_truncate_to_fixed_for_decal(SkFractionalInt frX, void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count); void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count); +// Compile neon code paths if needed +#if !SK_ARM_NEON_IS_NONE + +// These are defined in src/opts/SkBitmapProcState_matrixProcs_neon.cpp +extern const SkBitmapProcState::MatrixProc ClampX_ClampY_Procs_neon[]; +extern const SkBitmapProcState::MatrixProc RepeatX_RepeatY_Procs_neon[]; + +#endif // !SK_ARM_NEON_IS_NONE + +// Compile non-neon code path if needed +#if !SK_ARM_NEON_IS_ALWAYS #define MAKENAME(suffix) ClampX_ClampY ## suffix #define TILEX_PROCF(fx, max) SkClampMax((fx) >> 16, max) #define TILEY_PROCF(fy, max) SkClampMax((fy) >> 16, max) #define TILEX_LOW_BITS(fx, max) (((fx) >> 12) & 0xF) #define TILEY_LOW_BITS(fy, max) (((fy) >> 12) & 0xF) #define CHECK_FOR_DECAL -#if defined(__ARM_HAVE_NEON) - #include "SkBitmapProcState_matrix_clamp.h" -#else - #include "SkBitmapProcState_matrix.h" -#endif +#include "SkBitmapProcState_matrix.h" #define MAKENAME(suffix) RepeatX_RepeatY ## suffix #define TILEX_PROCF(fx, max) SK_USHIFT16(((fx) & 0xFFFF) * ((max) + 1)) #define TILEY_PROCF(fy, max) SK_USHIFT16(((fy) & 0xFFFF) * ((max) + 1)) #define TILEX_LOW_BITS(fx, max) ((((fx) & 0xFFFF) * ((max) + 1) >> 12) & 0xF) #define TILEY_LOW_BITS(fy, max) ((((fy) & 0xFFFF) * ((max) + 1) >> 12) & 0xF) -#if defined(__ARM_HAVE_NEON) - #include "SkBitmapProcState_matrix_repeat.h" -#else - #include "SkBitmapProcState_matrix.h" +#include "SkBitmapProcState_matrix.h" #endif #define MAKENAME(suffix) GeneralXY ## suffix @@ -228,52 +233,6 @@ void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count) { int i; -#if defined(__ARM_HAVE_NEON) - if (count >= 8) { - /* SkFixed is 16.16 fixed point */ - SkFixed dx2 = dx+dx; - SkFixed dx4 = dx2+dx2; - SkFixed dx8 = dx4+dx4; - - /* now build fx/fx+dx/fx+2dx/fx+3dx */ - SkFixed fx1, fx2, fx3; - int32x2_t lower, upper; - int32x4_t lbase, hbase; - uint16_t *dst16 = (uint16_t *)dst; - - fx1 = fx+dx; - fx2 = fx1+dx; - fx3 = fx2+dx; - - /* avoid an 'lbase unitialized' warning */ - lbase = vdupq_n_s32(fx); - lbase = vsetq_lane_s32(fx1, lbase, 1); - lbase = vsetq_lane_s32(fx2, lbase, 2); - lbase = vsetq_lane_s32(fx3, lbase, 3); - hbase = vaddq_s32(lbase, vdupq_n_s32(dx4)); - - /* take upper 16 of each, store, and bump everything */ - do { - int32x4_t lout, hout; - uint16x8_t hi16; - - lout = lbase; - hout = hbase; - /* gets hi's of all louts then hi's of all houts */ - asm ("vuzpq.16 %q0, %q1" : "+w" (lout), "+w" (hout)); - hi16 = vreinterpretq_u16_s32(hout); - vst1q_u16(dst16, hi16); - - /* on to the next */ - lbase = vaddq_s32 (lbase, vdupq_n_s32(dx8)); - hbase = vaddq_s32 (hbase, vdupq_n_s32(dx8)); - dst16 += 8; - count -= 8; - fx += dx8; - } while (count >= 8); - dst = (uint32_t *) dst16; - } -#else for (i = (count >> 2); i > 0; --i) { *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16); @@ -282,7 +241,6 @@ void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count) fx += dx+dx; } count &= 3; -#endif uint16_t* xx = (uint16_t*)dst; for (i = count; i > 0; --i) { @@ -293,42 +251,6 @@ void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count) void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count) { -#if defined(__ARM_HAVE_NEON) - if (count >= 8) { - int32x4_t wide_fx; - int32x4_t wide_fx2; - int32x4_t wide_dx8 = vdupq_n_s32(dx*8); - - wide_fx = vdupq_n_s32(fx); - wide_fx = vsetq_lane_s32(fx+dx, wide_fx, 1); - wide_fx = vsetq_lane_s32(fx+dx+dx, wide_fx, 2); - wide_fx = vsetq_lane_s32(fx+dx+dx+dx, wide_fx, 3); - - wide_fx2 = vaddq_s32(wide_fx, vdupq_n_s32(dx+dx+dx+dx)); - - while (count >= 8) { - int32x4_t wide_out; - int32x4_t wide_out2; - - wide_out = vshlq_n_s32(vshrq_n_s32(wide_fx, 12), 14); - wide_out = vorrq_s32(wide_out, - vaddq_s32(vshrq_n_s32(wide_fx,16), vdupq_n_s32(1))); - - wide_out2 = vshlq_n_s32(vshrq_n_s32(wide_fx2, 12), 14); - wide_out2 = vorrq_s32(wide_out2, - vaddq_s32(vshrq_n_s32(wide_fx2,16), vdupq_n_s32(1))); - - vst1q_u32(dst, vreinterpretq_u32_s32(wide_out)); - vst1q_u32(dst+4, vreinterpretq_u32_s32(wide_out2)); - - dst += 8; - fx += dx*8; - wide_fx = vaddq_s32(wide_fx, wide_dx8); - wide_fx2 = vaddq_s32(wide_fx2, wide_dx8); - count -= 8; - } - } -#endif if (count & 1) { @@ -574,7 +496,17 @@ SkBitmapProcState::chooseMatrixProc(bool trivial_matrix) { // clamp gets special version of filterOne fFilterOneX = SK_Fixed1; fFilterOneY = SK_Fixed1; +#if SK_ARM_NEON_IS_NONE return ClampX_ClampY_Procs[index]; +#elif SK_ARM_NEON_IS_ALWAYS + return ClampX_ClampY_Procs_neon[index]; +#else // SK_ARM_NEON_IS_DYNAMIC + if (sk_cpu_arm_has_neon()) { + return ClampX_ClampY_Procs_neon[index]; + } else { + return ClampX_ClampY_Procs[index]; + } +#endif } // all remaining procs use this form for filterOne @@ -584,9 +516,19 @@ SkBitmapProcState::chooseMatrixProc(bool trivial_matrix) { if (SkShader::kRepeat_TileMode == fTileModeX && SkShader::kRepeat_TileMode == fTileModeY) { +#if SK_ARM_NEON_IS_NONE return RepeatX_RepeatY_Procs[index]; +#elif SK_ARM_NEON_IS_ALWAYS + return RepeatX_RepeatY_Procs_neon[index]; +#else // SK_ARM_NEON_IS_DYNAMIC + if (sk_cpu_arm_has_neon()) { + return RepeatX_RepeatY_Procs_neon[index]; + } else { + return RepeatX_RepeatY_Procs[index]; + } +#endif } - + fTileProcX = choose_tile_proc(fTileModeX); fTileProcY = choose_tile_proc(fTileModeY); fTileLowBitsProcX = choose_tile_lowbits_proc(fTileModeX); diff --git a/src/core/SkUtilsArm.cpp b/src/core/SkUtilsArm.cpp index 4f6ef3a..e484576 100644 --- a/src/core/SkUtilsArm.cpp +++ b/src/core/SkUtilsArm.cpp @@ -33,8 +33,7 @@ // A function used to determine at runtime if the target CPU supports // the ARM NEON instruction set. This implementation is Linux-specific. -static bool sk_cpu_arm_check_neon(void) -{ +static bool sk_cpu_arm_check_neon(void) { bool result = false; #if NEON_DEBUG @@ -164,13 +163,11 @@ static pthread_once_t sOnce; static bool sHasArmNeon; // called through pthread_once() -void sk_cpu_arm_probe_features(void) -{ +void sk_cpu_arm_probe_features(void) { sHasArmNeon = sk_cpu_arm_check_neon(); } -bool sk_cpu_arm_has_neon(void) -{ +bool sk_cpu_arm_has_neon(void) { pthread_once(&sOnce, sk_cpu_arm_probe_features); return sHasArmNeon; } diff --git a/src/opts/SkBitmapProcState_matrixProcs_neon.cpp b/src/opts/SkBitmapProcState_matrixProcs_neon.cpp new file mode 100644 index 0000000..7ebdddc --- /dev/null +++ b/src/opts/SkBitmapProcState_matrixProcs_neon.cpp @@ -0,0 +1,145 @@ +/* NEON optimized code (C) COPYRIGHT 2009 Motorola + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#include "SkBitmapProcState.h" +#include "SkPerspIter.h" +#include "SkShader.h" +#include "SkUtilsArm.h" + +extern const SkBitmapProcState::MatrixProc ClampX_ClampY_Procs_neon[]; +extern const SkBitmapProcState::MatrixProc RepeatX_RepeatY_Procs_neon[]; + +static void decal_nofilter_scale_neon(uint32_t dst[], SkFixed fx, SkFixed dx, int count); +static void decal_filter_scale_neon(uint32_t dst[], SkFixed fx, SkFixed dx, int count); + +static unsigned SK_USHIFT16(unsigned x) { + return x >> 16; +} + +#define MAKENAME(suffix) ClampX_ClampY ## suffix ## _neon +#define TILEX_PROCF(fx, max) SkClampMax((fx) >> 16, max) +#define TILEY_PROCF(fy, max) SkClampMax((fy) >> 16, max) +#define TILEX_LOW_BITS(fx, max) (((fx) >> 12) & 0xF) +#define TILEY_LOW_BITS(fy, max) (((fy) >> 12) & 0xF) +#define CHECK_FOR_DECAL +#include "SkBitmapProcState_matrix_clamp_neon.h" + +#define MAKENAME(suffix) RepeatX_RepeatY ## suffix ## _neon +#define TILEX_PROCF(fx, max) SK_USHIFT16(((fx) & 0xFFFF) * ((max) + 1)) +#define TILEY_PROCF(fy, max) SK_USHIFT16(((fy) & 0xFFFF) * ((max) + 1)) +#define TILEX_LOW_BITS(fx, max) ((((fx) & 0xFFFF) * ((max) + 1) >> 12) & 0xF) +#define TILEY_LOW_BITS(fy, max) ((((fy) & 0xFFFF) * ((max) + 1) >> 12) & 0xF) +#include "SkBitmapProcState_matrix_repeat_neon.h" + + +void decal_nofilter_scale_neon(uint32_t dst[], SkFixed fx, SkFixed dx, int count) +{ + int i; + + if (count >= 8) { + /* SkFixed is 16.16 fixed point */ + SkFixed dx2 = dx+dx; + SkFixed dx4 = dx2+dx2; + SkFixed dx8 = dx4+dx4; + + /* now build fx/fx+dx/fx+2dx/fx+3dx */ + SkFixed fx1, fx2, fx3; + int32x2_t lower, upper; + int32x4_t lbase, hbase; + uint16_t *dst16 = (uint16_t *)dst; + + fx1 = fx+dx; + fx2 = fx1+dx; + fx3 = fx2+dx; + + /* avoid an 'lbase unitialized' warning */ + lbase = vdupq_n_s32(fx); + lbase = vsetq_lane_s32(fx1, lbase, 1); + lbase = vsetq_lane_s32(fx2, lbase, 2); + lbase = vsetq_lane_s32(fx3, lbase, 3); + hbase = vaddq_s32(lbase, vdupq_n_s32(dx4)); + + /* take upper 16 of each, store, and bump everything */ + do { + int32x4_t lout, hout; + uint16x8_t hi16; + + lout = lbase; + hout = hbase; + /* gets hi's of all louts then hi's of all houts */ + asm ("vuzpq.16 %q0, %q1" : "+w" (lout), "+w" (hout)); + hi16 = vreinterpretq_u16_s32(hout); + vst1q_u16(dst16, hi16); + + /* on to the next */ + lbase = vaddq_s32 (lbase, vdupq_n_s32(dx8)); + hbase = vaddq_s32 (hbase, vdupq_n_s32(dx8)); + dst16 += 8; + count -= 8; + fx += dx8; + } while (count >= 8); + dst = (uint32_t *) dst16; + } + + uint16_t* xx = (uint16_t*)dst; + for (i = count; i > 0; --i) { + *xx++ = SkToU16(fx >> 16); fx += dx; + } +} + +void decal_filter_scale_neon(uint32_t dst[], SkFixed fx, SkFixed dx, int count) +{ + if (count >= 8) { + int32x4_t wide_fx; + int32x4_t wide_fx2; + int32x4_t wide_dx8 = vdupq_n_s32(dx*8); + + wide_fx = vdupq_n_s32(fx); + wide_fx = vsetq_lane_s32(fx+dx, wide_fx, 1); + wide_fx = vsetq_lane_s32(fx+dx+dx, wide_fx, 2); + wide_fx = vsetq_lane_s32(fx+dx+dx+dx, wide_fx, 3); + + wide_fx2 = vaddq_s32(wide_fx, vdupq_n_s32(dx+dx+dx+dx)); + + while (count >= 8) { + int32x4_t wide_out; + int32x4_t wide_out2; + + wide_out = vshlq_n_s32(vshrq_n_s32(wide_fx, 12), 14); + wide_out = vorrq_s32(wide_out, + vaddq_s32(vshrq_n_s32(wide_fx,16), vdupq_n_s32(1))); + + wide_out2 = vshlq_n_s32(vshrq_n_s32(wide_fx2, 12), 14); + wide_out2 = vorrq_s32(wide_out2, + vaddq_s32(vshrq_n_s32(wide_fx2,16), vdupq_n_s32(1))); + + vst1q_u32(dst, vreinterpretq_u32_s32(wide_out)); + vst1q_u32(dst+4, vreinterpretq_u32_s32(wide_out2)); + + dst += 8; + fx += dx*8; + wide_fx = vaddq_s32(wide_fx, wide_dx8); + wide_fx2 = vaddq_s32(wide_fx2, wide_dx8); + count -= 8; + } + } + + if (count & 1) + { + SkASSERT((fx >> (16 + 14)) == 0); + *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1); + fx += dx; + } + while ((count -= 2) >= 0) + { + SkASSERT((fx >> (16 + 14)) == 0); + *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1); + fx += dx; + + *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1); + fx += dx; + } +} diff --git a/src/core/SkBitmapProcState_matrix_clamp.h b/src/opts/SkBitmapProcState_matrix_clamp_neon.h similarity index 99% rename from src/core/SkBitmapProcState_matrix_clamp.h rename to src/opts/SkBitmapProcState_matrix_clamp_neon.h index 06bc0fa..2e48161 100644 --- a/src/core/SkBitmapProcState_matrix_clamp.h +++ b/src/opts/SkBitmapProcState_matrix_clamp_neon.h @@ -21,10 +21,6 @@ */ -#if !defined(__ARM_HAVE_NEON) -#error this file can be used only when the NEON unit is enabled -#endif - #include /* @@ -91,7 +87,7 @@ static void SCALE_NOFILTER_NAME(const SkBitmapProcState& s, // test if we don't need to apply the tile proc if ((unsigned)(fx >> 16) <= maxX && (unsigned)((fx + dx * (count - 1)) >> 16) <= maxX) { - decal_nofilter_scale(xy, fx, dx, count); + decal_nofilter_scale_neon(xy, fx, dx, count); return; } #endif @@ -516,7 +512,7 @@ static void SCALE_FILTER_NAME(const SkBitmapProcState& s, if (dx > 0 && (unsigned)(fx >> 16) <= maxX && (unsigned)((fx + dx * (count - 1)) >> 16) < maxX) { - decal_filter_scale(xy, fx, dx, count); + decal_filter_scale_neon(xy, fx, dx, count); } else #endif @@ -891,7 +887,7 @@ static void PERSP_FILTER_NAME(const SkBitmapProcState& s, } } -static SkBitmapProcState::MatrixProc MAKENAME(_Procs)[] = { +const SkBitmapProcState::MatrixProc MAKENAME(_Procs)[] = { SCALE_NOFILTER_NAME, SCALE_FILTER_NAME, AFFINE_NOFILTER_NAME, diff --git a/src/core/SkBitmapProcState_matrix_repeat.h b/src/opts/SkBitmapProcState_matrix_repeat_neon.h similarity index 93% rename from src/core/SkBitmapProcState_matrix_repeat.h rename to src/opts/SkBitmapProcState_matrix_repeat_neon.h index 8f32795..d05beab 100644 --- a/src/core/SkBitmapProcState_matrix_repeat.h +++ b/src/opts/SkBitmapProcState_matrix_repeat_neon.h @@ -20,10 +20,6 @@ */ -#if !defined(__ARM_HAVE_NEON) -#error this file can be used only when the NEON unit is enabled -#endif - #include /* @@ -39,11 +35,11 @@ /* SkClampMax(val,max) -- bound to 0..max */ -#define SCALE_NOFILTER_NAME MAKENAME(_nofilter_scale_neon) +#define SCALE_NOFILTER_NAME MAKENAME(_nofilter_scale) #define SCALE_FILTER_NAME MAKENAME(_filter_scale) -#define AFFINE_NOFILTER_NAME MAKENAME(_nofilter_affine_neon) +#define AFFINE_NOFILTER_NAME MAKENAME(_nofilter_affine) #define AFFINE_FILTER_NAME MAKENAME(_filter_affine) -#define PERSP_NOFILTER_NAME MAKENAME(_nofilter_persp_neon) +#define PERSP_NOFILTER_NAME MAKENAME(_nofilter_persp) #define PERSP_FILTER_NAME MAKENAME(_filter_persp) #define PACK_FILTER_X_NAME MAKENAME(_pack_filter_x) @@ -89,13 +85,12 @@ static void SCALE_NOFILTER_NAME(const SkBitmapProcState& s, // test if we don't need to apply the tile proc if ((unsigned)(fx >> 16) <= maxX && (unsigned)((fx + dx * (count - 1)) >> 16) <= maxX) { - decal_nofilter_scale(xy, fx, dx, count); + decal_nofilter_scale_neon(xy, fx, dx, count); } else #endif { int i; -#if defined(__ARM_HAVE_NEON) /* RBE: very much like done in decal_nofilter , * but some processing of the 'fx' information * TILEX_PROCF(fx, max) (((fx) & 0xFFFF) * ((max) + 1) >> 16) @@ -152,30 +147,6 @@ static void SCALE_NOFILTER_NAME(const SkBitmapProcState& s, } while (count >= 8); xy = (uint32_t *) dst16; } -#else - /* simple, portable way of looking at 4 at a crack; - * so gets some loop unrolling, but not full SIMD speed - */ - for (i = (count >> 2); i > 0; --i) { - unsigned a, b; - a = TILEX_PROCF(fx, maxX); fx += dx; - b = TILEX_PROCF(fx, maxX); fx += dx; -#ifdef SK_CPU_BENDIAN - *xy++ = (a << 16) | b; -#else - *xy++ = (b << 16) | a; -#endif - a = TILEX_PROCF(fx, maxX); fx += dx; - b = TILEX_PROCF(fx, maxX); fx += dx; -#ifdef SK_CPU_BENDIAN - *xy++ = (a << 16) | b; -#else - *xy++ = (b << 16) | a; -#endif - } - /* loop doesn't adjust count */ - count %= 4; -#endif uint16_t* xx = (uint16_t*)xy; for (i = count; i > 0; --i) { *xx++ = TILEX_PROCF(fx, maxX); fx += dx; @@ -214,7 +185,6 @@ static void AFFINE_NOFILTER_NAME(const SkBitmapProcState& s, SkFixed bfx = fx, bfy=fy, bdx=dx, bdy=dy; #endif -#if defined(__ARM_HAVE_NEON) if (0) { extern void rbe(void); rbe(); } @@ -298,7 +268,6 @@ static void AFFINE_NOFILTER_NAME(const SkBitmapProcState& s, SkDebugf("maxX %08x maxY %08x\n", maxX, maxY); } #endif -#endif for (int i = count; i > 0; --i) { /* fx, fy, dx, dy are all 32 bit 16.16 fixed point */ @@ -324,7 +293,6 @@ static void PERSP_NOFILTER_NAME(const SkBitmapProcState& s, while ((count = iter.next()) != 0) { const SkFixed* SK_RESTRICT srcXY = iter.getXY(); -#if defined(__ARM_HAVE_NEON) /* RBE: */ /* TILEX_PROCF(fx, max) (((fx) & 0xFFFF) * ((max) + 1) >> 16) */ /* it's a little more complicated than what I did for the @@ -417,7 +385,6 @@ static void PERSP_NOFILTER_NAME(const SkBitmapProcState& s, srcXY = (const SkFixed *) mysrc; xy = (uint32_t *) mydst; } -#endif while (--count >= 0) { *xy++ = (TILEY_PROCF(srcXY[1], maxY) << 16) | TILEX_PROCF(srcXY[0], maxX); @@ -472,7 +439,7 @@ static void SCALE_FILTER_NAME(const SkBitmapProcState& s, if (dx > 0 && (unsigned)(fx >> 16) <= maxX && (unsigned)((fx + dx * (count - 1)) >> 16) < maxX) { - decal_filter_scale(xy, fx, dx, count); + decal_filter_scale_neon(xy, fx, dx, count); } else #endif { @@ -544,7 +511,7 @@ static void PERSP_FILTER_NAME(const SkBitmapProcState& s, } } -static SkBitmapProcState::MatrixProc MAKENAME(_Procs)[] = { +const SkBitmapProcState::MatrixProc MAKENAME(_Procs)[] = { SCALE_NOFILTER_NAME, SCALE_FILTER_NAME, AFFINE_NOFILTER_NAME, -- 2.7.4