1 /* NEON optimized code (C) COPYRIGHT 2009 Motorola
3 * Use of this source code is governed by a BSD-style license that can be
4 * found in the LICENSE file.
7 #include "SkBitmapProcState.h"
8 #include "SkPerspIter.h"
11 #include "SkUtilsArm.h"
12 #include "SkBitmapProcState_utils.h"
14 /* returns 0...(n-1) given any x (positive or negative).
16 As an example, if n (which is always positive) is 5...
18 x: -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8
19 returns: 2 3 4 0 1 2 3 4 0 1 2 3 4 0 1 2 3
21 static inline int sk_int_mod(int x, int n) {
23 if ((unsigned)x >= (unsigned)n) {
33 void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count);
34 void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count);
36 #include "SkBitmapProcState_matrix_template.h"
38 ///////////////////////////////////////////////////////////////////////////////
40 // Compile neon code paths if needed
41 #if !SK_ARM_NEON_IS_NONE
43 // These are defined in src/opts/SkBitmapProcState_matrixProcs_neon.cpp
44 extern const SkBitmapProcState::MatrixProc ClampX_ClampY_Procs_neon[];
45 extern const SkBitmapProcState::MatrixProc RepeatX_RepeatY_Procs_neon[];
47 #endif // !SK_ARM_NEON_IS_NONE
49 // Compile non-neon code path if needed
50 #if !SK_ARM_NEON_IS_ALWAYS
51 #define MAKENAME(suffix) ClampX_ClampY ## suffix
52 #define TILEX_PROCF(fx, max) SkClampMax((fx) >> 16, max)
53 #define TILEY_PROCF(fy, max) SkClampMax((fy) >> 16, max)
54 #define TILEX_LOW_BITS(fx, max) (((fx) >> 12) & 0xF)
55 #define TILEY_LOW_BITS(fy, max) (((fy) >> 12) & 0xF)
56 #define CHECK_FOR_DECAL
57 #include "SkBitmapProcState_matrix.h"
59 struct ClampTileProcs {
60 static unsigned X(const SkBitmapProcState&, SkFixed fx, int max) {
61 return SkClampMax(fx >> 16, max);
63 static unsigned Y(const SkBitmapProcState&, SkFixed fy, int max) {
64 return SkClampMax(fy >> 16, max);
68 // Referenced in opts_check_x86.cpp
69 void ClampX_ClampY_nofilter_scale(const SkBitmapProcState& s, uint32_t xy[],
70 int count, int x, int y) {
71 return NoFilterProc_Scale<ClampTileProcs, true>(s, xy, count, x, y);
73 void ClampX_ClampY_nofilter_affine(const SkBitmapProcState& s, uint32_t xy[],
74 int count, int x, int y) {
75 return NoFilterProc_Affine<ClampTileProcs>(s, xy, count, x, y);
78 static SkBitmapProcState::MatrixProc ClampX_ClampY_Procs[] = {
79 // only clamp lives in the right coord space to check for decal
80 ClampX_ClampY_nofilter_scale,
81 ClampX_ClampY_filter_scale,
82 ClampX_ClampY_nofilter_affine,
83 ClampX_ClampY_filter_affine,
84 NoFilterProc_Persp<ClampTileProcs>,
85 ClampX_ClampY_filter_persp
88 #define MAKENAME(suffix) RepeatX_RepeatY ## suffix
89 #define TILEX_PROCF(fx, max) SK_USHIFT16(((fx) & 0xFFFF) * ((max) + 1))
90 #define TILEY_PROCF(fy, max) SK_USHIFT16(((fy) & 0xFFFF) * ((max) + 1))
91 #define TILEX_LOW_BITS(fx, max) ((((fx) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
92 #define TILEY_LOW_BITS(fy, max) ((((fy) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
93 #include "SkBitmapProcState_matrix.h"
95 struct RepeatTileProcs {
96 static unsigned X(const SkBitmapProcState&, SkFixed fx, int max) {
97 return SK_USHIFT16(((fx) & 0xFFFF) * ((max) + 1));
99 static unsigned Y(const SkBitmapProcState&, SkFixed fy, int max) {
100 return SK_USHIFT16(((fy) & 0xFFFF) * ((max) + 1));
104 static SkBitmapProcState::MatrixProc RepeatX_RepeatY_Procs[] = {
105 NoFilterProc_Scale<RepeatTileProcs, false>,
106 RepeatX_RepeatY_filter_scale,
107 NoFilterProc_Affine<RepeatTileProcs>,
108 RepeatX_RepeatY_filter_affine,
109 NoFilterProc_Persp<RepeatTileProcs>,
110 RepeatX_RepeatY_filter_persp
114 #define MAKENAME(suffix) GeneralXY ## suffix
115 #define PREAMBLE(state) SkBitmapProcState::FixedTileProc tileProcX = (state).fTileProcX; (void) tileProcX; \
116 SkBitmapProcState::FixedTileProc tileProcY = (state).fTileProcY; (void) tileProcY; \
117 SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcX = (state).fTileLowBitsProcX; (void) tileLowBitsProcX; \
118 SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcY = (state).fTileLowBitsProcY; (void) tileLowBitsProcY
119 #define PREAMBLE_PARAM_X , SkBitmapProcState::FixedTileProc tileProcX, SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcX
120 #define PREAMBLE_PARAM_Y , SkBitmapProcState::FixedTileProc tileProcY, SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcY
121 #define PREAMBLE_ARG_X , tileProcX, tileLowBitsProcX
122 #define PREAMBLE_ARG_Y , tileProcY, tileLowBitsProcY
123 #define TILEX_PROCF(fx, max) SK_USHIFT16(tileProcX(fx) * ((max) + 1))
124 #define TILEY_PROCF(fy, max) SK_USHIFT16(tileProcY(fy) * ((max) + 1))
125 #define TILEX_LOW_BITS(fx, max) tileLowBitsProcX(fx, (max) + 1)
126 #define TILEY_LOW_BITS(fy, max) tileLowBitsProcY(fy, (max) + 1)
127 #include "SkBitmapProcState_matrix.h"
129 struct GeneralTileProcs {
130 static unsigned X(const SkBitmapProcState& s, SkFixed fx, int max) {
131 return SK_USHIFT16(s.fTileProcX(fx) * ((max) + 1));
133 static unsigned Y(const SkBitmapProcState& s, SkFixed fy, int max) {
134 return SK_USHIFT16(s.fTileProcY(fy) * ((max) + 1));
138 static SkBitmapProcState::MatrixProc GeneralXY_Procs[] = {
139 NoFilterProc_Scale<GeneralTileProcs, false>,
140 GeneralXY_filter_scale,
141 NoFilterProc_Affine<GeneralTileProcs>,
142 GeneralXY_filter_affine,
143 NoFilterProc_Persp<GeneralTileProcs>,
144 GeneralXY_filter_persp
147 ///////////////////////////////////////////////////////////////////////////////
149 static inline U16CPU fixed_clamp(SkFixed x) {
159 static inline U16CPU fixed_repeat(SkFixed x) {
163 // Visual Studio 2010 (MSC_VER=1600) optimizes bit-shift code incorrectly.
164 // See http://code.google.com/p/skia/issues/detail?id=472
165 #if defined(_MSC_VER) && (_MSC_VER >= 1600)
166 #pragma optimize("", off)
169 static inline U16CPU fixed_mirror(SkFixed x) {
170 SkFixed s = x << 15 >> 31;
171 // s is FFFFFFFF if we're on an odd interval, or 0 if an even interval
172 return (x ^ s) & 0xFFFF;
175 #if defined(_MSC_VER) && (_MSC_VER >= 1600)
176 #pragma optimize("", on)
179 static SkBitmapProcState::FixedTileProc choose_tile_proc(unsigned m) {
180 if (SkShader::kClamp_TileMode == m) {
183 if (SkShader::kRepeat_TileMode == m) {
186 SkASSERT(SkShader::kMirror_TileMode == m);
190 static inline U16CPU fixed_clamp_lowbits(SkFixed x, int) {
191 return (x >> 12) & 0xF;
194 static inline U16CPU fixed_repeat_or_mirrow_lowbits(SkFixed x, int scale) {
195 return ((x * scale) >> 12) & 0xF;
198 static SkBitmapProcState::FixedTileLowBitsProc choose_tile_lowbits_proc(unsigned m) {
199 if (SkShader::kClamp_TileMode == m) {
200 return fixed_clamp_lowbits;
202 SkASSERT(SkShader::kMirror_TileMode == m ||
203 SkShader::kRepeat_TileMode == m);
204 // mirror and repeat have the same behavior for the low bits.
205 return fixed_repeat_or_mirrow_lowbits;
209 static inline U16CPU int_clamp(int x, int n) {
219 static inline U16CPU int_repeat(int x, int n) {
220 return sk_int_mod(x, n);
223 static inline U16CPU int_mirror(int x, int n) {
224 x = sk_int_mod(x, 2 * n);
232 static void test_int_tileprocs() {
233 for (int i = -8; i <= 8; i++) {
234 SkDebugf(" int_mirror(%2d, 3) = %d\n", i, int_mirror(i, 3));
239 static SkBitmapProcState::IntTileProc choose_int_tile_proc(unsigned tm) {
240 if (SkShader::kClamp_TileMode == tm)
242 if (SkShader::kRepeat_TileMode == tm)
244 SkASSERT(SkShader::kMirror_TileMode == tm);
248 //////////////////////////////////////////////////////////////////////////////
250 void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count) {
253 for (i = (count >> 2); i > 0; --i) {
254 *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16);
256 *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16);
261 uint16_t* xx = (uint16_t*)dst;
262 for (i = count; i > 0; --i) {
263 *xx++ = SkToU16(fx >> 16); fx += dx;
267 void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count) {
269 SkASSERT((fx >> (16 + 14)) == 0);
270 *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
273 while ((count -= 2) >= 0) {
274 SkASSERT((fx >> (16 + 14)) == 0);
275 *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
278 *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
283 ///////////////////////////////////////////////////////////////////////////////
284 // stores the same as SCALE, but is cheaper to compute. Also since there is no
285 // scale, we don't need/have a FILTER version
287 static void fill_sequential(uint16_t xptr[], int start, int count) {
289 if (reinterpret_cast<intptr_t>(xptr) & 0x2) {
294 uint32_t* xxptr = reinterpret_cast<uint32_t*>(xptr);
295 uint32_t pattern0 = PACK_TWO_SHORTS(start + 0, start + 1);
296 uint32_t pattern1 = PACK_TWO_SHORTS(start + 2, start + 3);
298 int qcount = count >> 2;
304 } while (--qcount != 0);
305 xptr = reinterpret_cast<uint16_t*>(xxptr);
308 while (--count >= 0) {
312 for (int i = 0; i < count; i++) {
318 static int nofilter_trans_preamble(const SkBitmapProcState& s, uint32_t** xy,
321 s.fInvProc(s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf,
322 SkIntToScalar(y) + SK_ScalarHalf, &pt);
323 **xy = s.fIntTileProcY(SkScalarToFixed(pt.fY) >> 16,
324 s.fBitmap->height());
325 *xy += 1; // bump the ptr
326 // return our starting X position
327 return SkScalarToFixed(pt.fX) >> 16;
330 static void clampx_nofilter_trans(const SkBitmapProcState& s,
331 uint32_t xy[], int count, int x, int y) {
332 SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
334 int xpos = nofilter_trans_preamble(s, &xy, x, y);
335 const int width = s.fBitmap->width();
337 // all of the following X values must be 0
338 memset(xy, 0, count * sizeof(uint16_t));
342 uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
345 // fill before 0 as needed
351 memset(xptr, 0, n * sizeof(uint16_t));
360 // fill in 0..width-1 if needed
366 fill_sequential(xptr, xpos, n);
374 // fill the remaining with the max value
375 sk_memset16(xptr, width - 1, count);
378 static void repeatx_nofilter_trans(const SkBitmapProcState& s,
379 uint32_t xy[], int count, int x, int y) {
380 SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
382 int xpos = nofilter_trans_preamble(s, &xy, x, y);
383 const int width = s.fBitmap->width();
385 // all of the following X values must be 0
386 memset(xy, 0, count * sizeof(uint16_t));
390 uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
391 int start = sk_int_mod(xpos, width);
392 int n = width - start;
396 fill_sequential(xptr, start, n);
400 while (count >= width) {
401 fill_sequential(xptr, 0, width);
407 fill_sequential(xptr, 0, count);
411 static void fill_backwards(uint16_t xptr[], int pos, int count) {
412 for (int i = 0; i < count; i++) {
418 static void mirrorx_nofilter_trans(const SkBitmapProcState& s,
419 uint32_t xy[], int count, int x, int y) {
420 SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
422 int xpos = nofilter_trans_preamble(s, &xy, x, y);
423 const int width = s.fBitmap->width();
425 // all of the following X values must be 0
426 memset(xy, 0, count * sizeof(uint16_t));
430 uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
431 // need to know our start, and our initial phase (forward or backward)
434 int start = sk_int_mod(xpos, 2 * width);
435 if (start >= width) {
436 start = width + ~(start - width);
438 n = start + 1; // [start .. 0]
441 n = width - start; // [start .. width)
447 fill_sequential(xptr, start, n);
449 fill_backwards(xptr, start, n);
455 while (count >= width) {
457 fill_sequential(xptr, 0, width);
459 fill_backwards(xptr, width - 1, width);
468 fill_sequential(xptr, 0, count);
470 fill_backwards(xptr, width - 1, count);
475 ///////////////////////////////////////////////////////////////////////////////
477 SkBitmapProcState::MatrixProc SkBitmapProcState::chooseMatrixProc(bool trivial_matrix) {
478 // test_int_tileprocs();
479 // check for our special case when there is no scale/affine/perspective
480 if (trivial_matrix) {
481 SkASSERT(SkPaint::kNone_FilterLevel == fFilterLevel);
482 fIntTileProcY = choose_int_tile_proc(fTileModeY);
483 switch (fTileModeX) {
484 case SkShader::kClamp_TileMode:
485 return clampx_nofilter_trans;
486 case SkShader::kRepeat_TileMode:
487 return repeatx_nofilter_trans;
488 case SkShader::kMirror_TileMode:
489 return mirrorx_nofilter_trans;
494 if (fFilterLevel != SkPaint::kNone_FilterLevel) {
497 if (fInvType & SkMatrix::kPerspective_Mask) {
499 } else if (fInvType & SkMatrix::kAffine_Mask) {
503 if (SkShader::kClamp_TileMode == fTileModeX && SkShader::kClamp_TileMode == fTileModeY) {
504 // clamp gets special version of filterOne
505 fFilterOneX = SK_Fixed1;
506 fFilterOneY = SK_Fixed1;
507 return SK_ARM_NEON_WRAP(ClampX_ClampY_Procs)[index];
510 // all remaining procs use this form for filterOne
511 fFilterOneX = SK_Fixed1 / fBitmap->width();
512 fFilterOneY = SK_Fixed1 / fBitmap->height();
514 if (SkShader::kRepeat_TileMode == fTileModeX && SkShader::kRepeat_TileMode == fTileModeY) {
515 return SK_ARM_NEON_WRAP(RepeatX_RepeatY_Procs)[index];
518 fTileProcX = choose_tile_proc(fTileModeX);
519 fTileProcY = choose_tile_proc(fTileModeY);
520 fTileLowBitsProcX = choose_tile_lowbits_proc(fTileModeX);
521 fTileLowBitsProcY = choose_tile_lowbits_proc(fTileModeY);
522 return GeneralXY_Procs[index];