1 /* NEON optimized code (C) COPYRIGHT 2009 Motorola
3 * Use of this source code is governed by a BSD-style license that can be
4 * found in the LICENSE file.
7 #include "SkBitmapProcState.h"
8 #include "SkPerspIter.h"
11 #include "SkUtilsArm.h"
12 #include "SkBitmapProcState_utils.h"
14 /* returns 0...(n-1) given any x (positive or negative).
16 As an example, if n (which is always positive) is 5...
18 x: -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8
19 returns: 2 3 4 0 1 2 3 4 0 1 2 3 4 0 1 2 3
21 static inline int sk_int_mod(int x, int n) {
23 if ((unsigned)x >= (unsigned)n) {
33 void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count);
34 void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count);
36 // Compile neon code paths if needed
37 #if !SK_ARM_NEON_IS_NONE
39 // These are defined in src/opts/SkBitmapProcState_matrixProcs_neon.cpp
40 extern const SkBitmapProcState::MatrixProc ClampX_ClampY_Procs_neon[];
41 extern const SkBitmapProcState::MatrixProc RepeatX_RepeatY_Procs_neon[];
43 #endif // !SK_ARM_NEON_IS_NONE
45 // Compile non-neon code path if needed
46 #if !SK_ARM_NEON_IS_ALWAYS
47 #define MAKENAME(suffix) ClampX_ClampY ## suffix
48 #define TILEX_PROCF(fx, max) SkClampMax((fx) >> 16, max)
49 #define TILEY_PROCF(fy, max) SkClampMax((fy) >> 16, max)
50 #define TILEX_LOW_BITS(fx, max) (((fx) >> 12) & 0xF)
51 #define TILEY_LOW_BITS(fy, max) (((fy) >> 12) & 0xF)
52 #define CHECK_FOR_DECAL
53 #include "SkBitmapProcState_matrix.h"
55 #define MAKENAME(suffix) RepeatX_RepeatY ## suffix
56 #define TILEX_PROCF(fx, max) SK_USHIFT16(((fx) & 0xFFFF) * ((max) + 1))
57 #define TILEY_PROCF(fy, max) SK_USHIFT16(((fy) & 0xFFFF) * ((max) + 1))
58 #define TILEX_LOW_BITS(fx, max) ((((fx) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
59 #define TILEY_LOW_BITS(fy, max) ((((fy) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
60 #include "SkBitmapProcState_matrix.h"
63 #define MAKENAME(suffix) GeneralXY ## suffix
64 #define PREAMBLE(state) SkBitmapProcState::FixedTileProc tileProcX = (state).fTileProcX; (void) tileProcX; \
65 SkBitmapProcState::FixedTileProc tileProcY = (state).fTileProcY; (void) tileProcY; \
66 SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcX = (state).fTileLowBitsProcX; (void) tileLowBitsProcX; \
67 SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcY = (state).fTileLowBitsProcY; (void) tileLowBitsProcY
68 #define PREAMBLE_PARAM_X , SkBitmapProcState::FixedTileProc tileProcX, SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcX
69 #define PREAMBLE_PARAM_Y , SkBitmapProcState::FixedTileProc tileProcY, SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcY
70 #define PREAMBLE_ARG_X , tileProcX, tileLowBitsProcX
71 #define PREAMBLE_ARG_Y , tileProcY, tileLowBitsProcY
72 #define TILEX_PROCF(fx, max) SK_USHIFT16(tileProcX(fx) * ((max) + 1))
73 #define TILEY_PROCF(fy, max) SK_USHIFT16(tileProcY(fy) * ((max) + 1))
74 #define TILEX_LOW_BITS(fx, max) tileLowBitsProcX(fx, (max) + 1)
75 #define TILEY_LOW_BITS(fy, max) tileLowBitsProcY(fy, (max) + 1)
76 #include "SkBitmapProcState_matrix.h"
78 static inline U16CPU fixed_clamp(SkFixed x)
89 static inline U16CPU fixed_repeat(SkFixed x)
94 // Visual Studio 2010 (MSC_VER=1600) optimizes bit-shift code incorrectly.
95 // See http://code.google.com/p/skia/issues/detail?id=472
96 #if defined(_MSC_VER) && (_MSC_VER >= 1600)
97 #pragma optimize("", off)
100 static inline U16CPU fixed_mirror(SkFixed x)
102 SkFixed s = x << 15 >> 31;
103 // s is FFFFFFFF if we're on an odd interval, or 0 if an even interval
104 return (x ^ s) & 0xFFFF;
107 #if defined(_MSC_VER) && (_MSC_VER >= 1600)
108 #pragma optimize("", on)
111 static SkBitmapProcState::FixedTileProc choose_tile_proc(unsigned m)
113 if (SkShader::kClamp_TileMode == m)
115 if (SkShader::kRepeat_TileMode == m)
117 SkASSERT(SkShader::kMirror_TileMode == m);
121 static inline U16CPU fixed_clamp_lowbits(SkFixed x, int) {
122 return (x >> 12) & 0xF;
125 static inline U16CPU fixed_repeat_or_mirrow_lowbits(SkFixed x, int scale) {
126 return ((x * scale) >> 12) & 0xF;
129 static SkBitmapProcState::FixedTileLowBitsProc choose_tile_lowbits_proc(unsigned m) {
130 if (SkShader::kClamp_TileMode == m) {
131 return fixed_clamp_lowbits;
133 SkASSERT(SkShader::kMirror_TileMode == m ||
134 SkShader::kRepeat_TileMode == m);
135 // mirror and repeat have the same behavior for the low bits.
136 return fixed_repeat_or_mirrow_lowbits;
140 static inline U16CPU int_clamp(int x, int n) {
150 static inline U16CPU int_repeat(int x, int n) {
151 return sk_int_mod(x, n);
154 static inline U16CPU int_mirror(int x, int n) {
155 x = sk_int_mod(x, 2 * n);
163 static void test_int_tileprocs() {
164 for (int i = -8; i <= 8; i++) {
165 SkDebugf(" int_mirror(%2d, 3) = %d\n", i, int_mirror(i, 3));
170 static SkBitmapProcState::IntTileProc choose_int_tile_proc(unsigned tm) {
171 if (SkShader::kClamp_TileMode == tm)
173 if (SkShader::kRepeat_TileMode == tm)
175 SkASSERT(SkShader::kMirror_TileMode == tm);
179 //////////////////////////////////////////////////////////////////////////////
181 void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count)
185 for (i = (count >> 2); i > 0; --i)
187 *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16);
189 *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16);
194 uint16_t* xx = (uint16_t*)dst;
195 for (i = count; i > 0; --i) {
196 *xx++ = SkToU16(fx >> 16); fx += dx;
200 void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count)
206 SkASSERT((fx >> (16 + 14)) == 0);
207 *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
210 while ((count -= 2) >= 0)
212 SkASSERT((fx >> (16 + 14)) == 0);
213 *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
216 *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
221 ///////////////////////////////////////////////////////////////////////////////
222 // stores the same as SCALE, but is cheaper to compute. Also since there is no
223 // scale, we don't need/have a FILTER version
225 static void fill_sequential(uint16_t xptr[], int start, int count) {
227 if (reinterpret_cast<intptr_t>(xptr) & 0x2) {
232 uint32_t* xxptr = reinterpret_cast<uint32_t*>(xptr);
233 uint32_t pattern0 = PACK_TWO_SHORTS(start + 0, start + 1);
234 uint32_t pattern1 = PACK_TWO_SHORTS(start + 2, start + 3);
236 int qcount = count >> 2;
242 } while (--qcount != 0);
243 xptr = reinterpret_cast<uint16_t*>(xxptr);
246 while (--count >= 0) {
250 for (int i = 0; i < count; i++) {
256 static int nofilter_trans_preamble(const SkBitmapProcState& s, uint32_t** xy,
259 s.fInvProc(s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf,
260 SkIntToScalar(y) + SK_ScalarHalf, &pt);
261 **xy = s.fIntTileProcY(SkScalarToFixed(pt.fY) >> 16,
262 s.fBitmap->height());
263 *xy += 1; // bump the ptr
264 // return our starting X position
265 return SkScalarToFixed(pt.fX) >> 16;
268 static void clampx_nofilter_trans(const SkBitmapProcState& s,
269 uint32_t xy[], int count, int x, int y) {
270 SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
272 int xpos = nofilter_trans_preamble(s, &xy, x, y);
273 const int width = s.fBitmap->width();
275 // all of the following X values must be 0
276 memset(xy, 0, count * sizeof(uint16_t));
280 uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
283 // fill before 0 as needed
289 memset(xptr, 0, n * sizeof(uint16_t));
298 // fill in 0..width-1 if needed
304 fill_sequential(xptr, xpos, n);
312 // fill the remaining with the max value
313 sk_memset16(xptr, width - 1, count);
316 static void repeatx_nofilter_trans(const SkBitmapProcState& s,
317 uint32_t xy[], int count, int x, int y) {
318 SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
320 int xpos = nofilter_trans_preamble(s, &xy, x, y);
321 const int width = s.fBitmap->width();
323 // all of the following X values must be 0
324 memset(xy, 0, count * sizeof(uint16_t));
328 uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
329 int start = sk_int_mod(xpos, width);
330 int n = width - start;
334 fill_sequential(xptr, start, n);
338 while (count >= width) {
339 fill_sequential(xptr, 0, width);
345 fill_sequential(xptr, 0, count);
349 static void fill_backwards(uint16_t xptr[], int pos, int count) {
350 for (int i = 0; i < count; i++) {
356 static void mirrorx_nofilter_trans(const SkBitmapProcState& s,
357 uint32_t xy[], int count, int x, int y) {
358 SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
360 int xpos = nofilter_trans_preamble(s, &xy, x, y);
361 const int width = s.fBitmap->width();
363 // all of the following X values must be 0
364 memset(xy, 0, count * sizeof(uint16_t));
368 uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
369 // need to know our start, and our initial phase (forward or backward)
372 int start = sk_int_mod(xpos, 2 * width);
373 if (start >= width) {
374 start = width + ~(start - width);
376 n = start + 1; // [start .. 0]
379 n = width - start; // [start .. width)
385 fill_sequential(xptr, start, n);
387 fill_backwards(xptr, start, n);
393 while (count >= width) {
395 fill_sequential(xptr, 0, width);
397 fill_backwards(xptr, width - 1, width);
406 fill_sequential(xptr, 0, count);
408 fill_backwards(xptr, width - 1, count);
413 ///////////////////////////////////////////////////////////////////////////////
415 SkBitmapProcState::MatrixProc
416 SkBitmapProcState::chooseMatrixProc(bool trivial_matrix) {
417 // test_int_tileprocs();
418 // check for our special case when there is no scale/affine/perspective
419 if (trivial_matrix) {
420 SkASSERT(SkPaint::kNone_FilterLevel == fFilterLevel);
421 fIntTileProcY = choose_int_tile_proc(fTileModeY);
422 switch (fTileModeX) {
423 case SkShader::kClamp_TileMode:
424 return clampx_nofilter_trans;
425 case SkShader::kRepeat_TileMode:
426 return repeatx_nofilter_trans;
427 case SkShader::kMirror_TileMode:
428 return mirrorx_nofilter_trans;
433 if (fFilterLevel != SkPaint::kNone_FilterLevel) {
436 if (fInvType & SkMatrix::kPerspective_Mask) {
438 } else if (fInvType & SkMatrix::kAffine_Mask) {
442 if (SkShader::kClamp_TileMode == fTileModeX &&
443 SkShader::kClamp_TileMode == fTileModeY)
445 // clamp gets special version of filterOne
446 fFilterOneX = SK_Fixed1;
447 fFilterOneY = SK_Fixed1;
448 return SK_ARM_NEON_WRAP(ClampX_ClampY_Procs)[index];
451 // all remaining procs use this form for filterOne
452 fFilterOneX = SK_Fixed1 / fBitmap->width();
453 fFilterOneY = SK_Fixed1 / fBitmap->height();
455 if (SkShader::kRepeat_TileMode == fTileModeX &&
456 SkShader::kRepeat_TileMode == fTileModeY)
458 return SK_ARM_NEON_WRAP(RepeatX_RepeatY_Procs)[index];
461 fTileProcX = choose_tile_proc(fTileModeX);
462 fTileProcY = choose_tile_proc(fTileModeY);
463 fTileLowBitsProcX = choose_tile_lowbits_proc(fTileModeX);
464 fTileLowBitsProcY = choose_tile_lowbits_proc(fTileModeY);
465 return GeneralXY_Procs[index];