scaled_nearest_scanline_sse2_8888_n_8888_OVER,
uint32_t, uint32_t, uint32_t, NORMAL, TRUE, TRUE)
-#define BMSK ((1 << BILINEAR_INTERPOLATION_BITS) - 1)
-
-#define BILINEAR_DECLARE_VARIABLES \
+#if BILINEAR_INTERPOLATION_BITS < 8
+# define BILINEAR_DECLARE_VARIABLES \
+ const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); \
+ const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb); \
+ const __m128i xmm_addc = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1); \
+ const __m128i xmm_ux = _mm_set_epi16 (unit_x, -unit_x, unit_x, -unit_x, \
+ unit_x, -unit_x, unit_x, -unit_x); \
+ const __m128i xmm_zero = _mm_setzero_si128 (); \
+ __m128i xmm_x = _mm_set_epi16 (vx, -(vx + 1), vx, -(vx + 1), \
+ vx, -(vx + 1), vx, -(vx + 1))
+#else
+# define BILINEAR_DECLARE_VARIABLES \
const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); \
const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb); \
- const __m128i xmm_xorc8 = _mm_set_epi16 (0, 0, 0, 0, BMSK, BMSK, BMSK, BMSK);\
- const __m128i xmm_addc8 = _mm_set_epi16 (0, 0, 0, 0, 1, 1, 1, 1); \
- const __m128i xmm_xorc7 = _mm_set_epi16 (0, BMSK, 0, BMSK, 0, BMSK, 0, BMSK);\
- const __m128i xmm_addc7 = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1); \
+ const __m128i xmm_addc = _mm_set_epi16 (0, 0, 0, 0, 1, 1, 1, 1); \
const __m128i xmm_ux = _mm_set_epi16 (unit_x, unit_x, unit_x, unit_x, \
- unit_x, unit_x, unit_x, unit_x); \
+ -unit_x, -unit_x, -unit_x, -unit_x); \
const __m128i xmm_zero = _mm_setzero_si128 (); \
- __m128i xmm_x = _mm_set_epi16 (vx, vx, vx, vx, vx, vx, vx, vx)
+ __m128i xmm_x = _mm_set_epi16 (vx, vx, vx, vx, \
+ -(vx + 1), -(vx + 1), -(vx + 1), -(vx + 1))
+#endif
#define BILINEAR_INTERPOLATE_ONE_PIXEL(pix) \
do { \
if (BILINEAR_INTERPOLATION_BITS < 8) \
{ \
/* calculate horizontal weights */ \
- xmm_wh = _mm_add_epi16 (xmm_addc7, _mm_xor_si128 (xmm_xorc7, \
- _mm_srli_epi16 (xmm_x, 16 - BILINEAR_INTERPOLATION_BITS))); \
+ xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, \
+ 16 - BILINEAR_INTERPOLATION_BITS)); \
xmm_x = _mm_add_epi16 (xmm_x, xmm_ux); \
/* horizontal interpolation */ \
a = _mm_madd_epi16 (_mm_unpackhi_epi16 (_mm_shuffle_epi32 ( \
else \
{ \
/* calculate horizontal weights */ \
- xmm_wh = _mm_add_epi16 (xmm_addc8, _mm_xor_si128 (xmm_xorc8, \
- _mm_srli_epi16 (xmm_x, 16 - BILINEAR_INTERPOLATION_BITS))); \
+ xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, \
+ 16 - BILINEAR_INTERPOLATION_BITS)); \
xmm_x = _mm_add_epi16 (xmm_x, xmm_ux); \
/* horizontal interpolation */ \
xmm_lo = _mm_mullo_epi16 (a, xmm_wh); \