From 1ad6ae6ee8a350f6fe4f30ba928aacf44d04f86e Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Sun, 20 May 2012 20:51:08 -0400 Subject: [PATCH] mmx: add scaled bilinear over_8888_8_8888 Loongson: image firefox-fishtank 1665.163 1670.370 0.17% 3/3 image firefox-fishtank 1037.738 1040.218 0.19% 3/3 ARM/iwMMXt: image firefox-fishtank 2042.723 2045.308 0.10% 3/3 image firefox-fishtank 1487.282 1492.640 0.17% 3/3 --- pixman/pixman-mmx.c | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c index bf66a63..0c79f3a 100644 --- a/pixman/pixman-mmx.c +++ b/pixman/pixman-mmx.c @@ -3567,6 +3567,12 @@ do { \ pix = lo; \ } while (0) +#define BILINEAR_SKIP_ONE_PIXEL() \ +do { \ + vx += unit_x; \ + mm_x = _mm_add_pi16 (mm_x, mm_ux); \ +} while(0) + static force_inline void scaled_bilinear_scanline_mmx_8888_8888_SRC (uint32_t * dst, const uint32_t * mask, @@ -3659,6 +3665,79 @@ FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8888_normal_OVER, scaled_bilinear_scanline_mmx_8888_8888_OVER, uint32_t, uint32_t, uint32_t, NORMAL, FLAG_NONE) + +static force_inline void +scaled_bilinear_scanline_mmx_8888_8_8888_OVER (uint32_t * dst, + const uint8_t * mask, + const uint32_t * src_top, + const uint32_t * src_bottom, + int32_t w, + int wt, + int wb, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + pixman_fixed_t max_vx, + pixman_bool_t zero_src) +{ + BILINEAR_DECLARE_VARIABLES; + __m64 pix1, pix2; + uint32_t m; + + while (w) + { + m = (uint32_t) *mask++; + + if (m) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + + if (m == 0xff && is_opaque (pix1)) + { + store (dst, pix1); + } + else + { + __m64 ms, md, ma, msa; + + pix2 = load (dst); + ma = expand_alpha_rev (to_m64 (m)); + ms = _mm_unpacklo_pi8 (pix1, _mm_setzero_si64 ()); + md = _mm_unpacklo_pi8 (pix2, _mm_setzero_si64 ()); + + msa = expand_alpha (ms); + + store8888 (dst, (in_over (ms, msa, ma, md))); + } + } + else + { + BILINEAR_SKIP_ONE_PIXEL (); + } + + w--; + dst++; + } + + _mm_empty (); +} + +FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8_8888_cover_OVER, + scaled_bilinear_scanline_mmx_8888_8_8888_OVER, + uint32_t, uint8_t, uint32_t, + COVER, FLAG_HAVE_NON_SOLID_MASK) +FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8_8888_pad_OVER, + scaled_bilinear_scanline_mmx_8888_8_8888_OVER, + uint32_t, uint8_t, uint32_t, + PAD, FLAG_HAVE_NON_SOLID_MASK) +FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8_8888_none_OVER, + scaled_bilinear_scanline_mmx_8888_8_8888_OVER, + uint32_t, uint8_t, uint32_t, + NONE, FLAG_HAVE_NON_SOLID_MASK) +FAST_BILINEAR_MAINLOOP_COMMON (mmx_8888_8_8888_normal_OVER, + scaled_bilinear_scanline_mmx_8888_8_8888_OVER, + uint32_t, uint8_t, uint32_t, + NORMAL, FLAG_HAVE_NON_SOLID_MASK) + static uint32_t * mmx_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask) { @@ -3926,6 +4005,11 @@ static const pixman_fast_path_t mmx_fast_paths[] = SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mmx_8888_8888 ), SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, mmx_8888_8888 ), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mmx_8888_8_8888 ), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, mmx_8888_8_8888 ), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mmx_8888_8_8888 ), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, mmx_8888_8_8888 ), + { PIXMAN_OP_NONE }, }; -- 2.7.4