From: Siarhei Siamashka Date: Wed, 2 Oct 2013 00:54:30 +0000 (+0000) Subject: sse2: bilinear fast path for src_x888_8888 X-Git-Tag: pixman-0.31.2~7 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=a863bbcce0924f8d4004a352c670a9f774012463;p=platform%2Fupstream%2Fpixman.git sse2: bilinear fast path for src_x888_8888 Running cairo-perf-trace benchmark on Intel Core2 T7300: Before: [ 0] image t-firefox-canvas-swscroll 1.989 2.008 0.43% 8/8 [ 1] image firefox-canvas-scroll 4.574 4.609 0.50% 8/8 After: [ 0] image t-firefox-canvas-swscroll 1.404 1.418 0.51% 8/8 [ 1] image firefox-canvas-scroll 4.228 4.259 0.36% 8/8 --- diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c index 2ab2690..a6e7808 100644 --- a/pixman/pixman-sse2.c +++ b/pixman/pixman-sse2.c @@ -5751,6 +5751,66 @@ FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_normal_SRC, NORMAL, FLAG_NONE) static force_inline void +scaled_bilinear_scanline_sse2_x888_8888_SRC (uint32_t * dst, + const uint32_t * mask, + const uint32_t * src_top, + const uint32_t * src_bottom, + int32_t w, + int wt, + int wb, + pixman_fixed_t vx_, + pixman_fixed_t unit_x_, + pixman_fixed_t max_vx, + pixman_bool_t zero_src) +{ + intptr_t vx = vx_; + intptr_t unit_x = unit_x_; + BILINEAR_DECLARE_VARIABLES; + uint32_t pix1, pix2; + + while (w && ((uintptr_t)dst & 15)) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + *dst++ = pix1 | 0xFF000000; + w--; + } + + while ((w -= 4) >= 0) { + __m128i xmm_src; + BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src); + _mm_store_si128 ((__m128i *)dst, _mm_or_si128 (xmm_src, mask_ff000000)); + dst += 4; + } + + if (w & 2) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + BILINEAR_INTERPOLATE_ONE_PIXEL (pix2); + *dst++ = pix1 | 0xFF000000; + *dst++ = pix2 | 0xFF000000; + } + + if (w & 1) + { + BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); + *dst = pix1 | 0xFF000000; + } +} + +FAST_BILINEAR_MAINLOOP_COMMON (sse2_x888_8888_cover_SRC, + scaled_bilinear_scanline_sse2_x888_8888_SRC, + uint32_t, uint32_t, uint32_t, + COVER, FLAG_NONE) +FAST_BILINEAR_MAINLOOP_COMMON (sse2_x888_8888_pad_SRC, + scaled_bilinear_scanline_sse2_x888_8888_SRC, + uint32_t, uint32_t, uint32_t, + PAD, FLAG_NONE) +FAST_BILINEAR_MAINLOOP_COMMON (sse2_x888_8888_normal_SRC, + scaled_bilinear_scanline_sse2_x888_8888_SRC, + uint32_t, uint32_t, uint32_t, + NORMAL, FLAG_NONE) + +static force_inline void scaled_bilinear_scanline_sse2_8888_8888_OVER (uint32_t * dst, const uint32_t * mask, const uint32_t * src_top, @@ -6247,6 +6307,13 @@ static const pixman_fast_path_t sse2_fast_paths[] = SIMPLE_BILINEAR_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, sse2_8888_8888), SIMPLE_BILINEAR_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, sse2_8888_8888), + SIMPLE_BILINEAR_FAST_PATH_COVER (SRC, x8r8g8b8, a8r8g8b8, sse2_x888_8888), + SIMPLE_BILINEAR_FAST_PATH_COVER (SRC, x8b8g8r8, a8b8g8r8, sse2_x888_8888), + SIMPLE_BILINEAR_FAST_PATH_PAD (SRC, x8r8g8b8, a8r8g8b8, sse2_x888_8888), + SIMPLE_BILINEAR_FAST_PATH_PAD (SRC, x8b8g8r8, a8b8g8r8, sse2_x888_8888), + SIMPLE_BILINEAR_FAST_PATH_NORMAL (SRC, x8r8g8b8, a8r8g8b8, sse2_x888_8888), + SIMPLE_BILINEAR_FAST_PATH_NORMAL (SRC, x8b8g8r8, a8b8g8r8, sse2_x888_8888), + SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888), SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888), SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),