From 17acc7a4c707db4804b6bf47db30883745049fdb Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Thu, 17 May 2012 23:27:59 -0400 Subject: [PATCH] mmx: add add_0565_0565 Loongson: add_0565_0565 = L1: 15.37 L2: 14.91 M: 11.83 ( 16.06%) HT: 10.53 VT: 10.15 R: 9.74 RT: 6.19 ( 68Kops/s) add_0565_0565 = L1: 45.06 L2: 46.71 M: 27.45 ( 38.00%) HT: 23.76 VT: 22.84 R: 18.96 RT: 9.79 ( 104Kops/s) ARM/iwMMXt: add_0565_0565 = L1: 12.87 L2: 11.58 M: 10.11 ( 12.50%) HT: 9.06 VT: 8.66 R: 7.70 RT: 5.62 ( 58Kops/s) add_0565_0565 = L1: 31.14 L2: 28.87 M: 22.46 ( 28.60%) HT: 18.61 VT: 17.04 R: 15.21 RT: 9.35 ( 90Kops/s) --- pixman/pixman-mmx.c | 86 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c index 70dd4e0..a692837 100644 --- a/pixman/pixman-mmx.c +++ b/pixman/pixman-mmx.c @@ -3077,6 +3077,90 @@ mmx_composite_add_8_8 (pixman_implementation_t *imp, } static void +mmx_composite_add_0565_0565 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint16_t *dst_line, *dst; + uint32_t d; + uint16_t *src_line, *src; + uint32_t s; + int dst_stride, src_stride; + int32_t w; + + CHECKPOINT (); + + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint16_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w && (unsigned long)dst & 7) + { + s = *src++; + if (s) + { + d = *dst; + s = CONVERT_0565_TO_8888 (s); + if (d) + { + d = CONVERT_0565_TO_8888 (d); + UN8x4_ADD_UN8x4 (s, d); + } + *dst = CONVERT_8888_TO_0565 (s); + } + dst++; + w--; + } + + while (w >= 4) + { + __m64 vdest = *(__m64 *)dst; + __m64 vsrc = ldq_u ((__m64 *)src); + __m64 vd0, vd1; + __m64 vs0, vs1; + + expand_4xpacked565 (vdest, &vd0, &vd1, 0); + expand_4xpacked565 (vsrc, &vs0, &vs1, 0); + + vd0 = _mm_adds_pu8 (vd0, vs0); + vd1 = _mm_adds_pu8 (vd1, vs1); + + *(__m64 *)dst = pack_4xpacked565 (vd0, vd1); + + dst += 4; + src += 4; + w -= 4; + } + + while (w--) + { + s = *src++; + if (s) + { + d = *dst; + s = CONVERT_0565_TO_8888 (s); + if (d) + { + d = CONVERT_0565_TO_8888 (d); + UN8x4_ADD_UN8x4 (s, d); + } + *dst = CONVERT_8888_TO_0565 (s); + } + dst++; + } + } + + _mm_empty (); +} + +static void mmx_composite_add_8888_8888 (pixman_implementation_t *imp, pixman_composite_info_t *info) { @@ -3579,6 +3663,8 @@ static const pixman_fast_path_t mmx_fast_paths[] = PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, mmx_composite_over_8888_8888 ), PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, mmx_composite_over_8888_0565 ), + PIXMAN_STD_FAST_PATH (ADD, r5g6b5, null, r5g6b5, mmx_composite_add_0565_0565 ), + PIXMAN_STD_FAST_PATH (ADD, b5g6r5, null, b5g6r5, mmx_composite_add_0565_0565 ), PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, mmx_composite_add_8888_8888 ), PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, mmx_composite_add_8888_8888 ), PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, mmx_composite_add_8_8 ), -- 2.7.4