From 62c4bdc94f82d1e4c5dc0e58b5903382d74f3883 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Fri, 18 May 2012 01:37:07 -0400 Subject: [PATCH] mmx: add over_reverse_n_8888 Loongson: over_reverse_n_8888 = L1: 16.04 L2: 15.35 M: 10.20 ( 27.96%) HT: 10.95 VT: 10.45 R: 9.18 RT: 6.99 ( 76Kops/s) over_reverse_n_8888 = L1: 27.40 L2: 26.67 M: 16.97 ( 45.78%) HT: 16.66 VT: 15.38 R: 14.15 RT: 9.44 ( 97Kops/s) image poppler 34.106 35.500 1.48% 6/6 image poppler 29.598 30.835 1.70% 6/6 ARM/iwMMXt: over_reverse_n_8888 = L1: 15.63 L2: 14.33 M: 10.83 ( 27.55%) HT: 9.78 VT: 9.91 R: 9.49 RT: 6.96 ( 69Kops/s) over_reverse_n_8888 = L1: 22.79 L2: 19.40 M: 13.76 ( 34.19%) HT: 11.66 VT: 11.86 R: 11.17 RT: 7.85 ( 75Kops/s) image poppler 38.040 38.606 1.10% 6/6 image poppler 31.686 32.278 0.80% 5/6 --- pixman/pixman-mmx.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++ test/lowlevel-blt-bench.c | 1 + 2 files changed, 73 insertions(+) diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c index a692837..bb125bf 100644 --- a/pixman/pixman-mmx.c +++ b/pixman/pixman-mmx.c @@ -3435,6 +3435,75 @@ mmx_composite_over_x888_8_8888 (pixman_implementation_t *imp, _mm_empty (); } +static void +mmx_composite_over_reverse_n_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t src; + uint32_t *dst_line, *dst; + int32_t w; + int dst_stride; + __m64 vsrc; + + CHECKPOINT (); + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + + vsrc = load8888 (&src); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + w = width; + + CHECKPOINT (); + + while (w && (unsigned long)dst & 7) + { + __m64 vdest = load8888 (dst); + + store8888 (dst, over (vdest, expand_alpha (vdest), vsrc)); + + w--; + dst++; + } + + while (w >= 2) + { + __m64 vdest = *(__m64 *)dst; + __m64 dest0 = expand8888 (vdest, 0); + __m64 dest1 = expand8888 (vdest, 1); + + + dest0 = over (dest0, expand_alpha (dest0), vsrc); + dest1 = over (dest1, expand_alpha (dest1), vsrc); + + *(__m64 *)dst = pack8888 (dest0, dest1); + + dst += 2; + w -= 2; + } + + CHECKPOINT (); + + if (w) + { + __m64 vdest = load8888 (dst); + + store8888 (dst, over (vdest, expand_alpha (vdest), vsrc)); + } + } + + _mm_empty (); +} + static uint32_t * mmx_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask) { @@ -3663,6 +3732,9 @@ static const pixman_fast_path_t mmx_fast_paths[] = PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, mmx_composite_over_8888_8888 ), PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, mmx_composite_over_8888_0565 ), + PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, mmx_composite_over_reverse_n_8888), + PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, mmx_composite_over_reverse_n_8888), + PIXMAN_STD_FAST_PATH (ADD, r5g6b5, null, r5g6b5, mmx_composite_add_0565_0565 ), PIXMAN_STD_FAST_PATH (ADD, b5g6r5, null, b5g6r5, mmx_composite_add_0565_0565 ), PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, mmx_composite_add_8888_8888 ), diff --git a/test/lowlevel-blt-bench.c b/test/lowlevel-blt-bench.c index 8a39a46..b44b9f8 100644 --- a/test/lowlevel-blt-bench.c +++ b/test/lowlevel-blt-bench.c @@ -661,6 +661,7 @@ tests_tbl[] = { "outrev_n_8888_1555_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OUT_REV, PIXMAN_a8r8g8b8, 2, PIXMAN_a1r5g5b5 }, { "outrev_n_8888_x888_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OUT_REV, PIXMAN_a8r8g8b8, 2, PIXMAN_x8r8g8b8 }, { "outrev_n_8888_8888_ca", PIXMAN_a8r8g8b8, 1, PIXMAN_OP_OUT_REV, PIXMAN_a8r8g8b8, 2, PIXMAN_a8r8g8b8 }, + { "over_reverse_n_8888", PIXMAN_a8r8g8b8, 0, PIXMAN_OP_OVER_REVERSE, PIXMAN_null, 0, PIXMAN_a8r8g8b8 }, }; int -- 2.7.4