From a3e914407e354df70b9200e263608f1fc2e686cf Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Thu, 18 Jun 2015 15:05:49 +0300 Subject: [PATCH] vmx: implement fast path composite_add_8888_8888 Copied impl. from sse2 file and edited to use vmx functions It was benchmarked against commid id 2be523b from pixman/master POWER8, 16 cores, 3.4GHz, ppc64le : reference memcpy speed = 27036.4MB/s (6759.1MP/s for 32bpp fills) Before After Change --------------------------------------------- L1 248.76 3284.48 +1220.34% L2 264.09 2826.47 +970.27% M 261.24 2405.06 +820.63% HT 217.27 857.3 +294.58% VT 213.78 980.09 +358.46% R 176.61 442.95 +150.81% RT 107.54 150.08 +39.56% Kops/s 917 1125 +22.68% Signed-off-by: Oded Gabbay Acked-by: Siarhei Siamashka --- pixman/pixman-vmx.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/pixman/pixman-vmx.c b/pixman/pixman-vmx.c index e49e8aa..641c487 100644 --- a/pixman/pixman-vmx.c +++ b/pixman/pixman-vmx.c @@ -2765,6 +2765,31 @@ vmx_composite_add_8_8 (pixman_implementation_t *imp, } } +static void +vmx_composite_add_8888_8888 (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + int dst_stride, src_stride; + + PIXMAN_IMAGE_GET_LINE ( + src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE ( + dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + + vmx_combine_add_u (imp, op, dst, src, NULL, width); + } +} + static const pixman_fast_path_t vmx_fast_paths[] = { PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, vmx_composite_over_8888_8888), @@ -2774,6 +2799,8 @@ static const pixman_fast_path_t vmx_fast_paths[] = /* PIXMAN_OP_ADD */ PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, vmx_composite_add_8_8), + PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, vmx_composite_add_8888_8888), + PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, vmx_composite_add_8888_8888), { PIXMAN_OP_NONE }, }; -- 2.34.1