vmx: implement fast path composite_add_8888_8888
authorOded Gabbay <oded.gabbay@gmail.com>
Thu, 18 Jun 2015 12:05:49 +0000 (15:05 +0300)
committerOded Gabbay <oded.gabbay@gmail.com>
Thu, 16 Jul 2015 13:13:35 +0000 (16:13 +0300)
Copied impl. from sse2 file and edited to use vmx functions

It was benchmarked against commid id 2be523b from pixman/master

POWER8, 16 cores, 3.4GHz, ppc64le :

reference memcpy speed = 27036.4MB/s (6759.1MP/s for 32bpp fills)

                Before           After           Change
              ---------------------------------------------
L1              248.76          3284.48         +1220.34%
L2              264.09          2826.47         +970.27%
M               261.24          2405.06         +820.63%
HT              217.27          857.3           +294.58%
VT              213.78          980.09          +358.46%
R               176.61          442.95          +150.81%
RT              107.54          150.08          +39.56%
Kops/s          917             1125            +22.68%

Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Acked-by: Siarhei Siamashka <siarhei.siamashka@gmail.com>
pixman/pixman-vmx.c

index e49e8aa..641c487 100644 (file)
@@ -2765,6 +2765,31 @@ vmx_composite_add_8_8 (pixman_implementation_t *imp,
     }
 }
 
+static void
+vmx_composite_add_8888_8888 (pixman_implementation_t *imp,
+                              pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t    *dst_line, *dst;
+    uint32_t    *src_line, *src;
+    int dst_stride, src_stride;
+
+    PIXMAN_IMAGE_GET_LINE (
+       src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+       dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+
+    while (height--)
+    {
+       dst = dst_line;
+       dst_line += dst_stride;
+       src = src_line;
+       src_line += src_stride;
+
+       vmx_combine_add_u (imp, op, dst, src, NULL, width);
+    }
+}
+
 static const pixman_fast_path_t vmx_fast_paths[] =
 {
     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, vmx_composite_over_8888_8888),
@@ -2774,6 +2799,8 @@ static const pixman_fast_path_t vmx_fast_paths[] =
 
     /* PIXMAN_OP_ADD */
     PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, vmx_composite_add_8_8),
+    PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, vmx_composite_add_8888_8888),
+    PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, vmx_composite_add_8888_8888),
 
     {   PIXMAN_OP_NONE },
 };