ARM: added 'neon_composite_over_8888_8_0565' fast path
authorSiarhei Siamashka <siarhei.siamashka@nokia.com>
Mon, 23 Aug 2010 15:24:32 +0000 (18:24 +0300)
committerSiarhei Siamashka <ssvb@i7.(none)>
Mon, 6 Sep 2010 20:56:05 +0000 (23:56 +0300)
pixman/pixman-arm-neon-asm.S
pixman/pixman-arm-neon.c

index 3a71a0e7271dd41b5ade5adb476a98d0b0d41796..325f6e7b7d2a1fddbff890f1b1db6d82d026dd6c 100644 (file)
@@ -881,6 +881,42 @@ generate_composite_function \
 
 /******************************************************************************/
 
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_over_8888_8_0565_process_pixblock_tail_head
+    vld1.16     {d4, d5}, [DST_R, :128]!
+    pixman_composite_over_n_8_0565_process_pixblock_tail
+    vld4.8      {d8, d9, d10, d11}, [SRC]!
+    cache_preload 8, 8
+    vld1.8      {d24}, [MASK]!
+    pixman_composite_over_n_8_0565_process_pixblock_head
+    vst1.16     {d28, d29}, [DST_W, :128]!
+.endm
+
+.macro pixman_composite_over_8888_8_0565_init
+    vpush       {d8-d15}
+.endm
+
+.macro pixman_composite_over_8888_8_0565_cleanup
+    vpop        {d8-d15}
+.endm
+
+generate_composite_function \
+    pixman_composite_over_8888_8_0565_asm_neon, 32, 8, 16, \
+    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+    8, /* number of pixels, processed in a single block */ \
+    5, /* prefetch distance */ \
+    pixman_composite_over_8888_8_0565_init, \
+    pixman_composite_over_8888_8_0565_cleanup, \
+    pixman_composite_over_n_8_0565_process_pixblock_head, \
+    pixman_composite_over_n_8_0565_process_pixblock_tail, \
+    pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \
+    28, /* dst_w_basereg */ \
+    4,  /* dst_r_basereg */ \
+    8,  /* src_basereg   */ \
+    24  /* mask_basereg  */
+
+/******************************************************************************/
+
 .macro pixman_composite_src_0565_0565_process_pixblock_head
 .endm
 
index 1f2430c1e80ec24d9a0b24233733b90dcabdb687..dc88f5018230c9447a15826575febe27ac058c52 100644 (file)
@@ -88,6 +88,8 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_8888,
                                         uint32_t, 1, uint8_t, 1, uint32_t, 1)
 PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8888_8888,
                                         uint32_t, 1, uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_0565,
+                                        uint32_t, 1, uint8_t, 1, uint16_t, 1)
 
 void
 pixman_composite_src_n_8_asm_neon (int32_t   w,
@@ -237,6 +239,8 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8,       x8r8g8b8, neon_composite_over_8888_8_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8,       a8b8g8r8, neon_composite_over_8888_8_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8,       x8b8g8r8, neon_composite_over_8888_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8,       r5g6b5,   neon_composite_over_8888_8_0565),
+    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8,       b5g6r5,   neon_composite_over_8888_8_0565),
     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_over_8888_8888_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null,     r5g6b5,   neon_composite_over_8888_0565),
     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null,     b5g6r5,   neon_composite_over_8888_0565),