From 37e3368e20cee42f1e1039bb112ed9a09d21156f Mon Sep 17 00:00:00 2001 From: Nemanja Lukic Date: Fri, 14 Sep 2012 09:31:23 +0200 Subject: [PATCH] MIPS: DSPr2: Added fast-paths for OVER operation: - over_8888_n_8888 - over_8888_8_8888 Performance numbers before/after on MIPS-74kc @ 1GHz: lowlevel-blt-bench results Referent (before): over_8888_n_8888 = L1: 9.92 L2: 11.27 M: 8.50 ( 45.23%) HT: 4.70 VT: 4.45 R: 4.49 RT: 1.85 ( 20Kops/s) over_8888_8_8888 = L1: 12.54 L2: 10.86 M: 8.18 ( 54.36%) HT: 6.53 VT: 6.45 R: 6.41 RT: 3.83 ( 33Kops/s) Optimized: over_8888_n_8888 = L1: 28.02 L2: 24.92 M: 14.72 ( 78.15%) HT: 13.03 VT: 12.65 R: 12.00 RT: 7.49 ( 49Kops/s) over_8888_8_8888 = L1: 26.92 L2: 23.93 M: 13.65 ( 90.58%) HT: 11.68 VT: 11.29 R: 10.56 RT: 6.37 ( 45Kops/s) --- pixman/pixman-mips-dspr2-asm.S | 102 +++++++++++++++++++++++++++++++++++++++++ pixman/pixman-mips-dspr2.c | 13 ++++++ pixman/pixman-mips-dspr2.h | 83 +++++++++++++++++++++++++++++++++ 3 files changed, 198 insertions(+) diff --git a/pixman/pixman-mips-dspr2-asm.S b/pixman/pixman-mips-dspr2-asm.S index a8fccd5..165f177 100644 --- a/pixman/pixman-mips-dspr2-asm.S +++ b/pixman/pixman-mips-dspr2-asm.S @@ -749,6 +749,108 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8_0565_asm_mips) END(pixman_composite_over_n_8_0565_asm_mips) +LEAF_MIPS_DSPR2(pixman_composite_over_8888_n_8888_asm_mips) +/* + * a0 - dst (a8r8g8b8) + * a1 - src (a8r8g8b8) + * a2 - mask (32bit constant) + * a3 - w + */ + + SAVE_REGS_ON_STACK 0, s0 + li t4, 0x00ff00ff + beqz a3, 3f + nop + addiu t1, a3, -1 + srl a2, a2, 24 + beqz t1, 2f + nop + +1: + lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ + lw t1, 4(a1) /* t1 = source (a8r8g8b8) */ + /* a2 = mask (32bit constant) */ + lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */ + lw t3, 4(a0) /* t3 = destination (a8r8g8b8) */ + addiu a1, a1, 8 + + OVER_2x8888_2x8_2x8888 t0, t1, a2, a2, t2, t3, \ + t5, t6, t4, t7, t8, t9, t0, t1, s0 + + sw t5, 0(a0) + sw t6, 4(a0) + addiu a3, a3, -2 + addiu t1, a3, -1 + bgtz t1, 1b + addiu a0, a0, 8 +2: + beqz a3, 3f + nop + lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ + /* a2 = mask (32bit constant) */ + lw t1, 0(a0) /* t1 = destination (a8r8g8b8) */ + + OVER_8888_8_8888 t0, a2, t1, t3, t4, t5, t6, t7, t8 + + sw t3, 0(a0) +3: + RESTORE_REGS_FROM_STACK 0, s0 + j ra + nop + +END(pixman_composite_over_8888_n_8888_asm_mips) + +LEAF_MIPS_DSPR2(pixman_composite_over_8888_8_8888_asm_mips) +/* + * a0 - dst (a8r8g8b8) + * a1 - src (a8r8g8b8) + * a2 - mask (a8) + * a3 - w + */ + + SAVE_REGS_ON_STACK 0, s0, s1 + li t4, 0x00ff00ff + beqz a3, 3f + nop + addiu t1, a3, -1 + beqz t1, 2f + nop +1: + lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ + lw t1, 4(a1) /* t1 = source (a8r8g8b8) */ + lbu t2, 0(a2) /* t2 = mask (a8) */ + lbu t3, 1(a2) /* t3 = mask (a8) */ + lw t5, 0(a0) /* t5 = destination (a8r8g8b8) */ + lw t6, 4(a0) /* t6 = destination (a8r8g8b8) */ + addiu a1, a1, 8 + addiu a2, a2, 2 + + OVER_2x8888_2x8_2x8888 t0, t1, t2, t3, t5, t6, \ + t7, t8, t4, t9, s0, s1, t0, t1, t2 + + sw t7, 0(a0) + sw t8, 4(a0) + addiu a3, a3, -2 + addiu t1, a3, -1 + bgtz t1, 1b + addiu a0, a0, 8 +2: + beqz a3, 3f + nop + lw t0, 0(a1) /* t0 = source (a8r8g8b8) */ + lbu t1, 0(a2) /* t1 = mask (a8) */ + lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */ + + OVER_8888_8_8888 t0, t1, t2, t3, t4, t5, t6, t7, t8 + + sw t3, 0(a0) +3: + RESTORE_REGS_FROM_STACK 0, s0, s1 + j ra + nop + +END(pixman_composite_over_8888_8_8888_asm_mips) + LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_mips) /* * a0 - *dst diff --git a/pixman/pixman-mips-dspr2.c b/pixman/pixman-mips-dspr2.c index 1a9e610..a0ed50c 100644 --- a/pixman/pixman-mips-dspr2.c +++ b/pixman/pixman-mips-dspr2.c @@ -58,6 +58,12 @@ PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_8888, PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_0565, uint8_t, 1, uint16_t, 1) +PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, over_8888_n_8888, + uint32_t, 1, uint32_t, 1) + +PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (over_8888_8_8888, uint32_t, 1, + uint8_t, 1, uint32_t, 1) + PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 8888_8888, SRC, uint32_t, uint32_t) PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 8888_0565, SRC, @@ -234,6 +240,13 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] = PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, mips_composite_over_n_8_0565), PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, mips_composite_over_n_8_0565), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, mips_composite_over_8888_n_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, mips_composite_over_8888_n_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, a8r8g8b8, mips_composite_over_8888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, x8r8g8b8, mips_composite_over_8888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, a8b8g8r8, mips_composite_over_8888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, x8b8g8r8, mips_composite_over_8888_8_8888), + SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, mips_8888_8888), SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, mips_8888_8888), SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, mips_8888_8888), diff --git a/pixman/pixman-mips-dspr2.h b/pixman/pixman-mips-dspr2.h index a3d774f..bddcfd8 100644 --- a/pixman/pixman-mips-dspr2.h +++ b/pixman/pixman-mips-dspr2.h @@ -127,6 +127,89 @@ mips_composite_##name (pixman_implementation_t *imp, \ } \ } +/*******************************************************************/ + +#define PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST(flags, name, \ + src_type, src_cnt, \ + dst_type, dst_cnt) \ +void \ +pixman_composite_##name##_asm_mips (dst_type *dst, \ + src_type *src, \ + uint32_t mask, \ + int32_t w); \ + \ +static void \ +mips_composite_##name (pixman_implementation_t *imp, \ + pixman_composite_info_t *info) \ +{ \ + PIXMAN_COMPOSITE_ARGS (info); \ + dst_type *dst_line, *dst; \ + src_type *src_line, *src; \ + int32_t dst_stride, src_stride; \ + uint32_t mask; \ + \ + mask = _pixman_image_get_solid ( \ + imp, mask_image, dest_image->bits.format); \ + \ + if ((flags & SKIP_ZERO_MASK) && mask == 0) \ + return; \ + \ + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \ + dst_stride, dst_line, dst_cnt); \ + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \ + src_stride, src_line, src_cnt); \ + \ + while (height--) \ + { \ + dst = dst_line; \ + dst_line += dst_stride; \ + src = src_line; \ + src_line += src_stride; \ + \ + pixman_composite_##name##_asm_mips (dst, src, mask, width); \ + } \ +} + +/************************************************************************/ + +#define PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST(name, src_type, src_cnt, \ + mask_type, mask_cnt, \ + dst_type, dst_cnt) \ +void \ +pixman_composite_##name##_asm_mips (dst_type *dst, \ + src_type *src, \ + mask_type *mask, \ + int32_t w); \ + \ +static void \ +mips_composite_##name (pixman_implementation_t *imp, \ + pixman_composite_info_t *info) \ +{ \ + PIXMAN_COMPOSITE_ARGS (info); \ + dst_type *dst_line, *dst; \ + src_type *src_line, *src; \ + mask_type *mask_line, *mask; \ + int32_t dst_stride, src_stride, mask_stride; \ + \ + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \ + dst_stride, dst_line, dst_cnt); \ + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \ + src_stride, src_line, src_cnt); \ + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type, \ + mask_stride, mask_line, mask_cnt); \ + \ + while (height--) \ + { \ + dst = dst_line; \ + dst_line += dst_stride; \ + mask = mask_line; \ + mask_line += mask_stride; \ + src = src_line; \ + src_line += src_stride; \ + pixman_composite_##name##_asm_mips (dst, src, mask, width); \ + } \ +} + /****************************************************************************/ #define PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST(flags, name, op, \ -- 2.7.4