From: Siarhei Siamashka Date: Wed, 9 Dec 2009 09:29:13 +0000 (+0200) Subject: ARM: added 'neon_composite_over_n_8888' fast path X-Git-Tag: pixman-0.17.4~24 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=78a60047ac0f85423e0474ef54930e1f537f646b;p=platform%2Fupstream%2Fpixman.git ARM: added 'neon_composite_over_n_8888' fast path --- diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S index 57680bb..691a194 100644 --- a/pixman/pixman-arm-neon-asm.S +++ b/pixman/pixman-arm-neon-asm.S @@ -630,6 +630,36 @@ generate_composite_function \ /******************************************************************************/ +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_over_n_8888_process_pixblock_tail_head + pixman_composite_over_8888_8888_process_pixblock_tail + vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! + pixman_composite_over_8888_8888_process_pixblock_head +.endm + +.macro pixman_composite_over_n_8888_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vld1.32 {d3[0]}, [DUMMY] + vdup.8 d0, d3[0] + vdup.8 d1, d3[1] + vdup.8 d2, d3[2] + vdup.8 d3, d3[3] +.endm + +generate_composite_function \ + pixman_composite_over_n_8888_asm_neon, 0, 0, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_over_n_8888_init, \ + default_cleanup, \ + pixman_composite_over_8888_8888_process_pixblock_head, \ + pixman_composite_over_8888_8888_process_pixblock_tail, \ + pixman_composite_over_n_8888_process_pixblock_tail_head + +/******************************************************************************/ + .macro pixman_composite_over_n_8_0565_process_pixblock_head /* in */ vmull.u8 q0, d24, d8 diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c index 8ae79ae..fef98a1 100644 --- a/pixman/pixman-arm-neon.c +++ b/pixman/pixman-arm-neon.c @@ -259,6 +259,7 @@ BIND_SRC_NULL_DST(add_8000_8000, uint8_t, 1, uint8_t, 1) BIND_SRC_NULL_DST(add_8888_8888, uint32_t, 1, uint32_t, 1) BIND_N_NULL_DST(over_n_0565, uint16_t, 1) +BIND_N_NULL_DST(over_n_8888, uint32_t, 1) BIND_SRC_NULL_DST(over_8888_0565, uint32_t, 1, uint16_t, 1) BIND_SRC_NULL_DST(over_8888_8888, uint32_t, 1, uint32_t, 1) @@ -403,6 +404,8 @@ static const pixman_fast_path_t arm_neon_fast_path_array[] = { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8b8g8r8, neon_composite_over_n_8_8888 }, { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8b8g8r8, neon_composite_over_n_8_8888 }, { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_null, PIXMAN_r5g6b5, neon_composite_over_n_0565 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_null, PIXMAN_a8r8g8b8, neon_composite_over_n_8888 }, + { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_null, PIXMAN_x8r8g8b8, neon_composite_over_n_8888 }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_solid, PIXMAN_a8r8g8b8, neon_composite_over_8888_n_8888 }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_solid, PIXMAN_x8r8g8b8, neon_composite_over_8888_n_8888 }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_a8r8g8b8, neon_composite_over_8888_8_8888 },