From 062da411d81c7d970a302dd2c283ef5327b867da Mon Sep 17 00:00:00 2001 From: Siarhei Siamashka Date: Tue, 8 Dec 2009 15:04:41 +0200 Subject: [PATCH] ARM: added 'neon_composite_add_8888_8888_8888' fast path --- pixman/pixman-arm-neon-asm.S | 47 ++++++++++++++++++++++++++++++++++++++++++++ pixman/pixman-arm-neon.c | 2 ++ 2 files changed, 49 insertions(+) diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S index 81f0b92..d35704d 100644 --- a/pixman/pixman-arm-neon-asm.S +++ b/pixman/pixman-arm-neon-asm.S @@ -983,6 +983,53 @@ generate_composite_function \ /******************************************************************************/ +.macro pixman_composite_add_8888_8888_8888_process_pixblock_head + /* expecting source data in {d0, d1, d2, d3} */ + /* destination data in {d4, d5, d6, d7} */ + /* mask in {d24, d25, d26, d27} */ + vmull.u8 q8, d27, d0 + vmull.u8 q9, d27, d1 + vmull.u8 q10, d27, d2 + vmull.u8 q11, d27, d3 + vrshr.u16 q0, q8, #8 + vrshr.u16 q1, q9, #8 + vrshr.u16 q12, q10, #8 + vrshr.u16 q13, q11, #8 + vraddhn.u16 d0, q0, q8 + vraddhn.u16 d1, q1, q9 + vraddhn.u16 d2, q12, q10 + vraddhn.u16 d3, q13, q11 + vqadd.u8 q14, q0, q2 + vqadd.u8 q15, q1, q3 +.endm + +.macro pixman_composite_add_8888_8888_8888_process_pixblock_tail +.endm + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_add_8888_8888_8888_process_pixblock_tail_head + pixman_composite_add_8888_8888_8888_process_pixblock_tail + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! + vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! + vld4.8 {d24, d25, d26, d27}, [MASK]! + vld4.8 {d0, d1, d2, d3}, [SRC]! + cache_preload 8, 8 + pixman_composite_add_8888_8888_8888_process_pixblock_head +.endm + +generate_composite_function \ + pixman_composite_add_8888_8888_8888_asm_neon, 32, 32, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_add_8888_8888_8888_process_pixblock_head, \ + pixman_composite_add_8888_8888_8888_process_pixblock_tail, \ + pixman_composite_add_8888_8888_8888_process_pixblock_tail_head + +/******************************************************************************/ + .macro pixman_composite_over_8888_n_8888_process_pixblock_head /* expecting source data in {d0, d1, d2, d3} */ /* destination data in {d4, d5, d6, d7} */ diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c index fe444f2..c7f0870 100644 --- a/pixman/pixman-arm-neon.c +++ b/pixman/pixman-arm-neon.c @@ -227,6 +227,7 @@ BIND_N_MASK_DST(add_n_8_8, uint8_t, 1, uint8_t, 1) BIND_SRC_N_DST(over_8888_n_8888, uint32_t, 1, uint32_t, 1) BIND_SRC_MASK_DST(add_8_8_8, uint8_t, 1, uint8_t, 1, uint8_t, 1) +BIND_SRC_MASK_DST(add_8888_8888_8888, uint32_t, 1, uint32_t, 1, uint32_t, 1) BIND_SRC_MASK_DST(over_8888_8_8888, uint32_t, 1, uint8_t, 1, uint32_t, 1) BIND_SRC_MASK_DST(over_8888_8888_8888, uint32_t, 1, uint32_t, 1, uint32_t, 1) @@ -369,6 +370,7 @@ static const pixman_fast_path_t arm_neon_fast_path_array[] = { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, neon_composite_over_8888_8888 }, { PIXMAN_OP_ADD, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8, neon_composite_add_n_8_8 }, { PIXMAN_OP_ADD, PIXMAN_a8, PIXMAN_a8, PIXMAN_a8, neon_composite_add_8_8_8 }, + { PIXMAN_OP_ADD, PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, neon_composite_add_8888_8888_8888 }, { PIXMAN_OP_ADD, PIXMAN_a8, PIXMAN_null, PIXMAN_a8, neon_composite_add_8000_8000 }, { PIXMAN_OP_ADD, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, neon_composite_add_8888_8888 }, { PIXMAN_OP_ADD, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, neon_composite_add_8888_8888 }, -- 2.7.4