From a732d3baeb0697b91a713fd6b51b68ee7ca68e03 Mon Sep 17 00:00:00 2001 From: Siarhei Siamashka Date: Thu, 5 Nov 2009 20:27:38 +0200 Subject: [PATCH] ARM: added 'neon_composite_src_0888_0565_rev' fast path This is ARM NEON optimized conversion of native RGB format used by GTK/GDK into r5g6b5 format. --- pixman/pixman-arm-neon-asm.S | 38 ++++++++++++++++++++++++++++++++++++++ pixman/pixman-arm-neon.c | 2 ++ 2 files changed, 40 insertions(+) diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S index bb68be6..8010e80 100644 --- a/pixman/pixman-arm-neon-asm.S +++ b/pixman/pixman-arm-neon-asm.S @@ -1370,3 +1370,41 @@ generate_composite_function \ 0, /* dst_r_basereg */ \ 0, /* src_basereg */ \ 0 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_0888_0565_rev_process_pixblock_head + vshll.u8 q8, d1, #8 + vshll.u8 q9, d2, #8 +.endm + +.macro pixman_composite_src_0888_0565_rev_process_pixblock_tail + vshll.u8 q14, d0, #8 + vsri.u16 q14, q8, #5 + vsri.u16 q14, q9, #11 +.endm + +.macro pixman_composite_src_0888_0565_rev_process_pixblock_tail_head + vshll.u8 q14, d0, #8 + vld3.8 {d0, d1, d2}, [SRC]! + vsri.u16 q14, q8, #5 + vsri.u16 q14, q9, #11 + vshll.u8 q8, d1, #8 + vst1.16 {d28, d29}, [DST_W, :128]! + vshll.u8 q9, d2, #8 +.endm + +generate_composite_function \ + pixman_composite_src_0888_0565_rev_asm_neon, 24, 0, 16, \ + FLAG_DST_WRITEONLY, \ + 8, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_src_0888_0565_rev_process_pixblock_head, \ + pixman_composite_src_0888_0565_rev_process_pixblock_tail, \ + pixman_composite_src_0888_0565_rev_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 0 /* mask_basereg */ diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c index ac0c558..21ed436 100644 --- a/pixman/pixman-arm-neon.c +++ b/pixman/pixman-arm-neon.c @@ -256,6 +256,7 @@ BIND_SRC_NULL_DST(src_0888_0888, uint8_t, 3, uint8_t, 3) BIND_SRC_NULL_DST(src_8888_0565, uint32_t, 1, uint16_t, 1) BIND_SRC_NULL_DST(src_0565_8888, uint16_t, 1, uint32_t, 1) BIND_SRC_NULL_DST(src_0888_8888_rev, uint8_t, 3, uint32_t, 1) +BIND_SRC_NULL_DST(src_0888_0565_rev, uint8_t, 3, uint16_t, 1) BIND_SRC_NULL_DST(add_8000_8000, uint8_t, 1, uint8_t, 1) BIND_SRC_NULL_DST(add_8888_8888, uint32_t, 1, uint32_t, 1) @@ -399,6 +400,7 @@ static const pixman_fast_path_t arm_neon_fast_path_array[] = { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, neon_composite_src_8888_8888 }, { PIXMAN_OP_SRC, PIXMAN_r8g8b8, PIXMAN_null, PIXMAN_r8g8b8, neon_composite_src_0888_0888 }, { PIXMAN_OP_SRC, PIXMAN_b8g8r8, PIXMAN_null, PIXMAN_x8r8g8b8, neon_composite_src_0888_8888_rev }, + { PIXMAN_OP_SRC, PIXMAN_b8g8r8, PIXMAN_null, PIXMAN_r5g6b5, neon_composite_src_0888_0565_rev }, { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_r5g6b5, neon_composite_over_n_8_0565 }, { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_b5g6r5, neon_composite_over_n_8_0565 }, { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8r8g8b8, neon_composite_over_n_8_8888 }, -- 2.7.4