From 304f57644ac6a991c6e538675de935356252c0a5 Mon Sep 17 00:00:00 2001 From: Nemanja Lukic Date: Wed, 29 Feb 2012 12:04:33 +0100 Subject: [PATCH] MIPS: DSPr2: Added mips_dspr2_blt and mips_dspr2_fill routines. Performance numbers before/after on MIPS-74kc @ 1GHz Referent (before): lowlevel-blt-bench: src_n_0565 = L1: 238.14 L2: 233.15 M: 57.88 ( 77.23%) HT: 53.22 VT: 49.99 R: 47.73 RT: 24.79 ( 91Kops/s) src_n_8888 = L1: 190.19 L2: 187.57 M: 28.94 ( 77.23%) HT: 27.91 VT: 27.33 R: 26.64 RT: 14.68 ( 77Kops/s) cairo-perf-trace: [ # ] backend test min(s) median(s) stddev. count [ # ] image: pixman 0.25.1 [ 0] image gnome-system-monitor 268.460 269.712 0.22% 6/6 Optimized: lowlevel-blt-bench: src_n_0565 = L1:1081.39 L2: 258.22 M:189.59 (252.91%) HT: 60.23 VT: 55.01 R: 53.44 RT: 23.68 ( 89Kops/s) src_n_8888 = L1: 653.46 L2: 113.55 M:135.26 (360.86%) HT: 38.99 VT: 37.38 R: 34.95 RT: 18.67 ( 84Kops/s) cairo-perf-trace: [ # ] backend test min(s) median(s) stddev. count [ # ] image: pixman 0.25.1 [ 0] image gnome-system-monitor 246.565 246.706 0.04% 6/6 --- pixman/pixman-mips-dspr2-asm.S | 105 ++++++++++++++++++++++++++ pixman/pixman-mips-dspr2.c | 163 +++++++++++++++++++++++++++++++++++++++++ pixman/pixman-mips-dspr2.h | 4 + 3 files changed, 272 insertions(+) diff --git a/pixman/pixman-mips-dspr2-asm.S b/pixman/pixman-mips-dspr2-asm.S index 0a4c87e..f1087a7 100644 --- a/pixman/pixman-mips-dspr2-asm.S +++ b/pixman/pixman-mips-dspr2-asm.S @@ -31,6 +31,111 @@ #include "pixman-mips-dspr2-asm.h" +LEAF_MIPS_DSPR2(pixman_fill_buff16_mips) +/* + * a0 - *dest + * a1 - count (bytes) + * a2 - value to fill buffer with + */ + + beqz a1, 3f + andi t1, a0, 0x0002 + beqz t1, 0f /* check if address is 4-byte aligned */ + nop + sh a2, 0(a0) + addiu a0, a0, 2 + addiu a1, a1, -2 +0: + srl t1, a1, 5 /* t1 how many multiples of 32 bytes */ + replv.ph a2, a2 /* replicate fill value (16bit) in a2 */ + beqz t1, 2f + nop +1: + addiu t1, t1, -1 + beqz t1, 11f + addiu a1, a1, -32 + pref 30, 32(a0) + sw a2, 0(a0) + sw a2, 4(a0) + sw a2, 8(a0) + sw a2, 12(a0) + sw a2, 16(a0) + sw a2, 20(a0) + sw a2, 24(a0) + sw a2, 28(a0) + b 1b + addiu a0, a0, 32 +11: + sw a2, 0(a0) + sw a2, 4(a0) + sw a2, 8(a0) + sw a2, 12(a0) + sw a2, 16(a0) + sw a2, 20(a0) + sw a2, 24(a0) + sw a2, 28(a0) + addiu a0, a0, 32 +2: + blez a1, 3f + addiu a1, a1, -2 + sh a2, 0(a0) + b 2b + addiu a0, a0, 2 +3: + jr ra + nop + +END(pixman_fill_buff16_mips) + +LEAF_MIPS32R2(pixman_fill_buff32_mips) +/* + * a0 - *dest + * a1 - count (bytes) + * a2 - value to fill buffer with + */ + + beqz a1, 3f + nop + srl t1, a1, 5 /* t1 how many multiples of 32 bytes */ + beqz t1, 2f + nop +1: + addiu t1, t1, -1 + beqz t1, 11f + addiu a1, a1, -32 + pref 30, 32(a0) + sw a2, 0(a0) + sw a2, 4(a0) + sw a2, 8(a0) + sw a2, 12(a0) + sw a2, 16(a0) + sw a2, 20(a0) + sw a2, 24(a0) + sw a2, 28(a0) + b 1b + addiu a0, a0, 32 +11: + sw a2, 0(a0) + sw a2, 4(a0) + sw a2, 8(a0) + sw a2, 12(a0) + sw a2, 16(a0) + sw a2, 20(a0) + sw a2, 24(a0) + sw a2, 28(a0) + addiu a0, a0, 32 +2: + blez a1, 3f + addiu a1, a1, -4 + sw a2, 0(a0) + b 2b + addiu a0, a0, 4 +3: + jr ra + nop + +END(pixman_fill_buff32_mips) + LEAF_MIPS_DSPR2(pixman_composite_src_8888_0565_asm_mips) /* * a0 - dst (r5g6b5) diff --git a/pixman/pixman-mips-dspr2.c b/pixman/pixman-mips-dspr2.c index e331853..2beada3 100644 --- a/pixman/pixman-mips-dspr2.c +++ b/pixman/pixman-mips-dspr2.c @@ -49,6 +49,119 @@ PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (DO_FAST_MEMCPY, src_8888_8888, PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (DO_FAST_MEMCPY, src_0888_0888, uint8_t, 3, uint8_t, 3) +static pixman_bool_t +pixman_fill_mips (uint32_t *bits, + int stride, + int bpp, + int x, + int y, + int width, + int height, + uint32_t _xor) +{ + uint8_t *byte_line; + uint32_t byte_width; + switch (bpp) + { + case 16: + stride = stride * (int) sizeof (uint32_t) / 2; + byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x); + byte_width = width * 2; + stride *= 2; + + while (height--) + { + uint8_t *dst = byte_line; + byte_line += stride; + pixman_fill_buff16_mips (dst, byte_width, _xor & 0xffff); + } + return TRUE; + case 32: + stride = stride * (int) sizeof (uint32_t) / 4; + byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x); + byte_width = width * 4; + stride *= 4; + + while (height--) + { + uint8_t *dst = byte_line; + byte_line += stride; + pixman_fill_buff32_mips (dst, byte_width, _xor); + } + return TRUE; + default: + return FALSE; + } +} + +static pixman_bool_t +pixman_blt_mips (uint32_t *src_bits, + uint32_t *dst_bits, + int src_stride, + int dst_stride, + int src_bpp, + int dst_bpp, + int src_x, + int src_y, + int dest_x, + int dest_y, + int width, + int height) +{ + if (src_bpp != dst_bpp) + return FALSE; + + uint8_t *src_bytes; + uint8_t *dst_bytes; + uint32_t byte_width; + + switch (src_bpp) + { + case 16: + src_stride = src_stride * (int) sizeof (uint32_t) / 2; + dst_stride = dst_stride * (int) sizeof (uint32_t) / 2; + src_bytes =(uint8_t *)(((uint16_t *)src_bits) + + src_stride * (src_y) + (src_x)); + dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + + dst_stride * (dest_y) + (dest_x)); + byte_width = width * 2; + src_stride *= 2; + dst_stride *= 2; + + while (height--) + { + uint8_t *src = src_bytes; + uint8_t *dst = dst_bytes; + src_bytes += src_stride; + dst_bytes += dst_stride; + pixman_mips_fast_memcpy (dst, src, byte_width); + } + return TRUE; + case 32: + src_stride = src_stride * (int) sizeof (uint32_t) / 4; + dst_stride = dst_stride * (int) sizeof (uint32_t) / 4; + src_bytes = (uint8_t *)(((uint32_t *)src_bits) + + src_stride * (src_y) + (src_x)); + dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + + dst_stride * (dest_y) + (dest_x)); + byte_width = width * 4; + src_stride *= 4; + dst_stride *= 4; + + while (height--) + { + uint8_t *src = src_bytes; + uint8_t *dst = dst_bytes; + src_bytes += src_stride; + dst_bytes += dst_stride; + pixman_mips_fast_memcpy (dst, src, byte_width); + } + return TRUE; + default: + return FALSE; + } +} + static const pixman_fast_path_t mips_dspr2_fast_paths[] = { PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, mips_composite_src_0565_0565), @@ -74,11 +187,61 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] = { PIXMAN_OP_NONE }, }; +static pixman_bool_t +mips_dspr2_blt (pixman_implementation_t *imp, + uint32_t * src_bits, + uint32_t * dst_bits, + int src_stride, + int dst_stride, + int src_bpp, + int dst_bpp, + int src_x, + int src_y, + int dest_x, + int dest_y, + int width, + int height) +{ + if (!pixman_blt_mips ( + src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp, + src_x, src_y, dest_x, dest_y, width, height)) + + { + return _pixman_implementation_blt ( + imp->delegate, + src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp, + src_x, src_y, dest_x, dest_y, width, height); + } + + return TRUE; +} + +static pixman_bool_t +mips_dspr2_fill (pixman_implementation_t *imp, + uint32_t * bits, + int stride, + int bpp, + int x, + int y, + int width, + int height, + uint32_t xor) +{ + if (pixman_fill_mips (bits, stride, bpp, x, y, width, height, xor)) + return TRUE; + + return _pixman_implementation_fill ( + imp->delegate, bits, stride, bpp, x, y, width, height, xor); +} + pixman_implementation_t * _pixman_implementation_create_mips_dspr2 (pixman_implementation_t *fallback) { pixman_implementation_t *imp = _pixman_implementation_create (fallback, mips_dspr2_fast_paths); + imp->blt = mips_dspr2_blt; + imp->fill = mips_dspr2_fill; + return imp; } diff --git a/pixman/pixman-mips-dspr2.h b/pixman/pixman-mips-dspr2.h index 449c42a..a40e7c8 100644 --- a/pixman/pixman-mips-dspr2.h +++ b/pixman/pixman-mips-dspr2.h @@ -41,6 +41,10 @@ void pixman_mips_fast_memcpy (void *dst, void *src, uint32_t n_bytes); +void +pixman_fill_buff16_mips (void *dst, uint32_t n_bytes, uint16_t value); +void +pixman_fill_buff32_mips (void *dst, uint32_t n_bytes, uint32_t value); /****************************************************************/ -- 2.7.4