From 86ad09b548b45a5a5074d9d83970d5e7e7f89d31 Mon Sep 17 00:00:00 2001 From: Nemanja Lukic Date: Mon, 2 Jul 2012 20:54:20 +0200 Subject: [PATCH] MIPS: DSPr2: Added more bilinear fast paths (without mask) Performance numbers before/after on MIPS-74kc @ 1GHz: lowlevel-blt-bench -b Referent (before): src_8888_8888 = L1: 8.18 L2: 7.79 M: 6.32 ( 33.51%) HT: 5.78 VT: 5.70 R: 5.61 RT: 3.79 ( 29Kops/s) src_8888_0565 = L1: 6.90 L2: 7.14 M: 6.47 ( 25.75%) HT: 5.54 VT: 5.51 R: 5.46 RT: 3.53 ( 28Kops/s) src_0565_x888 = L1: 3.76 L2: 3.71 M: 3.37 ( 13.41%) HT: 3.26 VT: 3.22 R: 3.20 RT: 2.58 ( 23Kops/s) src_0565_0565 = L1: 3.59 L2: 3.56 M: 3.47 ( 9.19%) HT: 3.19 VT: 3.18 R: 3.16 RT: 2.46 ( 22Kops/s) over_8888_8888 = L1: 5.99 L2: 5.66 M: 4.95 ( 26.28%) HT: 4.40 VT: 4.38 R: 4.31 RT: 3.02 ( 26Kops/s) add_8888_8888 = L1: 6.84 L2: 6.39 M: 5.48 ( 29.09%) HT: 4.80 VT: 4.79 R: 4.70 RT: 3.20 ( 27Kops/s) Optimized: src_8888_8888 = L1: 18.27 L2: 16.69 M: 12.87 ( 68.25%) HT: 11.80 VT: 11.61 R: 10.60 RT: 7.05 ( 41Kops/s) src_8888_0565 = L1: 15.18 L2: 14.10 M: 11.75 ( 46.71%) HT: 10.64 VT: 10.50 R: 10.03 RT: 7.15 ( 41Kops/s) src_0565_x888 = L1: 10.45 L2: 9.96 M: 9.23 ( 36.72%) HT: 8.39 VT: 8.29 R: 8.02 RT: 5.75 ( 37Kops/s) src_0565_0565 = L1: 9.37 L2: 8.98 M: 8.50 ( 22.53%) HT: 7.71 VT: 7.66 R: 7.52 RT: 5.59 ( 37Kops/s) over_8888_8888 = L1: 12.21 L2: 11.01 M: 8.56 ( 45.36%) HT: 7.71 VT: 7.64 R: 7.43 RT: 5.51 ( 36Kops/s) add_8888_8888 = L1: 17.72 L2: 15.16 M: 10.78 ( 57.13%) HT: 9.46 VT: 9.30 R: 9.00 RT: 6.03 ( 38Kops/s) --- pixman/pixman-mips-dspr2-asm.S | 367 +++++++++++++++++++++++++++++++++++++++++ pixman/pixman-mips-dspr2-asm.h | 21 +++ pixman/pixman-mips-dspr2.c | 29 ++++ pixman/pixman-mips-dspr2.h | 49 ++++++ 4 files changed, 466 insertions(+) diff --git a/pixman/pixman-mips-dspr2-asm.S b/pixman/pixman-mips-dspr2-asm.S index ac56746..a8fccd5 100644 --- a/pixman/pixman-mips-dspr2-asm.S +++ b/pixman/pixman-mips-dspr2-asm.S @@ -749,6 +749,373 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8_0565_asm_mips) END(pixman_composite_over_n_8_0565_asm_mips) +LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_mips) +/* + * a0 - *dst + * a1 - *src_top + * a2 - *src_bottom + * a3 - w + * 16(sp) - wt + * 20(sp) - wb + * 24(sp) - vx + * 28(sp) - unit_x + */ + + beqz a3, 1f + nop + + SAVE_REGS_ON_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7 + + lw s0, 36(sp) /* s0 = wt */ + lw s1, 40(sp) /* s1 = wb */ + lw s2, 44(sp) /* s2 = vx */ + lw s3, 48(sp) /* s3 = unit_x */ + li v0, BILINEAR_INTERPOLATION_RANGE + + sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) + sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) +0: + andi t4, s2, 0xffff /* t4 = (short)vx */ + srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ + subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */ + + mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ + mul s5, s0, t4 /* s5 = wt*(vx>>8) */ + mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ + mul s7, s1, t4 /* s7 = wb*(vx>>8) */ + + sra t9, s2, 16 + sll t9, t9, 2 + addiu t8, t9, 4 + lwx t0, t9(a1) /* t0 = tl */ + lwx t1, t8(a1) /* t1 = tr */ + addiu a3, a3, -1 + lwx t2, t9(a2) /* t2 = bl */ + lwx t3, t8(a2) /* t3 = br */ + + BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 + + addu s2, s2, s3 /* vx += unit_x; */ + sw t0, 0(a0) + bnez a3, 0b + addiu a0, a0, 4 + + RESTORE_REGS_FROM_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7 +1: + j ra + nop + +END(pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_mips) + +LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_mips) +/* + * a0 - *dst + * a1 - *src_top + * a2 - *src_bottom + * a3 - w + * 16(sp) - wt + * 20(sp) - wb + * 24(sp) - vx + * 28(sp) - unit_x + */ + + beqz a3, 1f + nop + + SAVE_REGS_ON_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7 + + lw s0, 36(sp) /* s0 = wt */ + lw s1, 40(sp) /* s1 = wb */ + lw s2, 44(sp) /* s2 = vx */ + lw s3, 48(sp) /* s3 = unit_x */ + li v0, BILINEAR_INTERPOLATION_RANGE + + sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) + sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) +0: + andi t4, s2, 0xffff /* t4 = (short)vx */ + srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ + subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */ + + mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ + mul s5, s0, t4 /* s5 = wt*(vx>>8) */ + mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ + mul s7, s1, t4 /* s7 = wb*(vx>>8) */ + + sra t9, s2, 16 + sll t9, t9, 2 + addiu t8, t9, 4 + lwx t0, t9(a1) /* t0 = tl */ + lwx t1, t8(a1) /* t1 = tr */ + addiu a3, a3, -1 + lwx t2, t9(a2) /* t2 = bl */ + lwx t3, t8(a2) /* t3 = br */ + + BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 + CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3 + + addu s2, s2, s3 /* vx += unit_x; */ + sh t1, 0(a0) + bnez a3, 0b + addiu a0, a0, 2 + + RESTORE_REGS_FROM_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7 +1: + j ra + nop + +END(pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_mips) + +LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8888_SRC_asm_mips) +/* + * a0 - *dst + * a1 - *src_top + * a2 - *src_bottom + * a3 - w + * 16(sp) - wt + * 20(sp) - wb + * 24(sp) - vx + * 28(sp) - unit_x + */ + + beqz a3, 1f + nop + + SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8 + + lw s0, 44(sp) /* s0 = wt */ + lw s1, 48(sp) /* s1 = wb */ + lw s2, 52(sp) /* s2 = vx */ + lw s3, 56(sp) /* s3 = unit_x */ + li v0, BILINEAR_INTERPOLATION_RANGE + li v1, 0x07e007e0 + li s8, 0x001f001f + + sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) + sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) +0: + andi t4, s2, 0xffff /* t4 = (short)vx */ + srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ + subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */ + + mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ + mul s5, s0, t4 /* s5 = wt*(vx>>8) */ + mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ + mul s7, s1, t4 /* s7 = wb*(vx>>8) */ + + sra t9, s2, 16 + sll t9, t9, 1 + addiu t8, t9, 2 + lhx t0, t9(a1) /* t0 = tl */ + lhx t1, t8(a1) /* t1 = tr */ + andi t1, t1, 0xffff + addiu a3, a3, -1 + lhx t2, t9(a2) /* t2 = bl */ + lhx t3, t8(a2) /* t3 = br */ + andi t3, t3, 0xffff + + CONVERT_2x0565_TO_2x8888 t0, t1, t0, t1, v1, s8, t4, t5, t6, t7 + CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, v1, s8, t4, t5, t6, t7 + BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 + + addu s2, s2, s3 /* vx += unit_x; */ + sw t0, 0(a0) + bnez a3, 0b + addiu a0, a0, 4 + + RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8 +1: + j ra + nop + +END(pixman_scaled_bilinear_scanline_0565_8888_SRC_asm_mips) + +LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_mips) +/* + * a0 - *dst + * a1 - *src_top + * a2 - *src_bottom + * a3 - w + * 16(sp) - wt + * 20(sp) - wb + * 24(sp) - vx + * 28(sp) - unit_x + */ + + beqz a3, 1f + nop + + SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8 + + lw s0, 44(sp) /* s0 = wt */ + lw s1, 48(sp) /* s1 = wb */ + lw s2, 52(sp) /* s2 = vx */ + lw s3, 56(sp) /* s3 = unit_x */ + li v0, BILINEAR_INTERPOLATION_RANGE + li v1, 0x07e007e0 + li s8, 0x001f001f + + sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) + sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) +0: + andi t4, s2, 0xffff /* t4 = (short)vx */ + srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ + subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */ + + mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ + mul s5, s0, t4 /* s5 = wt*(vx>>8) */ + mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ + mul s7, s1, t4 /* s7 = wb*(vx>>8) */ + + sra t9, s2, 16 + sll t9, t9, 1 + addiu t8, t9, 2 + lhx t0, t9(a1) /* t0 = tl */ + lhx t1, t8(a1) /* t1 = tr */ + andi t1, t1, 0xffff + addiu a3, a3, -1 + lhx t2, t9(a2) /* t2 = bl */ + lhx t3, t8(a2) /* t3 = br */ + andi t3, t3, 0xffff + + CONVERT_2x0565_TO_2x8888 t0, t1, t0, t1, v1, s8, t4, t5, t6, t7 + CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, v1, s8, t4, t5, t6, t7 + BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 + CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3 + + addu s2, s2, s3 /* vx += unit_x; */ + sh t1, 0(a0) + bnez a3, 0b + addiu a0, a0, 2 + + RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8 +1: + j ra + nop + +END(pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_mips) + +LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_OVER_asm_mips) +/* + * a0 - *dst + * a1 - *src_top + * a2 - *src_bottom + * a3 - w + * 16(sp) - wt + * 20(sp) - wb + * 24(sp) - vx + * 28(sp) - unit_x + */ + + beqz a3, 1f + nop + + SAVE_REGS_ON_STACK 24, v0, s0, s1, s2, s3, s4, s5, s6, s7, s8 + + lw s0, 40(sp) /* s0 = wt */ + lw s1, 44(sp) /* s1 = wb */ + lw s2, 48(sp) /* s2 = vx */ + lw s3, 52(sp) /* s3 = unit_x */ + li v0, BILINEAR_INTERPOLATION_RANGE + li s8, 0x00ff00ff + + sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) + sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) +0: + andi t4, s2, 0xffff /* t4 = (short)vx */ + srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ + subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */ + + mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ + mul s5, s0, t4 /* s5 = wt*(vx>>8) */ + mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ + mul s7, s1, t4 /* s7 = wb*(vx>>8) */ + + sra t9, s2, 16 + sll t9, t9, 2 + addiu t8, t9, 4 + lwx t0, t9(a1) /* t0 = tl */ + lwx t1, t8(a1) /* t1 = tr */ + addiu a3, a3, -1 + lwx t2, t9(a2) /* t2 = bl */ + lwx t3, t8(a2) /* t3 = br */ + + BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 + lw t1, 0(a0) /* t1 = dest */ + OVER_8888_8888 t0, t1, t2, s8, t3, t4, t5, t6 + + addu s2, s2, s3 /* vx += unit_x; */ + sw t2, 0(a0) + bnez a3, 0b + addiu a0, a0, 4 + + RESTORE_REGS_FROM_STACK 24, v0, s0, s1, s2, s3, s4, s5, s6, s7, s8 +1: + j ra + nop + +END(pixman_scaled_bilinear_scanline_8888_8888_OVER_asm_mips) + +LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_ADD_asm_mips) +/* + * a0 - *dst + * a1 - *src_top + * a2 - *src_bottom + * a3 - w + * 16(sp) - wt + * 20(sp) - wb + * 24(sp) - vx + * 28(sp) - unit_x + */ + + beqz a3, 1f + nop + + SAVE_REGS_ON_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7 + + lw s0, 36(sp) /* s0 = wt */ + lw s1, 40(sp) /* s1 = wb */ + lw s2, 44(sp) /* s2 = vx */ + lw s3, 48(sp) /* s3 = unit_x */ + li v0, BILINEAR_INTERPOLATION_RANGE + + sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) + sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS)) +0: + andi t4, s2, 0xffff /* t4 = (short)vx */ + srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */ + subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */ + + mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */ + mul s5, s0, t4 /* s5 = wt*(vx>>8) */ + mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */ + mul s7, s1, t4 /* s7 = wb*(vx>>8) */ + + sra t9, s2, 16 + sll t9, t9, 2 + addiu t8, t9, 4 + lwx t0, t9(a1) /* t0 = tl */ + lwx t1, t8(a1) /* t1 = tr */ + addiu a3, a3, -1 + lwx t2, t9(a2) /* t2 = bl */ + lwx t3, t8(a2) /* t3 = br */ + + BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7 + lw t1, 0(a0) + addu_s.qb t2, t0, t1 + + addu s2, s2, s3 /* vx += unit_x; */ + sw t2, 0(a0) + bnez a3, 0b + addiu a0, a0, 4 + + RESTORE_REGS_FROM_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7 +1: + j ra + nop + +END(pixman_scaled_bilinear_scanline_8888_8888_ADD_asm_mips) + LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm_mips) /* * a0 - *dst diff --git a/pixman/pixman-mips-dspr2-asm.h b/pixman/pixman-mips-dspr2-asm.h index 24b049e..7327dc6 100644 --- a/pixman/pixman-mips-dspr2-asm.h +++ b/pixman/pixman-mips-dspr2-asm.h @@ -566,6 +566,27 @@ LEAF_MIPS32R2(symbol) \ addu_s.qb \out2_8888, \d2_8888, \scratch2 .endm +/* + * OVER operation on single a8r8g8b8 source pixel (s_8888) and single a8r8g8b8 + * destination pixel (d_8888). It also requires maskLSR needed for rounding + * process. maskLSR must have following value: + * li maskLSR, 0x00ff00ff + */ +.macro OVER_8888_8888 s_8888, \ + d_8888, \ + out_8888, \ + maskLSR, \ + scratch1, scratch2, scratch3, scratch4 + not \scratch1, \s_8888 + srl \scratch1, \scratch1, 24 + + MIPS_UN8x4_MUL_UN8 \d_8888, \scratch1, \ + \out_8888, \maskLSR, \ + \scratch2, \scratch3, \scratch4 + + addu_s.qb \out_8888, \out_8888, \s_8888 +.endm + .macro MIPS_UN8x4_MUL_UN8_ADD_UN8x4 s_8888, \ m_8, \ d_8888, \ diff --git a/pixman/pixman-mips-dspr2.c b/pixman/pixman-mips-dspr2.c index 66c0e5d..63a0225 100644 --- a/pixman/pixman-mips-dspr2.c +++ b/pixman/pixman-mips-dspr2.c @@ -58,6 +58,19 @@ PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_8888, PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_0565, uint8_t, 1, uint16_t, 1) +PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 8888_8888, SRC, + uint32_t, uint32_t) +PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 8888_0565, SRC, + uint32_t, uint16_t) +PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 0565_8888, SRC, + uint16_t, uint32_t) +PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 0565_0565, SRC, + uint16_t, uint16_t) +PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, 8888_8888, OVER, + uint32_t, uint32_t) +PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, 8888_8888, ADD, + uint32_t, uint32_t) + PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (0, 8888_8_8888, SRC, uint32_t, uint32_t) PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (0, 8888_8_0565, SRC, @@ -219,6 +232,22 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] = PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, mips_composite_over_n_8_0565), PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, mips_composite_over_n_8_0565), + SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, mips_8888_8888), + SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, mips_8888_8888), + SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, mips_8888_8888), + + SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, r5g6b5, mips_8888_0565), + SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, r5g6b5, mips_8888_0565), + + SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, x8r8g8b8, mips_0565_8888), + SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, r5g6b5, mips_0565_0565), + + SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mips_8888_8888), + SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mips_8888_8888), + + SIMPLE_BILINEAR_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, mips_8888_8888), + SIMPLE_BILINEAR_FAST_PATH (ADD, a8r8g8b8, x8r8g8b8, mips_8888_8888), + SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, mips_8888_8_8888), SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, mips_8888_8_8888), SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, mips_8888_8_8888), diff --git a/pixman/pixman-mips-dspr2.h b/pixman/pixman-mips-dspr2.h index 5036938..a3d774f 100644 --- a/pixman/pixman-mips-dspr2.h +++ b/pixman/pixman-mips-dspr2.h @@ -127,6 +127,55 @@ mips_composite_##name (pixman_implementation_t *imp, \ } \ } +/****************************************************************************/ + +#define PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST(flags, name, op, \ + src_type, dst_type) \ +void \ +pixman_scaled_bilinear_scanline_##name##_##op##_asm_mips( \ + dst_type * dst, \ + const src_type * src_top, \ + const src_type * src_bottom, \ + int32_t w, \ + int wt, \ + int wb, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x); \ +static force_inline void \ +scaled_bilinear_scanline_mips_##name##_##op (dst_type * dst, \ + const uint32_t * mask, \ + const src_type * src_top, \ + const src_type * src_bottom, \ + int32_t w, \ + int wt, \ + int wb, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x, \ + pixman_fixed_t max_vx, \ + pixman_bool_t zero_src) \ +{ \ + if ((flags & SKIP_ZERO_SRC) && zero_src) \ + return; \ + pixman_scaled_bilinear_scanline_##name##_##op##_asm_mips (dst, src_top, \ + src_bottom, w, \ + wt, wb, \ + vx, unit_x); \ +} \ + \ +FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_cover_##op, \ + scaled_bilinear_scanline_mips_##name##_##op, \ + src_type, uint32_t, dst_type, COVER, FLAG_NONE) \ +FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_none_##op, \ + scaled_bilinear_scanline_mips_##name##_##op, \ + src_type, uint32_t, dst_type, NONE, FLAG_NONE) \ +FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_pad_##op, \ + scaled_bilinear_scanline_mips_##name##_##op, \ + src_type, uint32_t, dst_type, PAD, FLAG_NONE) \ +FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_normal_##op, \ + scaled_bilinear_scanline_mips_##name##_##op, \ + src_type, uint32_t, dst_type, NORMAL, \ + FLAG_NONE) + /*****************************************************************************/ #define PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST(flags, name, op, \ -- 2.7.4