From 5122f2b28bfbf08479cc88ff9ddfeb371f92264f Mon Sep 17 00:00:00 2001 From: =?utf8?q?S=C3=B8ren=20Sandmann=20Pedersen?= Date: Sat, 16 Jun 2007 14:56:25 -0400 Subject: [PATCH] Unroll loop in fbComposite_x888x8x8888() --- pixman/pixman-mmx.c | 75 +++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 62 insertions(+), 13 deletions(-) diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c index 9ec0429..c738822 100644 --- a/pixman/pixman-mmx.c +++ b/pixman/pixman-mmx.c @@ -1324,23 +1324,72 @@ fbCompositeSrc_x888x8x8888mmx (pixman_op_t op, src++; } - while (w >= 2) + while (w >= 16) { + __m64 vd0 = *(__m64 *)(dst + 0); + __m64 vd1 = *(__m64 *)(dst + 2); + __m64 vd2 = *(__m64 *)(dst + 4); + __m64 vd3 = *(__m64 *)(dst + 6); + __m64 vd4 = *(__m64 *)(dst + 8); + __m64 vd5 = *(__m64 *)(dst + 10); + __m64 vd6 = *(__m64 *)(dst + 12); + __m64 vd7 = *(__m64 *)(dst + 14); + + __m64 vs0 = *(__m64 *)(src + 0); + __m64 vs1 = *(__m64 *)(src + 2); + __m64 vs2 = *(__m64 *)(src + 4); + __m64 vs3 = *(__m64 *)(src + 6); + __m64 vs4 = *(__m64 *)(src + 8); + __m64 vs5 = *(__m64 *)(src + 10); + __m64 vs6 = *(__m64 *)(src + 12); + __m64 vs7 = *(__m64 *)(src + 14); - __m64 vd0 = *(__m64 *)(dst); - __m64 vs0 = *(__m64 *)(src); - vd0 = pack8888 ( - in_over_full_src_alpha (expand8888 (vs0, 0), vmask, expand8888 (vd0, 0)), - in_over_full_src_alpha (expand8888 (vs0, 1), vmask, expand8888 (vd0, 1))); - - *(__m64 *)(dst) = vd0; - - w -= 2; - dst += 2; - src += 2; + in_over (expand8888 (vs0, 0), srca, vmask, expand8888 (vd0, 0)), + in_over (expand8888 (vs0, 1), srca, vmask, expand8888 (vd0, 1))); + + vd1 = pack8888 ( + in_over (expand8888 (vs1, 0), srca, vmask, expand8888 (vd1, 0)), + in_over (expand8888 (vs1, 1), srca, vmask, expand8888 (vd1, 1))); + + vd2 = pack8888 ( + in_over (expand8888 (vs2, 0), srca, vmask, expand8888 (vd2, 0)), + in_over (expand8888 (vs2, 1), srca, vmask, expand8888 (vd2, 1))); + + vd3 = pack8888 ( + in_over (expand8888 (vs3, 0), srca, vmask, expand8888 (vd3, 0)), + in_over (expand8888 (vs3, 1), srca, vmask, expand8888 (vd3, 1))); + + vd4 = pack8888 ( + in_over (expand8888 (vs4, 0), srca, vmask, expand8888 (vd4, 0)), + in_over (expand8888 (vs4, 1), srca, vmask, expand8888 (vd4, 1))); + + vd5 = pack8888 ( + in_over (expand8888 (vs5, 0), srca, vmask, expand8888 (vd5, 0)), + in_over (expand8888 (vs5, 1), srca, vmask, expand8888 (vd5, 1))); + + vd6 = pack8888 ( + in_over (expand8888 (vs6, 0), srca, vmask, expand8888 (vd6, 0)), + in_over (expand8888 (vs6, 1), srca, vmask, expand8888 (vd6, 1))); + + vd7 = pack8888 ( + in_over (expand8888 (vs7, 0), srca, vmask, expand8888 (vd7, 0)), + in_over (expand8888 (vs7, 1), srca, vmask, expand8888 (vd7, 1))); + + *(__m64 *)(dst + 0) = vd0; + *(__m64 *)(dst + 2) = vd1; + *(__m64 *)(dst + 4) = vd2; + *(__m64 *)(dst + 6) = vd3; + *(__m64 *)(dst + 8) = vd4; + *(__m64 *)(dst + 10) = vd5; + *(__m64 *)(dst + 12) = vd6; + *(__m64 *)(dst + 14) = vd7; + + w -= 16; + dst += 16; + src += 16; } - + while (w) { __m64 s = load8888 (*src | 0xff000000); -- 2.7.4