From 3d684556dbdb087fa6d0631f06ccde38bb02dea5 Mon Sep 17 00:00:00 2001 From: =?utf8?q?S=C3=B8ren=20Sandmann=20Pedersen?= Date: Fri, 1 May 2009 03:02:58 -0400 Subject: [PATCH] Implement the mmx combiners with masks (disabled) --- pixman/pixman-mmx.c | 85 +++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 63 insertions(+), 22 deletions(-) diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c index 8262cb1..98726d7 100644 --- a/pixman/pixman-mmx.c +++ b/pixman/pixman-mmx.c @@ -444,14 +444,32 @@ mmxCombineMaskU (uint32_t *src, const uint32_t *mask, int width) _mm_empty(); } +static force_inline uint32_t +combine (const uint32_t *src, const uint32_t *mask) +{ + uint32_t ssrc = *src; + + if (mask) + { + __m64 m = load8888 (*mask); + __m64 s = load8888 (ssrc); + + m = expand_alpha (m); + s = pix_multiply (s, m); + + ssrc = store8888 (s); + } + + return ssrc; +} static FASTCALL void -mmxCombineOverU (uint32_t *dest, const uint32_t *src, int width) +mmxCombineOverU (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width) { const uint32_t *end = dest + width; while (dest < end) { - uint32_t ssrc = *src; + uint32_t ssrc = combine (src, mask); uint32_t a = ssrc >> 24; if (a == 0xff) { *dest = ssrc; @@ -463,70 +481,79 @@ mmxCombineOverU (uint32_t *dest, const uint32_t *src, int width) } ++dest; ++src; + if (mask) + ++mask; } _mm_empty(); } static FASTCALL void -mmxCombineOverReverseU (uint32_t *dest, const uint32_t *src, int width) +mmxCombineOverReverseU (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width) { const uint32_t *end = dest + width; while (dest < end) { __m64 d, da; + uint32_t s = combine (src, mask); d = load8888(*dest); da = expand_alpha(d); - *dest = store8888(over (d, da, load8888(*src))); + *dest = store8888(over (d, da, load8888(s))); ++dest; ++src; + if (mask) + mask++; } _mm_empty(); } static FASTCALL void -mmxCombineInU (uint32_t *dest, const uint32_t *src, int width) +mmxCombineInU (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width) { const uint32_t *end = dest + width; while (dest < end) { __m64 x, a; - x = load8888(*src); + x = load8888 (combine (src, mask)); a = load8888(*dest); a = expand_alpha(a); x = pix_multiply(x, a); *dest = store8888(x); ++dest; ++src; + if (mask) + mask++; } _mm_empty(); } static FASTCALL void -mmxCombineInReverseU (uint32_t *dest, const uint32_t *src, int width) +mmxCombineInReverseU (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width) { const uint32_t *end = dest + width; while (dest < end) { __m64 x, a; x = load8888(*dest); - a = load8888(*src); + a = load8888(combine (src, mask)); a = expand_alpha(a); x = pix_multiply(x, a); *dest = store8888(x); ++dest; ++src; + if (mask) + mask++; } _mm_empty(); } static FASTCALL void -mmxCombineOutU (uint32_t *dest, const uint32_t *src, int width) +mmxCombineOutU (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width) { const uint32_t *end = dest + width; while (dest < end) { __m64 x, a; - x = load8888(*src); + x = load8888(combine (src, mask)); a = load8888(*dest); a = expand_alpha(a); a = negate(a); @@ -534,37 +561,41 @@ mmxCombineOutU (uint32_t *dest, const uint32_t *src, int width) *dest = store8888(x); ++dest; ++src; + if (mask) + mask++; } _mm_empty(); } static FASTCALL void -mmxCombineOutReverseU (uint32_t *dest, const uint32_t *src, int width) +mmxCombineOutReverseU (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width) { const uint32_t *end = dest + width; while (dest < end) { __m64 x, a; x = load8888(*dest); - a = load8888(*src); + a = load8888(combine (src, mask)); a = expand_alpha(a); a = negate(a); x = pix_multiply(x, a); *dest = store8888(x); ++dest; ++src; + if (mask) + mask++; } _mm_empty(); } static FASTCALL void -mmxCombineAtopU (uint32_t *dest, const uint32_t *src, int width) +mmxCombineAtopU (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width) { const uint32_t *end = dest + width; while (dest < end) { __m64 s, da, d, sia; - s = load8888(*src); + s = load8888(combine (src, mask)); d = load8888(*dest); sia = expand_alpha(s); sia = negate(sia); @@ -573,12 +604,14 @@ mmxCombineAtopU (uint32_t *dest, const uint32_t *src, int width) *dest = store8888(s); ++dest; ++src; + if (mask) + mask++; } _mm_empty(); } static FASTCALL void -mmxCombineAtopReverseU (uint32_t *dest, const uint32_t *src, int width) +mmxCombineAtopReverseU (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width) { const uint32_t *end; @@ -586,7 +619,7 @@ mmxCombineAtopReverseU (uint32_t *dest, const uint32_t *src, int width) while (dest < end) { __m64 s, dia, d, sa; - s = load8888(*src); + s = load8888(combine(src, mask)); d = load8888(*dest); sa = expand_alpha(s); dia = expand_alpha(d); @@ -595,18 +628,20 @@ mmxCombineAtopReverseU (uint32_t *dest, const uint32_t *src, int width) *dest = store8888(s); ++dest; ++src; + if (mask) + mask++; } _mm_empty(); } static FASTCALL void -mmxCombineXorU (uint32_t *dest, const uint32_t *src, int width) +mmxCombineXorU (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width) { const uint32_t *end = dest + width; while (dest < end) { __m64 s, dia, d, sia; - s = load8888(*src); + s = load8888(combine(src, mask)); d = load8888(*dest); sia = expand_alpha(s); dia = expand_alpha(d); @@ -616,32 +651,36 @@ mmxCombineXorU (uint32_t *dest, const uint32_t *src, int width) *dest = store8888(s); ++dest; ++src; + if (mask) + mask++; } _mm_empty(); } static FASTCALL void -mmxCombineAddU (uint32_t *dest, const uint32_t *src, int width) +mmxCombineAddU (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width) { const uint32_t *end = dest + width; while (dest < end) { __m64 s, d; - s = load8888(*src); + s = load8888(combine(src,mask)); d = load8888(*dest); s = pix_add(s, d); *dest = store8888(s); ++dest; ++src; + if (mask) + mask++; } _mm_empty(); } static FASTCALL void -mmxCombineSaturateU (uint32_t *dest, const uint32_t *src, int width) +mmxCombineSaturateU (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width) { const uint32_t *end = dest + width; while (dest < end) { - uint32_t s = *src; + uint32_t s = combine(src,mask); uint32_t d = *dest; __m64 ms = load8888(s); __m64 md = load8888(d); @@ -657,6 +696,8 @@ mmxCombineSaturateU (uint32_t *dest, const uint32_t *src, int width) *dest = store8888(md); ++src; ++dest; + if (mask) + mask++; } _mm_empty(); } -- 2.7.4