From eba402092082bf48072671e04e224589af872acd Mon Sep 17 00:00:00 2001 From: =?utf8?q?S=C3=B8ren=20Sandmann=20Pedersen?= Date: Sun, 14 Sep 2008 14:58:00 -0400 Subject: [PATCH] [sse2] Fix rounding bug in conversion from 565 to 8888 When converting from 565 to 8888, replicate the topmost bits instead of appending zeros. --- pixman/pixman-sse2.c | 52 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 34 insertions(+), 18 deletions(-) diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c index cc08189..0c671ed 100644 --- a/pixman/pixman-sse2.c +++ b/pixman/pixman-sse2.c @@ -1,3 +1,4 @@ +#include /* * Copyright © 2008 Rodrigo Kumpera * Copyright © 2008 André Tupinambá @@ -73,6 +74,9 @@ static __m128i MaskRed; static __m128i MaskGreen; static __m128i MaskBlue; +static __m128i Mask565FixRB; +static __m128i Mask565FixG; + /* ------------------------------------------------------------------------------------------------- * SSE2 Inlines */ @@ -89,26 +93,37 @@ unpack_128_2x128 (__m128i data, __m128i* dataLo, __m128i* dataHi) *dataHi = _mm_unpackhi_epi8 (data, _mm_setzero_si128 ()); } +static inline __m128i +unpack565to8888 (__m128i lo) +{ + __m128i r, g, b, rb, t; + + r = _mm_and_si128 (_mm_slli_epi32 (lo, 8), MaskRed); + g = _mm_and_si128 (_mm_slli_epi32 (lo, 5), MaskGreen); + b = _mm_and_si128 (_mm_slli_epi32 (lo, 3), MaskBlue); + + rb = _mm_or_si128 (r, b); + t = _mm_and_si128 (rb, Mask565FixRB); + t = _mm_srli_epi32 (t, 5); + rb = _mm_or_si128 (rb, t); + + t = _mm_and_si128 (g, Mask565FixG); + t = _mm_srli_epi32 (t, 6); + g = _mm_or_si128 (g, t); + + return _mm_or_si128 (rb, g); +} + static inline void unpack565_128_4x128 (__m128i data, __m128i* data0, __m128i* data1, __m128i* data2, __m128i* data3) { __m128i lo, hi; - __m128i r, g, b; lo = _mm_unpacklo_epi16 (data, _mm_setzero_si128 ()); hi = _mm_unpackhi_epi16 (data, _mm_setzero_si128 ()); - r = _mm_and_si128 (_mm_slli_epi32 (lo, 8), MaskRed); - g = _mm_and_si128 (_mm_slli_epi32 (lo, 5), MaskGreen); - b = _mm_and_si128 (_mm_slli_epi32 (lo, 3), MaskBlue); - - lo = _mm_or_si128 (_mm_or_si128 (r, g), b); - - r = _mm_and_si128 (_mm_slli_epi32 (hi, 8), MaskRed); - g = _mm_and_si128 (_mm_slli_epi32 (hi, 5), MaskGreen); - b = _mm_and_si128 (_mm_slli_epi32 (hi, 3), MaskBlue); - - hi = _mm_or_si128 (_mm_or_si128 (r, g), b); + lo = unpack565to8888 (lo); + hi = unpack565to8888 (hi); unpack_128_2x128 (lo, data0, data1); unpack_128_2x128 (hi, data2, data3); @@ -2297,7 +2312,8 @@ fbComposeSetupSSE2(void) MaskRed = createMask_2x32_128 (0x00f80000, 0x00f80000); MaskGreen = createMask_2x32_128 (0x0000fc00, 0x0000fc00); MaskBlue = createMask_2x32_128 (0x000000f8, 0x000000f8); - + Mask565FixRB = createMask_2x32_128 (0x00e000e0, 0x00e000e0); + Mask565FixG = createMask_2x32_128 (0x0000c000, 0x0000c000); Mask0080 = createMask_16_128 (0x0080); Mask00ff = createMask_16_128 (0x00ff); Mask0101 = createMask_16_128 (0x0101); @@ -2484,6 +2500,7 @@ fbCompositeSolid_nx0565sse2 (pixman_op_t op, while (w && (unsigned long)dst & 15) { d = *dst; + *dst++ = pack565_32_16 (pack_1x64_32 (over_1x64 (_mm_movepi64_pi64 (xmmSrc), _mm_movepi64_pi64 (xmmAlpha), expand565_16_1x64 (d)))); @@ -2498,15 +2515,14 @@ fbCompositeSolid_nx0565sse2 (pixman_op_t op, /* fill cache line with next memory */ cachePrefetchNext ((__m128i*)dst); - xmmDst = load128Aligned ((__m128i*)dst); - - unpack565_128_4x128 (xmmDst, &xmmDst0, &xmmDst1, &xmmDst2, &xmmDst3); - + xmmDst = load128Aligned ((__m128i*)dst); + + unpack565_128_4x128 (xmmDst, &xmmDst0, &xmmDst1, &xmmDst2, &xmmDst3); + over_2x128 (&xmmSrc, &xmmSrc, &xmmAlpha, &xmmAlpha, &xmmDst0, &xmmDst1); over_2x128 (&xmmSrc, &xmmSrc, &xmmAlpha, &xmmAlpha, &xmmDst2, &xmmDst3); xmmDst = pack565_4x128_128 (&xmmDst0, &xmmDst1, &xmmDst2, &xmmDst3); - save128Aligned ((__m128i*)dst, xmmDst); dst += 8; -- 2.7.4