From 8cd5ae79c6aaa20188ac6f34318c2f358d87e103 Mon Sep 17 00:00:00 2001 From: "bungeman@google.com" Date: Mon, 9 Jul 2012 17:44:57 +0000 Subject: [PATCH] Fix SkBlendLCD16_SSE2 for non ARGB platforms. http://codereview.appspot.com/6356062/ git-svn-id: http://skia.googlecode.com/svn/trunk@4481 2bbb7eff-a529-9590-31e7-b0007b416f81 --- src/opts/SkBlitRow_opts_SSE2.cpp | 66 ++++++++++++++++++++++++++++------------ 1 file changed, 46 insertions(+), 20 deletions(-) diff --git a/src/opts/SkBlitRow_opts_SSE2.cpp b/src/opts/SkBlitRow_opts_SSE2.cpp index 5bb0c8c..74e44af 100644 --- a/src/opts/SkBlitRow_opts_SSE2.cpp +++ b/src/opts/SkBlitRow_opts_SSE2.cpp @@ -513,20 +513,48 @@ void SkARGB32_A8_BlitMask_SSE2(void* device, size_t dstRB, const void* maskPtr, } while (--height != 0); } +// The following (left) shifts cause the top 5 bits of the mask components to +// line up with the corresponding components in an SkPMColor. +// Note that the mask's RGB16 order may differ from the SkPMColor order. +#define SK_R16x5_R32x5_SHIFT (SK_R32_SHIFT - SK_R16_SHIFT - SK_R16_BITS + 5) +#define SK_G16x5_G32x5_SHIFT (SK_G32_SHIFT - SK_G16_SHIFT - SK_G16_BITS + 5) +#define SK_B16x5_B32x5_SHIFT (SK_B32_SHIFT - SK_B16_SHIFT - SK_B16_BITS + 5) + +#if SK_R16x5_R32x5_SHIFT == 0 + #define SkPackedR16x5ToUnmaskedR32x5_SSE2(x) (x) +#elif SK_R16x5_R32x5_SHIFT > 0 + #define SkPackedR16x5ToUnmaskedR32x5_SSE2(x) (_mm_slli_epi32(x, SK_R16x5_R32x5_SHIFT)) +#else + #define SkPackedR16x5ToUnmaskedR32x5_SSE2(x) (_mm_srli_epi32(x, -SK_R16x5_R32x5_SHIFT)) +#endif + +#if SK_G16x5_G32x5_SHIFT == 0 + #define SkPackedG16x5ToUnmaskedG32x5_SSE2(x) (x) +#elif SK_G16x5_G32x5_SHIFT > 0 + #define SkPackedG16x5ToUnmaskedG32x5_SSE2(x) (_mm_slli_epi32(x, SK_G16x5_G32x5_SHIFT)) +#else + #define SkPackedG16x5ToUnmaskedG32x5_SSE2(x) (_mm_srli_epi32(x, -SK_G16x5_G32x5_SHIFT)) +#endif + +#if SK_B16x5_B32x5_SHIFT == 0 + #define SkPackedB16x5ToUnmaskedB32x5_SSE2(x) (x) +#elif SK_B16x5_B32x5_SHIFT > 0 + #define SkPackedB16x5ToUnmaskedB32x5_SSE2(x) (_mm_slli_epi32(x, SK_B16x5_B32x5_SHIFT)) +#else + #define SkPackedB16x5ToUnmaskedB32x5_SSE2(x) (_mm_srli_epi32(x, -SK_B16x5_B32x5_SHIFT)) +#endif + static __m128i SkBlendLCD16_SSE2(__m128i &srci, __m128i &dst, __m128i &mask, __m128i &scale) { // Get the R,G,B of each 16bit mask pixel, we want all of them in 5 bits. - __m128i r = _mm_and_si128(_mm_slli_epi32(mask, - 16-SK_R16_SHIFT-(SK_R16_BITS-5)), - _mm_set1_epi32(0x001F0000)); - - __m128i g = _mm_and_si128(_mm_slli_epi32(mask, - 8-SK_G16_SHIFT-(SK_G16_BITS-5)), - _mm_set1_epi32(0x00001F00)); + __m128i r = _mm_and_si128(SkPackedR16x5ToUnmaskedR32x5_SSE2(mask), + _mm_set1_epi32(0x1F << SK_R32_SHIFT)); - __m128i b = _mm_and_si128(_mm_slli_epi32(mask, - SK_B16_BITS-5), - _mm_set1_epi32(0x0000001F)); + __m128i g = _mm_and_si128(SkPackedG16x5ToUnmaskedG32x5_SSE2(mask), + _mm_set1_epi32(0x1F << SK_G32_SHIFT)); + + __m128i b = _mm_and_si128(SkPackedB16x5ToUnmaskedB32x5_SSE2(mask), + _mm_set1_epi32(0x1F << SK_B32_SHIFT)); // Pack the 4 16bit mask pixels into 4 32bit pixels, (p0, p1, p2, p3) mask = _mm_or_si128(_mm_or_si128(r, g), b); @@ -564,20 +592,18 @@ static __m128i SkBlendLCD16_SSE2(__m128i &srci, __m128i &dst, return _mm_packus_epi16(resultLo, resultHi); } -static __m128i SkBlendLCD16Opaque_SSE2(__m128i &srci, __m128i &dst, +static __m128i SkBlendLCD16Opaque_SSE2(__m128i &srci, __m128i &dst, __m128i &mask) { // Get the R,G,B of each 16bit mask pixel, we want all of them in 5 bits. - __m128i r = _mm_and_si128(_mm_slli_epi32(mask, - 16-SK_R16_SHIFT-(SK_R16_BITS-5)), - _mm_set1_epi32(0x001F0000)); + __m128i r = _mm_and_si128(SkPackedR16x5ToUnmaskedR32x5_SSE2(mask), + _mm_set1_epi32(0x1F << SK_R32_SHIFT)); - __m128i g = _mm_and_si128(_mm_slli_epi32(mask, - 8-SK_G16_SHIFT-(SK_G16_BITS-5)), - _mm_set1_epi32(0x00001F00)); + __m128i g = _mm_and_si128(SkPackedG16x5ToUnmaskedG32x5_SSE2(mask), + _mm_set1_epi32(0x1F << SK_G32_SHIFT)); + + __m128i b = _mm_and_si128(SkPackedB16x5ToUnmaskedB32x5_SSE2(mask), + _mm_set1_epi32(0x1F << SK_B32_SHIFT)); - __m128i b = _mm_and_si128(_mm_slli_epi32(mask, SK_B16_BITS-5), - _mm_set1_epi32(0x0000001F)); - // Pack the 4 16bit mask pixels into 4 32bit pixels, (p0, p1, p2, p3) mask = _mm_or_si128(_mm_or_si128(r, g), b); -- 2.7.4