return _mm_packus_epi16 (pack565_2x128_128 (*xmm0, *xmm1), pack565_2x128_128 (*xmm2, *xmm3));
}
-static force_inline uint32_t
-packAlpha (__m128i x)
+static force_inline int
+isOpaque (__m128i x)
+{
+ __m128i ffs = _mm_cmpeq_epi8 (x, x);
+ return (_mm_movemask_epi8 (_mm_cmpeq_epi8 (x, ffs)) & 0x8888) == 0x8888;
+}
+
+static force_inline int
+isZero (__m128i x)
{
- return _mm_cvtsi128_si32 (_mm_packus_epi16 (_mm_packus_epi16 (_mm_srli_epi32 (x, 24),
- _mm_setzero_si128 ()),
- _mm_setzero_si128 ()));
+ return _mm_movemask_epi8 (_mm_cmpeq_epi8 (x, _mm_setzero_si128())) == 0xffff;
+}
+
+static force_inline int
+isTransparent (__m128i x)
+{
+ return (_mm_movemask_epi8 (_mm_cmpeq_epi8 (x, _mm_setzero_si128())) & 0x8888) == 0x8888;
}
static force_inline __m128i
{
return src;
}
- else if (a)
+ else if (src)
{
ms = unpack_32_1x64 (src);
return pack_1x64_32 (over_1x64 (ms, expandAlpha_1x64 (ms), unpack_32_1x64 (dst)));
{
xmmMskLo = load128Unaligned (pm);
- if (!packAlpha (xmmMskLo))
+ if (isTransparent (xmmMskLo))
return _mm_setzero_si128 ();
}
static force_inline void
coreCombineOverUsse2 (uint32_t* pd, const uint32_t* ps, const uint32_t* pm, int w)
{
- uint32_t pa;
uint32_t s, d;
__m128i xmmDstLo, xmmDstHi;
/* I'm loading unaligned because I'm not sure about the address alignment. */
xmmSrcHi = combine4 ((__m128i*)ps, (__m128i*)pm);
- /* Check the alpha channel */
- pa = packAlpha (xmmSrcHi);
-
- if (pa == 0xffffffff)
+ if (isOpaque (xmmSrcHi))
{
save128Aligned ((__m128i*)pd, xmmSrcHi);
}
- else if (pa)
+ else if (!isZero (xmmSrcHi))
{
xmmDstHi = load128Aligned ((__m128i*) pd);
fbComposeGetSolid(pSrc, src, pDst->bits.format);
- if (src >> 24 == 0)
+ if (src == 0)
return;
fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
fbComposeGetSolid(pSrc, src, pDst->bits.format);
- if (src >> 24 == 0)
+ if (src == 0)
return;
fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1);
int32_t width,
int32_t height)
{
- uint32_t src, srca;
+ uint32_t src;
uint32_t *dstLine, d;
uint32_t *maskLine, m;
uint32_t packCmp;
fbComposeGetSolid(pSrc, src, pDst->bits.format);
- srca = src >> 24;
- if (srca == 0)
+ if (src == 0)
return;
fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
fbComposeGetSolid(pSrc, src, pDst->bits.format);
srca = src >> 24;
- if (srca == 0)
+ if (src == 0)
return;
fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
fbComposeGetSolid(pSrc, src, pDst->bits.format);
srca = src >> 24;
- if (srca == 0)
+ if (src == 0)
{
pixmanFillsse2 (pDst->bits.bits, pDst->bits.rowstride,
PIXMAN_FORMAT_BPP (pDst->bits.format),
fbComposeGetSolid(pSrc, src, pDst->bits.format);
srca = src >> 24;
- if (srca == 0)
+ if (src == 0)
return;
fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1);
{
uint16_t *dstLine, *dst, d;
uint32_t *srcLine, *src, s;
- int dstStride, srcStride;
+ int dstStride, srcStride;
uint16_t w;
- uint32_t packCmp;
+ uint32_t opaque, zero;
__m64 ms;
__m128i xmmSrc, xmmSrcLo, xmmSrcHi;
xmmSrc = load128Unaligned((__m128i*)src);
xmmDst = load128Aligned ((__m128i*)dst);
- packCmp = packAlpha (xmmSrc);
+ opaque = isOpaque (xmmSrc);
+ zero = isZero (xmmSrc);
- unpack565_128_4x128 (xmmDst, &xmmDst0, &xmmDst1, &xmmDst2, &xmmDst3);
+ unpack565_128_4x128 (xmmDst, &xmmDst0, &xmmDst1, &xmmDst2, &xmmDst3);
unpack_128_2x128 (xmmSrc, &xmmSrcLo, &xmmSrcHi);
/* preload next round*/
xmmSrc = load128Unaligned((__m128i*)(src+4));
- /* preload next round*/
-
- if (packCmp == 0xffffffff)
+
+ if (opaque)
{
invertColors_2x128 (xmmSrcLo, xmmSrcHi, &xmmDst0, &xmmDst1);
}
- else if (packCmp)
+ else if (!zero)
{
overRevNonPre_2x128 (xmmSrcLo, xmmSrcHi, &xmmDst0, &xmmDst1);
}
/* Second round */
- packCmp = packAlpha (xmmSrc);
+ opaque = isOpaque (xmmSrc);
+ zero = isZero (xmmSrc);
unpack_128_2x128 (xmmSrc, &xmmSrcLo, &xmmSrcHi);
- if (packCmp == 0xffffffff)
+ if (opaque)
{
invertColors_2x128 (xmmSrcLo, xmmSrcHi, &xmmDst2, &xmmDst3);
}
- else if (packCmp)
+ else if (zero)
{
overRevNonPre_2x128 (xmmSrcLo, xmmSrcHi, &xmmDst2, &xmmDst3);
}
uint32_t *srcLine, *src, s;
int dstStride, srcStride;
uint16_t w;
- uint32_t packCmp;
+ uint32_t opaque, zero;
__m128i xmmSrcLo, xmmSrcHi;
__m128i xmmDstLo, xmmDstHi;
xmmSrcHi = load128Unaligned((__m128i*)src);
- packCmp = packAlpha (xmmSrcHi);
+ opaque = isOpaque (xmmSrcHi);
+ zero = isZero (xmmSrcHi);
unpack_128_2x128 (xmmSrcHi, &xmmSrcLo, &xmmSrcHi);
- if (packCmp == 0xffffffff)
+ if (opaque)
{
invertColors_2x128( xmmSrcLo, xmmSrcHi, &xmmDstLo, &xmmDstHi);
save128Aligned ((__m128i*)dst, pack_2x128_128 (xmmDstLo, xmmDstHi));
}
- else if (packCmp)
+ else if (!zero)
{
xmmDstHi = load128Aligned ((__m128i*)dst);
int32_t width,
int32_t height)
{
- uint32_t src, srca;
+ uint32_t src;
uint16_t *dstLine, *dst, d;
uint32_t *maskLine, *mask, m;
int dstStride, maskStride;
fbComposeGetSolid(pSrc, src, pDst->bits.format);
- srca = src >> 24;
- if (srca == 0)
+ if (src == 0)
return;
fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1);