#include <config.h>
#endif
-#include <mmintrin.h>
#include <xmmintrin.h> /* for _mm_shuffle_pi16 and _MM_SHUFFLE */
#include <emmintrin.h> /* for SSE2 intrinsics */
#include "pixman-private.h"
#include "pixman-combine32.h"
-#include "pixman-fast-path.h"
-
-#if defined(_MSC_VER) && defined(_M_AMD64)
-/* Windows 64 doesn't allow MMX to be used, so
- * the pixman-x64-mmx-emulation.h file contains
- * implementations of those MMX intrinsics that
- * are used in the SSE2 implementation.
- */
-# include "pixman-x64-mmx-emulation.h"
-#endif
-
-#ifdef USE_SSE2
-
-/* --------------------------------------------------------------------
- * Locals
- */
-
-static __m64 mask_x0080;
-static __m64 mask_x00ff;
-static __m64 mask_x0101;
-static __m64 mask_x_alpha;
-
-static __m64 mask_x565_rgb;
-static __m64 mask_x565_unpack;
+#include "pixman-inlines.h"
static __m128i mask_0080;
static __m128i mask_00ff;
static __m128i mask_565_fix_rb;
static __m128i mask_565_fix_g;
-/* ----------------------------------------------------------------------
- * SSE2 Inlines
- */
static force_inline __m128i
unpack_32_1x128 (uint32_t data)
{
_mm_storeu_si128 (dst, data);
}
-/* ------------------------------------------------------------------
- * MMX inlines
- */
-
-static force_inline __m64
-load_32_1x64 (uint32_t data)
-{
- return _mm_cvtsi32_si64 (data);
-}
-
static force_inline __m128i
load_32_1x128 (uint32_t data)
{
return _mm_cvtsi32_si128 (data);
}
-static force_inline __m64
-unpack_32_1x64 (uint32_t data)
-{
- return _mm_unpacklo_pi8 (load_32_1x64 (data), _mm_setzero_si64 ());
-}
-
-static force_inline __m64
-expand_alpha_1x64 (__m64 data)
-{
- return _mm_shuffle_pi16 (data, _MM_SHUFFLE (3, 3, 3, 3));
-}
-
-static force_inline __m64
-expand_alpha_rev_1x64 (__m64 data)
-{
- return _mm_shuffle_pi16 (data, _MM_SHUFFLE (0, 0, 0, 0));
-}
-
static force_inline __m128i
expand_alpha_rev_1x128 (__m128i data)
{
return _mm_shufflelo_epi16 (data, _MM_SHUFFLE (0, 0, 0, 0));
}
-static force_inline __m64
-expand_pixel_8_1x64 (uint8_t data)
-{
- return _mm_shuffle_pi16 (
- unpack_32_1x64 ((uint32_t)data), _MM_SHUFFLE (0, 0, 0, 0));
-}
-
static force_inline __m128i
expand_pixel_8_1x128 (uint8_t data)
{
unpack_32_1x128 ((uint32_t)data), _MM_SHUFFLE (0, 0, 0, 0));
}
-static force_inline __m64
-pix_multiply_1x64 (__m64 data,
- __m64 alpha)
-{
- return _mm_mulhi_pu16 (_mm_adds_pu16 (_mm_mullo_pi16 (data, alpha),
- mask_x0080),
- mask_x0101);
-}
-
static force_inline __m128i
pix_multiply_1x128 (__m128i data,
__m128i alpha)
mask_0101);
}
-static force_inline __m64
-pix_add_multiply_1x64 (__m64* src,
- __m64* alpha_dst,
- __m64* dst,
- __m64* alpha_src)
-{
- __m64 t1 = pix_multiply_1x64 (*src, *alpha_dst);
- __m64 t2 = pix_multiply_1x64 (*dst, *alpha_src);
-
- return _mm_adds_pu8 (t1, t2);
-}
-
static force_inline __m128i
pix_add_multiply_1x128 (__m128i* src,
__m128i* alpha_dst,
return _mm_adds_epu8 (t1, t2);
}
-static force_inline __m64
-negate_1x64 (__m64 data)
-{
- return _mm_xor_si64 (data, mask_x00ff);
-}
-
static force_inline __m128i
negate_1x128 (__m128i data)
{
return _mm_xor_si128 (data, mask_00ff);
}
-static force_inline __m64
-invert_colors_1x64 (__m64 data)
-{
- return _mm_shuffle_pi16 (data, _MM_SHUFFLE (3, 0, 1, 2));
-}
-
static force_inline __m128i
invert_colors_1x128 (__m128i data)
{
return _mm_shufflelo_epi16 (data, _MM_SHUFFLE (3, 0, 1, 2));
}
-static force_inline __m64
-over_1x64 (__m64 src, __m64 alpha, __m64 dst)
-{
- return _mm_adds_pu8 (src, pix_multiply_1x64 (dst, negate_1x64 (alpha)));
-}
-
static force_inline __m128i
over_1x128 (__m128i src, __m128i alpha, __m128i dst)
{
return _mm_adds_epu8 (src, pix_multiply_1x128 (dst, negate_1x128 (alpha)));
}
-static force_inline __m64
-in_over_1x64 (__m64* src, __m64* alpha, __m64* mask, __m64* dst)
-{
- return over_1x64 (pix_multiply_1x64 (*src, *mask),
- pix_multiply_1x64 (*alpha, *mask),
- *dst);
-}
-
static force_inline __m128i
in_over_1x128 (__m128i* src, __m128i* alpha, __m128i* mask, __m128i* dst)
{
*dst);
}
-static force_inline __m64
-over_rev_non_pre_1x64 (__m64 src, __m64 dst)
-{
- __m64 alpha = expand_alpha_1x64 (src);
-
- return over_1x64 (pix_multiply_1x64 (invert_colors_1x64 (src),
- _mm_or_si64 (alpha, mask_x_alpha)),
- alpha,
- dst);
-}
-
static force_inline __m128i
over_rev_non_pre_1x128 (__m128i src, __m128i dst)
{
}
static force_inline uint32_t
-pack_1x64_32 (__m64 data)
-{
- return _mm_cvtsi64_si32 (_mm_packs_pu16 (data, _mm_setzero_si64 ()));
-}
-
-static force_inline uint32_t
pack_1x128_32 (__m128i data)
{
return _mm_cvtsi128_si32 (_mm_packus_epi16 (data, _mm_setzero_si128 ()));
}
-/* Expand 16 bits positioned at @pos (0-3) of a mmx register into
- *
- * 00RR00GG00BB
- *
- * --- Expanding 565 in the low word ---
- *
- * m = (m << (32 - 3)) | (m << (16 - 5)) | m;
- * m = m & (01f0003f001f);
- * m = m * (008404100840);
- * m = m >> 8;
- *
- * Note the trick here - the top word is shifted by another nibble to
- * avoid it bumping into the middle word
- */
-static force_inline __m64
-expand565_16_1x64 (uint16_t pixel)
-{
- __m64 p;
- __m64 t1, t2;
-
- p = _mm_cvtsi32_si64 ((uint32_t) pixel);
-
- t1 = _mm_slli_si64 (p, 36 - 11);
- t2 = _mm_slli_si64 (p, 16 - 5);
-
- p = _mm_or_si64 (t1, p);
- p = _mm_or_si64 (t2, p);
- p = _mm_and_si64 (p, mask_x565_rgb);
- p = _mm_mullo_pi16 (p, mask_x565_unpack);
-
- return _mm_srli_pi16 (p, 8);
-}
-
static force_inline __m128i
expand565_16_1x128 (uint16_t pixel)
{
return _mm_unpacklo_epi8 (m, _mm_setzero_si128 ());
}
-/* ----------------------------------------------------------------------------
- * Compose Core transformations
- */
static force_inline uint32_t
core_combine_over_u_pixel_sse2 (uint32_t src, uint32_t dst)
{
}
static force_inline void
-core_combine_over_u_sse2 (uint32_t* pd,
- const uint32_t* ps,
- const uint32_t* pm,
- int w)
+sse2_combine_over_u (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
{
if (pm)
core_combine_over_u_sse2_mask (pd, ps, pm, w);
core_combine_over_u_sse2_no_mask (pd, ps, w);
}
-static force_inline void
-core_combine_over_reverse_u_sse2 (uint32_t* pd,
- const uint32_t* ps,
- const uint32_t* pm,
- int w)
+static void
+sse2_combine_over_reverse_u (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
{
uint32_t s, d;
return dst;
}
-static force_inline void
-core_combine_in_u_sse2 (uint32_t* pd,
- const uint32_t* ps,
- const uint32_t* pm,
- int w)
+static void
+sse2_combine_in_u (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
{
uint32_t s, d;
}
}
-static force_inline void
-core_combine_reverse_in_u_sse2 (uint32_t* pd,
- const uint32_t* ps,
- const uint32_t *pm,
- int w)
+static void
+sse2_combine_in_reverse_u (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
{
uint32_t s, d;
}
}
-static force_inline void
-core_combine_reverse_out_u_sse2 (uint32_t* pd,
- const uint32_t* ps,
- const uint32_t* pm,
- int w)
+static void
+sse2_combine_out_reverse_u (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
{
while (w && ((unsigned long) pd & 15))
{
}
}
-static force_inline void
-core_combine_out_u_sse2 (uint32_t* pd,
- const uint32_t* ps,
- const uint32_t* pm,
- int w)
+static void
+sse2_combine_out_u (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
{
while (w && ((unsigned long) pd & 15))
{
return pack_1x128_32 (pix_add_multiply_1x128 (&s, &da, &d, &sa));
}
-static force_inline void
-core_combine_atop_u_sse2 (uint32_t* pd,
- const uint32_t* ps,
- const uint32_t* pm,
- int w)
+static void
+sse2_combine_atop_u (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
{
uint32_t s, d;
return pack_1x128_32 (pix_add_multiply_1x128 (&s, &da, &d, &sa));
}
-static force_inline void
-core_combine_reverse_atop_u_sse2 (uint32_t* pd,
- const uint32_t* ps,
- const uint32_t* pm,
- int w)
+static void
+sse2_combine_atop_reverse_u (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
{
uint32_t s, d;
return pack_1x128_32 (pix_add_multiply_1x128 (&s, &neg_d, &d, &neg_s));
}
-static force_inline void
-core_combine_xor_u_sse2 (uint32_t* dst,
- const uint32_t* src,
- const uint32_t *mask,
- int width)
+static void
+sse2_combine_xor_u (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * dst,
+ const uint32_t * src,
+ const uint32_t * mask,
+ int width)
{
int w = width;
uint32_t s, d;
}
static force_inline void
-core_combine_add_u_sse2 (uint32_t* dst,
- const uint32_t* src,
- const uint32_t* mask,
- int width)
+sse2_combine_add_u (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * dst,
+ const uint32_t * src,
+ const uint32_t * mask,
+ int width)
{
int w = width;
uint32_t s, d;
return pack_1x128_32 (_mm_adds_epu16 (md, ms));
}
-static force_inline void
-core_combine_saturate_u_sse2 (uint32_t * pd,
- const uint32_t *ps,
- const uint32_t *pm,
- int w)
+static void
+sse2_combine_saturate_u (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
{
uint32_t s, d;
}
}
-static force_inline void
-core_combine_src_ca_sse2 (uint32_t* pd,
- const uint32_t* ps,
- const uint32_t *pm,
- int w)
+static void
+sse2_combine_src_ca (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
{
uint32_t s, m;
return pack_1x128_32 (in_over_1x128 (&s, &expAlpha, &unpk_mask, &unpk_dst));
}
-static force_inline void
-core_combine_over_ca_sse2 (uint32_t* pd,
- const uint32_t* ps,
- const uint32_t *pm,
- int w)
+static void
+sse2_combine_over_ca (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
{
uint32_t s, m, d;
unpack_32_1x128 (mask))));
}
-static force_inline void
-core_combine_over_reverse_ca_sse2 (uint32_t* pd,
- const uint32_t* ps,
- const uint32_t *pm,
- int w)
+static void
+sse2_combine_over_reverse_ca (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
{
uint32_t s, m, d;
}
}
-static force_inline void
-core_combine_in_ca_sse2 (uint32_t * pd,
- const uint32_t *ps,
- const uint32_t *pm,
- int w)
+static void
+sse2_combine_in_ca (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
{
uint32_t s, m, d;
}
}
-static force_inline void
-core_combine_in_reverse_ca_sse2 (uint32_t * pd,
- const uint32_t *ps,
- const uint32_t *pm,
- int w)
+static void
+sse2_combine_in_reverse_ca (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
{
uint32_t s, m, d;
}
}
-static force_inline void
-core_combine_out_ca_sse2 (uint32_t * pd,
- const uint32_t *ps,
- const uint32_t *pm,
- int w)
+static void
+sse2_combine_out_ca (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
{
uint32_t s, m, d;
}
}
-static force_inline void
-core_combine_out_reverse_ca_sse2 (uint32_t * pd,
- const uint32_t *ps,
- const uint32_t *pm,
- int w)
+static void
+sse2_combine_out_reverse_ca (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
{
uint32_t s, m, d;
return pack_1x128_32 (pix_add_multiply_1x128 (&d, &m, &s, &da));
}
-static force_inline void
-core_combine_atop_ca_sse2 (uint32_t * pd,
- const uint32_t *ps,
- const uint32_t *pm,
- int w)
+static void
+sse2_combine_atop_ca (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
{
uint32_t s, m, d;
return pack_1x128_32 (pix_add_multiply_1x128 (&d, &m, &s, &da));
}
-static force_inline void
-core_combine_reverse_atop_ca_sse2 (uint32_t * pd,
- const uint32_t *ps,
- const uint32_t *pm,
- int w)
+static void
+sse2_combine_atop_reverse_ca (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
{
uint32_t s, m, d;
&alpha_src));
}
-static force_inline void
-core_combine_xor_ca_sse2 (uint32_t * pd,
- const uint32_t *ps,
- const uint32_t *pm,
- int w)
+static void
+sse2_combine_xor_ca (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
{
uint32_t s, m, d;
}
}
-static force_inline void
-core_combine_add_ca_sse2 (uint32_t * pd,
- const uint32_t *ps,
- const uint32_t *pm,
- int w)
+static void
+sse2_combine_add_ca (pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * pd,
+ const uint32_t * ps,
+ const uint32_t * pm,
+ int w)
{
uint32_t s, m, d;
&xmm_mask_lo, &xmm_mask_hi,
&xmm_src_lo, &xmm_src_hi);
- save_128_aligned (
- (__m128i*)pd, pack_2x128_128 (
- _mm_adds_epu8 (xmm_src_lo, xmm_dst_lo),
- _mm_adds_epu8 (xmm_src_hi, xmm_dst_hi)));
-
- ps += 4;
- pd += 4;
- pm += 4;
- w -= 4;
- }
-
- while (w)
- {
- s = *ps++;
- m = *pm++;
- d = *pd;
-
- *pd++ = pack_1x128_32 (
- _mm_adds_epu8 (pix_multiply_1x128 (unpack_32_1x128 (s),
- unpack_32_1x128 (m)),
- unpack_32_1x128 (d)));
- w--;
- }
-}
-
-/* ---------------------------------------------------
- * fb_compose_setup_sSE2
- */
-static force_inline __m64
-create_mask_16_64 (uint16_t mask)
-{
- return _mm_set1_pi16 (mask);
-}
-
-static force_inline __m128i
-create_mask_16_128 (uint16_t mask)
-{
- return _mm_set1_epi16 (mask);
-}
-
-static force_inline __m64
-create_mask_2x32_64 (uint32_t mask0,
- uint32_t mask1)
-{
- return _mm_set_pi32 (mask0, mask1);
-}
-
-/* Work around a code generation bug in Sun Studio 12. */
-#if defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)
-# define create_mask_2x32_128(mask0, mask1) \
- (_mm_set_epi32 ((mask0), (mask1), (mask0), (mask1)))
-#else
-static force_inline __m128i
-create_mask_2x32_128 (uint32_t mask0,
- uint32_t mask1)
-{
- return _mm_set_epi32 (mask0, mask1, mask0, mask1);
-}
-#endif
-
-/* SSE2 code patch for fbcompose.c */
-
-static void
-sse2_combine_over_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dst,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- core_combine_over_u_sse2 (dst, src, mask, width);
- _mm_empty ();
-}
-
-static void
-sse2_combine_over_reverse_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dst,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- core_combine_over_reverse_u_sse2 (dst, src, mask, width);
- _mm_empty ();
-}
-
-static void
-sse2_combine_in_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dst,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- core_combine_in_u_sse2 (dst, src, mask, width);
- _mm_empty ();
-}
-
-static void
-sse2_combine_in_reverse_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dst,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- core_combine_reverse_in_u_sse2 (dst, src, mask, width);
- _mm_empty ();
-}
-
-static void
-sse2_combine_out_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dst,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- core_combine_out_u_sse2 (dst, src, mask, width);
- _mm_empty ();
-}
-
-static void
-sse2_combine_out_reverse_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dst,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- core_combine_reverse_out_u_sse2 (dst, src, mask, width);
- _mm_empty ();
-}
-
-static void
-sse2_combine_atop_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dst,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- core_combine_atop_u_sse2 (dst, src, mask, width);
- _mm_empty ();
-}
-
-static void
-sse2_combine_atop_reverse_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dst,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- core_combine_reverse_atop_u_sse2 (dst, src, mask, width);
- _mm_empty ();
-}
-
-static void
-sse2_combine_xor_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dst,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- core_combine_xor_u_sse2 (dst, src, mask, width);
- _mm_empty ();
-}
-
-static void
-sse2_combine_add_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dst,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- core_combine_add_u_sse2 (dst, src, mask, width);
- _mm_empty ();
-}
-
-static void
-sse2_combine_saturate_u (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dst,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- core_combine_saturate_u_sse2 (dst, src, mask, width);
- _mm_empty ();
-}
-
-static void
-sse2_combine_src_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dst,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- core_combine_src_ca_sse2 (dst, src, mask, width);
- _mm_empty ();
-}
-
-static void
-sse2_combine_over_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dst,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- core_combine_over_ca_sse2 (dst, src, mask, width);
- _mm_empty ();
-}
-
-static void
-sse2_combine_over_reverse_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dst,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- core_combine_over_reverse_ca_sse2 (dst, src, mask, width);
- _mm_empty ();
-}
-
-static void
-sse2_combine_in_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dst,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- core_combine_in_ca_sse2 (dst, src, mask, width);
- _mm_empty ();
-}
-
-static void
-sse2_combine_in_reverse_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dst,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- core_combine_in_reverse_ca_sse2 (dst, src, mask, width);
- _mm_empty ();
-}
-
-static void
-sse2_combine_out_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dst,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- core_combine_out_ca_sse2 (dst, src, mask, width);
- _mm_empty ();
-}
+ save_128_aligned (
+ (__m128i*)pd, pack_2x128_128 (
+ _mm_adds_epu8 (xmm_src_lo, xmm_dst_lo),
+ _mm_adds_epu8 (xmm_src_hi, xmm_dst_hi)));
-static void
-sse2_combine_out_reverse_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dst,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- core_combine_out_reverse_ca_sse2 (dst, src, mask, width);
- _mm_empty ();
-}
+ ps += 4;
+ pd += 4;
+ pm += 4;
+ w -= 4;
+ }
-static void
-sse2_combine_atop_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dst,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- core_combine_atop_ca_sse2 (dst, src, mask, width);
- _mm_empty ();
-}
+ while (w)
+ {
+ s = *ps++;
+ m = *pm++;
+ d = *pd;
-static void
-sse2_combine_atop_reverse_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dst,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
-{
- core_combine_reverse_atop_ca_sse2 (dst, src, mask, width);
- _mm_empty ();
+ *pd++ = pack_1x128_32 (
+ _mm_adds_epu8 (pix_multiply_1x128 (unpack_32_1x128 (s),
+ unpack_32_1x128 (m)),
+ unpack_32_1x128 (d)));
+ w--;
+ }
}
-static void
-sse2_combine_xor_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dst,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
+static force_inline __m128i
+create_mask_16_128 (uint16_t mask)
{
- core_combine_xor_ca_sse2 (dst, src, mask, width);
- _mm_empty ();
+ return _mm_set1_epi16 (mask);
}
-static void
-sse2_combine_add_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- uint32_t * dst,
- const uint32_t * src,
- const uint32_t * mask,
- int width)
+/* Work around a code generation bug in Sun Studio 12. */
+#if defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)
+# define create_mask_2x32_128(mask0, mask1) \
+ (_mm_set_epi32 ((mask0), (mask1), (mask0), (mask1)))
+#else
+static force_inline __m128i
+create_mask_2x32_128 (uint32_t mask0,
+ uint32_t mask1)
{
- core_combine_add_ca_sse2 (dst, src, mask, width);
- _mm_empty ();
+ return _mm_set_epi32 (mask0, mask1, mask0, mask1);
}
-
-/* -------------------------------------------------------------------
- * composite_over_n_8888
- */
+#endif
static void
sse2_composite_over_n_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+ pixman_composite_info_t *info)
{
+ PIXMAN_COMPOSITE_ARGS (info);
uint32_t src;
uint32_t *dst_line, *dst, d;
int32_t w;
__m128i xmm_src, xmm_alpha;
__m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+ src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
if (src == 0)
return;
PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
xmm_src = expand_pixel_32_1x128 (src);
xmm_alpha = expand_alpha_1x128 (xmm_src);
}
}
- _mm_empty ();
}
-/* ---------------------------------------------------------------------
- * composite_over_n_0565
- */
static void
sse2_composite_over_n_0565 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+ pixman_composite_info_t *info)
{
+ PIXMAN_COMPOSITE_ARGS (info);
uint32_t src;
uint16_t *dst_line, *dst, d;
int32_t w;
__m128i xmm_src, xmm_alpha;
__m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+ src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
if (src == 0)
return;
PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+ dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
xmm_src = expand_pixel_32_1x128 (src);
xmm_alpha = expand_alpha_1x128 (xmm_src);
}
}
- _mm_empty ();
}
-/* ------------------------------
- * composite_add_n_8888_8888_ca
- */
static void
sse2_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+ pixman_composite_info_t *info)
{
- uint32_t src, srca;
+ PIXMAN_COMPOSITE_ARGS (info);
+ uint32_t src;
uint32_t *dst_line, d;
uint32_t *mask_line, m;
uint32_t pack_cmp;
int dst_stride, mask_stride;
- __m128i xmm_src, xmm_alpha;
+ __m128i xmm_src;
__m128i xmm_dst;
__m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
- __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest;
+ __m128i mmx_src, mmx_mask, mmx_dest;
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
- srca = src >> 24;
+ src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
if (src == 0)
return;
PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (
mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
xmm_src = _mm_unpacklo_epi8 (
create_mask_2x32_128 (src, src), _mm_setzero_si128 ());
- xmm_alpha = expand_alpha_1x128 (xmm_src);
mmx_src = xmm_src;
- mmx_alpha = xmm_alpha;
while (height--)
{
mmx_dest = unpack_32_1x128 (d);
*pd = pack_1x128_32 (
- _mm_adds_epu8 (pix_multiply_1x128 (mmx_mask, mmx_src), mmx_dest));
+ _mm_adds_epu8 (pix_multiply_1x128 (mmx_mask, mmx_src),
+ mmx_dest));
}
pd++;
mmx_dest = unpack_32_1x128 (d);
*pd = pack_1x128_32 (
- _mm_adds_epu8 (pix_multiply_1x128 (mmx_mask, mmx_src), mmx_dest));
+ _mm_adds_epu8 (pix_multiply_1x128 (mmx_mask, mmx_src),
+ mmx_dest));
}
pd++;
}
}
- _mm_empty ();
}
-/* ---------------------------------------------------------------------------
- * composite_over_n_8888_8888_ca
- */
-
static void
sse2_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+ pixman_composite_info_t *info)
{
+ PIXMAN_COMPOSITE_ARGS (info);
uint32_t src;
uint32_t *dst_line, d;
uint32_t *mask_line, m;
__m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest;
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+ src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
if (src == 0)
return;
PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (
mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
}
}
- _mm_empty ();
}
-/*---------------------------------------------------------------------
- * composite_over_8888_n_8888
- */
-
static void
sse2_composite_over_8888_n_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+ pixman_composite_info_t *info)
{
+ PIXMAN_COMPOSITE_ARGS (info);
uint32_t *dst_line, *dst;
uint32_t *src_line, *src;
uint32_t mask;
__m128i xmm_alpha_lo, xmm_alpha_hi;
PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (
src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
}
}
- _mm_empty ();
}
-/*---------------------------------------------------------------------
- * composite_over_8888_n_8888
- */
-
static void
sse2_composite_src_x888_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+ pixman_composite_info_t *info)
{
+ PIXMAN_COMPOSITE_ARGS (info);
uint32_t *dst_line, *dst;
uint32_t *src_line, *src;
int32_t w;
PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (
src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
}
}
- _mm_empty ();
}
-/* ---------------------------------------------------------------------
- * composite_over_x888_n_8888
- */
static void
sse2_composite_over_x888_n_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+ pixman_composite_info_t *info)
{
+ PIXMAN_COMPOSITE_ARGS (info);
uint32_t *dst_line, *dst;
uint32_t *src_line, *src;
uint32_t mask;
__m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (
src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
}
}
- _mm_empty ();
}
-/* --------------------------------------------------------------------
- * composite_over_8888_8888
- */
static void
sse2_composite_over_8888_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+ pixman_composite_info_t *info)
{
+ PIXMAN_COMPOSITE_ARGS (info);
int dst_stride, src_stride;
uint32_t *dst_line, *dst;
uint32_t *src_line, *src;
PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (
src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
while (height--)
{
- core_combine_over_u_sse2 (dst, src, NULL, width);
+ sse2_combine_over_u (imp, op, dst, src, NULL, width);
dst += dst_stride;
src += src_stride;
}
- _mm_empty ();
}
-/* ------------------------------------------------------------------
- * composite_over_8888_0565
- */
static force_inline uint16_t
composite_over_8888_0565pixel (uint32_t src, uint16_t dst)
{
static void
sse2_composite_over_8888_0565 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+ pixman_composite_info_t *info)
{
+ PIXMAN_COMPOSITE_ARGS (info);
uint16_t *dst_line, *dst, d;
uint32_t *src_line, *src, s;
int dst_stride, src_stride;
__m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+ dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (
src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-#if 0
- /* FIXME
- *
- * I copy the code from MMX one and keep the fixme.
- * If it's a problem there, probably is a problem here.
- */
- assert (src_image->drawable == mask_image->drawable);
-#endif
-
while (height--)
{
dst = dst_line;
}
}
- _mm_empty ();
}
-/* -----------------------------------------------------------------
- * composite_over_n_8_8888
- */
-
static void
sse2_composite_over_n_8_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+ pixman_composite_info_t *info)
{
+ PIXMAN_COMPOSITE_ARGS (info);
uint32_t src, srca;
uint32_t *dst_line, *dst;
uint8_t *mask_line, *mask;
__m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest;
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+ src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
srca = src >> 24;
if (src == 0)
return;
PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (
mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
}
}
- _mm_empty ();
}
-/* ----------------------------------------------------------------
- * composite_over_n_8_8888
- */
-
-pixman_bool_t
+static pixman_bool_t
pixman_fill_sse2 (uint32_t *bits,
int stride,
int bpp,
byte_line += stride;
w = byte_width;
- while (w >= 1 && ((unsigned long)d & 1))
+ if (w >= 1 && ((unsigned long)d & 1))
{
*(uint8_t *)d = data;
w -= 1;
}
}
- _mm_empty ();
return TRUE;
}
static void
sse2_composite_src_n_8_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+ pixman_composite_info_t *info)
{
+ PIXMAN_COMPOSITE_ARGS (info);
uint32_t src, srca;
uint32_t *dst_line, *dst;
uint8_t *mask_line, *mask;
__m128i xmm_src, xmm_def;
__m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+ src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
srca = src >> 24;
if (src == 0)
{
- pixman_fill_sse2 (dst_image->bits.bits, dst_image->bits.rowstride,
- PIXMAN_FORMAT_BPP (dst_image->bits.format),
+ pixman_fill_sse2 (dest_image->bits.bits, dest_image->bits.rowstride,
+ PIXMAN_FORMAT_BPP (dest_image->bits.format),
dest_x, dest_y, width, height, 0);
return;
}
PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (
mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
}
}
- _mm_empty ();
}
-/*-----------------------------------------------------------------------
- * composite_over_n_8_0565
- */
-
static void
sse2_composite_over_n_8_0565 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+ pixman_composite_info_t *info)
{
- uint32_t src, srca;
+ PIXMAN_COMPOSITE_ARGS (info);
+ uint32_t src;
uint16_t *dst_line, *dst, d;
uint8_t *mask_line, *mask;
int dst_stride, mask_stride;
__m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
__m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+ src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
- srca = src >> 24;
if (src == 0)
return;
PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+ dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (
mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
}
}
- _mm_empty ();
}
-/* -----------------------------------------------------------------------
- * composite_over_pixbuf_0565
- */
-
static void
sse2_composite_over_pixbuf_0565 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+ pixman_composite_info_t *info)
{
+ PIXMAN_COMPOSITE_ARGS (info);
uint16_t *dst_line, *dst, d;
uint32_t *src_line, *src, s;
int dst_stride, src_stride;
__m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+ dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (
src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-#if 0
- /* FIXME
- *
- * I copy the code from MMX one and keep the fixme.
- * If it's a problem there, probably is a problem here.
- */
- assert (src_image->drawable == mask_image->drawable);
-#endif
-
while (height--)
{
dst = dst_line;
}
}
- _mm_empty ();
}
-/* -------------------------------------------------------------------------
- * composite_over_pixbuf_8888
- */
-
static void
sse2_composite_over_pixbuf_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+ pixman_composite_info_t *info)
{
+ PIXMAN_COMPOSITE_ARGS (info);
uint32_t *dst_line, *dst, d;
uint32_t *src_line, *src, s;
int dst_stride, src_stride;
__m128i xmm_dst_lo, xmm_dst_hi;
PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (
src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-#if 0
- /* FIXME
- *
- * I copy the code from MMX one and keep the fixme.
- * If it's a problem there, probably is a problem here.
- */
- assert (src_image->drawable == mask_image->drawable);
-#endif
-
while (height--)
{
dst = dst_line;
}
}
- _mm_empty ();
}
-/* -------------------------------------------------------------------------------------------------
- * composite_over_n_8888_0565_ca
- */
-
static void
sse2_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+ pixman_composite_info_t *info)
{
+ PIXMAN_COMPOSITE_ARGS (info);
uint32_t src;
uint16_t *dst_line, *dst, d;
uint32_t *mask_line, *mask, m;
__m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest;
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+ src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
if (src == 0)
return;
PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+ dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (
mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
}
}
- _mm_empty ();
}
-/* -----------------------------------------------------------------------
- * composite_in_n_8_8
- */
-
static void
sse2_composite_in_n_8_8 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+ pixman_composite_info_t *info)
{
+ PIXMAN_COMPOSITE_ARGS (info);
uint8_t *dst_line, *dst;
uint8_t *mask_line, *mask;
int dst_stride, mask_stride;
uint32_t d, m;
uint32_t src;
- uint8_t sa;
int32_t w;
__m128i xmm_alpha;
__m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+ dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (
mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
-
- sa = src >> 24;
+ src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src));
}
}
- _mm_empty ();
}
-/* -----------------------------------------------------------------------
- * composite_in_n_8
- */
-
static void
sse2_composite_in_n_8 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+ pixman_composite_info_t *info)
{
+ PIXMAN_COMPOSITE_ARGS (info);
uint8_t *dst_line, *dst;
int dst_stride;
uint32_t d;
__m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+ dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+ src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src));
if (src == 0x00)
{
- pixman_fill (dst_image->bits.bits, dst_image->bits.rowstride,
+ pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride,
8, dest_x, dest_y, width, height, src);
return;
}
}
- _mm_empty ();
}
-/* ---------------------------------------------------------------------------
- * composite_in_8_8
- */
-
static void
sse2_composite_in_8_8 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+ pixman_composite_info_t *info)
{
+ PIXMAN_COMPOSITE_ARGS (info);
uint8_t *dst_line, *dst;
uint8_t *src_line, *src;
int src_stride, dst_stride;
__m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+ dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (
src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
}
}
- _mm_empty ();
}
-/* -------------------------------------------------------------------------
- * composite_add_n_8_8
- */
-
static void
sse2_composite_add_n_8_8 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+ pixman_composite_info_t *info)
{
+ PIXMAN_COMPOSITE_ARGS (info);
uint8_t *dst_line, *dst;
uint8_t *mask_line, *mask;
int dst_stride, mask_stride;
int32_t w;
uint32_t src;
- uint8_t sa;
uint32_t m, d;
__m128i xmm_alpha;
__m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+ dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (
mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
-
- sa = src >> 24;
+ src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src));
}
}
- _mm_empty ();
}
-/* -------------------------------------------------------------------------
- * composite_add_n_8_8
- */
-
static void
sse2_composite_add_n_8 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+ pixman_composite_info_t *info)
{
+ PIXMAN_COMPOSITE_ARGS (info);
uint8_t *dst_line, *dst;
int dst_stride;
int32_t w;
__m128i xmm_src;
PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+ dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+ src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
src >>= 24;
if (src == 0xff)
{
- pixman_fill (dst_image->bits.bits, dst_image->bits.rowstride,
+ pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride,
8, dest_x, dest_y, width, height, 0xff);
return;
}
}
- _mm_empty ();
}
-/* ----------------------------------------------------------------------
- * composite_add_8_8
- */
-
static void
sse2_composite_add_8_8 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+ pixman_composite_info_t *info)
{
+ PIXMAN_COMPOSITE_ARGS (info);
uint8_t *dst_line, *dst;
uint8_t *src_line, *src;
int dst_stride, src_stride;
PIXMAN_IMAGE_GET_LINE (
src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+ dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
while (height--)
{
w--;
}
- core_combine_add_u_sse2 ((uint32_t*)dst, (uint32_t*)src, NULL, w >> 2);
+ sse2_combine_add_u (imp, op,
+ (uint32_t*)dst, (uint32_t*)src, NULL, w >> 2);
/* Small tail */
dst += w & 0xfffc;
}
}
- _mm_empty ();
}
-/* ---------------------------------------------------------------------
- * composite_add_8888_8888
- */
static void
sse2_composite_add_8888_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+ pixman_composite_info_t *info)
{
+ PIXMAN_COMPOSITE_ARGS (info);
uint32_t *dst_line, *dst;
uint32_t *src_line, *src;
int dst_stride, src_stride;
PIXMAN_IMAGE_GET_LINE (
src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
while (height--)
{
src = src_line;
src_line += src_stride;
- core_combine_add_u_sse2 (dst, src, NULL, width);
+ sse2_combine_add_u (imp, op, dst, src, NULL, width);
}
- _mm_empty ();
}
-/* -------------------------------------------------------------------------------------------------
- * sse2_composite_copy_area
- */
-
static pixman_bool_t
pixman_blt_sse2 (uint32_t *src_bits,
uint32_t *dst_bits,
int dst_bpp,
int src_x,
int src_y,
- int dst_x,
- int dst_y,
+ int dest_x,
+ int dest_y,
int width,
int height)
{
src_stride = src_stride * (int) sizeof (uint32_t) / 2;
dst_stride = dst_stride * (int) sizeof (uint32_t) / 2;
src_bytes =(uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x));
- dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
+ dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dest_y) + (dest_x));
byte_width = 2 * width;
src_stride *= 2;
dst_stride *= 2;
src_stride = src_stride * (int) sizeof (uint32_t) / 4;
dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;
src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x));
- dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
+ dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dest_y) + (dest_x));
byte_width = 4 * width;
src_stride *= 4;
dst_stride *= 4;
}
}
- _mm_empty ();
return TRUE;
}
static void
sse2_composite_copy_area (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+ pixman_composite_info_t *info)
{
+ PIXMAN_COMPOSITE_ARGS (info);
pixman_blt_sse2 (src_image->bits.bits,
- dst_image->bits.bits,
+ dest_image->bits.bits,
src_image->bits.rowstride,
- dst_image->bits.rowstride,
+ dest_image->bits.rowstride,
PIXMAN_FORMAT_BPP (src_image->bits.format),
- PIXMAN_FORMAT_BPP (dst_image->bits.format),
+ PIXMAN_FORMAT_BPP (dest_image->bits.format),
src_x, src_y, dest_x, dest_y, width, height);
}
static void
sse2_composite_over_x888_8_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+ pixman_composite_info_t *info)
{
+ PIXMAN_COMPOSITE_ARGS (info);
uint32_t *src, *src_line, s;
uint32_t *dst, *dst_line, d;
uint8_t *mask, *mask_line;
__m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (
mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
PIXMAN_IMAGE_GET_LINE (
while (w >= 4)
{
m = *(uint32_t*) mask;
- xmm_src = _mm_or_si128 (load_128_unaligned ((__m128i*)src), mask_ff000000);
+ xmm_src = _mm_or_si128 (
+ load_128_unaligned ((__m128i*)src), mask_ff000000);
if (m == 0xffffffff)
{
unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
- expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+ expand_alpha_rev_2x128 (
+ xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
- in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &mask_00ff, &mask_00ff, &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi);
+ in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
+ &mask_00ff, &mask_00ff, &xmm_mask_lo, &xmm_mask_hi,
+ &xmm_dst_lo, &xmm_dst_hi);
save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
}
}
}
- _mm_empty ();
}
static void
sse2_composite_over_8888_8_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+ pixman_composite_info_t *info)
{
+ PIXMAN_COMPOSITE_ARGS (info);
uint32_t *src, *src_line, s;
uint32_t *dst, *dst_line, d;
uint8_t *mask, *mask_line;
__m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (
mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
PIXMAN_IMAGE_GET_LINE (
}
}
- _mm_empty ();
}
static void
sse2_composite_over_reverse_n_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+ pixman_composite_info_t *info)
{
+ PIXMAN_COMPOSITE_ARGS (info);
uint32_t src;
uint32_t *dst_line, *dst;
__m128i xmm_src;
int dst_stride;
int32_t w;
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+ src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
if (src == 0)
return;
PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
xmm_src = expand_pixel_32_1x128 (src);
}
- _mm_empty ();
}
static void
sse2_composite_over_8888_8888_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+ pixman_composite_info_t *info)
{
+ PIXMAN_COMPOSITE_ARGS (info);
uint32_t *src, *src_line, s;
uint32_t *dst, *dst_line, d;
uint32_t *mask, *mask_line;
__m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
PIXMAN_IMAGE_GET_LINE (
- dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (
mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
PIXMAN_IMAGE_GET_LINE (
}
}
- _mm_empty ();
}
-/* A variant of 'core_combine_over_u_sse2' with minor tweaks */
+/* A variant of 'sse2_combine_over_u' with minor tweaks */
static force_inline void
scaled_nearest_scanline_sse2_8888_8888_OVER (uint32_t* pd,
const uint32_t* ps,
w--;
}
- _mm_empty ();
}
FAST_NEAREST_MAINLOOP (sse2_8888_8888_cover_OVER,
w--;
}
- _mm_empty ();
}
FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_cover_OVER,
scaled_nearest_scanline_sse2_8888_n_8888_OVER,
uint32_t, uint32_t, uint32_t, NONE, TRUE, TRUE)
+#define BILINEAR_DECLARE_VARIABLES \
+ const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); \
+ const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb); \
+ const __m128i xmm_xorc = _mm_set_epi16 (0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff);\
+ const __m128i xmm_addc = _mm_set_epi16 (0, 0, 0, 0, 1, 1, 1, 1); \
+ const __m128i xmm_ux = _mm_set_epi16 (unit_x, unit_x, unit_x, unit_x, \
+ unit_x, unit_x, unit_x, unit_x); \
+ const __m128i xmm_zero = _mm_setzero_si128 (); \
+ __m128i xmm_x = _mm_set_epi16 (vx, vx, vx, vx, vx, vx, vx, vx)
+
+#define BILINEAR_INTERPOLATE_ONE_PIXEL(pix) \
+do { \
+ __m128i xmm_wh, xmm_lo, xmm_hi, a; \
+ /* fetch 2x2 pixel block into sse2 register */ \
+ uint32_t tl = src_top [pixman_fixed_to_int (vx)]; \
+ uint32_t tr = src_top [pixman_fixed_to_int (vx) + 1]; \
+ uint32_t bl = src_bottom [pixman_fixed_to_int (vx)]; \
+ uint32_t br = src_bottom [pixman_fixed_to_int (vx) + 1]; \
+ a = _mm_set_epi32 (tr, tl, br, bl); \
+ vx += unit_x; \
+ /* vertical interpolation */ \
+ a = _mm_add_epi16 (_mm_mullo_epi16 (_mm_unpackhi_epi8 (a, xmm_zero), \
+ xmm_wt), \
+ _mm_mullo_epi16 (_mm_unpacklo_epi8 (a, xmm_zero), \
+ xmm_wb)); \
+ /* calculate horizontal weights */ \
+ xmm_wh = _mm_add_epi16 (xmm_addc, \
+ _mm_xor_si128 (xmm_xorc, \
+ _mm_srli_epi16 (xmm_x, 8))); \
+ xmm_x = _mm_add_epi16 (xmm_x, xmm_ux); \
+ /* horizontal interpolation */ \
+ xmm_lo = _mm_mullo_epi16 (a, xmm_wh); \
+ xmm_hi = _mm_mulhi_epu16 (a, xmm_wh); \
+ a = _mm_add_epi32 (_mm_unpacklo_epi16 (xmm_lo, xmm_hi), \
+ _mm_unpackhi_epi16 (xmm_lo, xmm_hi)); \
+ /* shift and pack the result */ \
+ a = _mm_srli_epi32 (a, 16); \
+ a = _mm_packs_epi32 (a, a); \
+ a = _mm_packus_epi16 (a, a); \
+ pix = _mm_cvtsi128_si32 (a); \
+} while (0)
+
+#define BILINEAR_SKIP_ONE_PIXEL() \
+do { \
+ vx += unit_x; \
+ xmm_x = _mm_add_epi16 (xmm_x, xmm_ux); \
+} while(0)
+
+static force_inline void
+scaled_bilinear_scanline_sse2_8888_8888_SRC (uint32_t * dst,
+ const uint32_t * mask,
+ const uint32_t * src_top,
+ const uint32_t * src_bottom,
+ int32_t w,
+ int wt,
+ int wb,
+ pixman_fixed_t vx,
+ pixman_fixed_t unit_x,
+ pixman_fixed_t max_vx,
+ pixman_bool_t zero_src)
+{
+ BILINEAR_DECLARE_VARIABLES;
+ uint32_t pix1, pix2, pix3, pix4;
+
+ while ((w -= 4) >= 0)
+ {
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix2);
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix3);
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix4);
+ *dst++ = pix1;
+ *dst++ = pix2;
+ *dst++ = pix3;
+ *dst++ = pix4;
+ }
+
+ if (w & 2)
+ {
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix2);
+ *dst++ = pix1;
+ *dst++ = pix2;
+ }
+
+ if (w & 1)
+ {
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+ *dst = pix1;
+ }
+
+}
+
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_cover_SRC,
+ scaled_bilinear_scanline_sse2_8888_8888_SRC,
+ uint32_t, uint32_t, uint32_t,
+ COVER, FLAG_NONE)
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_pad_SRC,
+ scaled_bilinear_scanline_sse2_8888_8888_SRC,
+ uint32_t, uint32_t, uint32_t,
+ PAD, FLAG_NONE)
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_none_SRC,
+ scaled_bilinear_scanline_sse2_8888_8888_SRC,
+ uint32_t, uint32_t, uint32_t,
+ NONE, FLAG_NONE)
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_normal_SRC,
+ scaled_bilinear_scanline_sse2_8888_8888_SRC,
+ uint32_t, uint32_t, uint32_t,
+ NORMAL, FLAG_NONE)
+
+static force_inline void
+scaled_bilinear_scanline_sse2_8888_8888_OVER (uint32_t * dst,
+ const uint32_t * mask,
+ const uint32_t * src_top,
+ const uint32_t * src_bottom,
+ int32_t w,
+ int wt,
+ int wb,
+ pixman_fixed_t vx,
+ pixman_fixed_t unit_x,
+ pixman_fixed_t max_vx,
+ pixman_bool_t zero_src)
+{
+ BILINEAR_DECLARE_VARIABLES;
+ uint32_t pix1, pix2, pix3, pix4;
+
+ while (w && ((unsigned long)dst & 15))
+ {
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+
+ if (pix1)
+ {
+ pix2 = *dst;
+ *dst = core_combine_over_u_pixel_sse2 (pix1, pix2);
+ }
+
+ w--;
+ dst++;
+ }
+
+ while (w >= 4)
+ {
+ __m128i xmm_src;
+ __m128i xmm_src_hi, xmm_src_lo, xmm_dst_hi, xmm_dst_lo;
+ __m128i xmm_alpha_hi, xmm_alpha_lo;
+
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix2);
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix3);
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix4);
+
+ xmm_src = _mm_set_epi32 (pix4, pix3, pix2, pix1);
+
+ if (!is_zero (xmm_src))
+ {
+ if (is_opaque (xmm_src))
+ {
+ save_128_aligned ((__m128i *)dst, xmm_src);
+ }
+ else
+ {
+ __m128i xmm_dst = load_128_aligned ((__m128i *)dst);
+
+ unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
+ unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+
+ expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi);
+ over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi,
+ &xmm_dst_lo, &xmm_dst_hi);
+
+ save_128_aligned ((__m128i *)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+ }
+ }
+
+ w -= 4;
+ dst += 4;
+ }
+
+ while (w)
+ {
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+
+ if (pix1)
+ {
+ pix2 = *dst;
+ *dst = core_combine_over_u_pixel_sse2 (pix1, pix2);
+ }
+
+ w--;
+ dst++;
+ }
+}
+
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_cover_OVER,
+ scaled_bilinear_scanline_sse2_8888_8888_OVER,
+ uint32_t, uint32_t, uint32_t,
+ COVER, FLAG_NONE)
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_pad_OVER,
+ scaled_bilinear_scanline_sse2_8888_8888_OVER,
+ uint32_t, uint32_t, uint32_t,
+ PAD, FLAG_NONE)
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_none_OVER,
+ scaled_bilinear_scanline_sse2_8888_8888_OVER,
+ uint32_t, uint32_t, uint32_t,
+ NONE, FLAG_NONE)
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_normal_OVER,
+ scaled_bilinear_scanline_sse2_8888_8888_OVER,
+ uint32_t, uint32_t, uint32_t,
+ NORMAL, FLAG_NONE)
+
+static force_inline void
+scaled_bilinear_scanline_sse2_8888_8_8888_OVER (uint32_t * dst,
+ const uint8_t * mask,
+ const uint32_t * src_top,
+ const uint32_t * src_bottom,
+ int32_t w,
+ int wt,
+ int wb,
+ pixman_fixed_t vx,
+ pixman_fixed_t unit_x,
+ pixman_fixed_t max_vx,
+ pixman_bool_t zero_src)
+{
+ BILINEAR_DECLARE_VARIABLES;
+ uint32_t pix1, pix2, pix3, pix4;
+ uint32_t m;
+
+ while (w && ((unsigned long)dst & 15))
+ {
+ uint32_t sa;
+
+ m = (uint32_t) *mask++;
+
+ if (m)
+ {
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+ sa = pix1 >> 24;
+
+ if (sa == 0xff && m == 0xff)
+ {
+ *dst = pix1;
+ }
+ else
+ {
+ __m128i ms, md, ma, msa;
+
+ pix2 = *dst;
+ ma = expand_alpha_rev_1x128 (load_32_1x128 (m));
+ ms = unpack_32_1x128 (pix1);
+ md = unpack_32_1x128 (pix2);
+
+ msa = expand_alpha_rev_1x128 (load_32_1x128 (sa));
+
+ *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md));
+ }
+ }
+ else
+ {
+ BILINEAR_SKIP_ONE_PIXEL ();
+ }
+
+ w--;
+ dst++;
+ }
+
+ while (w >= 4)
+ {
+ __m128i xmm_src, xmm_src_lo, xmm_src_hi, xmm_srca_lo, xmm_srca_hi;
+ __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
+ __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
+
+ m = *(uint32_t*)mask;
+
+ if (m)
+ {
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix2);
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix3);
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix4);
+
+ xmm_src = _mm_set_epi32 (pix4, pix3, pix2, pix1);
+
+ if (m == 0xffffffff && is_opaque (xmm_src))
+ {
+ save_128_aligned ((__m128i *)dst, xmm_src);
+ }
+ else
+ {
+ xmm_dst = load_128_aligned ((__m128i *)dst);
+
+ xmm_mask = _mm_unpacklo_epi16 (unpack_32_1x128 (m), _mm_setzero_si128());
+
+ unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
+ unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
+ unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+
+ expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi);
+ expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+
+ in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi,
+ &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi);
+
+ save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+ }
+ }
+ else
+ {
+ BILINEAR_SKIP_ONE_PIXEL ();
+ BILINEAR_SKIP_ONE_PIXEL ();
+ BILINEAR_SKIP_ONE_PIXEL ();
+ BILINEAR_SKIP_ONE_PIXEL ();
+ }
+
+ w -= 4;
+ dst += 4;
+ mask += 4;
+ }
+
+ while (w)
+ {
+ uint32_t sa;
+
+ m = (uint32_t) *mask++;
+
+ if (m)
+ {
+ BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
+ sa = pix1 >> 24;
+
+ if (sa == 0xff && m == 0xff)
+ {
+ *dst = pix1;
+ }
+ else
+ {
+ __m128i ms, md, ma, msa;
+
+ pix2 = *dst;
+ ma = expand_alpha_rev_1x128 (load_32_1x128 (m));
+ ms = unpack_32_1x128 (pix1);
+ md = unpack_32_1x128 (pix2);
+
+ msa = expand_alpha_rev_1x128 (load_32_1x128 (sa));
+
+ *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md));
+ }
+ }
+ else
+ {
+ BILINEAR_SKIP_ONE_PIXEL ();
+ }
+
+ w--;
+ dst++;
+ }
+}
+
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_cover_OVER,
+ scaled_bilinear_scanline_sse2_8888_8_8888_OVER,
+ uint32_t, uint8_t, uint32_t,
+ COVER, FLAG_HAVE_NON_SOLID_MASK)
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_pad_OVER,
+ scaled_bilinear_scanline_sse2_8888_8_8888_OVER,
+ uint32_t, uint8_t, uint32_t,
+ PAD, FLAG_HAVE_NON_SOLID_MASK)
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_none_OVER,
+ scaled_bilinear_scanline_sse2_8888_8_8888_OVER,
+ uint32_t, uint8_t, uint32_t,
+ NONE, FLAG_HAVE_NON_SOLID_MASK)
+FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_normal_OVER,
+ scaled_bilinear_scanline_sse2_8888_8_8888_OVER,
+ uint32_t, uint8_t, uint32_t,
+ NORMAL, FLAG_HAVE_NON_SOLID_MASK)
+
static const pixman_fast_path_t sse2_fast_paths[] =
{
/* PIXMAN_OP_OVER */
SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888),
SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888),
+ SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
+ SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
+ SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, sse2_8888_8888),
+
+ SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
+ SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888),
+ SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
+ SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
+
+ SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8_8888),
+ SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8_8888),
+ SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8_8888),
+ SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8_8888),
+
{ PIXMAN_OP_NONE },
};
int dst_bpp,
int src_x,
int src_y,
- int dst_x,
- int dst_y,
+ int dest_x,
+ int dest_y,
int width,
int height)
{
if (!pixman_blt_sse2 (
src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
- src_x, src_y, dst_x, dst_y, width, height))
+ src_x, src_y, dest_x, dest_y, width, height))
{
return _pixman_implementation_blt (
imp->delegate,
src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
- src_x, src_y, dst_x, dst_y, width, height);
+ src_x, src_y, dest_x, dest_y, width, height);
}
return TRUE;
};
static void
-sse2_src_iter_init (pixman_implementation_t *imp,
- pixman_iter_t *iter,
- pixman_image_t *image,
- int x, int y, int width, int height,
- uint8_t *buffer, iter_flags_t flags)
+sse2_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
{
+ pixman_image_t *image = iter->image;
+ int x = iter->x;
+ int y = iter->y;
+ int width = iter->width;
+ int height = iter->height;
+
#define FLAGS \
- (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM)
+ (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | FAST_PATH_BITS_IMAGE)
- if ((flags & ITER_NARROW) &&
- (image->common.flags & FLAGS) == FLAGS &&
- x >= 0 && y >= 0 &&
- x + width <= image->bits.width &&
+ if ((iter->flags & ITER_NARROW) &&
+ (image->common.flags & FLAGS) == FLAGS &&
+ x >= 0 && y >= 0 &&
+ x + width <= image->bits.width &&
y + height <= image->bits.height)
{
const fetcher_info_t *f;
uint8_t *b = (uint8_t *)image->bits.bits;
int s = image->bits.rowstride * 4;
- iter->bits = b + s * y + x * PIXMAN_FORMAT_BPP (f->format) / 8;
+ iter->bits = b + s * iter->y + x * PIXMAN_FORMAT_BPP (f->format) / 8;
iter->stride = s;
- iter->width = width;
- iter->buffer = (uint32_t *)buffer;
iter->get_scanline = f->get_scanline;
return;
}
}
- _pixman_implementation_src_iter_init (
- imp->delegate, iter, image, x, y, width, height, buffer, flags);
+ imp->delegate->src_iter_init (imp->delegate, iter);
}
#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
mask_ff000000 = create_mask_2x32_128 (0xff000000, 0xff000000);
mask_alpha = create_mask_2x32_128 (0x00ff0000, 0x00000000);
- /* MMX constants */
- mask_x565_rgb = create_mask_2x32_64 (0x000001f0, 0x003f001f);
- mask_x565_unpack = create_mask_2x32_64 (0x00000084, 0x04100840);
-
- mask_x0080 = create_mask_16_64 (0x0080);
- mask_x00ff = create_mask_16_64 (0x00ff);
- mask_x0101 = create_mask_16_64 (0x0101);
- mask_x_alpha = create_mask_2x32_64 (0x00ff0000, 0x00000000);
-
- _mm_empty ();
-
/* Set up function pointers */
-
- /* SSE code patch for fbcompose.c */
imp->combine_32[PIXMAN_OP_OVER] = sse2_combine_over_u;
imp->combine_32[PIXMAN_OP_OVER_REVERSE] = sse2_combine_over_reverse_u;
imp->combine_32[PIXMAN_OP_IN] = sse2_combine_in_u;
return imp;
}
-
-#endif /* USE_SSE2 */