* Locals
*/
-static __m64 mask_x0080;
-static __m64 mask_x00ff;
-static __m64 mask_x0101;
-static __m64 mask_x_alpha;
-
-static __m64 mask_x565_rgb;
-static __m64 mask_x565_unpack;
-
static __m128i mask_0080;
static __m128i mask_00ff;
static __m128i mask_0101;
* MMX inlines
*/
-static force_inline __m64
-load_32_1x64 (uint32_t data)
-{
- return _mm_cvtsi32_si64 (data);
-}
-
static force_inline __m128i
load_32_1x128 (uint32_t data)
{
return _mm_cvtsi32_si128 (data);
}
-static force_inline __m64
-unpack_32_1x64 (uint32_t data)
-{
- return _mm_unpacklo_pi8 (load_32_1x64 (data), _mm_setzero_si64 ());
-}
-
-static force_inline __m64
-expand_alpha_1x64 (__m64 data)
-{
- return _mm_shuffle_pi16 (data, _MM_SHUFFLE (3, 3, 3, 3));
-}
-
-static force_inline __m64
-expand_alpha_rev_1x64 (__m64 data)
-{
- return _mm_shuffle_pi16 (data, _MM_SHUFFLE (0, 0, 0, 0));
-}
-
static force_inline __m128i
expand_alpha_rev_1x128 (__m128i data)
{
return _mm_shufflelo_epi16 (data, _MM_SHUFFLE (0, 0, 0, 0));
}
-static force_inline __m64
-expand_pixel_8_1x64 (uint8_t data)
-{
- return _mm_shuffle_pi16 (
- unpack_32_1x64 ((uint32_t)data), _MM_SHUFFLE (0, 0, 0, 0));
-}
-
static force_inline __m128i
expand_pixel_8_1x128 (uint8_t data)
{
unpack_32_1x128 ((uint32_t)data), _MM_SHUFFLE (0, 0, 0, 0));
}
-static force_inline __m64
-pix_multiply_1x64 (__m64 data,
- __m64 alpha)
-{
- return _mm_mulhi_pu16 (_mm_adds_pu16 (_mm_mullo_pi16 (data, alpha),
- mask_x0080),
- mask_x0101);
-}
-
static force_inline __m128i
pix_multiply_1x128 (__m128i data,
__m128i alpha)
mask_0101);
}
-static force_inline __m64
-pix_add_multiply_1x64 (__m64* src,
- __m64* alpha_dst,
- __m64* dst,
- __m64* alpha_src)
-{
- __m64 t1 = pix_multiply_1x64 (*src, *alpha_dst);
- __m64 t2 = pix_multiply_1x64 (*dst, *alpha_src);
-
- return _mm_adds_pu8 (t1, t2);
-}
-
static force_inline __m128i
pix_add_multiply_1x128 (__m128i* src,
__m128i* alpha_dst,
return _mm_adds_epu8 (t1, t2);
}
-static force_inline __m64
-negate_1x64 (__m64 data)
-{
- return _mm_xor_si64 (data, mask_x00ff);
-}
-
static force_inline __m128i
negate_1x128 (__m128i data)
{
return _mm_xor_si128 (data, mask_00ff);
}
-static force_inline __m64
-invert_colors_1x64 (__m64 data)
-{
- return _mm_shuffle_pi16 (data, _MM_SHUFFLE (3, 0, 1, 2));
-}
-
static force_inline __m128i
invert_colors_1x128 (__m128i data)
{
return _mm_shufflelo_epi16 (data, _MM_SHUFFLE (3, 0, 1, 2));
}
-static force_inline __m64
-over_1x64 (__m64 src, __m64 alpha, __m64 dst)
-{
- return _mm_adds_pu8 (src, pix_multiply_1x64 (dst, negate_1x64 (alpha)));
-}
-
static force_inline __m128i
over_1x128 (__m128i src, __m128i alpha, __m128i dst)
{
return _mm_adds_epu8 (src, pix_multiply_1x128 (dst, negate_1x128 (alpha)));
}
-static force_inline __m64
-in_over_1x64 (__m64* src, __m64* alpha, __m64* mask, __m64* dst)
-{
- return over_1x64 (pix_multiply_1x64 (*src, *mask),
- pix_multiply_1x64 (*alpha, *mask),
- *dst);
-}
-
static force_inline __m128i
in_over_1x128 (__m128i* src, __m128i* alpha, __m128i* mask, __m128i* dst)
{
*dst);
}
-static force_inline __m64
-over_rev_non_pre_1x64 (__m64 src, __m64 dst)
-{
- __m64 alpha = expand_alpha_1x64 (src);
-
- return over_1x64 (pix_multiply_1x64 (invert_colors_1x64 (src),
- _mm_or_si64 (alpha, mask_x_alpha)),
- alpha,
- dst);
-}
-
static force_inline __m128i
over_rev_non_pre_1x128 (__m128i src, __m128i dst)
{
}
static force_inline uint32_t
-pack_1x64_32 (__m64 data)
-{
- return _mm_cvtsi64_si32 (_mm_packs_pu16 (data, _mm_setzero_si64 ()));
-}
-
-static force_inline uint32_t
pack_1x128_32 (__m128i data)
{
return _mm_cvtsi128_si32 (_mm_packus_epi16 (data, _mm_setzero_si128 ()));
}
-/* Expand 16 bits positioned at @pos (0-3) of a mmx register into
- *
- * 00RR00GG00BB
- *
- * --- Expanding 565 in the low word ---
- *
- * m = (m << (32 - 3)) | (m << (16 - 5)) | m;
- * m = m & (01f0003f001f);
- * m = m * (008404100840);
- * m = m >> 8;
- *
- * Note the trick here - the top word is shifted by another nibble to
- * avoid it bumping into the middle word
- */
-static force_inline __m64
-expand565_16_1x64 (uint16_t pixel)
-{
- __m64 p;
- __m64 t1, t2;
-
- p = _mm_cvtsi32_si64 ((uint32_t) pixel);
-
- t1 = _mm_slli_si64 (p, 36 - 11);
- t2 = _mm_slli_si64 (p, 16 - 5);
-
- p = _mm_or_si64 (t1, p);
- p = _mm_or_si64 (t2, p);
- p = _mm_and_si64 (p, mask_x565_rgb);
- p = _mm_mullo_pi16 (p, mask_x565_unpack);
-
- return _mm_srli_pi16 (p, 8);
-}
-
static force_inline __m128i
expand565_16_1x128 (uint16_t pixel)
{
/* ---------------------------------------------------
* fb_compose_setup_sSE2
*/
-static force_inline __m64
-create_mask_16_64 (uint16_t mask)
-{
- return _mm_set1_pi16 (mask);
-}
-
static force_inline __m128i
create_mask_16_128 (uint16_t mask)
{
return _mm_set1_epi16 (mask);
}
-static force_inline __m64
-create_mask_2x32_64 (uint32_t mask0,
- uint32_t mask1)
-{
- return _mm_set_pi32 (mask0, mask1);
-}
-
/* Work around a code generation bug in Sun Studio 12. */
#if defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)
# define create_mask_2x32_128(mask0, mask1) \
int width)
{
core_combine_over_u_sse2 (dst, src, mask, width);
- _mm_empty ();
}
static void
int width)
{
core_combine_over_reverse_u_sse2 (dst, src, mask, width);
- _mm_empty ();
}
static void
int width)
{
core_combine_in_u_sse2 (dst, src, mask, width);
- _mm_empty ();
}
static void
int width)
{
core_combine_reverse_in_u_sse2 (dst, src, mask, width);
- _mm_empty ();
}
static void
int width)
{
core_combine_out_u_sse2 (dst, src, mask, width);
- _mm_empty ();
}
static void
int width)
{
core_combine_reverse_out_u_sse2 (dst, src, mask, width);
- _mm_empty ();
}
static void
int width)
{
core_combine_atop_u_sse2 (dst, src, mask, width);
- _mm_empty ();
}
static void
int width)
{
core_combine_reverse_atop_u_sse2 (dst, src, mask, width);
- _mm_empty ();
}
static void
int width)
{
core_combine_xor_u_sse2 (dst, src, mask, width);
- _mm_empty ();
}
static void
int width)
{
core_combine_add_u_sse2 (dst, src, mask, width);
- _mm_empty ();
}
static void
int width)
{
core_combine_saturate_u_sse2 (dst, src, mask, width);
- _mm_empty ();
}
static void
int width)
{
core_combine_src_ca_sse2 (dst, src, mask, width);
- _mm_empty ();
}
static void
int width)
{
core_combine_over_ca_sse2 (dst, src, mask, width);
- _mm_empty ();
}
static void
int width)
{
core_combine_over_reverse_ca_sse2 (dst, src, mask, width);
- _mm_empty ();
}
static void
int width)
{
core_combine_in_ca_sse2 (dst, src, mask, width);
- _mm_empty ();
}
static void
int width)
{
core_combine_in_reverse_ca_sse2 (dst, src, mask, width);
- _mm_empty ();
}
static void
int width)
{
core_combine_out_ca_sse2 (dst, src, mask, width);
- _mm_empty ();
}
static void
int width)
{
core_combine_out_reverse_ca_sse2 (dst, src, mask, width);
- _mm_empty ();
}
static void
int width)
{
core_combine_atop_ca_sse2 (dst, src, mask, width);
- _mm_empty ();
}
static void
int width)
{
core_combine_reverse_atop_ca_sse2 (dst, src, mask, width);
- _mm_empty ();
}
static void
int width)
{
core_combine_xor_ca_sse2 (dst, src, mask, width);
- _mm_empty ();
}
static void
int width)
{
core_combine_add_ca_sse2 (dst, src, mask, width);
- _mm_empty ();
}
/* -------------------------------------------------------------------
}
}
- _mm_empty ();
}
/* ---------------------------------------------------------------------
}
}
- _mm_empty ();
}
/* ------------------------------
}
}
- _mm_empty ();
}
/* ---------------------------------------------------------------------------
}
}
- _mm_empty ();
}
/*---------------------------------------------------------------------
}
}
- _mm_empty ();
}
/*---------------------------------------------------------------------
}
}
- _mm_empty ();
}
/* ---------------------------------------------------------------------
}
}
- _mm_empty ();
}
/* --------------------------------------------------------------------
dst += dst_stride;
src += src_stride;
}
- _mm_empty ();
}
/* ------------------------------------------------------------------
}
}
- _mm_empty ();
}
/* -----------------------------------------------------------------
}
}
- _mm_empty ();
}
/* ----------------------------------------------------------------
}
}
- _mm_empty ();
return TRUE;
}
}
}
- _mm_empty ();
}
/*-----------------------------------------------------------------------
}
}
- _mm_empty ();
}
/* -----------------------------------------------------------------------
}
}
- _mm_empty ();
}
/* -------------------------------------------------------------------------
}
}
- _mm_empty ();
}
/* -------------------------------------------------------------------------------------------------
}
}
- _mm_empty ();
}
/* -----------------------------------------------------------------------
}
}
- _mm_empty ();
}
/* -----------------------------------------------------------------------
}
}
- _mm_empty ();
}
/* ---------------------------------------------------------------------------
}
}
- _mm_empty ();
}
/* -------------------------------------------------------------------------
}
}
- _mm_empty ();
}
/* -------------------------------------------------------------------------
}
}
- _mm_empty ();
}
/* ----------------------------------------------------------------------
}
}
- _mm_empty ();
}
/* ---------------------------------------------------------------------
core_combine_add_u_sse2 (dst, src, NULL, width);
}
- _mm_empty ();
}
/* -------------------------------------------------------------------------------------------------
}
}
- _mm_empty ();
return TRUE;
}
}
}
- _mm_empty ();
}
static void
}
}
- _mm_empty ();
}
static void
}
- _mm_empty ();
}
static void
}
}
- _mm_empty ();
}
/* A variant of 'core_combine_over_u_sse2' with minor tweaks */
w--;
}
- _mm_empty ();
}
FAST_NEAREST_MAINLOOP (sse2_8888_8888_cover_OVER,
w--;
}
- _mm_empty ();
}
FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_cover_OVER,
mask_ff000000 = create_mask_2x32_128 (0xff000000, 0xff000000);
mask_alpha = create_mask_2x32_128 (0x00ff0000, 0x00000000);
- /* MMX constants */
- mask_x565_rgb = create_mask_2x32_64 (0x000001f0, 0x003f001f);
- mask_x565_unpack = create_mask_2x32_64 (0x00000084, 0x04100840);
-
- mask_x0080 = create_mask_16_64 (0x0080);
- mask_x00ff = create_mask_16_64 (0x00ff);
- mask_x0101 = create_mask_16_64 (0x0101);
- mask_x_alpha = create_mask_2x32_64 (0x00ff0000, 0x00000000);
-
- _mm_empty ();
/* Set up function pointers */