#include <config.h>
#endif
-#ifdef USE_MMX
+#if defined USE_X86_MMX || defined USE_ARM_IWMMXT
#include <mmintrin.h>
#include "pixman-private.h"
#define CHECKPOINT()
#endif
+#ifdef USE_ARM_IWMMXT
+/* Empty the multimedia state. For some reason, ARM's mmintrin.h doesn't provide this. */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_empty (void)
+{
+
+}
+#endif
+
+#ifdef USE_X86_MMX
+# if (defined(__SUNPRO_C) || defined(_MSC_VER))
+# include <xmmintrin.h>
+# else
+/* We have to compile with -msse to use xmmintrin.h, but that causes SSE
+ * instructions to be generated that we don't want. Just duplicate the
+ * functions we want to use. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mulhi_pu16 (__m64 __A, __m64 __B)
+{
+ asm ("pmulhuw %1, %0\n\t"
+ : "+y" (__A)
+ : "y" (__B)
+ );
+ return __A;
+}
+
+# ifdef __OPTIMIZE__
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shuffle_pi16 (__m64 __A, int8_t const __N)
+{
+ __m64 ret;
+
+ asm ("pshufw %2, %1, %0\n\t"
+ : "=y" (ret)
+ : "y" (__A), "K" (__N)
+ );
+
+ return ret;
+}
+# else
+# define _mm_shuffle_pi16(A, N) \
+ ((__m64) __builtin_ia32_pshufw ((__v4hi)(__m64)(A), (int)(N)))
+# endif
+# endif
+#endif
+
+#ifndef _MSC_VER
+#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \
+ (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0))
+#endif
+
/* Notes about writing mmx code
*
* give memory operands as the second operand. If you give it as the
/* --------------- MMX primitives ------------------------------------- */
-#ifdef __GNUC__
+/* If __m64 is defined as a struct or union, then define M64_MEMBER to be
+ * the name of the member used to access the data.
+ * If __m64 requires using mm_cvt* intrinsics functions to convert between
+ * uint64_t and __m64 values, then define USE_CVT_INTRINSICS.
+ * If __m64 and uint64_t values can just be cast to each other directly,
+ * then define USE_M64_CASTS.
+ */
+#ifdef _MSC_VER
+# define M64_MEMBER m64_u64
+#elif defined(__ICC)
+# define USE_CVT_INTRINSICS
+#elif defined(__GNUC__)
+# define USE_M64_CASTS
+#elif defined(__SUNPRO_C)
+# if (__SUNPRO_C >= 0x5120) && !defined(__NOVECTORSIZE__)
+/* Solaris Studio 12.3 (Sun C 5.12) introduces __attribute__(__vector_size__)
+ * support, and defaults to using it to define __m64, unless __NOVECTORSIZE__
+ * is defined. If it is used, then the mm_cvt* intrinsics must be used.
+ */
+# define USE_CVT_INTRINSICS
+# else
+/* For Studio 12.2 or older, or when __attribute__(__vector_size__) is
+ * disabled, __m64 is defined as a struct containing "unsigned long long l_".
+ */
+# define M64_MEMBER l_
+# endif
+#endif
+
+#if defined(USE_M64_CASTS) || defined(USE_CVT_INTRINSICS)
typedef uint64_t mmxdatafield;
#else
typedef __m64 mmxdatafield;
-/* If __m64 is defined as a struct or union, define M64_MEMBER to be the
- name of the member used to access the data */
-# ifdef _MSC_VER
-# define M64_MEMBER m64_u64
-# elif defined(__SUNPRO_C)
-# define M64_MEMBER l_
-# endif
#endif
typedef struct
mmxdatafield mmx_mask_2;
mmxdatafield mmx_mask_3;
mmxdatafield mmx_full_alpha;
- mmxdatafield mmx_ffff0000ffff0000;
- mmxdatafield mmx_0000ffff00000000;
- mmxdatafield mmx_000000000000ffff;
+ mmxdatafield mmx_4x0101;
} mmx_data_t;
#if defined(_MSC_VER)
# define MMXDATA_INIT(field, val) { val ## UI64 }
#elif defined(M64_MEMBER) /* __m64 is a struct, not an integral type */
# define MMXDATA_INIT(field, val) field = { val ## ULL }
-#else /* __m64 is an integral type */
+#else /* mmxdatafield is an integral type */
# define MMXDATA_INIT(field, val) field = val ## ULL
#endif
MMXDATA_INIT (.mmx_mask_2, 0xffff0000ffffffff),
MMXDATA_INIT (.mmx_mask_3, 0x0000ffffffffffff),
MMXDATA_INIT (.mmx_full_alpha, 0x00ff000000000000),
- MMXDATA_INIT (.mmx_ffff0000ffff0000, 0xffff0000ffff0000),
- MMXDATA_INIT (.mmx_0000ffff00000000, 0x0000ffff00000000),
- MMXDATA_INIT (.mmx_000000000000ffff, 0x000000000000ffff),
+ MMXDATA_INIT (.mmx_4x0101, 0x0101010101010101),
};
-#ifdef __GNUC__
-# ifdef __ICC
-# define MC(x) to_m64 (c.mmx_ ## x)
-# else
-# define MC(x) ((__m64)c.mmx_ ## x)
-# endif
+#ifdef USE_CVT_INTRINSICS
+# define MC(x) to_m64 (c.mmx_ ## x)
+#elif defined(USE_M64_CASTS)
+# define MC(x) ((__m64)c.mmx_ ## x)
#else
# define MC(x) c.mmx_ ## x
#endif
static force_inline __m64
to_m64 (uint64_t x)
{
-#ifdef __ICC
+#ifdef USE_CVT_INTRINSICS
return _mm_cvtsi64_m64 (x);
#elif defined M64_MEMBER /* __m64 is a struct, not an integral type */
__m64 res;
res.M64_MEMBER = x;
return res;
-#else /* __m64 is an integral type */
+#else /* USE_M64_CASTS */
return (__m64)x;
#endif
}
static force_inline uint64_t
to_uint64 (__m64 x)
{
-#ifdef __ICC
+#ifdef USE_CVT_INTRINSICS
return _mm_cvtm64_si64 (x);
#elif defined M64_MEMBER /* __m64 is a struct, not an integral type */
uint64_t res = x.M64_MEMBER;
return res;
-#else /* __m64 is an integral type */
+#else /* USE_M64_CASTS */
return (uint64_t)x;
#endif
}
res = _mm_mullo_pi16 (a, b);
res = _mm_adds_pu16 (res, MC (4x0080));
- res = _mm_adds_pu16 (res, _mm_srli_pi16 (res, 8));
- res = _mm_srli_pi16 (res, 8);
+ res = _mm_mulhi_pu16 (res, MC (4x0101));
return res;
}
static force_inline __m64
expand_alpha (__m64 pixel)
{
- __m64 t1, t2;
-
- t1 = shift (pixel, -48);
- t2 = shift (t1, 16);
- t1 = _mm_or_si64 (t1, t2);
- t2 = shift (t1, 32);
- t1 = _mm_or_si64 (t1, t2);
-
- return t1;
+ return _mm_shuffle_pi16 (pixel, _MM_SHUFFLE (3, 3, 3, 3));
}
static force_inline __m64
expand_alpha_rev (__m64 pixel)
{
- __m64 t1, t2;
-
- /* move alpha to low 16 bits and zero the rest */
- t1 = shift (pixel, 48);
- t1 = shift (t1, -48);
-
- t2 = shift (t1, 16);
- t1 = _mm_or_si64 (t1, t2);
- t2 = shift (t1, 32);
- t1 = _mm_or_si64 (t1, t2);
-
- return t1;
+ return _mm_shuffle_pi16 (pixel, _MM_SHUFFLE (0, 0, 0, 0));
}
static force_inline __m64
invert_colors (__m64 pixel)
{
- __m64 x, y, z;
-
- x = y = z = pixel;
-
- x = _mm_and_si64 (x, MC (ffff0000ffff0000));
- y = _mm_and_si64 (y, MC (000000000000ffff));
- z = _mm_and_si64 (z, MC (0000ffff00000000));
-
- y = shift (y, 32);
- z = shift (z, -32);
-
- x = _mm_or_si64 (x, y);
- x = _mm_or_si64 (x, z);
-
- return x;
+ return _mm_shuffle_pi16 (pixel, _MM_SHUFFLE (3, 0, 1, 2));
}
static force_inline __m64
return pix_multiply (src, mask);
}
-static force_inline __m64
-in_over_full_src_alpha (__m64 src, __m64 mask, __m64 dest)
-{
- src = _mm_or_si64 (src, MC (full_alpha));
-
- return over (in (src, mask), mask, dest);
-}
-
#ifndef _MSC_VER
static force_inline __m64
in_over (__m64 src, __m64 srca, __m64 mask, __m64 dest)
#endif
+/* Elemental unaligned loads */
+
+static force_inline __m64 ldq_u(uint64_t *p)
+{
+#ifdef USE_X86_MMX
+ /* x86's alignment restrictions are very relaxed. */
+ return *(__m64 *)p;
+#elif defined USE_ARM_IWMMXT
+ int align = (uintptr_t)p & 7;
+ __m64 *aligned_p;
+ if (align == 0)
+ return *p;
+ aligned_p = (__m64 *)((uintptr_t)p & ~7);
+ return (__m64) _mm_align_si64 (aligned_p[0], aligned_p[1], align);
+#else
+ struct __una_u64 { uint64_t x __attribute__((packed)); };
+ const struct __una_u64 *ptr = (const struct __una_u64 *) p;
+ return (__m64) ptr->x;
+#endif
+}
+
+static force_inline uint32_t ldl_u(const uint32_t *p)
+{
+#ifdef USE_X86_MMX
+ /* x86's alignment restrictions are very relaxed. */
+ return *p;
+#else
+ struct __una_u32 { uint32_t x __attribute__((packed)); };
+ const struct __una_u32 *ptr = (const struct __una_u32 *) p;
+ return ptr->x;
+#endif
+}
+
+static force_inline __m64
+load8888 (const uint32_t *v)
+{
+ return _mm_unpacklo_pi8 (_mm_cvtsi32_si64 (*v), _mm_setzero_si64 ());
+}
+
static force_inline __m64
-load8888 (uint32_t v)
+load8888u (const uint32_t *v)
{
- return _mm_unpacklo_pi8 (_mm_cvtsi32_si64 (v), _mm_setzero_si64 ());
+ uint32_t l = ldl_u(v);
+ return load8888(&l);
}
static force_inline __m64
return _mm_packs_pu16 (lo, hi);
}
-static force_inline uint32_t
-store8888 (__m64 v)
+static force_inline void
+store (uint32_t *dest, __m64 v)
+{
+ *dest = _mm_cvtsi64_si32 (v);
+}
+
+static force_inline void
+store8888 (uint32_t *dest, __m64 v)
{
- return _mm_cvtsi64_si32 (pack8888 (v, _mm_setzero_si64 ()));
+ v = pack8888 (v, _mm_setzero_si64());
+ store (dest, v);
}
/* Expand 16 bits positioned at @pos (0-3) of a mmx register into
#define pix_add_mul(x, a, y, b) \
( x = pix_multiply (x, a), \
- y = pix_multiply (y, a), \
+ y = pix_multiply (y, b), \
pix_add (x, y) )
#endif
if (mask)
{
- __m64 m = load8888 (*mask);
- __m64 s = load8888 (ssrc);
+ __m64 m = load8888 (mask);
+ __m64 s = load8888 (&ssrc);
m = expand_alpha (m);
s = pix_multiply (s, m);
- ssrc = store8888 (s);
+ store8888 (&ssrc, s);
}
return ssrc;
else if (ssrc)
{
__m64 s, sa;
- s = load8888 (ssrc);
+ s = load8888 (&ssrc);
sa = expand_alpha (s);
- *dest = store8888 (over (s, sa, load8888 (*dest)));
+ store8888 (dest, over (s, sa, load8888 (dest)));
}
++dest;
__m64 d, da;
uint32_t s = combine (src, mask);
- d = load8888 (*dest);
+ d = load8888 (dest);
da = expand_alpha (d);
- *dest = store8888 (over (d, da, load8888 (s)));
+ store8888 (dest, over (d, da, load8888 (&s)));
++dest;
++src;
while (dest < end)
{
__m64 x, a;
+ uint32_t ssrc = combine (src, mask);
- x = load8888 (combine (src, mask));
- a = load8888 (*dest);
+ x = load8888 (&ssrc);
+ a = load8888 (dest);
a = expand_alpha (a);
x = pix_multiply (x, a);
- *dest = store8888 (x);
+ store8888 (dest, x);
++dest;
++src;
while (dest < end)
{
__m64 x, a;
+ uint32_t ssrc = combine (src, mask);
- x = load8888 (*dest);
- a = load8888 (combine (src, mask));
+ x = load8888 (dest);
+ a = load8888 (&ssrc);
a = expand_alpha (a);
x = pix_multiply (x, a);
- *dest = store8888 (x);
+ store8888 (dest, x);
++dest;
++src;
while (dest < end)
{
__m64 x, a;
+ uint32_t ssrc = combine (src, mask);
- x = load8888 (combine (src, mask));
- a = load8888 (*dest);
+ x = load8888 (&ssrc);
+ a = load8888 (dest);
a = expand_alpha (a);
a = negate (a);
x = pix_multiply (x, a);
- *dest = store8888 (x);
+ store8888 (dest, x);
++dest;
++src;
while (dest < end)
{
__m64 x, a;
+ uint32_t ssrc = combine (src, mask);
- x = load8888 (*dest);
- a = load8888 (combine (src, mask));
+ x = load8888 (dest);
+ a = load8888 (&ssrc);
a = expand_alpha (a);
a = negate (a);
x = pix_multiply (x, a);
- *dest = store8888 (x);
+ store8888 (dest, x);
++dest;
++src;
while (dest < end)
{
__m64 s, da, d, sia;
+ uint32_t ssrc = combine (src, mask);
- s = load8888 (combine (src, mask));
- d = load8888 (*dest);
+ s = load8888 (&ssrc);
+ d = load8888 (dest);
sia = expand_alpha (s);
sia = negate (sia);
da = expand_alpha (d);
s = pix_add_mul (s, da, d, sia);
- *dest = store8888 (s);
+ store8888 (dest, s);
++dest;
++src;
while (dest < end)
{
__m64 s, dia, d, sa;
+ uint32_t ssrc = combine (src, mask);
- s = load8888 (combine (src, mask));
- d = load8888 (*dest);
+ s = load8888 (&ssrc);
+ d = load8888 (dest);
sa = expand_alpha (s);
dia = expand_alpha (d);
dia = negate (dia);
s = pix_add_mul (s, dia, d, sa);
- *dest = store8888 (s);
+ store8888 (dest, s);
++dest;
++src;
while (dest < end)
{
__m64 s, dia, d, sia;
+ uint32_t ssrc = combine (src, mask);
- s = load8888 (combine (src, mask));
- d = load8888 (*dest);
+ s = load8888 (&ssrc);
+ d = load8888 (dest);
sia = expand_alpha (s);
dia = expand_alpha (d);
sia = negate (sia);
dia = negate (dia);
s = pix_add_mul (s, dia, d, sia);
- *dest = store8888 (s);
+ store8888 (dest, s);
++dest;
++src;
while (dest < end)
{
__m64 s, d;
+ uint32_t ssrc = combine (src, mask);
- s = load8888 (combine (src, mask));
- d = load8888 (*dest);
+ s = load8888 (&ssrc);
+ d = load8888 (dest);
s = pix_add (s, d);
- *dest = store8888 (s);
+ store8888 (dest, s);
++dest;
++src;
{
uint32_t s = combine (src, mask);
uint32_t d = *dest;
- __m64 ms = load8888 (s);
- __m64 md = load8888 (d);
+ __m64 ms = load8888 (&s);
+ __m64 md = load8888 (&d);
uint32_t sa = s >> 24;
uint32_t da = ~d >> 24;
if (sa > da)
{
- __m64 msa = load8888 (DIV_UN8 (da, sa) << 24);
+ uint32_t quot = DIV_UN8 (da, sa) << 24;
+ __m64 msa = load8888 (");
msa = expand_alpha (msa);
ms = pix_multiply (ms, msa);
}
md = pix_add (md, ms);
- *dest = store8888 (md);
+ store8888 (dest, md);
++src;
++dest;
while (src < end)
{
- __m64 a = load8888 (*mask);
- __m64 s = load8888 (*src);
+ __m64 a = load8888 (mask);
+ __m64 s = load8888 (src);
s = pix_multiply (s, a);
- *dest = store8888 (s);
+ store8888 (dest, s);
++src;
++mask;
while (src < end)
{
- __m64 a = load8888 (*mask);
- __m64 s = load8888 (*src);
- __m64 d = load8888 (*dest);
+ __m64 a = load8888 (mask);
+ __m64 s = load8888 (src);
+ __m64 d = load8888 (dest);
__m64 sa = expand_alpha (s);
- *dest = store8888 (in_over (s, sa, a, d));
+ store8888 (dest, in_over (s, sa, a, d));
++src;
++dest;
while (src < end)
{
- __m64 a = load8888 (*mask);
- __m64 s = load8888 (*src);
- __m64 d = load8888 (*dest);
+ __m64 a = load8888 (mask);
+ __m64 s = load8888 (src);
+ __m64 d = load8888 (dest);
__m64 da = expand_alpha (d);
- *dest = store8888 (over (d, da, in (s, a)));
+ store8888 (dest, over (d, da, in (s, a)));
++src;
++dest;
while (src < end)
{
- __m64 a = load8888 (*mask);
- __m64 s = load8888 (*src);
- __m64 d = load8888 (*dest);
+ __m64 a = load8888 (mask);
+ __m64 s = load8888 (src);
+ __m64 d = load8888 (dest);
__m64 da = expand_alpha (d);
s = pix_multiply (s, a);
s = pix_multiply (s, da);
- *dest = store8888 (s);
+ store8888 (dest, s);
++src;
++dest;
while (src < end)
{
- __m64 a = load8888 (*mask);
- __m64 s = load8888 (*src);
- __m64 d = load8888 (*dest);
+ __m64 a = load8888 (mask);
+ __m64 s = load8888 (src);
+ __m64 d = load8888 (dest);
__m64 sa = expand_alpha (s);
a = pix_multiply (a, sa);
d = pix_multiply (d, a);
- *dest = store8888 (d);
+ store8888 (dest, d);
++src;
++dest;
while (src < end)
{
- __m64 a = load8888 (*mask);
- __m64 s = load8888 (*src);
- __m64 d = load8888 (*dest);
+ __m64 a = load8888 (mask);
+ __m64 s = load8888 (src);
+ __m64 d = load8888 (dest);
__m64 da = expand_alpha (d);
da = negate (da);
s = pix_multiply (s, a);
s = pix_multiply (s, da);
- *dest = store8888 (s);
+ store8888 (dest, s);
++src;
++dest;
while (src < end)
{
- __m64 a = load8888 (*mask);
- __m64 s = load8888 (*src);
- __m64 d = load8888 (*dest);
+ __m64 a = load8888 (mask);
+ __m64 s = load8888 (src);
+ __m64 d = load8888 (dest);
__m64 sa = expand_alpha (s);
a = pix_multiply (a, sa);
a = negate (a);
d = pix_multiply (d, a);
- *dest = store8888 (d);
+ store8888 (dest, d);
++src;
++dest;
while (src < end)
{
- __m64 a = load8888 (*mask);
- __m64 s = load8888 (*src);
- __m64 d = load8888 (*dest);
+ __m64 a = load8888 (mask);
+ __m64 s = load8888 (src);
+ __m64 d = load8888 (dest);
__m64 da = expand_alpha (d);
__m64 sa = expand_alpha (s);
a = pix_multiply (a, sa);
a = negate (a);
d = pix_add_mul (d, a, s, da);
- *dest = store8888 (d);
+ store8888 (dest, d);
++src;
++dest;
while (src < end)
{
- __m64 a = load8888 (*mask);
- __m64 s = load8888 (*src);
- __m64 d = load8888 (*dest);
+ __m64 a = load8888 (mask);
+ __m64 s = load8888 (src);
+ __m64 d = load8888 (dest);
__m64 da = expand_alpha (d);
__m64 sa = expand_alpha (s);
a = pix_multiply (a, sa);
da = negate (da);
d = pix_add_mul (d, a, s, da);
- *dest = store8888 (d);
+ store8888 (dest, d);
++src;
++dest;
while (src < end)
{
- __m64 a = load8888 (*mask);
- __m64 s = load8888 (*src);
- __m64 d = load8888 (*dest);
+ __m64 a = load8888 (mask);
+ __m64 s = load8888 (src);
+ __m64 d = load8888 (dest);
__m64 da = expand_alpha (d);
__m64 sa = expand_alpha (s);
da = negate (da);
a = negate (a);
d = pix_add_mul (d, a, s, da);
- *dest = store8888 (d);
+ store8888 (dest, d);
++src;
++dest;
while (src < end)
{
- __m64 a = load8888 (*mask);
- __m64 s = load8888 (*src);
- __m64 d = load8888 (*dest);
+ __m64 a = load8888 (mask);
+ __m64 s = load8888 (src);
+ __m64 d = load8888 (dest);
s = pix_multiply (s, a);
d = pix_add (s, d);
- *dest = store8888 (d);
+ store8888 (dest, d);
++src;
++dest;
static void
mmx_composite_over_n_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+ pixman_composite_info_t *info)
{
+ PIXMAN_COMPOSITE_ARGS (info);
uint32_t src;
uint32_t *dst_line, *dst;
int32_t w;
CHECKPOINT ();
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+ src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
if (src == 0)
return;
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- vsrc = load8888 (src);
+ vsrc = load8888 (&src);
vsrca = expand_alpha (vsrc);
while (height--)
while (w && (unsigned long)dst & 7)
{
- *dst = store8888 (over (vsrc, vsrca, load8888 (*dst)));
+ store8888 (dst, over (vsrc, vsrca, load8888 (dst)));
w--;
dst++;
CHECKPOINT ();
- while (w)
+ if (w)
{
- *dst = store8888 (over (vsrc, vsrca, load8888 (*dst)));
-
- w--;
- dst++;
+ store8888 (dst, over (vsrc, vsrca, load8888 (dst)));
}
}
static void
mmx_composite_over_n_0565 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+ pixman_composite_info_t *info)
{
+ PIXMAN_COMPOSITE_ARGS (info);
uint32_t src;
uint16_t *dst_line, *dst;
int32_t w;
CHECKPOINT ();
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+ src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
if (src == 0)
return;
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
- vsrc = load8888 (src);
+ vsrc = load8888 (&src);
vsrca = expand_alpha (vsrc);
while (height--)
static void
mmx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+ pixman_composite_info_t *info)
{
+ PIXMAN_COMPOSITE_ARGS (info);
uint32_t src;
uint32_t *dst_line;
uint32_t *mask_line;
CHECKPOINT ();
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+ src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
if (src == 0)
return;
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
- vsrc = load8888 (src);
+ vsrc = load8888 (&src);
vsrca = expand_alpha (vsrc);
while (height--)
if (m)
{
- __m64 vdest = load8888 (*q);
- vdest = in_over (vsrc, vsrca, load8888 (m), vdest);
- *q = store8888 (vdest);
+ __m64 vdest = load8888 (q);
+ vdest = in_over (vsrc, vsrca, load8888 (&m), vdest);
+ store8888 (q, vdest);
}
twidth--;
__m64 dest0, dest1;
__m64 vdest = *(__m64 *)q;
- dest0 = in_over (vsrc, vsrca, load8888 (m0),
+ dest0 = in_over (vsrc, vsrca, load8888 (&m0),
expand8888 (vdest, 0));
- dest1 = in_over (vsrc, vsrca, load8888 (m1),
+ dest1 = in_over (vsrc, vsrca, load8888 (&m1),
expand8888 (vdest, 1));
*(__m64 *)q = pack8888 (dest0, dest1);
twidth -= 2;
}
- while (twidth)
+ if (twidth)
{
uint32_t m = *(uint32_t *)p;
if (m)
{
- __m64 vdest = load8888 (*q);
- vdest = in_over (vsrc, vsrca, load8888 (m), vdest);
- *q = store8888 (vdest);
+ __m64 vdest = load8888 (q);
+ vdest = in_over (vsrc, vsrca, load8888 (&m), vdest);
+ store8888 (q, vdest);
}
twidth--;
static void
mmx_composite_over_8888_n_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+ pixman_composite_info_t *info)
{
+ PIXMAN_COMPOSITE_ARGS (info);
uint32_t *dst_line, *dst;
uint32_t *src_line, *src;
uint32_t mask;
CHECKPOINT ();
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
- mask = _pixman_image_get_solid (imp, mask_image, dst_image->bits.format);
+ mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format);
mask &= 0xff000000;
mask = mask | mask >> 8 | mask >> 16 | mask >> 24;
- vmask = load8888 (mask);
+ vmask = load8888 (&mask);
while (height--)
{
while (w && (unsigned long)dst & 7)
{
- __m64 s = load8888 (*src);
- __m64 d = load8888 (*dst);
+ __m64 s = load8888 (src);
+ __m64 d = load8888 (dst);
- *dst = store8888 (in_over (s, expand_alpha (s), vmask, d));
+ store8888 (dst, in_over (s, expand_alpha (s), vmask, d));
w--;
dst++;
while (w >= 2)
{
- __m64 vs = *(__m64 *)src;
+ __m64 vs = ldq_u((uint64_t *)src);
__m64 vd = *(__m64 *)dst;
__m64 vsrc0 = expand8888 (vs, 0);
__m64 vsrc1 = expand8888 (vs, 1);
src += 2;
}
- while (w)
+ if (w)
{
- __m64 s = load8888 (*src);
- __m64 d = load8888 (*dst);
-
- *dst = store8888 (in_over (s, expand_alpha (s), vmask, d));
+ __m64 s = load8888 (src);
+ __m64 d = load8888 (dst);
- w--;
- dst++;
- src++;
+ store8888 (dst, in_over (s, expand_alpha (s), vmask, d));
}
}
static void
mmx_composite_over_x888_n_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+ pixman_composite_info_t *info)
{
+ PIXMAN_COMPOSITE_ARGS (info);
uint32_t *dst_line, *dst;
uint32_t *src_line, *src;
uint32_t mask;
CHECKPOINT ();
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
- mask = _pixman_image_get_solid (imp, mask_image, dst_image->bits.format);
+ mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format);
mask &= 0xff000000;
mask = mask | mask >> 8 | mask >> 16 | mask >> 24;
- vmask = load8888 (mask);
+ vmask = load8888 (&mask);
srca = MC (4x00ff);
while (height--)
while (w && (unsigned long)dst & 7)
{
- __m64 s = load8888 (*src | 0xff000000);
- __m64 d = load8888 (*dst);
+ uint32_t ssrc = *src | 0xff000000;
+ __m64 s = load8888 (&ssrc);
+ __m64 d = load8888 (dst);
- *dst = store8888 (in_over (s, srca, vmask, d));
+ store8888 (dst, in_over (s, srca, vmask, d));
w--;
dst++;
__m64 vd6 = *(__m64 *)(dst + 12);
__m64 vd7 = *(__m64 *)(dst + 14);
- __m64 vs0 = *(__m64 *)(src + 0);
- __m64 vs1 = *(__m64 *)(src + 2);
- __m64 vs2 = *(__m64 *)(src + 4);
- __m64 vs3 = *(__m64 *)(src + 6);
- __m64 vs4 = *(__m64 *)(src + 8);
- __m64 vs5 = *(__m64 *)(src + 10);
- __m64 vs6 = *(__m64 *)(src + 12);
- __m64 vs7 = *(__m64 *)(src + 14);
+ __m64 vs0 = ldq_u((uint64_t *)(src + 0));
+ __m64 vs1 = ldq_u((uint64_t *)(src + 2));
+ __m64 vs2 = ldq_u((uint64_t *)(src + 4));
+ __m64 vs3 = ldq_u((uint64_t *)(src + 6));
+ __m64 vs4 = ldq_u((uint64_t *)(src + 8));
+ __m64 vs5 = ldq_u((uint64_t *)(src + 10));
+ __m64 vs6 = ldq_u((uint64_t *)(src + 12));
+ __m64 vs7 = ldq_u((uint64_t *)(src + 14));
vd0 = pack8888 (
in_over (expandx888 (vs0, 0), srca, vmask, expand8888 (vd0, 0)),
while (w)
{
- __m64 s = load8888 (*src | 0xff000000);
- __m64 d = load8888 (*dst);
+ uint32_t ssrc = *src | 0xff000000;
+ __m64 s = load8888 (&ssrc);
+ __m64 d = load8888 (dst);
- *dst = store8888 (in_over (s, srca, vmask, d));
+ store8888 (dst, in_over (s, srca, vmask, d));
w--;
dst++;
static void
mmx_composite_over_8888_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+ pixman_composite_info_t *info)
{
+ PIXMAN_COMPOSITE_ARGS (info);
uint32_t *dst_line, *dst;
uint32_t *src_line, *src;
uint32_t s;
CHECKPOINT ();
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
while (height--)
else if (s)
{
__m64 ms, sa;
- ms = load8888 (s);
+ ms = load8888 (&s);
sa = expand_alpha (ms);
- *dst = store8888 (over (ms, sa, load8888 (*dst)));
+ store8888 (dst, over (ms, sa, load8888 (dst)));
}
dst++;
static void
mmx_composite_over_8888_0565 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+ pixman_composite_info_t *info)
{
+ PIXMAN_COMPOSITE_ARGS (info);
uint16_t *dst_line, *dst;
uint32_t *src_line, *src;
int dst_stride, src_stride;
CHECKPOINT ();
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
#if 0
while (w && (unsigned long)dst & 7)
{
- __m64 vsrc = load8888 (*src);
+ __m64 vsrc = load8888 (src);
uint64_t d = *dst;
__m64 vdest = expand565 (to_m64 (d), 0);
__m64 vsrc0, vsrc1, vsrc2, vsrc3;
__m64 vdest;
- vsrc0 = load8888 (*(src + 0));
- vsrc1 = load8888 (*(src + 1));
- vsrc2 = load8888 (*(src + 2));
- vsrc3 = load8888 (*(src + 3));
+ vsrc0 = load8888 ((src + 0));
+ vsrc1 = load8888 ((src + 1));
+ vsrc2 = load8888 ((src + 2));
+ vsrc3 = load8888 ((src + 3));
vdest = *(__m64 *)dst;
while (w)
{
- __m64 vsrc = load8888 (*src);
+ __m64 vsrc = load8888 (src);
uint64_t d = *dst;
__m64 vdest = expand565 (to_m64 (d), 0);
static void
mmx_composite_over_n_8_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+ pixman_composite_info_t *info)
{
+ PIXMAN_COMPOSITE_ARGS (info);
uint32_t src, srca;
uint32_t *dst_line, *dst;
uint8_t *mask_line, *mask;
CHECKPOINT ();
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+ src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
srca = src >> 24;
if (src == 0)
srcsrc = (uint64_t)src << 32 | src;
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
- vsrc = load8888 (src);
+ vsrc = load8888 (&src);
vsrca = expand_alpha (vsrc);
while (height--)
{
__m64 vdest = in_over (vsrc, vsrca,
expand_alpha_rev (to_m64 (m)),
- load8888 (*dst));
+ load8888 (dst));
- *dst = store8888 (vdest);
+ store8888 (dst, vdest);
}
w--;
CHECKPOINT ();
- while (w)
+ if (w)
{
uint64_t m = *mask;
if (m)
{
- __m64 vdest = load8888 (*dst);
+ __m64 vdest = load8888 (dst);
vdest = in_over (
vsrc, vsrca, expand_alpha_rev (to_m64 (m)), vdest);
- *dst = store8888 (vdest);
+ store8888 (dst, vdest);
}
-
- w--;
- mask++;
- dst++;
}
}
uint32_t byte_width;
uint8_t *byte_line;
-#ifdef __GNUC__
+#if defined __GNUC__ && defined USE_X86_MMX
__m64 v1, v2, v3, v4, v5, v6, v7;
#endif
fill = ((uint64_t)xor << 32) | xor;
vfill = to_m64 (fill);
-#ifdef __GNUC__
+#if defined __GNUC__ && defined USE_X86_MMX
__asm__ (
"movq %7, %0\n"
"movq %7, %1\n"
byte_line += stride;
w = byte_width;
- while (w >= 1 && ((unsigned long)d & 1))
+ if (w >= 1 && ((unsigned long)d & 1))
{
*(uint8_t *)d = (xor & 0xff);
w--;
d++;
}
- while (w >= 2 && ((unsigned long)d & 3))
+ if (w >= 2 && ((unsigned long)d & 3))
{
*(uint16_t *)d = xor;
w -= 2;
while (w >= 64)
{
-#ifdef __GNUC__
+#if defined __GNUC__ && defined USE_X86_MMX
__asm__ (
"movq %1, (%0)\n"
"movq %2, 8(%0)\n"
w -= 4;
d += 4;
}
- while (w >= 2)
+ if (w >= 2)
{
*(uint16_t *)d = xor;
w -= 2;
d += 2;
}
- while (w >= 1)
+ if (w >= 1)
{
*(uint8_t *)d = (xor & 0xff);
w--;
static void
mmx_composite_src_n_8_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+ pixman_composite_info_t *info)
{
+ PIXMAN_COMPOSITE_ARGS (info);
uint32_t src, srca;
uint32_t *dst_line, *dst;
uint8_t *mask_line, *mask;
CHECKPOINT ();
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+ src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
srca = src >> 24;
if (src == 0)
{
- pixman_fill_mmx (dst_image->bits.bits, dst_image->bits.rowstride,
- PIXMAN_FORMAT_BPP (dst_image->bits.format),
+ pixman_fill_mmx (dest_image->bits.bits, dest_image->bits.rowstride,
+ PIXMAN_FORMAT_BPP (dest_image->bits.format),
dest_x, dest_y, width, height, 0);
return;
}
srcsrc = (uint64_t)src << 32 | src;
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
- vsrc = load8888 (src);
+ vsrc = load8888 (&src);
while (height--)
{
{
__m64 vdest = in (vsrc, expand_alpha_rev (to_m64 (m)));
- *dst = store8888 (vdest);
+ store8888 (dst, vdest);
}
else
{
CHECKPOINT ();
- while (w)
+ if (w)
{
uint64_t m = *mask;
if (m)
{
- __m64 vdest = load8888 (*dst);
+ __m64 vdest = load8888 (dst);
vdest = in (vsrc, expand_alpha_rev (to_m64 (m)));
- *dst = store8888 (vdest);
+ store8888 (dst, vdest);
}
else
{
*dst = 0;
}
-
- w--;
- mask++;
- dst++;
}
}
static void
mmx_composite_over_n_8_0565 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+ pixman_composite_info_t *info)
{
+ PIXMAN_COMPOSITE_ARGS (info);
uint32_t src, srca;
uint16_t *dst_line, *dst;
uint8_t *mask_line, *mask;
CHECKPOINT ();
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+ src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
srca = src >> 24;
if (src == 0)
return;
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
- vsrc = load8888 (src);
+ vsrc = load8888 (&src);
vsrca = expand_alpha (vsrc);
tmp = pack_565 (vsrc, _mm_setzero_si64 (), 0);
static void
mmx_composite_over_pixbuf_0565 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+ pixman_composite_info_t *info)
{
+ PIXMAN_COMPOSITE_ARGS (info);
uint16_t *dst_line, *dst;
uint32_t *src_line, *src;
int dst_stride, src_stride;
CHECKPOINT ();
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
#if 0
while (w && (unsigned long)dst & 7)
{
- __m64 vsrc = load8888 (*src);
+ __m64 vsrc = load8888 (src);
uint64_t d = *dst;
__m64 vdest = expand565 (to_m64 (d), 0);
if ((a0 & a1 & a2 & a3) == 0xFF)
{
__m64 vdest;
- vdest = pack_565 (invert_colors (load8888 (s0)), _mm_setzero_si64 (), 0);
- vdest = pack_565 (invert_colors (load8888 (s1)), vdest, 1);
- vdest = pack_565 (invert_colors (load8888 (s2)), vdest, 2);
- vdest = pack_565 (invert_colors (load8888 (s3)), vdest, 3);
+ vdest = pack_565 (invert_colors (load8888 (&s0)), _mm_setzero_si64 (), 0);
+ vdest = pack_565 (invert_colors (load8888 (&s1)), vdest, 1);
+ vdest = pack_565 (invert_colors (load8888 (&s2)), vdest, 2);
+ vdest = pack_565 (invert_colors (load8888 (&s3)), vdest, 3);
*(__m64 *)dst = vdest;
}
{
__m64 vdest = *(__m64 *)dst;
- vdest = pack_565 (over_rev_non_pre (load8888 (s0), expand565 (vdest, 0)), vdest, 0);
- vdest = pack_565 (over_rev_non_pre (load8888 (s1), expand565 (vdest, 1)), vdest, 1);
- vdest = pack_565 (over_rev_non_pre (load8888 (s2), expand565 (vdest, 2)), vdest, 2);
- vdest = pack_565 (over_rev_non_pre (load8888 (s3), expand565 (vdest, 3)), vdest, 3);
+ vdest = pack_565 (over_rev_non_pre (load8888 (&s0), expand565 (vdest, 0)), vdest, 0);
+ vdest = pack_565 (over_rev_non_pre (load8888 (&s1), expand565 (vdest, 1)), vdest, 1);
+ vdest = pack_565 (over_rev_non_pre (load8888 (&s2), expand565 (vdest, 2)), vdest, 2);
+ vdest = pack_565 (over_rev_non_pre (load8888 (&s3), expand565 (vdest, 3)), vdest, 3);
*(__m64 *)dst = vdest;
}
while (w)
{
- __m64 vsrc = load8888 (*src);
+ __m64 vsrc = load8888 (src);
uint64_t d = *dst;
__m64 vdest = expand565 (to_m64 (d), 0);
static void
mmx_composite_over_pixbuf_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+ pixman_composite_info_t *info)
{
+ PIXMAN_COMPOSITE_ARGS (info);
uint32_t *dst_line, *dst;
uint32_t *src_line, *src;
int dst_stride, src_stride;
CHECKPOINT ();
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
#if 0
while (w && (unsigned long)dst & 7)
{
- __m64 s = load8888 (*src);
- __m64 d = load8888 (*dst);
+ __m64 s = load8888 (src);
+ __m64 d = load8888 (dst);
- *dst = store8888 (over_rev_non_pre (s, d));
+ store8888 (dst, over_rev_non_pre (s, d));
w--;
dst++;
while (w >= 2)
{
- uint64_t s0, s1;
+ uint32_t s0, s1;
unsigned char a0, a1;
__m64 d0, d1;
if ((a0 & a1) == 0xFF)
{
- d0 = invert_colors (load8888 (s0));
- d1 = invert_colors (load8888 (s1));
+ d0 = invert_colors (load8888 (&s0));
+ d1 = invert_colors (load8888 (&s1));
*(__m64 *)dst = pack8888 (d0, d1);
}
{
__m64 vdest = *(__m64 *)dst;
- d0 = over_rev_non_pre (load8888 (s0), expand8888 (vdest, 0));
- d1 = over_rev_non_pre (load8888 (s1), expand8888 (vdest, 1));
+ d0 = over_rev_non_pre (load8888 (&s0), expand8888 (vdest, 0));
+ d1 = over_rev_non_pre (load8888 (&s1), expand8888 (vdest, 1));
*(__m64 *)dst = pack8888 (d0, d1);
}
src += 2;
}
- while (w)
+ if (w)
{
- __m64 s = load8888 (*src);
- __m64 d = load8888 (*dst);
-
- *dst = store8888 (over_rev_non_pre (s, d));
+ __m64 s = load8888 (src);
+ __m64 d = load8888 (dst);
- w--;
- dst++;
- src++;
+ store8888 (dst, over_rev_non_pre (s, d));
}
}
static void
mmx_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+ pixman_composite_info_t *info)
{
+ PIXMAN_COMPOSITE_ARGS (info);
uint32_t src;
uint16_t *dst_line;
uint32_t *mask_line;
CHECKPOINT ();
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+ src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
if (src == 0)
return;
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
- vsrc = load8888 (src);
+ vsrc = load8888 (&src);
vsrca = expand_alpha (vsrc);
while (height--)
{
uint64_t d = *q;
__m64 vdest = expand565 (to_m64 (d), 0);
- vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m), vdest), vdest, 0);
+ vdest = pack_565 (in_over (vsrc, vsrca, load8888 (&m), vdest), vdest, 0);
*q = to_uint64 (vdest);
}
{
__m64 vdest = *(__m64 *)q;
- vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m0), expand565 (vdest, 0)), vdest, 0);
- vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m1), expand565 (vdest, 1)), vdest, 1);
- vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m2), expand565 (vdest, 2)), vdest, 2);
- vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m3), expand565 (vdest, 3)), vdest, 3);
+ vdest = pack_565 (in_over (vsrc, vsrca, load8888 (&m0), expand565 (vdest, 0)), vdest, 0);
+ vdest = pack_565 (in_over (vsrc, vsrca, load8888 (&m1), expand565 (vdest, 1)), vdest, 1);
+ vdest = pack_565 (in_over (vsrc, vsrca, load8888 (&m2), expand565 (vdest, 2)), vdest, 2);
+ vdest = pack_565 (in_over (vsrc, vsrca, load8888 (&m3), expand565 (vdest, 3)), vdest, 3);
*(__m64 *)q = vdest;
}
{
uint64_t d = *q;
__m64 vdest = expand565 (to_m64 (d), 0);
- vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m), vdest), vdest, 0);
+ vdest = pack_565 (in_over (vsrc, vsrca, load8888 (&m), vdest), vdest, 0);
*q = to_uint64 (vdest);
}
static void
mmx_composite_in_n_8_8 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+ pixman_composite_info_t *info)
{
+ PIXMAN_COMPOSITE_ARGS (info);
uint8_t *dst_line, *dst;
uint8_t *mask_line, *mask;
int dst_stride, mask_stride;
uint8_t sa;
__m64 vsrc, vsrca;
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+ src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
sa = src >> 24;
- vsrc = load8888 (src);
+ vsrc = load8888 (&src);
vsrca = expand_alpha (vsrc);
while (height--)
mask_line += mask_stride;
w = width;
- if ((((unsigned long)dst_image & 3) == 0) &&
- (((unsigned long)src_image & 3) == 0))
+ while (w && (unsigned long)dst & 7)
{
- while (w >= 4)
- {
- __m64 vmask;
- __m64 vdest;
+ uint16_t tmp;
+ uint8_t a;
+ uint32_t m, d;
- vmask = load8888 (*(uint32_t *)mask);
- vdest = load8888 (*(uint32_t *)dst);
+ a = *mask++;
+ d = *dst;
- *(uint32_t *)dst = store8888 (in (in (vsrca, vmask), vdest));
+ m = MUL_UN8 (sa, a, tmp);
+ d = MUL_UN8 (m, d, tmp);
- dst += 4;
- mask += 4;
- w -= 4;
- }
+ *dst++ = d;
+ w--;
+ }
+
+ while (w >= 4)
+ {
+ __m64 vmask;
+ __m64 vdest;
+
+ vmask = load8888u ((uint32_t *)mask);
+ vdest = load8888 ((uint32_t *)dst);
+
+ store8888 ((uint32_t *)dst, in (in (vsrca, vmask), vdest));
+
+ dst += 4;
+ mask += 4;
+ w -= 4;
}
while (w--)
static void
mmx_composite_in_8_8 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+ pixman_composite_info_t *info)
{
+ PIXMAN_COMPOSITE_ARGS (info);
uint8_t *dst_line, *dst;
uint8_t *src_line, *src;
int src_stride, dst_stride;
int32_t w;
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
while (height--)
src_line += src_stride;
w = width;
- if ((((unsigned long)dst_image & 3) == 0) &&
- (((unsigned long)src_image & 3) == 0))
+ while (w && (unsigned long)dst & 3)
{
- while (w >= 4)
- {
- uint32_t *s = (uint32_t *)src;
- uint32_t *d = (uint32_t *)dst;
+ uint8_t s, d;
+ uint16_t tmp;
- *d = store8888 (in (load8888 (*s), load8888 (*d)));
+ s = *src;
+ d = *dst;
- w -= 4;
- dst += 4;
- src += 4;
- }
+ *dst = MUL_UN8 (s, d, tmp);
+
+ src++;
+ dst++;
+ w--;
+ }
+
+ while (w >= 4)
+ {
+ uint32_t *s = (uint32_t *)src;
+ uint32_t *d = (uint32_t *)dst;
+
+ store8888 (d, in (load8888u (s), load8888 (d)));
+
+ w -= 4;
+ dst += 4;
+ src += 4;
}
while (w--)
static void
mmx_composite_add_n_8_8 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+ pixman_composite_info_t *info)
{
+ PIXMAN_COMPOSITE_ARGS (info);
uint8_t *dst_line, *dst;
uint8_t *mask_line, *mask;
int dst_stride, mask_stride;
uint8_t sa;
__m64 vsrc, vsrca;
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+ src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
sa = src >> 24;
if (src == 0)
return;
- vsrc = load8888 (src);
+ vsrc = load8888 (&src);
vsrca = expand_alpha (vsrc);
while (height--)
mask_line += mask_stride;
w = width;
- if ((((unsigned long)mask_image & 3) == 0) &&
- (((unsigned long)dst_image & 3) == 0))
+ while (w && (unsigned long)dst & 3)
{
- while (w >= 4)
- {
- __m64 vmask = load8888 (*(uint32_t *)mask);
- __m64 vdest = load8888 (*(uint32_t *)dst);
+ uint16_t tmp;
+ uint16_t a;
+ uint32_t m, d;
+ uint32_t r;
- *(uint32_t *)dst = store8888 (_mm_adds_pu8 (in (vsrca, vmask), vdest));
+ a = *mask++;
+ d = *dst;
- w -= 4;
- dst += 4;
- mask += 4;
- }
+ m = MUL_UN8 (sa, a, tmp);
+ r = ADD_UN8 (m, d, tmp);
+
+ *dst++ = r;
+ w--;
+ }
+
+ while (w >= 4)
+ {
+ __m64 vmask;
+ __m64 vdest;
+
+ vmask = load8888u ((uint32_t *)mask);
+ vdest = load8888 ((uint32_t *)dst);
+
+ store8888 ((uint32_t *)dst, _mm_adds_pu8 (in (vsrca, vmask), vdest));
+
+ dst += 4;
+ mask += 4;
+ w -= 4;
}
while (w--)
static void
mmx_composite_add_8_8 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+ pixman_composite_info_t *info)
{
+ PIXMAN_COMPOSITE_ARGS (info);
uint8_t *dst_line, *dst;
uint8_t *src_line, *src;
int dst_stride, src_stride;
CHECKPOINT ();
PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
while (height--)
{
while (w >= 8)
{
- *(__m64*)dst = _mm_adds_pu8 (*(__m64*)src, *(__m64*)dst);
+ *(__m64*)dst = _mm_adds_pu8 (ldq_u((uint64_t *)src), *(__m64*)dst);
dst += 8;
src += 8;
w -= 8;
static void
mmx_composite_add_8888_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+ pixman_composite_info_t *info)
{
+ PIXMAN_COMPOSITE_ARGS (info);
__m64 dst64;
uint32_t *dst_line, *dst;
uint32_t *src_line, *src;
CHECKPOINT ();
PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
while (height--)
{
while (w && (unsigned long)dst & 7)
{
- *dst = _mm_cvtsi64_si32 (_mm_adds_pu8 (_mm_cvtsi32_si64 (*src),
- _mm_cvtsi32_si64 (*dst)));
+ store (dst, _mm_adds_pu8 (_mm_cvtsi32_si64 (*src),
+ _mm_cvtsi32_si64 (*dst)));
dst++;
src++;
w--;
while (w >= 2)
{
- dst64 = _mm_adds_pu8 (*(__m64*)src, *(__m64*)dst);
+ dst64 = _mm_adds_pu8 (ldq_u((uint64_t *)src), *(__m64*)dst);
*(uint64_t*)dst = to_uint64 (dst64);
dst += 2;
src += 2;
if (w)
{
- *dst = _mm_cvtsi64_si32 (_mm_adds_pu8 (_mm_cvtsi32_si64 (*src),
- _mm_cvtsi32_si64 (*dst)));
+ store (dst, _mm_adds_pu8 (_mm_cvtsi32_si64 (*src),
+ _mm_cvtsi32_si64 (*dst)));
}
}
int dst_bpp,
int src_x,
int src_y,
- int dst_x,
- int dst_y,
+ int dest_x,
+ int dest_y,
int width,
int height)
{
src_stride = src_stride * (int) sizeof (uint32_t) / 2;
dst_stride = dst_stride * (int) sizeof (uint32_t) / 2;
src_bytes = (uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x));
- dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
+ dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dest_y) + (dest_x));
byte_width = 2 * width;
src_stride *= 2;
dst_stride *= 2;
src_stride = src_stride * (int) sizeof (uint32_t) / 4;
dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;
src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x));
- dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
+ dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dest_y) + (dest_x));
byte_width = 4 * width;
src_stride *= 4;
dst_stride *= 4;
dst_bytes += dst_stride;
w = byte_width;
- while (w >= 2 && ((unsigned long)d & 3))
+ if (w >= 1 && ((unsigned long)d & 1))
+ {
+ *(uint8_t *)d = *(uint8_t *)s;
+ w -= 1;
+ s += 1;
+ d += 1;
+ }
+
+ if (w >= 2 && ((unsigned long)d & 3))
{
*(uint16_t *)d = *(uint16_t *)s;
w -= 2;
while (w >= 4 && ((unsigned long)d & 7))
{
- *(uint32_t *)d = *(uint32_t *)s;
+ *(uint32_t *)d = ldl_u((uint32_t *)s);
w -= 4;
s += 4;
while (w >= 64)
{
-#if defined (__GNUC__) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))
+#if (defined (__GNUC__) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))) && defined USE_X86_MMX
__asm__ (
"movq (%1), %%mm0\n"
"movq 8(%1), %%mm1\n"
"%mm0", "%mm1", "%mm2", "%mm3",
"%mm4", "%mm5", "%mm6", "%mm7");
#else
- __m64 v0 = *(__m64 *)(s + 0);
- __m64 v1 = *(__m64 *)(s + 8);
- __m64 v2 = *(__m64 *)(s + 16);
- __m64 v3 = *(__m64 *)(s + 24);
- __m64 v4 = *(__m64 *)(s + 32);
- __m64 v5 = *(__m64 *)(s + 40);
- __m64 v6 = *(__m64 *)(s + 48);
- __m64 v7 = *(__m64 *)(s + 56);
+ __m64 v0 = ldq_u((uint64_t *)(s + 0));
+ __m64 v1 = ldq_u((uint64_t *)(s + 8));
+ __m64 v2 = ldq_u((uint64_t *)(s + 16));
+ __m64 v3 = ldq_u((uint64_t *)(s + 24));
+ __m64 v4 = ldq_u((uint64_t *)(s + 32));
+ __m64 v5 = ldq_u((uint64_t *)(s + 40));
+ __m64 v6 = ldq_u((uint64_t *)(s + 48));
+ __m64 v7 = ldq_u((uint64_t *)(s + 56));
*(__m64 *)(d + 0) = v0;
*(__m64 *)(d + 8) = v1;
*(__m64 *)(d + 16) = v2;
}
while (w >= 4)
{
- *(uint32_t *)d = *(uint32_t *)s;
+ *(uint32_t *)d = ldl_u((uint32_t *)s);
w -= 4;
s += 4;
static void
mmx_composite_copy_area (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+ pixman_composite_info_t *info)
{
+ PIXMAN_COMPOSITE_ARGS (info);
+
pixman_blt_mmx (src_image->bits.bits,
- dst_image->bits.bits,
+ dest_image->bits.bits,
src_image->bits.rowstride,
- dst_image->bits.rowstride,
+ dest_image->bits.rowstride,
PIXMAN_FORMAT_BPP (src_image->bits.format),
- PIXMAN_FORMAT_BPP (dst_image->bits.format),
+ PIXMAN_FORMAT_BPP (dest_image->bits.format),
src_x, src_y, dest_x, dest_y, width, height);
}
-#if 0
static void
mmx_composite_over_x888_8_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+ pixman_composite_info_t *info)
{
+ PIXMAN_COMPOSITE_ARGS (info);
uint32_t *src, *src_line;
uint32_t *dst, *dst_line;
uint8_t *mask, *mask_line;
int src_stride, mask_stride, dst_stride;
int32_t w;
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
if (m)
{
- __m64 s = load8888 (*src | 0xff000000);
+ uint32_t ssrc = *src | 0xff000000;
+ __m64 s = load8888 (&ssrc);
if (m == 0xff)
{
- *dst = store8888 (s);
+ store8888 (dst, s);
}
else
{
__m64 sa = expand_alpha (s);
__m64 vm = expand_alpha_rev (to_m64 (m));
- __m64 vdest = in_over (s, sa, vm, load8888 (*dst));
+ __m64 vdest = in_over (s, sa, vm, load8888 (dst));
- *dst = store8888 (vdest);
+ store8888 (dst, vdest);
}
}
_mm_empty ();
}
-#endif
static const pixman_fast_path_t mmx_fast_paths[] =
{
PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, mmx_composite_over_8888_n_8888 ),
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, mmx_composite_over_8888_n_8888 ),
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, mmx_composite_over_8888_n_8888 ),
-#if 0
- /* FIXME: This code is commented out since it's apparently
- * not actually faster than the generic code.
- */
PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, mmx_composite_over_x888_8_8888 ),
PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, mmx_composite_over_x888_8_8888 ),
- PIXMAN_STD_FAST_PATH (OVER, x8b8r8g8, a8, x8b8g8r8, mmx_composite_over_x888_8_8888 ),
- PIXMAN_STD_FAST_PATH (OVER, x8b8r8g8, a8, a8r8g8b8, mmx_composite_over_x888_8_8888 ),
-#endif
+ PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, mmx_composite_over_x888_8_8888 ),
+ PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, mmx_composite_over_x888_8_8888 ),
PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, mmx_composite_over_n_8888 ),
PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, mmx_composite_over_n_8888 ),
PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, mmx_composite_over_n_0565 ),
+ PIXMAN_STD_FAST_PATH (OVER, solid, null, b5g6r5, mmx_composite_over_n_0565 ),
PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, x8r8g8b8, mmx_composite_copy_area ),
PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, x8b8g8r8, mmx_composite_copy_area ),
int dst_bpp,
int src_x,
int src_y,
- int dst_x,
- int dst_y,
+ int dest_x,
+ int dest_y,
int width,
int height)
{
if (!pixman_blt_mmx (
src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
- src_x, src_y, dst_x, dst_y, width, height))
+ src_x, src_y, dest_x, dest_y, width, height))
{
return _pixman_implementation_blt (
imp->delegate,
src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
- src_x, src_y, dst_x, dst_y, width, height);
+ src_x, src_y, dest_x, dest_y, width, height);
}
return TRUE;
return imp;
}
-#endif /* USE_MMX */
+#endif /* USE_X86_MMX || USE_ARM_IWMMXT */