2 * Copyright © 2004, 2005 Red Hat, Inc.
3 * Copyright © 2004 Nicholas Miell
4 * Copyright © 2005 Trolltech AS
6 * Permission to use, copy, modify, distribute, and sell this software and its
7 * documentation for any purpose is hereby granted without fee, provided that
8 * the above copyright notice appear in all copies and that both that
9 * copyright notice and this permission notice appear in supporting
10 * documentation, and that the name of Red Hat not be used in advertising or
11 * publicity pertaining to distribution of the software without specific,
12 * written prior permission. Red Hat makes no representations about the
13 * suitability of this software for any purpose. It is provided "as is"
14 * without express or implied warranty.
16 * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
17 * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
18 * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
19 * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
21 * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
22 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
25 * Author: Søren Sandmann (sandmann@redhat.com)
26 * Minor Improvements: Nicholas Miell (nmiell@gmail.com)
27 * MMX code paths for fbcompose.c by Lars Knoll (lars@trolltech.com)
29 * Based on work by Owen Taylor
39 #include "pixman-private.h"
44 #define CHECKPOINT() ErrorF ("at %s %d\n", __FUNCTION__, __LINE__)
49 /* Notes about writing mmx code
51 * give memory operands as the second operand. If you give it as the
52 * first, gcc will first load it into a register, then use that
57 * _mm_mullo_pi16 (x, mmx_constant);
61 * _mm_mullo_pi16 (mmx_constant, x);
63 * Also try to minimize dependencies. i.e. when you need a value, try
64 * to calculate it from a value that was calculated as early as
68 /* --------------- MMX primitives ------------------------------------- */
71 typedef uint64_t mmxdatafield;
73 typedef __m64 mmxdatafield;
74 /* If __m64 is defined as a struct or union, define M64_MEMBER to be the
75 name of the member used to access the data */
77 # define M64_MEMBER m64_u64
78 # elif defined(__SUNPRO_C)
79 # define M64_MEMBER l_
85 mmxdatafield mmx_4x00ff;
86 mmxdatafield mmx_4x0080;
87 mmxdatafield mmx_565_rgb;
88 mmxdatafield mmx_565_unpack_multiplier;
89 mmxdatafield mmx_565_r;
90 mmxdatafield mmx_565_g;
91 mmxdatafield mmx_565_b;
92 mmxdatafield mmx_mask_0;
93 mmxdatafield mmx_mask_1;
94 mmxdatafield mmx_mask_2;
95 mmxdatafield mmx_mask_3;
96 mmxdatafield mmx_full_alpha;
97 mmxdatafield mmx_ffff0000ffff0000;
98 mmxdatafield mmx_0000ffff00000000;
99 mmxdatafield mmx_000000000000ffff;
102 #if defined(_MSC_VER)
103 # define MMXDATA_INIT(field, val) { val##UI64 }
104 #elif defined(M64_MEMBER) /* __m64 is a struct, not an integral type */
105 # define MMXDATA_INIT(field, val) field = { val##ULL }
106 #else /* __m64 is an integral type */
107 # define MMXDATA_INIT(field, val) field = val##ULL
110 static const MMXData c =
112 MMXDATA_INIT(.mmx_4x00ff, 0x00ff00ff00ff00ff),
113 MMXDATA_INIT(.mmx_4x0080, 0x0080008000800080),
114 MMXDATA_INIT(.mmx_565_rgb, 0x000001f0003f001f),
115 MMXDATA_INIT(.mmx_565_unpack_multiplier, 0x0000008404100840),
116 MMXDATA_INIT(.mmx_565_r, 0x000000f800000000),
117 MMXDATA_INIT(.mmx_565_g, 0x0000000000fc0000),
118 MMXDATA_INIT(.mmx_565_b, 0x00000000000000f8),
119 MMXDATA_INIT(.mmx_mask_0, 0xffffffffffff0000),
120 MMXDATA_INIT(.mmx_mask_1, 0xffffffff0000ffff),
121 MMXDATA_INIT(.mmx_mask_2, 0xffff0000ffffffff),
122 MMXDATA_INIT(.mmx_mask_3, 0x0000ffffffffffff),
123 MMXDATA_INIT(.mmx_full_alpha, 0x00ff000000000000),
124 MMXDATA_INIT(.mmx_ffff0000ffff0000, 0xffff0000ffff0000),
125 MMXDATA_INIT(.mmx_0000ffff00000000, 0x0000ffff00000000),
126 MMXDATA_INIT(.mmx_000000000000ffff, 0x000000000000ffff),
131 # define MC(x) M64(c.mmx_##x)
133 # define MC(x) ((__m64)c.mmx_##x)
136 # define MC(x) c.mmx_##x
139 static force_inline __m64
143 return _mm_cvtsi64_m64 (x);
144 #elif defined M64_MEMBER /* __m64 is a struct, not an integral type */
149 #else /* __m64 is an integral type */
154 static force_inline uint64_t
158 return _mm_cvtm64_si64 (x);
159 #elif defined M64_MEMBER /* __m64 is a struct, not an integral type */
160 uint64_t res = x.M64_MEMBER;
162 #else /* __m64 is an integral type */
167 static force_inline __m64
168 shift (__m64 v, int s)
171 return _mm_slli_si64 (v, s);
173 return _mm_srli_si64 (v, -s);
178 static force_inline __m64
181 return _mm_xor_si64 (mask, MC(4x00ff));
184 static force_inline __m64
185 pix_multiply (__m64 a, __m64 b)
189 res = _mm_mullo_pi16 (a, b);
190 res = _mm_adds_pu16 (res, MC(4x0080));
191 res = _mm_adds_pu16 (res, _mm_srli_pi16 (res, 8));
192 res = _mm_srli_pi16 (res, 8);
197 static force_inline __m64
198 pix_add (__m64 a, __m64 b)
200 return _mm_adds_pu8 (a, b);
203 static force_inline __m64
204 expand_alpha (__m64 pixel)
208 t1 = shift (pixel, -48);
210 t1 = _mm_or_si64 (t1, t2);
212 t1 = _mm_or_si64 (t1, t2);
217 static force_inline __m64
218 expand_alpha_rev (__m64 pixel)
222 /* move alpha to low 16 bits and zero the rest */
223 t1 = shift (pixel, 48);
224 t1 = shift (t1, -48);
227 t1 = _mm_or_si64 (t1, t2);
229 t1 = _mm_or_si64 (t1, t2);
234 static force_inline __m64
235 invert_colors (__m64 pixel)
241 x = _mm_and_si64 (x, MC(ffff0000ffff0000));
242 y = _mm_and_si64 (y, MC(000000000000ffff));
243 z = _mm_and_si64 (z, MC(0000ffff00000000));
248 x = _mm_or_si64 (x, y);
249 x = _mm_or_si64 (x, z);
254 static force_inline __m64
255 over (__m64 src, __m64 srca, __m64 dest)
257 return _mm_adds_pu8 (src, pix_multiply(dest, negate(srca)));
260 static force_inline __m64
261 over_rev_non_pre (__m64 src, __m64 dest)
263 __m64 srca = expand_alpha (src);
264 __m64 srcfaaa = _mm_or_si64 (srca, MC(full_alpha));
266 return over(pix_multiply(invert_colors(src), srcfaaa), srca, dest);
269 static force_inline __m64
273 return pix_multiply (src, mask);
276 static force_inline __m64
277 in_over_full_src_alpha (__m64 src, __m64 mask, __m64 dest)
279 src = _mm_or_si64 (src, MC(full_alpha));
281 return over(in (src, mask), mask, dest);
285 static force_inline __m64
291 return over(in(src, mask), pix_multiply(srca, mask), dest);
294 #define in_over(src, srca, mask, dest) over(in(src, mask), pix_multiply(srca, mask), dest)
297 static force_inline __m64
298 load8888 (uint32_t v)
300 return _mm_unpacklo_pi8 (_mm_cvtsi32_si64 (v), _mm_setzero_si64());
303 static force_inline __m64
304 pack8888 (__m64 lo, __m64 hi)
306 return _mm_packs_pu16 (lo, hi);
309 static force_inline uint32_t
312 return _mm_cvtsi64_si32(pack8888(v, _mm_setzero_si64()));
315 /* Expand 16 bits positioned at @pos (0-3) of a mmx register into
319 * --- Expanding 565 in the low word ---
321 * m = (m << (32 - 3)) | (m << (16 - 5)) | m;
322 * m = m & (01f0003f001f);
323 * m = m * (008404100840);
326 * Note the trick here - the top word is shifted by another nibble to
327 * avoid it bumping into the middle word
329 static force_inline __m64
330 expand565 (__m64 pixel, int pos)
335 /* move pixel to low 16 bit and zero the rest */
336 p = shift (shift (p, (3 - pos) * 16), -48);
338 t1 = shift (p, 36 - 11);
339 t2 = shift (p, 16 - 5);
341 p = _mm_or_si64 (t1, p);
342 p = _mm_or_si64 (t2, p);
343 p = _mm_and_si64 (p, MC(565_rgb));
345 pixel = _mm_mullo_pi16 (p, MC(565_unpack_multiplier));
346 return _mm_srli_pi16 (pixel, 8);
349 static force_inline __m64
350 expand8888 (__m64 in, int pos)
353 return _mm_unpacklo_pi8 (in, _mm_setzero_si64());
355 return _mm_unpackhi_pi8 (in, _mm_setzero_si64());
358 static force_inline __m64
359 expandx888 (__m64 in, int pos)
361 return _mm_or_si64 (expand8888 (in, pos), MC(full_alpha));
364 static force_inline __m64
365 pack565 (__m64 pixel, __m64 target, int pos)
371 r = _mm_and_si64 (p, MC(565_r));
372 g = _mm_and_si64 (p, MC(565_g));
373 b = _mm_and_si64 (p, MC(565_b));
375 r = shift (r, - (32 - 8) + pos * 16);
376 g = shift (g, - (16 - 3) + pos * 16);
377 b = shift (b, - (0 + 3) + pos * 16);
380 t = _mm_and_si64 (t, MC(mask_0));
382 t = _mm_and_si64 (t, MC(mask_1));
384 t = _mm_and_si64 (t, MC(mask_2));
386 t = _mm_and_si64 (t, MC(mask_3));
388 p = _mm_or_si64 (r, t);
389 p = _mm_or_si64 (g, p);
391 return _mm_or_si64 (b, p);
395 static force_inline __m64
396 pix_add_mul (__m64 x, __m64 a, __m64 y, __m64 b)
398 x = _mm_mullo_pi16 (x, a);
399 y = _mm_mullo_pi16 (y, b);
400 x = _mm_adds_pu16 (x, MC(4x0080));
401 x = _mm_adds_pu16 (x, y);
402 x = _mm_adds_pu16 (x, _mm_srli_pi16 (x, 8));
403 x = _mm_srli_pi16 (x, 8);
408 #define pix_add_mul(x, a, y, b) \
409 ( x = _mm_mullo_pi16 (x, a), \
410 y = _mm_mullo_pi16 (y, b), \
411 x = _mm_adds_pu16 (x, MC(4x0080)), \
412 x = _mm_adds_pu16 (x, y), \
413 x = _mm_adds_pu16 (x, _mm_srli_pi16 (x, 8)), \
414 _mm_srli_pi16 (x, 8) )
417 /* --------------- MMX code patch for fbcompose.c --------------------- */
419 static force_inline uint32_t
420 combine (const uint32_t *src, const uint32_t *mask)
422 uint32_t ssrc = *src;
426 __m64 m = load8888 (*mask);
427 __m64 s = load8888 (ssrc);
429 m = expand_alpha (m);
430 s = pix_multiply (s, m);
432 ssrc = store8888 (s);
439 mmxCombineOverU (pixman_implementation_t *imp, pixman_op_t op,
440 uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
442 const uint32_t *end = dest + width;
445 uint32_t ssrc = combine (src, mask);
446 uint32_t a = ssrc >> 24;
452 sa = expand_alpha(s);
453 *dest = store8888(over(s, sa, load8888(*dest)));
464 mmxCombineOverReverseU (pixman_implementation_t *imp, pixman_op_t op,
465 uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
467 const uint32_t *end = dest + width;
471 uint32_t s = combine (src, mask);
473 da = expand_alpha(d);
474 *dest = store8888(over (d, da, load8888(s)));
484 mmxCombineInU (pixman_implementation_t *imp, pixman_op_t op,
485 uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
487 const uint32_t *end = dest + width;
491 x = load8888 (combine (src, mask));
494 x = pix_multiply(x, a);
495 *dest = store8888(x);
505 mmxCombineInReverseU (pixman_implementation_t *imp, pixman_op_t op,
506 uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
508 const uint32_t *end = dest + width;
513 a = load8888(combine (src, mask));
515 x = pix_multiply(x, a);
516 *dest = store8888(x);
526 mmxCombineOutU (pixman_implementation_t *imp, pixman_op_t op,
527 uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
529 const uint32_t *end = dest + width;
533 x = load8888(combine (src, mask));
537 x = pix_multiply(x, a);
538 *dest = store8888(x);
548 mmxCombineOutReverseU (pixman_implementation_t *imp, pixman_op_t op,
549 uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
551 const uint32_t *end = dest + width;
556 a = load8888(combine (src, mask));
559 x = pix_multiply(x, a);
560 *dest = store8888(x);
570 mmxCombineAtopU (pixman_implementation_t *imp, pixman_op_t op,
571 uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
573 const uint32_t *end = dest + width;
577 s = load8888(combine (src, mask));
579 sia = expand_alpha(s);
581 da = expand_alpha(d);
582 s = pix_add_mul (s, da, d, sia);
583 *dest = store8888(s);
593 mmxCombineAtopReverseU (pixman_implementation_t *imp, pixman_op_t op,
594 uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
602 s = load8888(combine(src, mask));
604 sa = expand_alpha(s);
605 dia = expand_alpha(d);
607 s = pix_add_mul (s, dia, d, sa);
608 *dest = store8888(s);
618 mmxCombineXorU (pixman_implementation_t *imp, pixman_op_t op,
619 uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
621 const uint32_t *end = dest + width;
624 __m64 s, dia, d, sia;
625 s = load8888(combine(src, mask));
627 sia = expand_alpha(s);
628 dia = expand_alpha(d);
631 s = pix_add_mul (s, dia, d, sia);
632 *dest = store8888(s);
642 mmxCombineAddU (pixman_implementation_t *imp, pixman_op_t op,
643 uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
645 const uint32_t *end = dest + width;
648 s = load8888(combine(src,mask));
651 *dest = store8888(s);
661 mmxCombineSaturateU (pixman_implementation_t *imp, pixman_op_t op,
662 uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
664 const uint32_t *end = dest + width;
666 uint32_t s = combine(src,mask);
668 __m64 ms = load8888(s);
669 __m64 md = load8888(d);
670 uint32_t sa = s >> 24;
671 uint32_t da = ~d >> 24;
674 __m64 msa = load8888(FbIntDiv(da, sa) << 24);
675 msa = expand_alpha(msa);
676 ms = pix_multiply(ms, msa);
678 md = pix_add(md, ms);
679 *dest = store8888(md);
690 mmxCombineSrcC (pixman_implementation_t *imp, pixman_op_t op,
691 uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
693 const uint32_t *end = src + width;
695 __m64 a = load8888(*mask);
696 __m64 s = load8888(*src);
697 s = pix_multiply(s, a);
698 *dest = store8888(s);
707 mmxCombineOverC (pixman_implementation_t *imp, pixman_op_t op,
708 uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
710 const uint32_t *end = src + width;
712 __m64 a = load8888(*mask);
713 __m64 s = load8888(*src);
714 __m64 d = load8888(*dest);
715 __m64 sa = expand_alpha(s);
717 *dest = store8888(in_over (s, sa, a, d));
727 mmxCombineOverReverseC (pixman_implementation_t *imp, pixman_op_t op,
728 uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
730 const uint32_t *end = src + width;
732 __m64 a = load8888(*mask);
733 __m64 s = load8888(*src);
734 __m64 d = load8888(*dest);
735 __m64 da = expand_alpha(d);
737 *dest = store8888(over (d, da, in (s, a)));
748 mmxCombineInC (pixman_implementation_t *imp, pixman_op_t op,
749 uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
751 const uint32_t *end = src + width;
753 __m64 a = load8888(*mask);
754 __m64 s = load8888(*src);
755 __m64 d = load8888(*dest);
756 __m64 da = expand_alpha(d);
757 s = pix_multiply(s, a);
758 s = pix_multiply(s, da);
759 *dest = store8888(s);
768 mmxCombineInReverseC (pixman_implementation_t *imp, pixman_op_t op,
769 uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
771 const uint32_t *end = src + width;
773 __m64 a = load8888(*mask);
774 __m64 s = load8888(*src);
775 __m64 d = load8888(*dest);
776 __m64 sa = expand_alpha(s);
777 a = pix_multiply(a, sa);
778 d = pix_multiply(d, a);
779 *dest = store8888(d);
788 mmxCombineOutC (pixman_implementation_t *imp, pixman_op_t op,
789 uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
791 const uint32_t *end = src + width;
793 __m64 a = load8888(*mask);
794 __m64 s = load8888(*src);
795 __m64 d = load8888(*dest);
796 __m64 da = expand_alpha(d);
798 s = pix_multiply(s, a);
799 s = pix_multiply(s, da);
800 *dest = store8888(s);
809 mmxCombineOutReverseC (pixman_implementation_t *imp, pixman_op_t op,
810 uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
812 const uint32_t *end = src + width;
814 __m64 a = load8888(*mask);
815 __m64 s = load8888(*src);
816 __m64 d = load8888(*dest);
817 __m64 sa = expand_alpha(s);
818 a = pix_multiply(a, sa);
820 d = pix_multiply(d, a);
821 *dest = store8888(d);
830 mmxCombineAtopC (pixman_implementation_t *imp, pixman_op_t op,
831 uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
833 const uint32_t *end = src + width;
835 __m64 a = load8888(*mask);
836 __m64 s = load8888(*src);
837 __m64 d = load8888(*dest);
838 __m64 da = expand_alpha(d);
839 __m64 sa = expand_alpha(s);
840 s = pix_multiply(s, a);
841 a = pix_multiply(a, sa);
843 d = pix_add_mul (d, a, s, da);
844 *dest = store8888(d);
853 mmxCombineAtopReverseC (pixman_implementation_t *imp, pixman_op_t op,
854 uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
856 const uint32_t *end = src + width;
858 __m64 a = load8888(*mask);
859 __m64 s = load8888(*src);
860 __m64 d = load8888(*dest);
861 __m64 da = expand_alpha(d);
862 __m64 sa = expand_alpha(s);
863 s = pix_multiply(s, a);
864 a = pix_multiply(a, sa);
866 d = pix_add_mul (d, a, s, da);
867 *dest = store8888(d);
876 mmxCombineXorC (pixman_implementation_t *imp, pixman_op_t op,
877 uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
879 const uint32_t *end = src + width;
881 __m64 a = load8888(*mask);
882 __m64 s = load8888(*src);
883 __m64 d = load8888(*dest);
884 __m64 da = expand_alpha(d);
885 __m64 sa = expand_alpha(s);
886 s = pix_multiply(s, a);
887 a = pix_multiply(a, sa);
890 d = pix_add_mul (d, a, s, da);
891 *dest = store8888(d);
900 mmxCombineAddC (pixman_implementation_t *imp, pixman_op_t op,
901 uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
903 const uint32_t *end = src + width;
905 __m64 a = load8888(*mask);
906 __m64 s = load8888(*src);
907 __m64 d = load8888(*dest);
908 s = pix_multiply(s, a);
910 *dest = store8888(d);
918 /* ------------------ MMX code paths called from fbpict.c ----------------------- */
921 fbCompositeSolid_nx8888mmx (pixman_implementation_t *imp,
923 pixman_image_t * pSrc,
924 pixman_image_t * pMask,
925 pixman_image_t * pDst,
936 uint32_t *dstLine, *dst;
943 src = pixman_image_get_solid(pSrc, pDst->bits.format);
948 fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
950 vsrc = load8888 (src);
951 vsrca = expand_alpha (vsrc);
956 dstLine += dstStride;
961 while (w && (unsigned long)dst & 7)
963 *dst = store8888(over(vsrc, vsrca, load8888(*dst)));
974 vdest = *(__m64 *)dst;
976 dest0 = over(vsrc, vsrca, expand8888(vdest, 0));
977 dest1 = over(vsrc, vsrca, expand8888(vdest, 1));
979 *(__m64 *)dst = pack8888(dest0, dest1);
989 *dst = store8888(over(vsrc, vsrca, load8888(*dst)));
1000 fbCompositeSolid_nx0565mmx (pixman_implementation_t *imp,
1002 pixman_image_t * pSrc,
1003 pixman_image_t * pMask,
1004 pixman_image_t * pDst,
1015 uint16_t *dstLine, *dst;
1022 src = pixman_image_get_solid(pSrc, pDst->bits.format);
1027 fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1);
1029 vsrc = load8888 (src);
1030 vsrca = expand_alpha (vsrc);
1035 dstLine += dstStride;
1040 while (w && (unsigned long)dst & 7)
1043 __m64 vdest = expand565 (M64(d), 0);
1044 vdest = pack565(over(vsrc, vsrca, vdest), vdest, 0);
1045 *dst = UINT64(vdest);
1055 vdest = *(__m64 *)dst;
1057 vdest = pack565 (over(vsrc, vsrca, expand565(vdest, 0)), vdest, 0);
1058 vdest = pack565 (over(vsrc, vsrca, expand565(vdest, 1)), vdest, 1);
1059 vdest = pack565 (over(vsrc, vsrca, expand565(vdest, 2)), vdest, 2);
1060 vdest = pack565 (over(vsrc, vsrca, expand565(vdest, 3)), vdest, 3);
1062 *(__m64 *)dst = vdest;
1073 __m64 vdest = expand565 (M64(d), 0);
1074 vdest = pack565(over(vsrc, vsrca, vdest), vdest, 0);
1075 *dst = UINT64(vdest);
1086 fbCompositeSolidMask_nx8888x8888Cmmx (pixman_implementation_t *imp,
1088 pixman_image_t * pSrc,
1089 pixman_image_t * pMask,
1090 pixman_image_t * pDst,
1103 int dstStride, maskStride;
1108 src = pixman_image_get_solid(pSrc, pDst->bits.format);
1114 fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
1115 fbComposeGetStart (pMask, xMask, yMask, uint32_t, maskStride, maskLine, 1);
1117 vsrc = load8888(src);
1118 vsrca = expand_alpha(vsrc);
1123 uint32_t *p = (uint32_t *)maskLine;
1124 uint32_t *q = (uint32_t *)dstLine;
1126 while (twidth && (unsigned long)q & 7)
1128 uint32_t m = *(uint32_t *)p;
1132 __m64 vdest = load8888(*q);
1133 vdest = in_over(vsrc, vsrca, load8888(m), vdest);
1134 *q = store8888(vdest);
1151 __m64 vdest = *(__m64 *)q;
1153 dest0 = in_over(vsrc, vsrca, load8888(m0),
1154 expand8888 (vdest, 0));
1155 dest1 = in_over(vsrc, vsrca, load8888(m1),
1156 expand8888 (vdest, 1));
1158 *(__m64 *)q = pack8888(dest0, dest1);
1168 uint32_t m = *(uint32_t *)p;
1172 __m64 vdest = load8888(*q);
1173 vdest = in_over(vsrc, vsrca, load8888(m), vdest);
1174 *q = store8888(vdest);
1182 dstLine += dstStride;
1183 maskLine += maskStride;
1190 fbCompositeSrc_8888x8x8888mmx (pixman_implementation_t *imp,
1192 pixman_image_t * pSrc,
1193 pixman_image_t * pMask,
1194 pixman_image_t * pDst,
1204 uint32_t *dstLine, *dst;
1205 uint32_t *srcLine, *src;
1208 int dstStride, srcStride;
1214 fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
1215 fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1);
1217 mask = pixman_image_get_solid (pMask, pDst->bits.format);
1218 mask = mask | mask >> 8 | mask >> 16 | mask >> 24;
1219 vmask = load8888 (mask);
1225 dstLine += dstStride;
1227 srcLine += srcStride;
1230 while (w && (unsigned long)dst & 7)
1232 __m64 s = load8888 (*src);
1233 __m64 d = load8888 (*dst);
1235 *dst = store8888 (in_over (s, expand_alpha (s), vmask, d));
1244 __m64 vs = *(__m64 *)src;
1245 __m64 vd = *(__m64 *)dst;
1246 __m64 vsrc0 = expand8888 (vs, 0);
1247 __m64 vsrc1 = expand8888 (vs, 1);
1249 *(__m64 *)dst = pack8888 (
1250 in_over (vsrc0, expand_alpha (vsrc0), vmask, expand8888 (vd, 0)),
1251 in_over (vsrc1, expand_alpha (vsrc1), vmask, expand8888 (vd, 1)));
1260 __m64 s = load8888 (*src);
1261 __m64 d = load8888 (*dst);
1263 *dst = store8888 (in_over (s, expand_alpha (s), vmask, d));
1275 fbCompositeSrc_x888xnx8888mmx (pixman_implementation_t *imp,
1277 pixman_image_t * pSrc,
1278 pixman_image_t * pMask,
1279 pixman_image_t * pDst,
1289 uint32_t *dstLine, *dst;
1290 uint32_t *srcLine, *src;
1293 int dstStride, srcStride;
1299 fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
1300 fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1);
1301 mask = pixman_image_get_solid (pMask, pDst->bits.format);
1303 mask = mask | mask >> 8 | mask >> 16 | mask >> 24;
1304 vmask = load8888 (mask);
1310 dstLine += dstStride;
1312 srcLine += srcStride;
1315 while (w && (unsigned long)dst & 7)
1317 __m64 s = load8888 (*src | 0xff000000);
1318 __m64 d = load8888 (*dst);
1320 *dst = store8888 (in_over (s, srca, vmask, d));
1329 __m64 vd0 = *(__m64 *)(dst + 0);
1330 __m64 vd1 = *(__m64 *)(dst + 2);
1331 __m64 vd2 = *(__m64 *)(dst + 4);
1332 __m64 vd3 = *(__m64 *)(dst + 6);
1333 __m64 vd4 = *(__m64 *)(dst + 8);
1334 __m64 vd5 = *(__m64 *)(dst + 10);
1335 __m64 vd6 = *(__m64 *)(dst + 12);
1336 __m64 vd7 = *(__m64 *)(dst + 14);
1338 __m64 vs0 = *(__m64 *)(src + 0);
1339 __m64 vs1 = *(__m64 *)(src + 2);
1340 __m64 vs2 = *(__m64 *)(src + 4);
1341 __m64 vs3 = *(__m64 *)(src + 6);
1342 __m64 vs4 = *(__m64 *)(src + 8);
1343 __m64 vs5 = *(__m64 *)(src + 10);
1344 __m64 vs6 = *(__m64 *)(src + 12);
1345 __m64 vs7 = *(__m64 *)(src + 14);
1348 in_over (expandx888 (vs0, 0), srca, vmask, expand8888 (vd0, 0)),
1349 in_over (expandx888 (vs0, 1), srca, vmask, expand8888 (vd0, 1)));
1352 in_over (expandx888 (vs1, 0), srca, vmask, expand8888 (vd1, 0)),
1353 in_over (expandx888 (vs1, 1), srca, vmask, expand8888 (vd1, 1)));
1356 in_over (expandx888 (vs2, 0), srca, vmask, expand8888 (vd2, 0)),
1357 in_over (expandx888 (vs2, 1), srca, vmask, expand8888 (vd2, 1)));
1360 in_over (expandx888 (vs3, 0), srca, vmask, expand8888 (vd3, 0)),
1361 in_over (expandx888 (vs3, 1), srca, vmask, expand8888 (vd3, 1)));
1364 in_over (expandx888 (vs4, 0), srca, vmask, expand8888 (vd4, 0)),
1365 in_over (expandx888 (vs4, 1), srca, vmask, expand8888 (vd4, 1)));
1368 in_over (expandx888 (vs5, 0), srca, vmask, expand8888 (vd5, 0)),
1369 in_over (expandx888 (vs5, 1), srca, vmask, expand8888 (vd5, 1)));
1372 in_over (expandx888 (vs6, 0), srca, vmask, expand8888 (vd6, 0)),
1373 in_over (expandx888 (vs6, 1), srca, vmask, expand8888 (vd6, 1)));
1376 in_over (expandx888 (vs7, 0), srca, vmask, expand8888 (vd7, 0)),
1377 in_over (expandx888 (vs7, 1), srca, vmask, expand8888 (vd7, 1)));
1379 *(__m64 *)(dst + 0) = vd0;
1380 *(__m64 *)(dst + 2) = vd1;
1381 *(__m64 *)(dst + 4) = vd2;
1382 *(__m64 *)(dst + 6) = vd3;
1383 *(__m64 *)(dst + 8) = vd4;
1384 *(__m64 *)(dst + 10) = vd5;
1385 *(__m64 *)(dst + 12) = vd6;
1386 *(__m64 *)(dst + 14) = vd7;
1395 __m64 s = load8888 (*src | 0xff000000);
1396 __m64 d = load8888 (*dst);
1398 *dst = store8888 (in_over (s, srca, vmask, d));
1410 fbCompositeSrc_8888x8888mmx (pixman_implementation_t *imp,
1412 pixman_image_t * pSrc,
1413 pixman_image_t * pMask,
1414 pixman_image_t * pDst,
1424 uint32_t *dstLine, *dst;
1425 uint32_t *srcLine, *src;
1427 int dstStride, srcStride;
1433 fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
1434 fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1);
1439 dstLine += dstStride;
1441 srcLine += srcStride;
1453 sa = expand_alpha(ms);
1454 *dst = store8888(over(ms, sa, load8888(*dst)));
1463 fbCompositeSrc_8888x0565mmx (pixman_implementation_t *imp,
1465 pixman_image_t * pSrc,
1466 pixman_image_t * pMask,
1467 pixman_image_t * pDst,
1477 uint16_t *dstLine, *dst;
1478 uint32_t *srcLine, *src;
1479 int dstStride, srcStride;
1484 fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1);
1485 fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1);
1489 assert (pSrc->pDrawable == pMask->pDrawable);
1495 dstLine += dstStride;
1497 srcLine += srcStride;
1502 while (w && (unsigned long)dst & 7)
1504 __m64 vsrc = load8888 (*src);
1506 __m64 vdest = expand565 (M64(d), 0);
1508 vdest = pack565(over(vsrc, expand_alpha(vsrc), vdest), vdest, 0);
1510 *dst = UINT64(vdest);
1521 __m64 vsrc0, vsrc1, vsrc2, vsrc3;
1524 vsrc0 = load8888(*(src + 0));
1525 vsrc1 = load8888(*(src + 1));
1526 vsrc2 = load8888(*(src + 2));
1527 vsrc3 = load8888(*(src + 3));
1529 vdest = *(__m64 *)dst;
1531 vdest = pack565(over(vsrc0, expand_alpha(vsrc0), expand565(vdest, 0)), vdest, 0);
1532 vdest = pack565(over(vsrc1, expand_alpha(vsrc1), expand565(vdest, 1)), vdest, 1);
1533 vdest = pack565(over(vsrc2, expand_alpha(vsrc2), expand565(vdest, 2)), vdest, 2);
1534 vdest = pack565(over(vsrc3, expand_alpha(vsrc3), expand565(vdest, 3)), vdest, 3);
1536 *(__m64 *)dst = vdest;
1547 __m64 vsrc = load8888 (*src);
1549 __m64 vdest = expand565 (M64(d), 0);
1551 vdest = pack565(over(vsrc, expand_alpha(vsrc), vdest), vdest, 0);
1553 *dst = UINT64(vdest);
1565 fbCompositeSolidMask_nx8x8888mmx (pixman_implementation_t *imp,
1567 pixman_image_t * pSrc,
1568 pixman_image_t * pMask,
1569 pixman_image_t * pDst,
1580 uint32_t *dstLine, *dst;
1581 uint8_t *maskLine, *mask;
1582 int dstStride, maskStride;
1589 src = pixman_image_get_solid(pSrc, pDst->bits.format);
1595 srcsrc = (uint64_t)src << 32 | src;
1597 fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
1598 fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1);
1600 vsrc = load8888 (src);
1601 vsrca = expand_alpha (vsrc);
1606 dstLine += dstStride;
1608 maskLine += maskStride;
1613 while (w && (unsigned long)dst & 7)
1619 __m64 vdest = in_over(vsrc, vsrca, expand_alpha_rev (M64(m)), load8888(*dst));
1620 *dst = store8888(vdest);
1636 if (srca == 0xff && (m0 & m1) == 0xff)
1638 *(uint64_t *)dst = srcsrc;
1645 vdest = *(__m64 *)dst;
1647 dest0 = in_over(vsrc, vsrca, expand_alpha_rev (M64(m0)), expand8888(vdest, 0));
1648 dest1 = in_over(vsrc, vsrca, expand_alpha_rev (M64(m1)), expand8888(vdest, 1));
1650 *(__m64 *)dst = pack8888(dest0, dest1);
1666 __m64 vdest = load8888(*dst);
1667 vdest = in_over(vsrc, vsrca, expand_alpha_rev (M64(m)), vdest);
1668 *dst = store8888(vdest);
1681 pixman_fill_mmx (uint32_t *bits,
1692 uint32_t byte_width;
1695 __m64 v1, v2, v3, v4, v5, v6, v7;
1698 if (bpp != 16 && bpp != 32 && bpp != 8)
1701 if (bpp == 16 && (xor >> 16 != (xor & 0xffff)))
1705 ((xor >> 16 != (xor & 0xffff)) ||
1706 (xor >> 24 != (xor & 0x00ff) >> 16)))
1713 stride = stride * (int) sizeof (uint32_t) / 1;
1714 byte_line = (uint8_t *)(((uint8_t *)bits) + stride * y + x);
1720 stride = stride * (int) sizeof (uint32_t) / 2;
1721 byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x);
1722 byte_width = 2 * width;
1727 stride = stride * (int) sizeof (uint32_t) / 4;
1728 byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x);
1729 byte_width = 4 * width;
1733 fill = ((uint64_t)xor << 32) | xor;
1745 : "=y" (v1), "=y" (v2), "=y" (v3),
1746 "=y" (v4), "=y" (v5), "=y" (v6), "=y" (v7)
1753 uint8_t *d = byte_line;
1754 byte_line += stride;
1757 while (w >= 1 && ((unsigned long)d & 1))
1759 *(uint8_t *)d = (xor & 0xff);
1764 while (w >= 2 && ((unsigned long)d & 3))
1766 *(uint16_t *)d = xor;
1771 while (w >= 4 && ((unsigned long)d & 7))
1773 *(uint32_t *)d = xor;
1793 "y" (vfill), "y" (v1), "y" (v2), "y" (v3),
1794 "y" (v4), "y" (v5), "y" (v6), "y" (v7)
1797 *(__m64*) (d + 0) = vfill;
1798 *(__m64*) (d + 8) = vfill;
1799 *(__m64*) (d + 16) = vfill;
1800 *(__m64*) (d + 24) = vfill;
1801 *(__m64*) (d + 32) = vfill;
1802 *(__m64*) (d + 40) = vfill;
1803 *(__m64*) (d + 48) = vfill;
1804 *(__m64*) (d + 56) = vfill;
1812 *(uint32_t *)d = xor;
1819 *(uint16_t *)d = xor;
1825 *(uint8_t *)d = (xor & 0xff);
1837 fbCompositeSolidMaskSrc_nx8x8888mmx (pixman_implementation_t *imp,
1839 pixman_image_t * pSrc,
1840 pixman_image_t * pMask,
1841 pixman_image_t * pDst,
1852 uint32_t *dstLine, *dst;
1853 uint8_t *maskLine, *mask;
1854 int dstStride, maskStride;
1861 src = pixman_image_get_solid(pSrc, pDst->bits.format);
1866 pixman_fill_mmx (pDst->bits.bits, pDst->bits.rowstride, PIXMAN_FORMAT_BPP (pDst->bits.format),
1867 xDst, yDst, width, height, 0);
1871 srcsrc = (uint64_t)src << 32 | src;
1873 fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
1874 fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1);
1876 vsrc = load8888 (src);
1877 vsrca = expand_alpha (vsrc);
1882 dstLine += dstStride;
1884 maskLine += maskStride;
1889 while (w && (unsigned long)dst & 7)
1895 __m64 vdest = in(vsrc, expand_alpha_rev (M64(m)));
1896 *dst = store8888(vdest);
1916 if (srca == 0xff && (m0 & m1) == 0xff)
1918 *(uint64_t *)dst = srcsrc;
1925 vdest = *(__m64 *)dst;
1927 dest0 = in(vsrc, expand_alpha_rev (M64(m0)));
1928 dest1 = in(vsrc, expand_alpha_rev (M64(m1)));
1930 *(__m64 *)dst = pack8888(dest0, dest1);
1934 *(uint64_t *)dst = 0;
1950 __m64 vdest = load8888(*dst);
1951 vdest = in(vsrc, expand_alpha_rev (M64(m)));
1952 *dst = store8888(vdest);
1969 fbCompositeSolidMask_nx8x0565mmx (pixman_implementation_t *imp,
1971 pixman_image_t * pSrc,
1972 pixman_image_t * pMask,
1973 pixman_image_t * pDst,
1984 uint16_t *dstLine, *dst;
1985 uint8_t *maskLine, *mask;
1986 int dstStride, maskStride;
1988 __m64 vsrc, vsrca, tmp;
1989 uint64_t srcsrcsrcsrc, src16;
1993 src = pixman_image_get_solid(pSrc, pDst->bits.format);
1999 fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1);
2000 fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1);
2002 vsrc = load8888 (src);
2003 vsrca = expand_alpha (vsrc);
2005 tmp = pack565(vsrc, _mm_setzero_si64(), 0);
2006 src16 = UINT64(tmp);
2008 srcsrcsrcsrc = (uint64_t)src16 << 48 | (uint64_t)src16 << 32 |
2009 (uint64_t)src16 << 16 | (uint64_t)src16;
2014 dstLine += dstStride;
2016 maskLine += maskStride;
2021 while (w && (unsigned long)dst & 7)
2029 __m64 vdest = in_over(vsrc, vsrca, expand_alpha_rev (M64 (m)), expand565(vd, 0));
2030 vd = pack565(vdest, _mm_setzero_si64(), 0);
2043 uint64_t m0, m1, m2, m3;
2049 if (srca == 0xff && (m0 & m1 & m2 & m3) == 0xff)
2051 *(uint64_t *)dst = srcsrcsrcsrc;
2053 else if (m0 | m1 | m2 | m3)
2056 __m64 vm0, vm1, vm2, vm3;
2058 vdest = *(__m64 *)dst;
2061 vdest = pack565(in_over(vsrc, vsrca, expand_alpha_rev(vm0), expand565(vdest, 0)), vdest, 0);
2063 vdest = pack565(in_over(vsrc, vsrca, expand_alpha_rev(vm1), expand565(vdest, 1)), vdest, 1);
2065 vdest = pack565(in_over(vsrc, vsrca, expand_alpha_rev(vm2), expand565(vdest, 2)), vdest, 2);
2067 vdest = pack565(in_over(vsrc, vsrca, expand_alpha_rev(vm3), expand565(vdest, 3)), vdest, 3);
2069 *(__m64 *)dst = vdest;
2087 __m64 vdest = in_over(vsrc, vsrca, expand_alpha_rev (M64(m)), expand565(vd, 0));
2088 vd = pack565(vdest, _mm_setzero_si64(), 0);
2102 fbCompositeSrc_8888RevNPx0565mmx (pixman_implementation_t *imp,
2104 pixman_image_t * pSrc,
2105 pixman_image_t * pMask,
2106 pixman_image_t * pDst,
2116 uint16_t *dstLine, *dst;
2117 uint32_t *srcLine, *src;
2118 int dstStride, srcStride;
2123 fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1);
2124 fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1);
2128 assert (pSrc->pDrawable == pMask->pDrawable);
2134 dstLine += dstStride;
2136 srcLine += srcStride;
2141 while (w && (unsigned long)dst & 7)
2143 __m64 vsrc = load8888 (*src);
2145 __m64 vdest = expand565 (M64(d), 0);
2147 vdest = pack565(over_rev_non_pre(vsrc, vdest), vdest, 0);
2149 *dst = UINT64(vdest);
2160 uint32_t s0, s1, s2, s3;
2161 unsigned char a0, a1, a2, a3;
2173 if ((a0 & a1 & a2 & a3) == 0xFF)
2176 vdest = pack565(invert_colors(load8888(s0)), _mm_setzero_si64(), 0);
2177 vdest = pack565(invert_colors(load8888(s1)), vdest, 1);
2178 vdest = pack565(invert_colors(load8888(s2)), vdest, 2);
2179 vdest = pack565(invert_colors(load8888(s3)), vdest, 3);
2181 *(__m64 *)dst = vdest;
2183 else if (s0 | s1 | s2 | s3)
2185 __m64 vdest = *(__m64 *)dst;
2187 vdest = pack565(over_rev_non_pre(load8888(s0), expand565(vdest, 0)), vdest, 0);
2188 vdest = pack565(over_rev_non_pre(load8888(s1), expand565(vdest, 1)), vdest, 1);
2189 vdest = pack565(over_rev_non_pre(load8888(s2), expand565(vdest, 2)), vdest, 2);
2190 vdest = pack565(over_rev_non_pre(load8888(s3), expand565(vdest, 3)), vdest, 3);
2192 *(__m64 *)dst = vdest;
2204 __m64 vsrc = load8888 (*src);
2206 __m64 vdest = expand565 (M64(d), 0);
2208 vdest = pack565(over_rev_non_pre(vsrc, vdest), vdest, 0);
2210 *dst = UINT64(vdest);
2221 /* "8888RevNP" is GdkPixbuf's format: ABGR, non premultiplied */
2224 fbCompositeSrc_8888RevNPx8888mmx (pixman_implementation_t *imp,
2226 pixman_image_t * pSrc,
2227 pixman_image_t * pMask,
2228 pixman_image_t * pDst,
2238 uint32_t *dstLine, *dst;
2239 uint32_t *srcLine, *src;
2240 int dstStride, srcStride;
2245 fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
2246 fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1);
2250 assert (pSrc->pDrawable == pMask->pDrawable);
2256 dstLine += dstStride;
2258 srcLine += srcStride;
2261 while (w && (unsigned long)dst & 7)
2263 __m64 s = load8888 (*src);
2264 __m64 d = load8888 (*dst);
2266 *dst = store8888 (over_rev_non_pre (s, d));
2276 unsigned char a0, a1;
2285 if ((a0 & a1) == 0xFF)
2287 d0 = invert_colors(load8888(s0));
2288 d1 = invert_colors(load8888(s1));
2290 *(__m64 *)dst = pack8888 (d0, d1);
2294 __m64 vdest = *(__m64 *)dst;
2296 d0 = over_rev_non_pre (load8888(s0), expand8888 (vdest, 0));
2297 d1 = over_rev_non_pre (load8888(s1), expand8888 (vdest, 1));
2299 *(__m64 *)dst = pack8888 (d0, d1);
2309 __m64 s = load8888 (*src);
2310 __m64 d = load8888 (*dst);
2312 *dst = store8888 (over_rev_non_pre (s, d));
2324 fbCompositeSolidMask_nx8888x0565Cmmx (pixman_implementation_t *imp,
2326 pixman_image_t * pSrc,
2327 pixman_image_t * pMask,
2328 pixman_image_t * pDst,
2341 int dstStride, maskStride;
2346 src = pixman_image_get_solid(pSrc, pDst->bits.format);
2352 fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1);
2353 fbComposeGetStart (pMask, xMask, yMask, uint32_t, maskStride, maskLine, 1);
2355 vsrc = load8888 (src);
2356 vsrca = expand_alpha (vsrc);
2361 uint32_t *p = (uint32_t *)maskLine;
2362 uint16_t *q = (uint16_t *)dstLine;
2364 while (twidth && ((unsigned long)q & 7))
2366 uint32_t m = *(uint32_t *)p;
2371 __m64 vdest = expand565 (M64(d), 0);
2372 vdest = pack565 (in_over (vsrc, vsrca, load8888 (m), vdest), vdest, 0);
2383 uint32_t m0, m1, m2, m3;
2390 if ((m0 | m1 | m2 | m3))
2392 __m64 vdest = *(__m64 *)q;
2394 vdest = pack565(in_over(vsrc, vsrca, load8888(m0), expand565(vdest, 0)), vdest, 0);
2395 vdest = pack565(in_over(vsrc, vsrca, load8888(m1), expand565(vdest, 1)), vdest, 1);
2396 vdest = pack565(in_over(vsrc, vsrca, load8888(m2), expand565(vdest, 2)), vdest, 2);
2397 vdest = pack565(in_over(vsrc, vsrca, load8888(m3), expand565(vdest, 3)), vdest, 3);
2399 *(__m64 *)q = vdest;
2414 __m64 vdest = expand565(M64(d), 0);
2415 vdest = pack565 (in_over(vsrc, vsrca, load8888(m), vdest), vdest, 0);
2424 maskLine += maskStride;
2425 dstLine += dstStride;
2432 fbCompositeIn_nx8x8mmx (pixman_implementation_t *imp,
2434 pixman_image_t * pSrc,
2435 pixman_image_t * pMask,
2436 pixman_image_t * pDst,
2446 uint8_t *dstLine, *dst;
2447 uint8_t *maskLine, *mask;
2448 int dstStride, maskStride;
2454 fbComposeGetStart (pDst, xDst, yDst, uint8_t, dstStride, dstLine, 1);
2455 fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1);
2457 src = pixman_image_get_solid(pSrc, pDst->bits.format);
2463 vsrc = load8888(src);
2464 vsrca = expand_alpha(vsrc);
2469 dstLine += dstStride;
2471 maskLine += maskStride;
2474 if ((((unsigned long)pDst & 3) == 0) &&
2475 (((unsigned long)pSrc & 3) == 0))
2485 vmask = load8888 (*(uint32_t *)mask);
2486 vdest = load8888 (*(uint32_t *)dst);
2488 *(uint32_t *)dst = store8888 (in (in (vsrca, vmask), vdest));
2506 m = FbInU (sa, 0, a, tmp);
2507 r = FbInU (m, 0, d, tmp);
2517 fbCompositeIn_8x8mmx (pixman_implementation_t *imp,
2519 pixman_image_t * pSrc,
2520 pixman_image_t * pMask,
2521 pixman_image_t * pDst,
2531 uint8_t *dstLine, *dst;
2532 uint8_t *srcLine, *src;
2533 int srcStride, dstStride;
2536 fbComposeGetStart (pDst, xDst, yDst, uint8_t, dstStride, dstLine, 1);
2537 fbComposeGetStart (pSrc, xSrc, ySrc, uint8_t, srcStride, srcLine, 1);
2542 dstLine += dstStride;
2544 srcLine += srcStride;
2547 if ((((unsigned long)pDst & 3) == 0) &&
2548 (((unsigned long)pSrc & 3) == 0))
2552 uint32_t *s = (uint32_t *)src;
2553 uint32_t *d = (uint32_t *)dst;
2555 *d = store8888 (in (load8888 (*s), load8888 (*d)));
2571 *dst = FbInU (s, 0, d, tmp);
2582 fbCompositeSrcAdd_8888x8x8mmx (pixman_implementation_t *imp,
2584 pixman_image_t * pSrc,
2585 pixman_image_t * pMask,
2586 pixman_image_t * pDst,
2596 uint8_t *dstLine, *dst;
2597 uint8_t *maskLine, *mask;
2598 int dstStride, maskStride;
2604 fbComposeGetStart (pDst, xDst, yDst, uint8_t, dstStride, dstLine, 1);
2605 fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1);
2607 src = pixman_image_get_solid(pSrc, pDst->bits.format);
2613 vsrc = load8888(src);
2614 vsrca = expand_alpha(vsrc);
2619 dstLine += dstStride;
2621 maskLine += maskStride;
2624 if ((((unsigned long)pMask & 3) == 0) &&
2625 (((unsigned long)pDst & 3) == 0))
2629 __m64 vmask = load8888 (*(uint32_t *)mask);
2630 __m64 vdest = load8888 (*(uint32_t *)dst);
2632 *(uint32_t *)dst = store8888 (_mm_adds_pu8 (in (vsrca, vmask), vdest));
2650 m = FbInU (sa, 0, a, tmp);
2651 r = FbAdd (m, d, 0, tmp);
2661 fbCompositeSrcAdd_8000x8000mmx (pixman_implementation_t *imp,
2663 pixman_image_t * pSrc,
2664 pixman_image_t * pMask,
2665 pixman_image_t * pDst,
2675 uint8_t *dstLine, *dst;
2676 uint8_t *srcLine, *src;
2677 int dstStride, srcStride;
2684 fbComposeGetStart (pSrc, xSrc, ySrc, uint8_t, srcStride, srcLine, 1);
2685 fbComposeGetStart (pDst, xDst, yDst, uint8_t, dstStride, dstLine, 1);
2690 dstLine += dstStride;
2692 srcLine += srcStride;
2695 while (w && (unsigned long)dst & 7)
2700 s = t | (0 - (t >> 8));
2710 *(__m64*)dst = _mm_adds_pu8(*(__m64*)src, *(__m64*)dst);
2721 s = t | (0 - (t >> 8));
2734 fbCompositeSrcAdd_8888x8888mmx (pixman_implementation_t *imp,
2736 pixman_image_t * pSrc,
2737 pixman_image_t * pMask,
2738 pixman_image_t * pDst,
2749 uint32_t *dstLine, *dst;
2750 uint32_t *srcLine, *src;
2751 int dstStride, srcStride;
2756 fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1);
2757 fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
2762 dstLine += dstStride;
2764 srcLine += srcStride;
2767 while (w && (unsigned long)dst & 7)
2769 *dst = _mm_cvtsi64_si32(_mm_adds_pu8(_mm_cvtsi32_si64(*src),
2770 _mm_cvtsi32_si64(*dst)));
2778 dst64 = _mm_adds_pu8(*(__m64*)src, *(__m64*)dst);
2779 *(uint64_t*)dst = UINT64(dst64);
2787 *dst = _mm_cvtsi64_si32(_mm_adds_pu8(_mm_cvtsi32_si64(*src),
2788 _mm_cvtsi32_si64(*dst)));
2796 static pixman_bool_t
2797 pixman_blt_mmx (uint32_t *src_bits,
2803 int src_x, int src_y,
2804 int dst_x, int dst_y,
2805 int width, int height)
2807 uint8_t * src_bytes;
2808 uint8_t * dst_bytes;
2811 if (src_bpp != dst_bpp)
2816 src_stride = src_stride * (int) sizeof (uint32_t) / 2;
2817 dst_stride = dst_stride * (int) sizeof (uint32_t) / 2;
2818 src_bytes = (uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x));
2819 dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
2820 byte_width = 2 * width;
2823 } else if (src_bpp == 32) {
2824 src_stride = src_stride * (int) sizeof (uint32_t) / 4;
2825 dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;
2826 src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x));
2827 dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
2828 byte_width = 4 * width;
2838 uint8_t *s = src_bytes;
2839 uint8_t *d = dst_bytes;
2840 src_bytes += src_stride;
2841 dst_bytes += dst_stride;
2844 while (w >= 2 && ((unsigned long)d & 3))
2846 *(uint16_t *)d = *(uint16_t *)s;
2852 while (w >= 4 && ((unsigned long)d & 7))
2854 *(uint32_t *)d = *(uint32_t *)s;
2863 #if defined (__GNUC__) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))
2865 "movq (%1), %%mm0\n"
2866 "movq 8(%1), %%mm1\n"
2867 "movq 16(%1), %%mm2\n"
2868 "movq 24(%1), %%mm3\n"
2869 "movq 32(%1), %%mm4\n"
2870 "movq 40(%1), %%mm5\n"
2871 "movq 48(%1), %%mm6\n"
2872 "movq 56(%1), %%mm7\n"
2874 "movq %%mm0, (%0)\n"
2875 "movq %%mm1, 8(%0)\n"
2876 "movq %%mm2, 16(%0)\n"
2877 "movq %%mm3, 24(%0)\n"
2878 "movq %%mm4, 32(%0)\n"
2879 "movq %%mm5, 40(%0)\n"
2880 "movq %%mm6, 48(%0)\n"
2881 "movq %%mm7, 56(%0)\n"
2885 "%mm0", "%mm1", "%mm2", "%mm3",
2886 "%mm4", "%mm5", "%mm6", "%mm7");
2888 __m64 v0 = *(__m64 *)(s + 0);
2889 __m64 v1 = *(__m64 *)(s + 8);
2890 __m64 v2 = *(__m64 *)(s + 16);
2891 __m64 v3 = *(__m64 *)(s + 24);
2892 __m64 v4 = *(__m64 *)(s + 32);
2893 __m64 v5 = *(__m64 *)(s + 40);
2894 __m64 v6 = *(__m64 *)(s + 48);
2895 __m64 v7 = *(__m64 *)(s + 56);
2896 *(__m64 *)(d + 0) = v0;
2897 *(__m64 *)(d + 8) = v1;
2898 *(__m64 *)(d + 16) = v2;
2899 *(__m64 *)(d + 24) = v3;
2900 *(__m64 *)(d + 32) = v4;
2901 *(__m64 *)(d + 40) = v5;
2902 *(__m64 *)(d + 48) = v6;
2903 *(__m64 *)(d + 56) = v7;
2912 *(uint32_t *)d = *(uint32_t *)s;
2920 *(uint16_t *)d = *(uint16_t *)s;
2933 fbCompositeCopyAreammx (pixman_implementation_t *imp,
2935 pixman_image_t * pSrc,
2936 pixman_image_t * pMask,
2937 pixman_image_t * pDst,
2947 pixman_blt_mmx (pSrc->bits.bits,
2949 pSrc->bits.rowstride,
2950 pDst->bits.rowstride,
2951 PIXMAN_FORMAT_BPP (pSrc->bits.format),
2952 PIXMAN_FORMAT_BPP (pDst->bits.format),
2953 xSrc, ySrc, xDst, yDst, width, height);
2957 fbCompositeOver_x888x8x8888mmx (pixman_implementation_t *imp,
2959 pixman_image_t * pSrc,
2960 pixman_image_t * pMask,
2961 pixman_image_t * pDst,
2971 uint32_t *src, *srcLine;
2972 uint32_t *dst, *dstLine;
2973 uint8_t *mask, *maskLine;
2974 int srcStride, maskStride, dstStride;
2977 fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
2978 fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1);
2979 fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1);
2984 srcLine += srcStride;
2986 dstLine += dstStride;
2988 maskLine += maskStride;
2998 __m64 s = load8888 (*src | 0xff000000);
3001 *dst = store8888 (s);
3004 __m64 sa = expand_alpha (s);
3005 __m64 vm = expand_alpha_rev (M64(m));
3006 __m64 vdest = in_over(s, sa, vm, load8888 (*dst));
3008 *dst = store8888 (vdest);
3021 static const pixman_fast_path_t mmx_fast_paths[] =
3023 { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_r5g6b5, fbCompositeSolidMask_nx8x0565mmx, 0 },
3024 { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_b5g6r5, fbCompositeSolidMask_nx8x0565mmx, 0 },
3025 { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8r8g8b8, fbCompositeSolidMask_nx8x8888mmx, 0 },
3026 { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8r8g8b8, fbCompositeSolidMask_nx8x8888mmx, 0 },
3027 { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8b8g8r8, fbCompositeSolidMask_nx8x8888mmx, 0 },
3028 { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8b8g8r8, fbCompositeSolidMask_nx8x8888mmx, 0 },
3029 { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, fbCompositeSolidMask_nx8888x8888Cmmx, NEED_COMPONENT_ALPHA },
3030 { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8r8g8b8, PIXMAN_x8r8g8b8, fbCompositeSolidMask_nx8888x8888Cmmx, NEED_COMPONENT_ALPHA },
3031 { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8r8g8b8, PIXMAN_r5g6b5, fbCompositeSolidMask_nx8888x0565Cmmx, NEED_COMPONENT_ALPHA },
3032 { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8b8g8r8, PIXMAN_a8b8g8r8, fbCompositeSolidMask_nx8888x8888Cmmx, NEED_COMPONENT_ALPHA },
3033 { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8b8g8r8, PIXMAN_x8b8g8r8, fbCompositeSolidMask_nx8888x8888Cmmx, NEED_COMPONENT_ALPHA },
3034 { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8b8g8r8, PIXMAN_b5g6r5, fbCompositeSolidMask_nx8888x0565Cmmx, NEED_COMPONENT_ALPHA },
3035 { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, fbCompositeSrc_8888RevNPx8888mmx, NEED_PIXBUF },
3036 { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8b8g8r8, PIXMAN_a8r8g8b8, fbCompositeSrc_8888RevNPx8888mmx, NEED_PIXBUF },
3037 { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8r8g8b8, PIXMAN_x8r8g8b8, fbCompositeSrc_8888RevNPx8888mmx, NEED_PIXBUF },
3038 { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8b8g8r8, PIXMAN_x8r8g8b8, fbCompositeSrc_8888RevNPx8888mmx, NEED_PIXBUF },
3039 { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8r8g8b8, PIXMAN_r5g6b5, fbCompositeSrc_8888RevNPx0565mmx, NEED_PIXBUF },
3040 { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8b8g8r8, PIXMAN_r5g6b5, fbCompositeSrc_8888RevNPx0565mmx, NEED_PIXBUF },
3041 { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8r8g8b8, PIXMAN_a8b8g8r8, fbCompositeSrc_8888RevNPx8888mmx, NEED_PIXBUF },
3042 { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8b8g8r8, PIXMAN_a8b8g8r8, fbCompositeSrc_8888RevNPx8888mmx, NEED_PIXBUF },
3043 { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8r8g8b8, PIXMAN_x8b8g8r8, fbCompositeSrc_8888RevNPx8888mmx, NEED_PIXBUF },
3044 { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8b8g8r8, PIXMAN_x8b8g8r8, fbCompositeSrc_8888RevNPx8888mmx, NEED_PIXBUF },
3045 { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8r8g8b8, PIXMAN_b5g6r5, fbCompositeSrc_8888RevNPx0565mmx, NEED_PIXBUF },
3046 { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8b8g8r8, PIXMAN_b5g6r5, fbCompositeSrc_8888RevNPx0565mmx, NEED_PIXBUF },
3047 { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8, PIXMAN_a8r8g8b8, fbCompositeSrc_x888xnx8888mmx, NEED_SOLID_MASK },
3048 { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8, PIXMAN_x8r8g8b8, fbCompositeSrc_x888xnx8888mmx, NEED_SOLID_MASK },
3049 { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8, PIXMAN_a8b8g8r8, fbCompositeSrc_x888xnx8888mmx, NEED_SOLID_MASK },
3050 { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8, PIXMAN_x8b8g8r8, fbCompositeSrc_x888xnx8888mmx, NEED_SOLID_MASK },
3051 { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_a8r8g8b8, fbCompositeSrc_8888x8x8888mmx, NEED_SOLID_MASK },
3052 { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_x8r8g8b8, fbCompositeSrc_8888x8x8888mmx, NEED_SOLID_MASK },
3053 { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_a8, PIXMAN_a8b8g8r8, fbCompositeSrc_8888x8x8888mmx, NEED_SOLID_MASK },
3054 { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_a8, PIXMAN_x8b8g8r8, fbCompositeSrc_8888x8x8888mmx, NEED_SOLID_MASK },
3056 /* FIXME: This code is commented out since it's apparently not actually faster than the generic code. */
3057 { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8, PIXMAN_x8r8g8b8, fbCompositeOver_x888x8x8888mmx, 0 },
3058 { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8, PIXMAN_a8r8g8b8, fbCompositeOver_x888x8x8888mmx, 0 },
3059 { PIXMAN_OP_OVER, PIXMAN_x8b8r8g8, PIXMAN_a8, PIXMAN_x8b8g8r8, fbCompositeOver_x888x8x8888mmx, 0 },
3060 { PIXMAN_OP_OVER, PIXMAN_x8b8r8g8, PIXMAN_a8, PIXMAN_a8r8g8b8, fbCompositeOver_x888x8x8888mmx, 0 },
3062 { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_null, PIXMAN_a8r8g8b8, fbCompositeSolid_nx8888mmx, 0 },
3063 { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_null, PIXMAN_x8r8g8b8, fbCompositeSolid_nx8888mmx, 0 },
3064 { PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_null, PIXMAN_r5g6b5, fbCompositeSolid_nx0565mmx, 0 },
3065 { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, fbCompositeCopyAreammx, 0 },
3066 { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, fbCompositeCopyAreammx, 0 },
3068 { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, fbCompositeSrc_8888x8888mmx, 0 },
3069 { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, fbCompositeSrc_8888x8888mmx, 0 },
3070 { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_r5g6b5, fbCompositeSrc_8888x0565mmx, 0 },
3071 { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, fbCompositeSrc_8888x8888mmx, 0 },
3072 { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, fbCompositeSrc_8888x8888mmx, 0 },
3073 { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_b5g6r5, fbCompositeSrc_8888x0565mmx, 0 },
3075 { PIXMAN_OP_ADD, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, fbCompositeSrcAdd_8888x8888mmx, 0 },
3076 { PIXMAN_OP_ADD, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, fbCompositeSrcAdd_8888x8888mmx, 0 },
3077 { PIXMAN_OP_ADD, PIXMAN_a8, PIXMAN_null, PIXMAN_a8, fbCompositeSrcAdd_8000x8000mmx, 0 },
3078 { PIXMAN_OP_ADD, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8, fbCompositeSrcAdd_8888x8x8mmx, 0 },
3080 { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8r8g8b8, fbCompositeSolidMaskSrc_nx8x8888mmx, 0 },
3081 { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8r8g8b8, fbCompositeSolidMaskSrc_nx8x8888mmx, 0 },
3082 { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8b8g8r8, fbCompositeSolidMaskSrc_nx8x8888mmx, 0 },
3083 { PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8b8g8r8, fbCompositeSolidMaskSrc_nx8x8888mmx, 0 },
3084 { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, fbCompositeCopyAreammx, 0 },
3085 { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, fbCompositeCopyAreammx, 0 },
3086 { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, fbCompositeCopyAreammx, 0 },
3087 { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, fbCompositeCopyAreammx, 0 },
3088 { PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, fbCompositeCopyAreammx, 0 },
3089 { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, fbCompositeCopyAreammx, 0 },
3090 { PIXMAN_OP_SRC, PIXMAN_r5g6b5, PIXMAN_null, PIXMAN_r5g6b5, fbCompositeCopyAreammx, 0 },
3091 { PIXMAN_OP_SRC, PIXMAN_b5g6r5, PIXMAN_null, PIXMAN_b5g6r5, fbCompositeCopyAreammx, 0 },
3093 { PIXMAN_OP_IN, PIXMAN_a8, PIXMAN_null, PIXMAN_a8, fbCompositeIn_8x8mmx, 0 },
3094 { PIXMAN_OP_IN, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8, fbCompositeIn_nx8x8mmx, 0 },
3100 mmx_composite (pixman_implementation_t *imp,
3102 pixman_image_t *src,
3103 pixman_image_t *mask,
3104 pixman_image_t *dest,
3114 if (_pixman_run_fast_path (mmx_fast_paths, imp,
3115 op, src, mask, dest,
3122 _pixman_implementation_composite (imp->delegate,
3123 op, src, mask, dest, src_x, src_y,
3124 mask_x, mask_y, dest_x, dest_y,
3128 static pixman_bool_t
3129 mmx_blt (pixman_implementation_t *imp,
3136 int src_x, int src_y,
3137 int dst_x, int dst_y,
3138 int width, int height)
3140 if (!pixman_blt_mmx (
3141 src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
3142 src_x, src_y, dst_x, dst_y, width, height))
3145 return _pixman_implementation_blt (
3147 src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
3148 src_x, src_y, dst_x, dst_y, width, height);
3154 static pixman_bool_t
3155 mmx_fill (pixman_implementation_t *imp,
3165 if (!pixman_fill_mmx (bits, stride, bpp, x, y, width, height, xor))
3167 return _pixman_implementation_fill (
3168 imp->delegate, bits, stride, bpp, x, y, width, height, xor);
3174 pixman_implementation_t *
3175 _pixman_implementation_create_mmx (void)
3177 pixman_implementation_t *general = _pixman_implementation_create_fast_path ();
3178 pixman_implementation_t *imp = _pixman_implementation_create (general);
3180 imp->combine_32[PIXMAN_OP_OVER] = mmxCombineOverU;
3181 imp->combine_32[PIXMAN_OP_OVER_REVERSE] = mmxCombineOverReverseU;
3182 imp->combine_32[PIXMAN_OP_IN] = mmxCombineInU;
3183 imp->combine_32[PIXMAN_OP_IN_REVERSE] = mmxCombineInReverseU;
3184 imp->combine_32[PIXMAN_OP_OUT] = mmxCombineOutU;
3185 imp->combine_32[PIXMAN_OP_OUT_REVERSE] = mmxCombineOutReverseU;
3186 imp->combine_32[PIXMAN_OP_ATOP] = mmxCombineAtopU;
3187 imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = mmxCombineAtopReverseU;
3188 imp->combine_32[PIXMAN_OP_XOR] = mmxCombineXorU;
3189 imp->combine_32[PIXMAN_OP_ADD] = mmxCombineAddU;
3190 imp->combine_32[PIXMAN_OP_SATURATE] = mmxCombineSaturateU;
3192 imp->combine_32_ca[PIXMAN_OP_SRC] = mmxCombineSrcC;
3193 imp->combine_32_ca[PIXMAN_OP_OVER] = mmxCombineOverC;
3194 imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = mmxCombineOverReverseC;
3195 imp->combine_32_ca[PIXMAN_OP_IN] = mmxCombineInC;
3196 imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = mmxCombineInReverseC;
3197 imp->combine_32_ca[PIXMAN_OP_OUT] = mmxCombineOutC;
3198 imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = mmxCombineOutReverseC;
3199 imp->combine_32_ca[PIXMAN_OP_ATOP] = mmxCombineAtopC;
3200 imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = mmxCombineAtopReverseC;
3201 imp->combine_32_ca[PIXMAN_OP_XOR] = mmxCombineXorC;
3202 imp->combine_32_ca[PIXMAN_OP_ADD] = mmxCombineAddC;
3204 imp->composite = mmx_composite;
3206 imp->fill = mmx_fill;
3211 #endif /* USE_MMX */