7c88a65f3c197d997f61505a345c4129bd141480
[profile/ivi/pixman.git] / pixman / pixman-pict.c
1 /* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
2 /*
3  * Copyright © 2000 SuSE, Inc.
4  * Copyright © 2007 Red Hat, Inc.
5  *
6  * Permission to use, copy, modify, distribute, and sell this software and its
7  * documentation for any purpose is hereby granted without fee, provided that
8  * the above copyright notice appear in all copies and that both that
9  * copyright notice and this permission notice appear in supporting
10  * documentation, and that the name of SuSE not be used in advertising or
11  * publicity pertaining to distribution of the software without specific,
12  * written prior permission.  SuSE makes no representations about the
13  * suitability of this software for any purpose.  It is provided "as is"
14  * without express or implied warranty.
15  *
16  * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
18  * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
20  * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
21  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
22  *
23  * Author:  Keith Packard, SuSE, Inc.
24  */
25
26 #ifdef HAVE_CONFIG_H
27 #include <config.h>
28 #endif
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include "pixman-private.h"
34 #include "pixman-mmx.h"
35 #include "pixman-vmx.h"
36 #include "pixman-sse.h"
37
38 #define FbFullMask(n)   ((n) == 32 ? (uint32_t)-1 : ((((uint32_t) 1) << n) - 1))
39
40 #undef READ
41 #undef WRITE
42 #define READ(img,x) (*(x))
43 #define WRITE(img,ptr,v) ((*(ptr)) = (v))
44
45 typedef void (* CompositeFunc) (pixman_op_t,
46                                 pixman_image_t *, pixman_image_t *, pixman_image_t *,
47                                 int16_t, int16_t, int16_t, int16_t, int16_t, int16_t,
48                                 uint16_t, uint16_t);
49
50 uint32_t
51 fbOver (uint32_t x, uint32_t y)
52 {
53     uint16_t  a = ~x >> 24;
54     uint16_t  t;
55     uint32_t  m,n,o,p;
56
57     m = FbOverU(x,y,0,a,t);
58     n = FbOverU(x,y,8,a,t);
59     o = FbOverU(x,y,16,a,t);
60     p = FbOverU(x,y,24,a,t);
61     return m|n|o|p;
62 }
63
64 uint32_t
65 fbOver24 (uint32_t x, uint32_t y)
66 {
67     uint16_t  a = ~x >> 24;
68     uint16_t  t;
69     uint32_t  m,n,o;
70
71     m = FbOverU(x,y,0,a,t);
72     n = FbOverU(x,y,8,a,t);
73     o = FbOverU(x,y,16,a,t);
74     return m|n|o;
75 }
76
77 uint32_t
78 fbIn (uint32_t x, uint8_t y)
79 {
80     uint16_t  a = y;
81     uint16_t  t;
82     uint32_t  m,n,o,p;
83
84     m = FbInU(x,0,a,t);
85     n = FbInU(x,8,a,t);
86     o = FbInU(x,16,a,t);
87     p = FbInU(x,24,a,t);
88     return m|n|o|p;
89 }
90
91 /*
92  * Naming convention:
93  *
94  *  opSRCxMASKxDST
95  */
96
97 static void
98 fbCompositeOver_x888x8x8888 (pixman_op_t      op,
99                              pixman_image_t * pSrc,
100                              pixman_image_t * pMask,
101                              pixman_image_t * pDst,
102                              int16_t      xSrc,
103                              int16_t      ySrc,
104                              int16_t      xMask,
105                              int16_t      yMask,
106                              int16_t      xDst,
107                              int16_t      yDst,
108                              uint16_t     width,
109                              uint16_t     height)
110 {
111     uint32_t    *src, *srcLine;
112     uint32_t    *dst, *dstLine;
113     uint8_t     *mask, *maskLine;
114     int          srcStride, maskStride, dstStride;
115     uint8_t m;
116     uint32_t s, d;
117     uint16_t w;
118
119     fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
120     fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1);
121     fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1);
122
123     while (height--)
124     {
125         src = srcLine;
126         srcLine += srcStride;
127         dst = dstLine;
128         dstLine += dstStride;
129         mask = maskLine;
130         maskLine += maskStride;
131
132         w = width;
133         while (w--)
134         {
135             m = READ(pMask, mask++);
136             if (m)
137             {
138                 s = READ(pSrc, src) | 0xff000000;
139
140                 if (m == 0xff)
141                     WRITE(pDst, dst, s);
142                 else
143                 {
144                     d = fbIn (s, m);
145                     WRITE(pDst, dst, fbOver (d, READ(pDst, dst)));
146                 }
147             }
148             src++;
149             dst++;
150         }
151     }
152 }
153
154 static void
155 fbCompositeSolidMaskIn_nx8x8 (pixman_op_t      op,
156                               pixman_image_t    *iSrc,
157                               pixman_image_t    *iMask,
158                               pixman_image_t    *iDst,
159                               int16_t      xSrc,
160                               int16_t      ySrc,
161                               int16_t      xMask,
162                               int16_t      yMask,
163                               int16_t      xDst,
164                               int16_t      yDst,
165                               uint16_t     width,
166                               uint16_t     height)
167 {
168     uint32_t    src, srca;
169     uint8_t     *dstLine, *dst, dstMask;
170     uint8_t     *maskLine, *mask, m;
171     int dstStride, maskStride;
172     uint16_t    w;
173     uint16_t    t;
174
175     fbComposeGetSolid(iSrc, src, iDst->bits.format);
176
177     dstMask = FbFullMask (PIXMAN_FORMAT_DEPTH (iDst->bits.format));
178     srca = src >> 24;
179
180     fbComposeGetStart (iDst, xDst, yDst, uint8_t, dstStride, dstLine, 1);
181     fbComposeGetStart (iMask, xMask, yMask, uint8_t, maskStride, maskLine, 1);
182
183     if (srca == 0xff) {
184         while (height--)
185         {
186             dst = dstLine;
187             dstLine += dstStride;
188             mask = maskLine;
189             maskLine += maskStride;
190             w = width;
191
192             while (w--)
193             {
194                 m = *mask++;
195                 if (m == 0)
196                 {
197                     *dst = 0;
198                 }
199                 else if (m != 0xff)
200                 {
201                     *dst = FbIntMult(m, *dst, t);
202                 }
203                 dst++;
204             }
205         }
206     }
207     else
208     {
209         while (height--)
210         {
211             dst = dstLine;
212             dstLine += dstStride;
213             mask = maskLine;
214             maskLine += maskStride;
215             w = width;
216
217             while (w--)
218             {
219                 m = *mask++;
220                 m = FbIntMult(m, srca, t);
221                 if (m == 0)
222                 {
223                     *dst = 0;
224                 }
225                 else if (m != 0xff)
226                 {
227                     *dst = FbIntMult(m, *dst, t);
228                 }
229                 dst++;
230             }
231         }
232     }
233 }
234
235
236 static void
237 fbCompositeSrcIn_8x8 (pixman_op_t      op,
238                       pixman_image_t  *iSrc,
239                       pixman_image_t  *iMask,
240                       pixman_image_t  *iDst,
241                       int16_t          xSrc,
242                       int16_t          ySrc,
243                       int16_t          xMask,
244                       int16_t          yMask,
245                       int16_t          xDst,
246                       int16_t          yDst,
247                       uint16_t         width,
248                       uint16_t         height)
249 {
250     uint8_t     *dstLine, *dst;
251     uint8_t     *srcLine, *src;
252     int dstStride, srcStride;
253     uint16_t    w;
254     uint8_t     s;
255     uint16_t    t;
256
257     fbComposeGetStart (iSrc, xSrc, ySrc, uint8_t, srcStride, srcLine, 1);
258     fbComposeGetStart (iDst, xDst, yDst, uint8_t, dstStride, dstLine, 1);
259
260     while (height--)
261     {
262         dst = dstLine;
263         dstLine += dstStride;
264         src = srcLine;
265         srcLine += srcStride;
266         w = width;
267
268         while (w--)
269         {
270             s = *src++;
271             if (s == 0)
272             {
273                 *dst = 0;
274             }
275             else if (s != 0xff)
276             {
277                 *dst = FbIntMult(s, *dst, t);
278             }
279             dst++;
280         }
281     }
282 }
283
284 void
285 fbCompositeSolidMask_nx8x8888 (pixman_op_t      op,
286                                pixman_image_t * pSrc,
287                                pixman_image_t * pMask,
288                                pixman_image_t * pDst,
289                                int16_t      xSrc,
290                                int16_t      ySrc,
291                                int16_t      xMask,
292                                int16_t      yMask,
293                                int16_t      xDst,
294                                int16_t      yDst,
295                                uint16_t     width,
296                                uint16_t     height)
297 {
298     uint32_t     src, srca;
299     uint32_t    *dstLine, *dst, d, dstMask;
300     uint8_t     *maskLine, *mask, m;
301     int          dstStride, maskStride;
302     uint16_t     w;
303
304     fbComposeGetSolid(pSrc, src, pDst->bits.format);
305
306     dstMask = FbFullMask (PIXMAN_FORMAT_DEPTH (pDst->bits.format));
307     srca = src >> 24;
308     if (src == 0)
309         return;
310
311     fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
312     fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1);
313
314     while (height--)
315     {
316         dst = dstLine;
317         dstLine += dstStride;
318         mask = maskLine;
319         maskLine += maskStride;
320         w = width;
321
322         while (w--)
323         {
324             m = READ(pMask, mask++);
325             if (m == 0xff)
326             {
327                 if (srca == 0xff)
328                     WRITE(pDst, dst, src & dstMask);
329                 else
330                     WRITE(pDst, dst, fbOver (src, READ(pDst, dst)) & dstMask);
331             }
332             else if (m)
333             {
334                 d = fbIn (src, m);
335                 WRITE(pDst, dst, fbOver (d, READ(pDst, dst)) & dstMask);
336             }
337             dst++;
338         }
339     }
340 }
341
342 void
343 fbCompositeSolidMask_nx8888x8888C (pixman_op_t op,
344                                    pixman_image_t * pSrc,
345                                    pixman_image_t * pMask,
346                                    pixman_image_t * pDst,
347                                    int16_t      xSrc,
348                                    int16_t      ySrc,
349                                    int16_t      xMask,
350                                    int16_t      yMask,
351                                    int16_t      xDst,
352                                    int16_t      yDst,
353                                    uint16_t     width,
354                                    uint16_t     height)
355 {
356     uint32_t    src, srca;
357     uint32_t    *dstLine, *dst, d, dstMask;
358     uint32_t    *maskLine, *mask, ma;
359     int dstStride, maskStride;
360     uint16_t    w;
361     uint32_t    m, n, o, p;
362
363     fbComposeGetSolid(pSrc, src, pDst->bits.format);
364
365     dstMask = FbFullMask (PIXMAN_FORMAT_DEPTH (pDst->bits.format));
366     srca = src >> 24;
367     if (src == 0)
368         return;
369
370     fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
371     fbComposeGetStart (pMask, xMask, yMask, uint32_t, maskStride, maskLine, 1);
372
373     while (height--)
374     {
375         dst = dstLine;
376         dstLine += dstStride;
377         mask = maskLine;
378         maskLine += maskStride;
379         w = width;
380
381         while (w--)
382         {
383             ma = READ(pMask, mask++);
384             if (ma == 0xffffffff)
385             {
386                 if (srca == 0xff)
387                     WRITE(pDst, dst, src & dstMask);
388                 else
389                     WRITE(pDst, dst, fbOver (src, READ(pDst, dst)) & dstMask);
390             }
391             else if (ma)
392             {
393                 d = READ(pDst, dst);
394 #define FbInOverC(src,srca,msk,dst,i,result) { \
395     uint16_t  __a = FbGet8(msk,i); \
396     uint32_t  __t, __ta; \
397     uint32_t  __i; \
398     __t = FbIntMult (FbGet8(src,i), __a,__i); \
399     __ta = (uint8_t) ~FbIntMult (srca, __a,__i); \
400     __t = __t + FbIntMult(FbGet8(dst,i),__ta,__i); \
401     __t = (uint32_t) (uint8_t) (__t | (-(__t >> 8))); \
402     result = __t << (i); \
403 }
404                 FbInOverC (src, srca, ma, d, 0, m);
405                 FbInOverC (src, srca, ma, d, 8, n);
406                 FbInOverC (src, srca, ma, d, 16, o);
407                 FbInOverC (src, srca, ma, d, 24, p);
408                 WRITE(pDst, dst, m|n|o|p);
409             }
410             dst++;
411         }
412     }
413 }
414
415 void
416 fbCompositeSolidMask_nx8x0888 (pixman_op_t op,
417                                pixman_image_t * pSrc,
418                                pixman_image_t * pMask,
419                                pixman_image_t * pDst,
420                                int16_t      xSrc,
421                                int16_t      ySrc,
422                                int16_t      xMask,
423                                int16_t      yMask,
424                                int16_t      xDst,
425                                int16_t      yDst,
426                                uint16_t     width,
427                                uint16_t     height)
428 {
429     uint32_t    src, srca;
430     uint8_t     *dstLine, *dst;
431     uint32_t    d;
432     uint8_t     *maskLine, *mask, m;
433     int dstStride, maskStride;
434     uint16_t    w;
435
436     fbComposeGetSolid(pSrc, src, pDst->bits.format);
437
438     srca = src >> 24;
439     if (src == 0)
440         return;
441
442     fbComposeGetStart (pDst, xDst, yDst, uint8_t, dstStride, dstLine, 3);
443     fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1);
444
445     while (height--)
446     {
447         dst = dstLine;
448         dstLine += dstStride;
449         mask = maskLine;
450         maskLine += maskStride;
451         w = width;
452
453         while (w--)
454         {
455             m = READ(pMask, mask++);
456             if (m == 0xff)
457             {
458                 if (srca == 0xff)
459                     d = src;
460                 else
461                 {
462                     d = Fetch24(pDst, dst);
463                     d = fbOver24 (src, d);
464                 }
465                 Store24(pDst, dst,d);
466             }
467             else if (m)
468             {
469                 d = fbOver24 (fbIn(src,m), Fetch24(pDst, dst));
470                 Store24(pDst, dst, d);
471             }
472             dst += 3;
473         }
474     }
475 }
476
477 void
478 fbCompositeSolidMask_nx8x0565 (pixman_op_t op,
479                                   pixman_image_t * pSrc,
480                                   pixman_image_t * pMask,
481                                   pixman_image_t * pDst,
482                                   int16_t      xSrc,
483                                   int16_t      ySrc,
484                                   int16_t      xMask,
485                                   int16_t      yMask,
486                                   int16_t      xDst,
487                                   int16_t      yDst,
488                                   uint16_t     width,
489                                   uint16_t     height)
490 {
491     uint32_t    src, srca;
492     uint16_t    *dstLine, *dst;
493     uint32_t    d;
494     uint8_t     *maskLine, *mask, m;
495     int dstStride, maskStride;
496     uint16_t    w;
497
498     fbComposeGetSolid(pSrc, src, pDst->bits.format);
499
500     srca = src >> 24;
501     if (src == 0)
502         return;
503
504     fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1);
505     fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1);
506
507     while (height--)
508     {
509         dst = dstLine;
510         dstLine += dstStride;
511         mask = maskLine;
512         maskLine += maskStride;
513         w = width;
514
515         while (w--)
516         {
517             m = READ(pMask, mask++);
518             if (m == 0xff)
519             {
520                 if (srca == 0xff)
521                     d = src;
522                 else
523                 {
524                     d = READ(pDst, dst);
525                     d = fbOver24 (src, cvt0565to0888(d));
526                 }
527                 WRITE(pDst, dst, cvt8888to0565(d));
528             }
529             else if (m)
530             {
531                 d = READ(pDst, dst);
532                 d = fbOver24 (fbIn(src,m), cvt0565to0888(d));
533                 WRITE(pDst, dst, cvt8888to0565(d));
534             }
535             dst++;
536         }
537     }
538 }
539
540 void
541 fbCompositeSolidMask_nx8888x0565C (pixman_op_t op,
542                                    pixman_image_t * pSrc,
543                                    pixman_image_t * pMask,
544                                    pixman_image_t * pDst,
545                                    int16_t      xSrc,
546                                    int16_t      ySrc,
547                                    int16_t      xMask,
548                                    int16_t      yMask,
549                                    int16_t      xDst,
550                                    int16_t      yDst,
551                                    uint16_t     width,
552                                    uint16_t     height)
553 {
554     uint32_t    src, srca;
555     uint16_t    src16;
556     uint16_t    *dstLine, *dst;
557     uint32_t    d;
558     uint32_t    *maskLine, *mask, ma;
559     int dstStride, maskStride;
560     uint16_t    w;
561     uint32_t    m, n, o;
562
563     fbComposeGetSolid(pSrc, src, pDst->bits.format);
564
565     srca = src >> 24;
566     if (src == 0)
567         return;
568
569     src16 = cvt8888to0565(src);
570
571     fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1);
572     fbComposeGetStart (pMask, xMask, yMask, uint32_t, maskStride, maskLine, 1);
573
574     while (height--)
575     {
576         dst = dstLine;
577         dstLine += dstStride;
578         mask = maskLine;
579         maskLine += maskStride;
580         w = width;
581
582         while (w--)
583         {
584             ma = READ(pMask, mask++);
585             if (ma == 0xffffffff)
586             {
587                 if (srca == 0xff)
588                 {
589                     WRITE(pDst, dst, src16);
590                 }
591                 else
592                 {
593                     d = READ(pDst, dst);
594                     d = fbOver24 (src, cvt0565to0888(d));
595                     WRITE(pDst, dst, cvt8888to0565(d));
596                 }
597             }
598             else if (ma)
599             {
600                 d = READ(pDst, dst);
601                 d = cvt0565to0888(d);
602                 FbInOverC (src, srca, ma, d, 0, m);
603                 FbInOverC (src, srca, ma, d, 8, n);
604                 FbInOverC (src, srca, ma, d, 16, o);
605                 d = m|n|o;
606                 WRITE(pDst, dst, cvt8888to0565(d));
607             }
608             dst++;
609         }
610     }
611 }
612
613 void
614 fbCompositeSrc_8888x8888 (pixman_op_t op,
615                          pixman_image_t * pSrc,
616                          pixman_image_t * pMask,
617                          pixman_image_t * pDst,
618                          int16_t      xSrc,
619                          int16_t      ySrc,
620                          int16_t      xMask,
621                          int16_t      yMask,
622                          int16_t      xDst,
623                          int16_t      yDst,
624                          uint16_t     width,
625                          uint16_t     height)
626 {
627     uint32_t    *dstLine, *dst, dstMask;
628     uint32_t    *srcLine, *src, s;
629     int dstStride, srcStride;
630     uint8_t     a;
631     uint16_t    w;
632
633     fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
634     fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1);
635
636     dstMask = FbFullMask (PIXMAN_FORMAT_DEPTH (pDst->bits.format));
637
638     while (height--)
639     {
640         dst = dstLine;
641         dstLine += dstStride;
642         src = srcLine;
643         srcLine += srcStride;
644         w = width;
645
646         while (w--)
647         {
648             s = READ(pSrc, src++);
649             a = s >> 24;
650             if (a == 0xff)
651                 WRITE(pDst, dst, s & dstMask);
652             else if (a)
653                 WRITE(pDst, dst, fbOver (s, READ(pDst, dst)) & dstMask);
654             dst++;
655         }
656     }
657 }
658
659 void
660 fbCompositeSrc_8888x0888 (pixman_op_t op,
661                          pixman_image_t * pSrc,
662                          pixman_image_t * pMask,
663                          pixman_image_t * pDst,
664                          int16_t      xSrc,
665                          int16_t      ySrc,
666                          int16_t      xMask,
667                          int16_t      yMask,
668                          int16_t      xDst,
669                          int16_t      yDst,
670                          uint16_t     width,
671                          uint16_t     height)
672 {
673     uint8_t     *dstLine, *dst;
674     uint32_t    d;
675     uint32_t    *srcLine, *src, s;
676     uint8_t     a;
677     int dstStride, srcStride;
678     uint16_t    w;
679
680     fbComposeGetStart (pDst, xDst, yDst, uint8_t, dstStride, dstLine, 3);
681     fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1);
682
683     while (height--)
684     {
685         dst = dstLine;
686         dstLine += dstStride;
687         src = srcLine;
688         srcLine += srcStride;
689         w = width;
690
691         while (w--)
692         {
693             s = READ(pSrc, src++);
694             a = s >> 24;
695             if (a)
696             {
697                 if (a == 0xff)
698                     d = s;
699                 else
700                     d = fbOver24 (s, Fetch24(pDst, dst));
701                 Store24(pDst, dst, d);
702             }
703             dst += 3;
704         }
705     }
706 }
707
708 void
709 fbCompositeSrc_8888x0565 (pixman_op_t op,
710                          pixman_image_t * pSrc,
711                          pixman_image_t * pMask,
712                          pixman_image_t * pDst,
713                          int16_t      xSrc,
714                          int16_t      ySrc,
715                          int16_t      xMask,
716                          int16_t      yMask,
717                          int16_t      xDst,
718                          int16_t      yDst,
719                          uint16_t     width,
720                          uint16_t     height)
721 {
722     uint16_t    *dstLine, *dst;
723     uint32_t    d;
724     uint32_t    *srcLine, *src, s;
725     uint8_t     a;
726     int dstStride, srcStride;
727     uint16_t    w;
728
729     fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1);
730     fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1);
731
732     while (height--)
733     {
734         dst = dstLine;
735         dstLine += dstStride;
736         src = srcLine;
737         srcLine += srcStride;
738         w = width;
739
740         while (w--)
741         {
742             s = READ(pSrc, src++);
743             a = s >> 24;
744             if (a)
745             {
746                 if (a == 0xff)
747                     d = s;
748                 else
749                 {
750                     d = READ(pDst, dst);
751                     d = fbOver24 (s, cvt0565to0888(d));
752                 }
753                 WRITE(pDst, dst, cvt8888to0565(d));
754             }
755             dst++;
756         }
757     }
758 }
759
760 void
761 fbCompositeSrcAdd_8000x8000 (pixman_op_t        op,
762                              pixman_image_t * pSrc,
763                              pixman_image_t * pMask,
764                              pixman_image_t * pDst,
765                              int16_t      xSrc,
766                              int16_t      ySrc,
767                              int16_t      xMask,
768                              int16_t      yMask,
769                              int16_t      xDst,
770                              int16_t      yDst,
771                              uint16_t     width,
772                              uint16_t     height)
773 {
774     uint8_t     *dstLine, *dst;
775     uint8_t     *srcLine, *src;
776     int dstStride, srcStride;
777     uint16_t    w;
778     uint8_t     s, d;
779     uint16_t    t;
780
781     fbComposeGetStart (pSrc, xSrc, ySrc, uint8_t, srcStride, srcLine, 1);
782     fbComposeGetStart (pDst, xDst, yDst, uint8_t, dstStride, dstLine, 1);
783
784     while (height--)
785     {
786         dst = dstLine;
787         dstLine += dstStride;
788         src = srcLine;
789         srcLine += srcStride;
790         w = width;
791
792         while (w--)
793         {
794             s = READ(pSrc, src++);
795             if (s)
796             {
797                 if (s != 0xff)
798                 {
799                     d = READ(pDst, dst);
800                     t = d + s;
801                     s = t | (0 - (t >> 8));
802                 }
803                 WRITE(pDst, dst, s);
804             }
805             dst++;
806         }
807     }
808 }
809
810 void
811 fbCompositeSrcAdd_8888x8888 (pixman_op_t        op,
812                              pixman_image_t * pSrc,
813                              pixman_image_t * pMask,
814                              pixman_image_t * pDst,
815                              int16_t      xSrc,
816                              int16_t      ySrc,
817                              int16_t      xMask,
818                              int16_t      yMask,
819                              int16_t      xDst,
820                              int16_t      yDst,
821                              uint16_t     width,
822                              uint16_t     height)
823 {
824     uint32_t    *dstLine, *dst;
825     uint32_t    *srcLine, *src;
826     int dstStride, srcStride;
827     uint16_t    w;
828     uint32_t    s, d;
829     uint16_t    t;
830     uint32_t    m,n,o,p;
831
832     fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1);
833     fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
834
835     while (height--)
836     {
837         dst = dstLine;
838         dstLine += dstStride;
839         src = srcLine;
840         srcLine += srcStride;
841         w = width;
842
843         while (w--)
844         {
845             s = READ(pSrc, src++);
846             if (s)
847             {
848                 if (s != 0xffffffff)
849                 {
850                     d = READ(pDst, dst);
851                     if (d)
852                     {
853                         m = FbAdd(s,d,0,t);
854                         n = FbAdd(s,d,8,t);
855                         o = FbAdd(s,d,16,t);
856                         p = FbAdd(s,d,24,t);
857                         s = m|n|o|p;
858                     }
859                 }
860                 WRITE(pDst, dst, s);
861             }
862             dst++;
863         }
864     }
865 }
866
867 static void
868 fbCompositeSrcAdd_8888x8x8 (pixman_op_t op,
869                             pixman_image_t * pSrc,
870                             pixman_image_t * pMask,
871                             pixman_image_t * pDst,
872                             int16_t      xSrc,
873                             int16_t      ySrc,
874                             int16_t      xMask,
875                             int16_t      yMask,
876                             int16_t      xDst,
877                             int16_t      yDst,
878                             uint16_t     width,
879                             uint16_t     height)
880 {
881     uint8_t     *dstLine, *dst;
882     uint8_t     *maskLine, *mask;
883     int dstStride, maskStride;
884     uint16_t    w;
885     uint32_t    src;
886     uint8_t     sa;
887
888     fbComposeGetStart (pDst, xDst, yDst, uint8_t, dstStride, dstLine, 1);
889     fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1);
890     fbComposeGetSolid (pSrc, src, pDst->bits.format);
891     sa = (src >> 24);
892
893     while (height--)
894     {
895         dst = dstLine;
896         dstLine += dstStride;
897         mask = maskLine;
898         maskLine += maskStride;
899         w = width;
900
901         while (w--)
902         {
903             uint16_t    tmp;
904             uint16_t    a;
905             uint32_t    m, d;
906             uint32_t    r;
907
908             a = READ(pMask, mask++);
909             d = READ(pDst, dst);
910
911             m = FbInU (sa, 0, a, tmp);
912             r = FbAdd (m, d, 0, tmp);
913
914             WRITE(pDst, dst++, r);
915         }
916     }
917 }
918
919 void
920 fbCompositeSrcAdd_1000x1000 (pixman_op_t        op,
921                              pixman_image_t * pSrc,
922                              pixman_image_t * pMask,
923                              pixman_image_t * pDst,
924                              int16_t      xSrc,
925                              int16_t      ySrc,
926                              int16_t      xMask,
927                              int16_t      yMask,
928                              int16_t      xDst,
929                              int16_t      yDst,
930                              uint16_t     width,
931                              uint16_t     height)
932 {
933     /* FIXME */
934 #if 0
935
936     uint32_t    *dstBits, *srcBits;
937     int dstStride, srcStride;
938     int         dstBpp, srcBpp;
939     int         dstXoff, dstYoff;
940     int         srcXoff, srcYoff;
941
942     fbGetDrawable(pSrc->pDrawable, srcBits, srcStride, srcBpp, srcXoff, srcYoff);
943
944     fbGetDrawable(pDst->pDrawable, dstBits, dstStride, dstBpp, dstXoff, dstYoff);
945
946     fbBlt (srcBits + srcStride * (ySrc + srcYoff),
947            srcStride,
948            xSrc + srcXoff,
949
950            dstBits + dstStride * (yDst + dstYoff),
951            dstStride,
952            xDst + dstXoff,
953
954            width,
955            height,
956
957            GXor,
958            FB_ALLONES,
959            srcBpp,
960
961            FALSE,
962            FALSE);
963
964 #endif
965 }
966
967 void
968 fbCompositeSolidMask_nx1xn (pixman_op_t op,
969                             pixman_image_t * pSrc,
970                             pixman_image_t * pMask,
971                             pixman_image_t * pDst,
972                             int16_t      xSrc,
973                             int16_t      ySrc,
974                             int16_t      xMask,
975                             int16_t      yMask,
976                             int16_t      xDst,
977                             int16_t      yDst,
978                             uint16_t     width,
979                             uint16_t     height)
980 {
981     /* FIXME */
982 #if 0
983     uint32_t    *dstBits;
984     uint32_t    *maskBits;
985     int dstStride, maskStride;
986     int         dstBpp, maskBpp;
987     int         dstXoff, dstYoff;
988     int         maskXoff, maskYoff;
989     uint32_t    src;
990
991     fbComposeGetSolid(pSrc, src, pDst->bits.format);
992     fbGetStipDrawable (pMask->pDrawable, maskBits, maskStride, maskBpp, maskXoff, maskYoff);
993     fbGetDrawable (pDst->pDrawable, dstBits, dstStride, dstBpp, dstXoff, dstYoff);
994
995     switch (dstBpp) {
996     case 32:
997         break;
998     case 24:
999         break;
1000     case 16:
1001         src = cvt8888to0565(src);
1002         break;
1003     }
1004
1005     src = fbReplicatePixel (src, dstBpp);
1006
1007     fbBltOne (maskBits + maskStride * (yMask + maskYoff),
1008               maskStride,
1009               xMask + maskXoff,
1010
1011               dstBits + dstStride * (yDst + dstYoff),
1012               dstStride,
1013               (xDst + dstXoff) * dstBpp,
1014               dstBpp,
1015
1016               width * dstBpp,
1017               height,
1018
1019               0x0,
1020               src,
1021               FB_ALLONES,
1022               0x0);
1023
1024 #endif
1025 }
1026
1027 /*
1028  * Apply a constant alpha value in an over computation
1029  */
1030 static void
1031 fbCompositeSrcSrc_nxn  (pixman_op_t        op,
1032                         pixman_image_t * pSrc,
1033                         pixman_image_t * pMask,
1034                         pixman_image_t * pDst,
1035                         int16_t      xSrc,
1036                         int16_t      ySrc,
1037                         int16_t      xMask,
1038                         int16_t      yMask,
1039                         int16_t      xDst,
1040                         int16_t      yDst,
1041                         uint16_t     width,
1042                         uint16_t     height);
1043
1044 /*
1045  * Simple bitblt
1046  */
1047
1048 static void
1049 fbCompositeSrcSrc_nxn  (pixman_op_t        op,
1050                         pixman_image_t * pSrc,
1051                         pixman_image_t * pMask,
1052                         pixman_image_t * pDst,
1053                         int16_t      xSrc,
1054                         int16_t      ySrc,
1055                         int16_t      xMask,
1056                         int16_t      yMask,
1057                         int16_t      xDst,
1058                         int16_t      yDst,
1059                         uint16_t     width,
1060                         uint16_t     height)
1061 {
1062     /* FIXME */
1063 #if 0
1064     uint32_t    *dst;
1065     uint32_t    *src;
1066     int dstStride, srcStride;
1067     int         srcXoff, srcYoff;
1068     int         dstXoff, dstYoff;
1069     int         srcBpp;
1070     int         dstBpp;
1071     pixman_bool_t       reverse = FALSE;
1072     pixman_bool_t       upsidedown = FALSE;
1073
1074     fbGetDrawable(pSrc->pDrawable,src,srcStride,srcBpp,srcXoff,srcYoff);
1075     fbGetDrawable(pDst->pDrawable,dst,dstStride,dstBpp,dstXoff,dstYoff);
1076
1077     fbBlt (src + (ySrc + srcYoff) * srcStride,
1078            srcStride,
1079            (xSrc + srcXoff) * srcBpp,
1080
1081            dst + (yDst + dstYoff) * dstStride,
1082            dstStride,
1083            (xDst + dstXoff) * dstBpp,
1084
1085            (width) * dstBpp,
1086            (height),
1087
1088            GXcopy,
1089            FB_ALLONES,
1090            dstBpp,
1091
1092            reverse,
1093            upsidedown);
1094 #endif
1095 }
1096
1097 static void
1098 pixman_image_composite_rect  (pixman_op_t                   op,
1099                               pixman_image_t               *src,
1100                               pixman_image_t               *mask,
1101                               pixman_image_t               *dest,
1102                               int16_t                       src_x,
1103                               int16_t                       src_y,
1104                               int16_t                       mask_x,
1105                               int16_t                       mask_y,
1106                               int16_t                       dest_x,
1107                               int16_t                       dest_y,
1108                               uint16_t                      width,
1109                               uint16_t                      height);
1110 void
1111 fbCompositeSolidFill (pixman_op_t op,
1112                       pixman_image_t * pSrc,
1113                       pixman_image_t * pMask,
1114                       pixman_image_t * pDst,
1115                       int16_t      xSrc,
1116                       int16_t      ySrc,
1117                       int16_t      xMask,
1118                       int16_t      yMask,
1119                       int16_t      xDst,
1120                       int16_t      yDst,
1121                       uint16_t     width,
1122                       uint16_t     height)
1123 {
1124     uint32_t    src;
1125
1126     fbComposeGetSolid(pSrc, src, pDst->bits.format);
1127
1128     if (pDst->bits.format == PIXMAN_a8)
1129         src = src >> 24;
1130     else if (pDst->bits.format == PIXMAN_r5g6b5 ||
1131              pDst->bits.format == PIXMAN_b5g6r5)
1132         src = cvt8888to0565 (src);
1133
1134     pixman_fill (pDst->bits.bits, pDst->bits.rowstride,
1135                  PIXMAN_FORMAT_BPP (pDst->bits.format),
1136                  xDst, yDst,
1137                  width, height,
1138                  src);
1139 }
1140
1141 static void
1142 fbCompositeSrc_8888xx888 (pixman_op_t op,
1143                           pixman_image_t * pSrc,
1144                           pixman_image_t * pMask,
1145                           pixman_image_t * pDst,
1146                           int16_t      xSrc,
1147                           int16_t      ySrc,
1148                           int16_t      xMask,
1149                           int16_t      yMask,
1150                           int16_t      xDst,
1151                           int16_t      yDst,
1152                           uint16_t     width,
1153                           uint16_t     height)
1154 {
1155     uint32_t    *dst;
1156     uint32_t    *src;
1157     int          dstStride, srcStride;
1158     uint32_t     n_bytes = width * sizeof (uint32_t);
1159
1160     fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, src, 1);
1161     fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dst, 1);
1162
1163     while (height--)
1164     {
1165         memcpy (dst, src, n_bytes);
1166
1167         dst += dstStride;
1168         src += srcStride;
1169     }
1170 }
1171
1172 static void
1173 pixman_walk_composite_region (pixman_op_t op,
1174                               pixman_image_t * pSrc,
1175                               pixman_image_t * pMask,
1176                               pixman_image_t * pDst,
1177                               int16_t xSrc,
1178                               int16_t ySrc,
1179                               int16_t xMask,
1180                               int16_t yMask,
1181                               int16_t xDst,
1182                               int16_t yDst,
1183                               uint16_t width,
1184                               uint16_t height,
1185                               pixman_bool_t srcRepeat,
1186                               pixman_bool_t maskRepeat,
1187                               CompositeFunc compositeRect)
1188 {
1189     int             n;
1190     const pixman_box32_t *pbox;
1191     int             w, h, w_this, h_this;
1192     int             x_msk, y_msk, x_src, y_src, x_dst, y_dst;
1193     pixman_region32_t reg;
1194     pixman_region32_t *region;
1195
1196     pixman_region32_init (&reg);
1197     if (!pixman_compute_composite_region32 (&reg, pSrc, pMask, pDst,
1198                                             xSrc, ySrc, xMask, yMask, xDst, yDst, width, height))
1199     {
1200         return;
1201     }
1202
1203     region = &reg;
1204
1205     pbox = pixman_region32_rectangles (region, &n);
1206     while (n--)
1207     {
1208         h = pbox->y2 - pbox->y1;
1209         y_src = pbox->y1 - yDst + ySrc;
1210         y_msk = pbox->y1 - yDst + yMask;
1211         y_dst = pbox->y1;
1212         while (h)
1213         {
1214             h_this = h;
1215             w = pbox->x2 - pbox->x1;
1216             x_src = pbox->x1 - xDst + xSrc;
1217             x_msk = pbox->x1 - xDst + xMask;
1218             x_dst = pbox->x1;
1219             if (maskRepeat)
1220             {
1221                 y_msk = MOD (y_msk, pMask->bits.height);
1222                 if (h_this > pMask->bits.height - y_msk)
1223                     h_this = pMask->bits.height - y_msk;
1224             }
1225             if (srcRepeat)
1226             {
1227                 y_src = MOD (y_src, pSrc->bits.height);
1228                 if (h_this > pSrc->bits.height - y_src)
1229                     h_this = pSrc->bits.height - y_src;
1230             }
1231             while (w)
1232             {
1233                 w_this = w;
1234                 if (maskRepeat)
1235                 {
1236                     x_msk = MOD (x_msk, pMask->bits.width);
1237                     if (w_this > pMask->bits.width - x_msk)
1238                         w_this = pMask->bits.width - x_msk;
1239                 }
1240                 if (srcRepeat)
1241                 {
1242                     x_src = MOD (x_src, pSrc->bits.width);
1243                     if (w_this > pSrc->bits.width - x_src)
1244                         w_this = pSrc->bits.width - x_src;
1245                 }
1246                 (*compositeRect) (op, pSrc, pMask, pDst,
1247                                   x_src, y_src, x_msk, y_msk, x_dst, y_dst,
1248                                   w_this, h_this);
1249                 w -= w_this;
1250                 x_src += w_this;
1251                 x_msk += w_this;
1252                 x_dst += w_this;
1253             }
1254             h -= h_this;
1255             y_src += h_this;
1256             y_msk += h_this;
1257             y_dst += h_this;
1258         }
1259         pbox++;
1260     }
1261     pixman_region32_fini (&reg);
1262 }
1263
1264 static void
1265 pixman_image_composite_rect  (pixman_op_t                   op,
1266                               pixman_image_t               *src,
1267                               pixman_image_t               *mask,
1268                               pixman_image_t               *dest,
1269                               int16_t                       src_x,
1270                               int16_t                       src_y,
1271                               int16_t                       mask_x,
1272                               int16_t                       mask_y,
1273                               int16_t                       dest_x,
1274                               int16_t                       dest_y,
1275                               uint16_t                      width,
1276                               uint16_t                      height)
1277 {
1278     FbComposeData compose_data;
1279
1280     return_if_fail (src != NULL);
1281     return_if_fail (dest != NULL);
1282
1283     compose_data.op = op;
1284     compose_data.src = src;
1285     compose_data.mask = mask;
1286     compose_data.dest = dest;
1287     compose_data.xSrc = src_x;
1288     compose_data.ySrc = src_y;
1289     compose_data.xMask = mask_x;
1290     compose_data.yMask = mask_y;
1291     compose_data.xDest = dest_x;
1292     compose_data.yDest = dest_y;
1293     compose_data.width = width;
1294     compose_data.height = height;
1295
1296     pixman_composite_rect_general (&compose_data);
1297 }
1298
1299 /* These "formats" both have depth 0, so they
1300  * will never clash with any real ones
1301  */
1302 #define PIXMAN_null             PIXMAN_FORMAT(0,0,0,0,0,0)
1303 #define PIXMAN_solid            PIXMAN_FORMAT(0,1,0,0,0,0)
1304
1305 #define NEED_COMPONENT_ALPHA            (1 << 0)
1306 #define NEED_PIXBUF                     (1 << 1)
1307 #define NEED_SOLID_MASK                 (1 << 2)
1308
1309 typedef struct
1310 {
1311     pixman_op_t                 op;
1312     pixman_format_code_t        src_format;
1313     pixman_format_code_t        mask_format;
1314     pixman_format_code_t        dest_format;
1315     CompositeFunc               func;
1316     uint32_t                    flags;
1317 } FastPathInfo;
1318
1319 #ifdef USE_MMX
1320 static const FastPathInfo mmx_fast_paths[] =
1321 {
1322     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_r5g6b5,   fbCompositeSolidMask_nx8x0565mmx,     0 },
1323     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_b5g6r5,   fbCompositeSolidMask_nx8x0565mmx,     0 },
1324     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_a8r8g8b8, fbCompositeSolidMask_nx8x8888mmx,     0 },
1325     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_x8r8g8b8, fbCompositeSolidMask_nx8x8888mmx,     0 },
1326     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_a8b8g8r8, fbCompositeSolidMask_nx8x8888mmx,     0 },
1327     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_x8b8g8r8, fbCompositeSolidMask_nx8x8888mmx,     0 },
1328     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, fbCompositeSolidMask_nx8888x8888Cmmx, NEED_COMPONENT_ALPHA },
1329     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8r8g8b8, PIXMAN_x8r8g8b8, fbCompositeSolidMask_nx8888x8888Cmmx, NEED_COMPONENT_ALPHA },
1330     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8r8g8b8, PIXMAN_r5g6b5,   fbCompositeSolidMask_nx8888x0565Cmmx, NEED_COMPONENT_ALPHA },
1331     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8b8g8r8, PIXMAN_a8b8g8r8, fbCompositeSolidMask_nx8888x8888Cmmx, NEED_COMPONENT_ALPHA },
1332     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8b8g8r8, PIXMAN_x8b8g8r8, fbCompositeSolidMask_nx8888x8888Cmmx, NEED_COMPONENT_ALPHA },
1333     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8b8g8r8, PIXMAN_b5g6r5,   fbCompositeSolidMask_nx8888x0565Cmmx, NEED_COMPONENT_ALPHA },
1334     { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, fbCompositeSrc_8888RevNPx8888mmx, NEED_PIXBUF },
1335     { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8b8g8r8, PIXMAN_a8r8g8b8, fbCompositeSrc_8888RevNPx8888mmx, NEED_PIXBUF },
1336     { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8r8g8b8, PIXMAN_x8r8g8b8, fbCompositeSrc_8888RevNPx8888mmx, NEED_PIXBUF },
1337     { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8b8g8r8, PIXMAN_x8r8g8b8, fbCompositeSrc_8888RevNPx8888mmx, NEED_PIXBUF },
1338     { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8r8g8b8, PIXMAN_r5g6b5,   fbCompositeSrc_8888RevNPx0565mmx, NEED_PIXBUF },
1339     { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8b8g8r8, PIXMAN_r5g6b5,   fbCompositeSrc_8888RevNPx0565mmx, NEED_PIXBUF },
1340     { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8r8g8b8, PIXMAN_a8b8g8r8, fbCompositeSrc_8888RevNPx8888mmx, NEED_PIXBUF },
1341     { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8b8g8r8, PIXMAN_a8b8g8r8, fbCompositeSrc_8888RevNPx8888mmx, NEED_PIXBUF },
1342     { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8r8g8b8, PIXMAN_x8b8g8r8, fbCompositeSrc_8888RevNPx8888mmx, NEED_PIXBUF },
1343     { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8b8g8r8, PIXMAN_x8b8g8r8, fbCompositeSrc_8888RevNPx8888mmx, NEED_PIXBUF },
1344     { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8r8g8b8, PIXMAN_b5g6r5,   fbCompositeSrc_8888RevNPx0565mmx, NEED_PIXBUF },
1345     { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8b8g8r8, PIXMAN_b5g6r5,   fbCompositeSrc_8888RevNPx0565mmx, NEED_PIXBUF },
1346     { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8,       PIXMAN_a8r8g8b8, fbCompositeSrc_x888xnx8888mmx,    NEED_SOLID_MASK },
1347     { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8,       PIXMAN_x8r8g8b8, fbCompositeSrc_x888xnx8888mmx,    NEED_SOLID_MASK },
1348     { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8,       PIXMAN_a8b8g8r8, fbCompositeSrc_x888xnx8888mmx,    NEED_SOLID_MASK },
1349     { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8,       PIXMAN_x8b8g8r8, fbCompositeSrc_x888xnx8888mmx,    NEED_SOLID_MASK },
1350     { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8,       PIXMAN_a8r8g8b8, fbCompositeSrc_8888x8x8888mmx,    NEED_SOLID_MASK },
1351     { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8,       PIXMAN_x8r8g8b8, fbCompositeSrc_8888x8x8888mmx,    NEED_SOLID_MASK },
1352     { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_a8,       PIXMAN_a8b8g8r8, fbCompositeSrc_8888x8x8888mmx,    NEED_SOLID_MASK },
1353     { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_a8,       PIXMAN_x8b8g8r8, fbCompositeSrc_8888x8x8888mmx,    NEED_SOLID_MASK },
1354 #if 0
1355     /* FIXME: This code is commented out since it's apparently not actually faster than the generic code. */
1356     { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8,       PIXMAN_x8r8g8b8, fbCompositeOver_x888x8x8888mmx,   0 },
1357     { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8,       PIXMAN_a8r8g8b8, fbCompositeOver_x888x8x8888mmx,   0 },
1358     { PIXMAN_OP_OVER, PIXMAN_x8b8r8g8, PIXMAN_a8,       PIXMAN_x8b8g8r8, fbCompositeOver_x888x8x8888mmx,   0 },
1359     { PIXMAN_OP_OVER, PIXMAN_x8b8r8g8, PIXMAN_a8,       PIXMAN_a8r8g8b8, fbCompositeOver_x888x8x8888mmx,   0 },
1360 #endif
1361     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_null,     PIXMAN_a8r8g8b8, fbCompositeSolid_nx8888mmx,        0 },
1362     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_null,     PIXMAN_x8r8g8b8, fbCompositeSolid_nx8888mmx,       0 },
1363     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_null,     PIXMAN_r5g6b5,   fbCompositeSolid_nx0565mmx,       0 },
1364     { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_null,     PIXMAN_x8r8g8b8, fbCompositeCopyAreammx,           0 },
1365     { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_null,     PIXMAN_x8b8g8r8, fbCompositeCopyAreammx,           0 },
1366     { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_a8r8g8b8, fbCompositeSrc_8888x8888mmx,      0 },
1367     { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_x8r8g8b8, fbCompositeSrc_8888x8888mmx,      0 },
1368     { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_r5g6b5,   fbCompositeSrc_8888x0565mmx,      0 },
1369     { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_a8b8g8r8, fbCompositeSrc_8888x8888mmx,      0 },
1370     { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_x8b8g8r8, fbCompositeSrc_8888x8888mmx,      0 },
1371     { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_b5g6r5,   fbCompositeSrc_8888x0565mmx,      0 },
1372
1373     { PIXMAN_OP_ADD, PIXMAN_a8r8g8b8,  PIXMAN_null,     PIXMAN_a8r8g8b8, fbCompositeSrcAdd_8888x8888mmx,   0 },
1374     { PIXMAN_OP_ADD, PIXMAN_a8b8g8r8,  PIXMAN_null,     PIXMAN_a8b8g8r8, fbCompositeSrcAdd_8888x8888mmx,   0 },
1375     { PIXMAN_OP_ADD, PIXMAN_a8,        PIXMAN_null,     PIXMAN_a8,       fbCompositeSrcAdd_8000x8000mmx,   0 },
1376     { PIXMAN_OP_ADD, PIXMAN_solid,     PIXMAN_a8,       PIXMAN_a8,       fbCompositeSrcAdd_8888x8x8mmx,    0 },
1377     { PIXMAN_OP_SRC, PIXMAN_solid,     PIXMAN_a8,       PIXMAN_a8r8g8b8, fbCompositeSolidMaskSrc_nx8x8888mmx, 0 },
1378     { PIXMAN_OP_SRC, PIXMAN_solid,     PIXMAN_a8,       PIXMAN_x8r8g8b8, fbCompositeSolidMaskSrc_nx8x8888mmx, 0 },
1379     { PIXMAN_OP_SRC, PIXMAN_solid,     PIXMAN_a8,       PIXMAN_a8b8g8r8, fbCompositeSolidMaskSrc_nx8x8888mmx, 0 },
1380     { PIXMAN_OP_SRC, PIXMAN_solid,     PIXMAN_a8,       PIXMAN_x8b8g8r8, fbCompositeSolidMaskSrc_nx8x8888mmx, 0 },
1381
1382     { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8,  PIXMAN_null,     PIXMAN_a8r8g8b8, fbCompositeCopyAreammx, 0 },
1383     { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8,  PIXMAN_null,     PIXMAN_a8b8g8r8, fbCompositeCopyAreammx, 0 },
1384     { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8,  PIXMAN_null,     PIXMAN_x8r8g8b8, fbCompositeCopyAreammx, 0 },
1385     { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8,  PIXMAN_null,     PIXMAN_x8b8g8r8, fbCompositeCopyAreammx, 0 },
1386     { PIXMAN_OP_SRC, PIXMAN_x8r8g8b8,  PIXMAN_null,     PIXMAN_x8r8g8b8, fbCompositeCopyAreammx, 0 },
1387     { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8,  PIXMAN_null,     PIXMAN_x8b8g8r8, fbCompositeCopyAreammx, 0 },
1388     { PIXMAN_OP_SRC, PIXMAN_r5g6b5,    PIXMAN_null,     PIXMAN_r5g6b5,   fbCompositeCopyAreammx, 0 },
1389     { PIXMAN_OP_SRC, PIXMAN_b5g6r5,    PIXMAN_null,     PIXMAN_b5g6r5,   fbCompositeCopyAreammx, 0 },
1390     { PIXMAN_OP_IN,  PIXMAN_a8,        PIXMAN_null,     PIXMAN_a8,       fbCompositeIn_8x8mmx,   0 },
1391     { PIXMAN_OP_IN,  PIXMAN_solid,     PIXMAN_a8,       PIXMAN_a8,       fbCompositeIn_nx8x8mmx, 0 },
1392     { PIXMAN_OP_NONE },
1393 };
1394 #endif
1395
1396 #ifdef USE_SSE2
1397 static const FastPathInfo sse_fast_paths[] =
1398 {
1399     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_r5g6b5,   fbCompositeSolidMask_nx8x0565sse2,     0 },
1400     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_b5g6r5,   fbCompositeSolidMask_nx8x0565sse2,     0 },
1401     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_null,     PIXMAN_a8r8g8b8, fbCompositeSolid_nx8888sse2,           0 },
1402     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_null,     PIXMAN_x8r8g8b8, fbCompositeSolid_nx8888sse2,           0 },
1403     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_null,     PIXMAN_r5g6b5,   fbCompositeSolid_nx0565sse2,           0 },
1404     { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_a8r8g8b8, fbCompositeSrc_8888x8888sse2,          0 },
1405     { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_x8r8g8b8, fbCompositeSrc_8888x8888sse2,          0 },
1406     { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_a8b8g8r8, fbCompositeSrc_8888x8888sse2,          0 },
1407     { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_x8b8g8r8, fbCompositeSrc_8888x8888sse2,          0 },
1408     { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_r5g6b5,   fbCompositeSrc_8888x0565sse2,          0 },
1409     { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_b5g6r5,   fbCompositeSrc_8888x0565sse2,          0 },
1410     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_a8r8g8b8, fbCompositeSolidMask_nx8x8888sse2,     0 },
1411     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_x8r8g8b8, fbCompositeSolidMask_nx8x8888sse2,     0 },
1412     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_a8b8g8r8, fbCompositeSolidMask_nx8x8888sse2,     0 },
1413     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_x8b8g8r8, fbCompositeSolidMask_nx8x8888sse2,     0 },
1414 #if 0
1415     /* FIXME: This code are buggy in MMX version, now the bug was translated to SSE2 version */
1416     { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8,       PIXMAN_x8r8g8b8, fbCompositeOver_x888x8x8888sse2,       0 },
1417     { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8,       PIXMAN_a8r8g8b8, fbCompositeOver_x888x8x8888sse2,       0 },
1418     { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8,       PIXMAN_x8b8g8r8, fbCompositeOver_x888x8x8888sse2,       0 },
1419     { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8,       PIXMAN_a8r8g8b8, fbCompositeOver_x888x8x8888sse2,       0 },
1420 #endif
1421     { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8,       PIXMAN_a8r8g8b8, fbCompositeSrc_x888xnx8888sse2,        NEED_SOLID_MASK },
1422     { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8,       PIXMAN_x8r8g8b8, fbCompositeSrc_x888xnx8888sse2,        NEED_SOLID_MASK },
1423     { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8,       PIXMAN_a8b8g8r8, fbCompositeSrc_x888xnx8888sse2,        NEED_SOLID_MASK },
1424     { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8,       PIXMAN_x8b8g8r8, fbCompositeSrc_x888xnx8888sse2,        NEED_SOLID_MASK },
1425     { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8,       PIXMAN_a8r8g8b8, fbCompositeSrc_8888x8x8888sse2,        NEED_SOLID_MASK },
1426     { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8,       PIXMAN_x8r8g8b8, fbCompositeSrc_8888x8x8888sse2,        NEED_SOLID_MASK },
1427     { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_a8,       PIXMAN_a8b8g8r8, fbCompositeSrc_8888x8x8888sse2,        NEED_SOLID_MASK },
1428     { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_a8,       PIXMAN_x8b8g8r8, fbCompositeSrc_8888x8x8888sse2,        NEED_SOLID_MASK },
1429     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, fbCompositeSolidMask_nx8888x8888Csse2, NEED_COMPONENT_ALPHA },
1430     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8r8g8b8, PIXMAN_x8r8g8b8, fbCompositeSolidMask_nx8888x8888Csse2, NEED_COMPONENT_ALPHA },
1431     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8b8g8r8, PIXMAN_a8b8g8r8, fbCompositeSolidMask_nx8888x8888Csse2, NEED_COMPONENT_ALPHA },
1432     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8b8g8r8, PIXMAN_x8b8g8r8, fbCompositeSolidMask_nx8888x8888Csse2, NEED_COMPONENT_ALPHA },
1433     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8r8g8b8, PIXMAN_r5g6b5,   fbCompositeSolidMask_nx8888x0565Csse2, NEED_COMPONENT_ALPHA },
1434     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8b8g8r8, PIXMAN_b5g6r5,   fbCompositeSolidMask_nx8888x0565Csse2, NEED_COMPONENT_ALPHA },
1435     { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, fbCompositeSrc_8888RevNPx8888sse2,     NEED_PIXBUF },
1436     { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8b8g8r8, PIXMAN_a8r8g8b8, fbCompositeSrc_8888RevNPx8888sse2,     NEED_PIXBUF },
1437     { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8r8g8b8, PIXMAN_x8r8g8b8, fbCompositeSrc_8888RevNPx8888sse2,     NEED_PIXBUF },
1438     { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8b8g8r8, PIXMAN_x8r8g8b8, fbCompositeSrc_8888RevNPx8888sse2,     NEED_PIXBUF },
1439     { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8r8g8b8, PIXMAN_a8b8g8r8, fbCompositeSrc_8888RevNPx8888sse2,     NEED_PIXBUF },
1440     { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8b8g8r8, PIXMAN_a8b8g8r8, fbCompositeSrc_8888RevNPx8888sse2,     NEED_PIXBUF },
1441     { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8r8g8b8, PIXMAN_x8b8g8r8, fbCompositeSrc_8888RevNPx8888sse2,     NEED_PIXBUF },
1442     { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8b8g8r8, PIXMAN_x8b8g8r8, fbCompositeSrc_8888RevNPx8888sse2,     NEED_PIXBUF },
1443     { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8r8g8b8, PIXMAN_r5g6b5,   fbCompositeSrc_8888RevNPx0565sse2,     NEED_PIXBUF },
1444     { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8b8g8r8, PIXMAN_r5g6b5,   fbCompositeSrc_8888RevNPx0565sse2,     NEED_PIXBUF },
1445     { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8r8g8b8, PIXMAN_b5g6r5,   fbCompositeSrc_8888RevNPx0565sse2,     NEED_PIXBUF },
1446     { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8b8g8r8, PIXMAN_b5g6r5,   fbCompositeSrc_8888RevNPx0565sse2,     NEED_PIXBUF },
1447 #if 0
1448     /* FIXME: This code is commented out since it's apparently not actually faster than the generic code */
1449     { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_null,     PIXMAN_x8r8g8b8, fbCompositeCopyAreasse2,               0 },
1450     { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_null,     PIXMAN_x8b8g8r8, fbCompositeCopyAreasse2,               0 },
1451 #endif
1452
1453     { PIXMAN_OP_ADD,  PIXMAN_a8,       PIXMAN_null,     PIXMAN_a8,       fbCompositeSrcAdd_8000x8000sse2,       0 },
1454     { PIXMAN_OP_ADD,  PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_a8r8g8b8, fbCompositeSrcAdd_8888x8888sse2,       0 },
1455     { PIXMAN_OP_ADD,  PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_a8b8g8r8, fbCompositeSrcAdd_8888x8888sse2,       0 },
1456     { PIXMAN_OP_ADD,  PIXMAN_solid,    PIXMAN_a8,       PIXMAN_a8,       fbCompositeSrcAdd_8888x8x8sse2,        0 },
1457
1458     { PIXMAN_OP_SRC, PIXMAN_solid,     PIXMAN_a8,       PIXMAN_a8r8g8b8, fbCompositeSolidMaskSrc_nx8x8888sse2,  0 },
1459     { PIXMAN_OP_SRC, PIXMAN_solid,     PIXMAN_a8,       PIXMAN_x8r8g8b8, fbCompositeSolidMaskSrc_nx8x8888sse2,  0 },
1460     { PIXMAN_OP_SRC, PIXMAN_solid,     PIXMAN_a8,       PIXMAN_a8b8g8r8, fbCompositeSolidMaskSrc_nx8x8888sse2,  0 },
1461     { PIXMAN_OP_SRC, PIXMAN_solid,     PIXMAN_a8,       PIXMAN_x8b8g8r8, fbCompositeSolidMaskSrc_nx8x8888sse2,  0 },
1462
1463 #if 0
1464     /* FIXME: This code is commented out since it's apparently not actually faster than the generic code */
1465     { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8,  PIXMAN_null,     PIXMAN_a8r8g8b8, fbCompositeCopyAreasse2,               0 },
1466     { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8,  PIXMAN_null,     PIXMAN_a8b8g8r8, fbCompositeCopyAreasse2,               0 },
1467     { PIXMAN_OP_SRC, PIXMAN_x8r8g8b8,  PIXMAN_null,     PIXMAN_x8r8g8b8, fbCompositeCopyAreasse2,               0 },
1468     { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8,  PIXMAN_null,     PIXMAN_x8b8g8r8, fbCompositeCopyAreasse2,               0 },
1469     { PIXMAN_OP_SRC, PIXMAN_r5g6b5,    PIXMAN_null,     PIXMAN_r5g6b5,   fbCompositeCopyAreasse2,               0 },
1470     { PIXMAN_OP_SRC, PIXMAN_b5g6r5,    PIXMAN_null,     PIXMAN_b5g6r5,   fbCompositeCopyAreasse2,               0 },
1471 #endif
1472
1473     { PIXMAN_OP_IN,  PIXMAN_a8,        PIXMAN_null,     PIXMAN_a8,       fbCompositeIn_8x8sse2,                 0 },
1474     { PIXMAN_OP_IN,  PIXMAN_solid,     PIXMAN_a8,       PIXMAN_a8,       fbCompositeIn_nx8x8sse2,               0 },
1475
1476     { PIXMAN_OP_NONE },
1477 };
1478 #endif
1479
1480 #ifdef USE_VMX
1481 static const FastPathInfo vmx_fast_paths[] =
1482 {
1483     { PIXMAN_OP_NONE },
1484 };
1485 #endif
1486
1487
1488 static const FastPathInfo c_fast_paths[] =
1489 {
1490     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_r5g6b5,   fbCompositeSolidMask_nx8x0565, 0 },
1491     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_b5g6r5,   fbCompositeSolidMask_nx8x0565, 0 },
1492     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_r8g8b8,   fbCompositeSolidMask_nx8x0888, 0 },
1493     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_b8g8r8,   fbCompositeSolidMask_nx8x0888, 0 },
1494     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_a8r8g8b8, fbCompositeSolidMask_nx8x8888, 0 },
1495     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_x8r8g8b8, fbCompositeSolidMask_nx8x8888, 0 },
1496     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_a8b8g8r8, fbCompositeSolidMask_nx8x8888, 0 },
1497     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_x8b8g8r8, fbCompositeSolidMask_nx8x8888, 0 },
1498     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, fbCompositeSolidMask_nx8888x8888C, NEED_COMPONENT_ALPHA },
1499     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8r8g8b8, PIXMAN_x8r8g8b8, fbCompositeSolidMask_nx8888x8888C, NEED_COMPONENT_ALPHA },
1500     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8r8g8b8, PIXMAN_r5g6b5,   fbCompositeSolidMask_nx8888x0565C, NEED_COMPONENT_ALPHA },
1501     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8b8g8r8, PIXMAN_a8b8g8r8, fbCompositeSolidMask_nx8888x8888C, NEED_COMPONENT_ALPHA },
1502     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8b8g8r8, PIXMAN_x8b8g8r8, fbCompositeSolidMask_nx8888x8888C, NEED_COMPONENT_ALPHA },
1503     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8b8g8r8, PIXMAN_b5g6r5,   fbCompositeSolidMask_nx8888x0565C, NEED_COMPONENT_ALPHA },
1504 #if 0
1505     /* FIXME: This code is commented out since it's apparently not actually faster than the generic code */
1506     { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8,       PIXMAN_x8r8g8b8, fbCompositeOver_x888x8x8888,       0 },
1507     { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8,       PIXMAN_a8r8g8b8, fbCompositeOver_x888x8x8888,       0 },
1508     { PIXMAN_OP_OVER, PIXMAN_x8b8r8g8, PIXMAN_a8,       PIXMAN_x8b8g8r8, fbCompositeOver_x888x8x8888,       0 },
1509     { PIXMAN_OP_OVER, PIXMAN_x8b8r8g8, PIXMAN_a8,       PIXMAN_a8r8g8b8, fbCompositeOver_x888x8x8888,       0 },
1510 #endif
1511     { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_a8r8g8b8, fbCompositeSrc_8888x8888,         0 },
1512     { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_x8r8g8b8, fbCompositeSrc_8888x8888,         0 },
1513     { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_r5g6b5,   fbCompositeSrc_8888x0565,         0 },
1514     { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_a8b8g8r8, fbCompositeSrc_8888x8888,         0 },
1515     { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_x8b8g8r8, fbCompositeSrc_8888x8888,         0 },
1516     { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_b5g6r5,   fbCompositeSrc_8888x0565,         0 },
1517 #if 0
1518     /* FIXME */
1519     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a1,       PIXMAN_r5g6b5,   fbCompositeSolidMask_nx1xn,       0 },
1520     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a1,       PIXMAN_b5g6r5,   fbCompositeSolidMask_nx1xn,       0 },
1521     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a1,       PIXMAN_r8g8b8,   fbCompositeSolidMask_nx1xn,       0 },
1522     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a1,       PIXMAN_b8g8r8,   fbCompositeSolidMask_nx1xn,       0 },
1523     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a1,       PIXMAN_a8r8g8b8, fbCompositeSolidMask_nx1xn,       0 },
1524     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a1,       PIXMAN_a8b8g8r8, fbCompositeSolidMask_nx1xn,       0 },
1525     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a1,       PIXMAN_x8r8g8b8, fbCompositeSolidMask_nx1xn,       0 },
1526     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a1,       PIXMAN_x8b8g8r8, fbCompositeSolidMask_nx1xn,       0 },
1527 #endif
1528     { PIXMAN_OP_ADD, PIXMAN_a8r8g8b8,  PIXMAN_null,     PIXMAN_a8r8g8b8, fbCompositeSrcAdd_8888x8888,   0 },
1529     { PIXMAN_OP_ADD, PIXMAN_a8b8g8r8,  PIXMAN_null,     PIXMAN_a8b8g8r8, fbCompositeSrcAdd_8888x8888,   0 },
1530     { PIXMAN_OP_ADD, PIXMAN_a8,        PIXMAN_null,     PIXMAN_a8,       fbCompositeSrcAdd_8000x8000,   0 },
1531 #if 0
1532     /* FIXME */
1533     { PIXMAN_OP_ADD, PIXMAN_a1,        PIXMAN_null,     PIXMAN_a1,       fbCompositeSrcAdd_1000x1000,   0 },
1534 #endif
1535     { PIXMAN_OP_ADD, PIXMAN_solid,     PIXMAN_a8,       PIXMAN_a8,       fbCompositeSrcAdd_8888x8x8,    0 },
1536     { PIXMAN_OP_SRC, PIXMAN_solid,     PIXMAN_null,     PIXMAN_a8r8g8b8, fbCompositeSolidFill, 0 },
1537     { PIXMAN_OP_SRC, PIXMAN_solid,     PIXMAN_null,     PIXMAN_x8r8g8b8, fbCompositeSolidFill, 0 },
1538     { PIXMAN_OP_SRC, PIXMAN_solid,     PIXMAN_null,     PIXMAN_a8b8g8r8, fbCompositeSolidFill, 0 },
1539     { PIXMAN_OP_SRC, PIXMAN_solid,     PIXMAN_null,     PIXMAN_x8b8g8r8, fbCompositeSolidFill, 0 },
1540     { PIXMAN_OP_SRC, PIXMAN_solid,     PIXMAN_null,     PIXMAN_a8,       fbCompositeSolidFill, 0 },
1541     { PIXMAN_OP_SRC, PIXMAN_solid,     PIXMAN_null,     PIXMAN_r5g6b5,   fbCompositeSolidFill, 0 },
1542     { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8,  PIXMAN_null,     PIXMAN_x8r8g8b8, fbCompositeSrc_8888xx888, 0 },
1543     { PIXMAN_OP_SRC, PIXMAN_x8r8g8b8,  PIXMAN_null,     PIXMAN_x8r8g8b8, fbCompositeSrc_8888xx888, 0 },
1544     { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8,  PIXMAN_null,     PIXMAN_x8b8g8r8, fbCompositeSrc_8888xx888, 0 },
1545     { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8,  PIXMAN_null,     PIXMAN_x8b8g8r8, fbCompositeSrc_8888xx888, 0 },
1546 #if 0
1547     /* FIXME */
1548     { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8,  PIXMAN_null,     PIXMAN_a8r8g8b8, fbCompositeSrcSrc_nxn, 0 },
1549     { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8,  PIXMAN_null,     PIXMAN_a8b8g8r8, fbCompositeSrcSrc_nxn, 0 },
1550     { PIXMAN_OP_SRC, PIXMAN_x8r8g8b8,  PIXMAN_null,     PIXMAN_x8r8g8b8, fbCompositeSrcSrc_nxn, 0 },
1551     { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8,  PIXMAN_null,     PIXMAN_x8b8g8r8, fbCompositeSrcSrc_nxn, 0 },
1552     { PIXMAN_OP_SRC, PIXMAN_r5g6b5,    PIXMAN_null,     PIXMAN_r5g6b5,   fbCompositeSrcSrc_nxn, 0 },
1553     { PIXMAN_OP_SRC, PIXMAN_b5g6r5,    PIXMAN_null,     PIXMAN_b5g6r5,   fbCompositeSrcSrc_nxn, 0 },
1554 #endif
1555     { PIXMAN_OP_IN,  PIXMAN_a8,        PIXMAN_null,     PIXMAN_a8,       fbCompositeSrcIn_8x8,   0 },
1556     { PIXMAN_OP_IN,  PIXMAN_solid,     PIXMAN_a8,       PIXMAN_a8,       fbCompositeSolidMaskIn_nx8x8, 0 },
1557     { PIXMAN_OP_NONE },
1558 };
1559
1560 static pixman_bool_t
1561 mask_is_solid (pixman_image_t *mask)
1562 {
1563     if (mask->type == SOLID)
1564         return TRUE;
1565
1566     if (mask->type == BITS &&
1567         mask->common.repeat == PIXMAN_REPEAT_NORMAL &&
1568         mask->bits.width == 1 &&
1569         mask->bits.height == 1)
1570     {
1571         return TRUE;
1572     }
1573
1574     return FALSE;
1575 }
1576
1577 static const FastPathInfo *
1578 get_fast_path (const FastPathInfo *fast_paths,
1579                pixman_op_t         op,
1580                pixman_image_t     *pSrc,
1581                pixman_image_t     *pMask,
1582                pixman_image_t     *pDst,
1583                pixman_bool_t       is_pixbuf)
1584 {
1585     const FastPathInfo *info;
1586
1587     for (info = fast_paths; info->op != PIXMAN_OP_NONE; info++)
1588     {
1589         pixman_bool_t valid_src         = FALSE;
1590         pixman_bool_t valid_mask        = FALSE;
1591
1592         if (info->op != op)
1593             continue;
1594
1595         if ((info->src_format == PIXMAN_solid && pixman_image_can_get_solid (pSrc))             ||
1596             (pSrc->type == BITS && info->src_format == pSrc->bits.format))
1597         {
1598             valid_src = TRUE;
1599         }
1600
1601         if (!valid_src)
1602             continue;
1603
1604         if ((info->mask_format == PIXMAN_null && !pMask)                        ||
1605             (pMask && pMask->type == BITS && info->mask_format == pMask->bits.format))
1606         {
1607             valid_mask = TRUE;
1608
1609             if (info->flags & NEED_SOLID_MASK)
1610             {
1611                 if (!pMask || !mask_is_solid (pMask))
1612                     valid_mask = FALSE;
1613             }
1614
1615             if (info->flags & NEED_COMPONENT_ALPHA)
1616             {
1617                 if (!pMask || !pMask->common.component_alpha)
1618                     valid_mask = FALSE;
1619             }
1620         }
1621
1622         if (!valid_mask)
1623             continue;
1624         
1625         if (info->dest_format != pDst->bits.format)
1626             continue;
1627
1628         if ((info->flags & NEED_PIXBUF) && !is_pixbuf)
1629             continue;
1630
1631         return info;
1632     }
1633
1634     return NULL;
1635 }
1636
1637 /*
1638  * Operator optimizations based on source or destination opacity
1639  */
1640 typedef struct
1641 {
1642     pixman_op_t                 op;
1643     pixman_op_t                 opSrcDstOpaque;
1644     pixman_op_t                 opSrcOpaque;
1645     pixman_op_t                 opDstOpaque;
1646 } OptimizedOperatorInfo;
1647
1648 static const OptimizedOperatorInfo optimized_operators[] =
1649 {
1650     /* Input Operator           SRC&DST Opaque          SRC Opaque              DST Opaque      */
1651     { PIXMAN_OP_OVER,           PIXMAN_OP_SRC,          PIXMAN_OP_SRC,          PIXMAN_OP_OVER },
1652     { PIXMAN_OP_OVER_REVERSE,   PIXMAN_OP_DST,          PIXMAN_OP_OVER_REVERSE, PIXMAN_OP_DST },
1653     { PIXMAN_OP_IN,             PIXMAN_OP_SRC,          PIXMAN_OP_IN,           PIXMAN_OP_SRC },
1654     { PIXMAN_OP_IN_REVERSE,     PIXMAN_OP_DST,          PIXMAN_OP_DST,          PIXMAN_OP_IN_REVERSE },
1655     { PIXMAN_OP_OUT,            PIXMAN_OP_CLEAR,        PIXMAN_OP_OUT,          PIXMAN_OP_CLEAR },
1656     { PIXMAN_OP_OUT_REVERSE,    PIXMAN_OP_CLEAR,        PIXMAN_OP_CLEAR,        PIXMAN_OP_OUT_REVERSE },
1657     { PIXMAN_OP_ATOP,           PIXMAN_OP_SRC,          PIXMAN_OP_IN,           PIXMAN_OP_OVER },
1658     { PIXMAN_OP_ATOP_REVERSE,   PIXMAN_OP_DST,          PIXMAN_OP_OVER_REVERSE, PIXMAN_OP_IN_REVERSE },
1659     { PIXMAN_OP_XOR,            PIXMAN_OP_CLEAR,        PIXMAN_OP_OUT,          PIXMAN_OP_OUT_REVERSE },
1660     { PIXMAN_OP_SATURATE,       PIXMAN_OP_DST,          PIXMAN_OP_OVER_REVERSE, PIXMAN_OP_DST },
1661     { PIXMAN_OP_NONE }
1662 };
1663
1664 /*
1665  * Check if the current operator could be optimized
1666  */
1667 static const OptimizedOperatorInfo*
1668 pixman_operator_can_be_optimized(pixman_op_t op)
1669 {
1670     const OptimizedOperatorInfo *info;
1671
1672     for (info = optimized_operators; info->op != PIXMAN_OP_NONE; info++)
1673     {
1674         if(info->op == op)
1675             return info;
1676     }
1677     return NULL;
1678 }
1679
1680 /*
1681  * Optimize the current operator based on opacity of source or destination
1682  * The output operator should be mathematically equivalent to the source.
1683  */
1684 static pixman_op_t
1685 pixman_optimize_operator(pixman_op_t op, pixman_image_t *pSrc, pixman_image_t *pMask, pixman_image_t *pDst )
1686 {
1687     pixman_bool_t is_source_opaque;
1688     pixman_bool_t is_dest_opaque;
1689     const OptimizedOperatorInfo *info = pixman_operator_can_be_optimized(op);
1690
1691     if(!info || pMask)
1692         return op;
1693
1694     is_source_opaque = pixman_image_is_opaque(pSrc);
1695     is_dest_opaque = pixman_image_is_opaque(pDst);
1696
1697     if(is_source_opaque == FALSE && is_dest_opaque == FALSE)
1698         return op;
1699
1700     if(is_source_opaque && is_dest_opaque)
1701         return info->opSrcDstOpaque;
1702     else if(is_source_opaque)
1703         return info->opSrcOpaque;
1704     else if(is_dest_opaque)
1705         return info->opDstOpaque;
1706
1707     return op;
1708
1709 }
1710
1711 #if defined(USE_SSE2) && defined (__GNUC__)
1712
1713 /*
1714  * Work around GCC bug causing crashes in Mozilla with SSE2
1715  * 
1716  * When using SSE2 intrinsics, gcc assumes that the stack is 16 byte
1717  * aligned. Unfortunately some code, such as Mozilla and Mono contain
1718  * code that aligns the stack to 4 bytes.
1719  *
1720  * The __force_align_arg_pointer__ makes gcc generate a prologue that
1721  * realigns the stack pointer to 16 bytes.
1722  *
1723  * See https://bugs.freedesktop.org/show_bug.cgi?id=15693
1724  */
1725
1726 __attribute__((__force_align_arg_pointer__))
1727 #endif
1728 PIXMAN_EXPORT void
1729 pixman_image_composite (pixman_op_t      op,
1730                         pixman_image_t * pSrc,
1731                         pixman_image_t * pMask,
1732                         pixman_image_t * pDst,
1733                         int16_t      xSrc,
1734                         int16_t      ySrc,
1735                         int16_t      xMask,
1736                         int16_t      yMask,
1737                         int16_t      xDst,
1738                         int16_t      yDst,
1739                         uint16_t     width,
1740                         uint16_t     height)
1741 {
1742     pixman_bool_t srcRepeat = pSrc->type == BITS && pSrc->common.repeat == PIXMAN_REPEAT_NORMAL;
1743     pixman_bool_t maskRepeat = FALSE;
1744     pixman_bool_t srcTransform = pSrc->common.transform != NULL;
1745     pixman_bool_t maskTransform = FALSE;
1746     pixman_bool_t srcAlphaMap = pSrc->common.alpha_map != NULL;
1747     pixman_bool_t maskAlphaMap = FALSE;
1748     pixman_bool_t dstAlphaMap = pDst->common.alpha_map != NULL;
1749     CompositeFunc func = NULL;
1750
1751 #ifdef USE_MMX
1752     fbComposeSetupMMX();
1753 #endif
1754
1755 #ifdef USE_VMX
1756     fbComposeSetupVMX();
1757 #endif
1758
1759 #ifdef USE_SSE2
1760     fbComposeSetupSSE();
1761 #endif
1762
1763     if (srcRepeat && srcTransform &&
1764         pSrc->bits.width == 1 &&
1765         pSrc->bits.height == 1)
1766     {
1767         srcTransform = FALSE;
1768     }
1769
1770     if (pMask && pMask->type == BITS)
1771     {
1772         maskRepeat = pMask->common.repeat == PIXMAN_REPEAT_NORMAL;
1773
1774         maskTransform = pMask->common.transform != 0;
1775         if (pMask->common.filter == PIXMAN_FILTER_CONVOLUTION)
1776             maskTransform = TRUE;
1777
1778         maskAlphaMap = pMask->common.alpha_map != 0;
1779
1780         if (maskRepeat && maskTransform &&
1781             pMask->bits.width == 1 &&
1782             pMask->bits.height == 1)
1783         {
1784             maskTransform = FALSE;
1785         }
1786     }
1787
1788     /*
1789     * Check if we can replace our operator by a simpler one if the src or dest are opaque
1790     * The output operator should be mathematically equivalent to the source.
1791     */
1792     op = pixman_optimize_operator(op, pSrc, pMask, pDst);
1793     if(op == PIXMAN_OP_DST)
1794         return;
1795
1796     if ((pSrc->type == BITS || pixman_image_can_get_solid (pSrc)) && (!pMask || pMask->type == BITS)
1797         && !srcTransform && !maskTransform
1798         && !maskAlphaMap && !srcAlphaMap && !dstAlphaMap
1799         && (pSrc->common.filter != PIXMAN_FILTER_CONVOLUTION)
1800         && (pSrc->common.repeat != PIXMAN_REPEAT_PAD)
1801         && (!pMask || (pMask->common.filter != PIXMAN_FILTER_CONVOLUTION && pMask->common.repeat != PIXMAN_REPEAT_PAD))
1802         && !pSrc->common.read_func && !pSrc->common.write_func
1803         && !(pMask && pMask->common.read_func) && !(pMask && pMask->common.write_func)
1804         && !pDst->common.read_func && !pDst->common.write_func)
1805     {
1806         const FastPathInfo *info;
1807         pixman_bool_t pixbuf;
1808
1809         pixbuf =
1810             pSrc && pSrc->type == BITS          &&
1811             pMask && pMask->type == BITS        &&
1812             pSrc->bits.bits == pMask->bits.bits &&
1813             xSrc == xMask                       &&
1814             ySrc == yMask                       &&
1815             !pMask->common.component_alpha      &&
1816             !maskRepeat;
1817         info = NULL;
1818         
1819 #ifdef USE_SSE2
1820         if (pixman_have_sse ())
1821             info = get_fast_path (sse_fast_paths, op, pSrc, pMask, pDst, pixbuf);
1822 #endif
1823
1824 #ifdef USE_MMX
1825         if (!info && pixman_have_mmx())
1826             info = get_fast_path (mmx_fast_paths, op, pSrc, pMask, pDst, pixbuf);
1827 #endif
1828
1829 #ifdef USE_VMX
1830
1831         if (!info && pixman_have_vmx())
1832             info = get_fast_path (vmx_fast_paths, op, pSrc, pMask, pDst, pixbuf);
1833 #endif
1834         if (!info)
1835             info = get_fast_path (c_fast_paths, op, pSrc, pMask, pDst, pixbuf);
1836
1837         if (info)
1838         {
1839             func = info->func;
1840
1841             if (info->src_format == PIXMAN_solid)
1842                 srcRepeat = FALSE;
1843
1844             if (info->mask_format == PIXMAN_solid       ||
1845                 info->flags & NEED_SOLID_MASK)
1846             {
1847                 maskRepeat = FALSE;
1848             }
1849         }
1850     }
1851     
1852     if ((srcRepeat                      &&
1853          pSrc->bits.width == 1          &&
1854          pSrc->bits.height == 1)        ||
1855         (maskRepeat                     &&
1856          pMask->bits.width == 1         &&
1857          pMask->bits.height == 1))
1858     {
1859         /* If src or mask are repeating 1x1 images and srcRepeat or
1860          * maskRepeat are still TRUE, it means the fast path we
1861          * selected does not actually handle repeating images.
1862          *
1863          * So rather than call the "fast path" with a zillion
1864          * 1x1 requests, we just use the general code (which does
1865          * do something sensible with 1x1 repeating images).
1866          */
1867         func = NULL;
1868     }
1869
1870     if (!func)
1871     {
1872         func = pixman_image_composite_rect;
1873
1874         /* CompositeGeneral optimizes 1x1 repeating images itself */
1875         if (pSrc->type == BITS &&
1876             pSrc->bits.width == 1 && pSrc->bits.height == 1)
1877         {
1878             srcRepeat = FALSE;
1879         }
1880
1881         if (pMask && pMask->type == BITS &&
1882             pMask->bits.width == 1 && pMask->bits.height == 1)
1883         {
1884             maskRepeat = FALSE;
1885         }
1886
1887         /* if we are transforming, repeats are handled in fbFetchTransformed */
1888         if (srcTransform)
1889             srcRepeat = FALSE;
1890
1891         if (maskTransform)
1892             maskRepeat = FALSE;
1893     }
1894
1895     pixman_walk_composite_region (op, pSrc, pMask, pDst, xSrc, ySrc,
1896                                   xMask, yMask, xDst, yDst, width, height,
1897                                   srcRepeat, maskRepeat, func);
1898 }
1899
1900
1901 #ifdef USE_VMX
1902 /* The CPU detection code needs to be in a file not compiled with
1903  * "-maltivec -mabi=altivec", as gcc would try to save vector register
1904  * across function calls causing SIGILL on cpus without Altivec/vmx.
1905  */
1906 static pixman_bool_t initialized = FALSE;
1907 static volatile pixman_bool_t have_vmx = TRUE;
1908
1909 #ifdef __APPLE__
1910 #include <sys/sysctl.h>
1911
1912 pixman_bool_t pixman_have_vmx (void) {
1913     if(!initialized) {
1914         size_t length = sizeof(have_vmx);
1915         int error =
1916             sysctlbyname("hw.optional.altivec", &have_vmx, &length, NULL, 0);
1917         if(error) have_vmx = FALSE;
1918         initialized = TRUE;
1919     }
1920     return have_vmx;
1921 }
1922
1923 #else
1924 #include <signal.h>
1925
1926 static void vmx_test(int sig, siginfo_t *si, void *unused) {
1927     have_vmx = FALSE;
1928 }
1929
1930 pixman_bool_t pixman_have_vmx (void) {
1931     struct sigaction sa, osa;
1932     if (!initialized) {
1933         sa.sa_flags = SA_SIGINFO;
1934         sigemptyset(&sa.sa_mask);
1935         sa.sa_sigaction = vmx_test;
1936         sigaction(SIGILL, &sa, &osa);
1937         asm volatile ( "vor 0, 0, 0" );
1938         sigaction(SIGILL, &osa, NULL);
1939         initialized = TRUE;
1940     }
1941     return have_vmx;
1942 }
1943 #endif /* __APPLE__ */
1944 #endif /* USE_VMX */
1945
1946 #ifdef USE_MMX
1947 /* The CPU detection code needs to be in a file not compiled with
1948  * "-mmmx -msse", as gcc would generate CMOV instructions otherwise
1949  * that would lead to SIGILL instructions on old CPUs that don't have
1950  * it.
1951  */
1952 #if !defined(__amd64__) && !defined(__x86_64__)
1953
1954 #ifdef HAVE_GETISAX
1955 #include <sys/auxv.h>
1956 #endif
1957
1958 enum CPUFeatures {
1959     NoFeatures = 0,
1960     MMX = 0x1,
1961     MMX_Extensions = 0x2,
1962     SSE = 0x6,
1963     SSE2 = 0x8,
1964     CMOV = 0x10
1965 };
1966
1967 static unsigned int detectCPUFeatures(void) {
1968     unsigned int features = 0;
1969     unsigned int result = 0;
1970
1971 #ifdef HAVE_GETISAX
1972     if (getisax(&result, 1)) {
1973         if (result & AV_386_CMOV)
1974             features |= CMOV;
1975         if (result & AV_386_MMX)
1976             features |= MMX;
1977         if (result & AV_386_AMD_MMX)
1978             features |= MMX_Extensions;
1979         if (result & AV_386_SSE)
1980             features |= SSE;
1981         if (result & AV_386_SSE2)
1982             features |= SSE2;
1983     }
1984 #else
1985     char vendor[13];
1986 #ifdef _MSC_VER
1987     int vendor0 = 0, vendor1, vendor2;
1988 #endif
1989     vendor[0] = 0;
1990     vendor[12] = 0;
1991
1992 #ifdef __GNUC__
1993     /* see p. 118 of amd64 instruction set manual Vol3 */
1994     /* We need to be careful about the handling of %ebx and
1995      * %esp here. We can't declare either one as clobbered
1996      * since they are special registers (%ebx is the "PIC
1997      * register" holding an offset to global data, %esp the
1998      * stack pointer), so we need to make sure they have their
1999      * original values when we access the output operands.
2000      */
2001     __asm__ ("pushf\n"
2002              "pop %%eax\n"
2003              "mov %%eax, %%ecx\n"
2004              "xor $0x00200000, %%eax\n"
2005              "push %%eax\n"
2006              "popf\n"
2007              "pushf\n"
2008              "pop %%eax\n"
2009              "mov $0x0, %%edx\n"
2010              "xor %%ecx, %%eax\n"
2011              "jz 1f\n"
2012
2013              "mov $0x00000000, %%eax\n"
2014              "push %%ebx\n"
2015              "cpuid\n"
2016              "mov %%ebx, %%eax\n"
2017              "pop %%ebx\n"
2018              "mov %%eax, %1\n"
2019              "mov %%edx, %2\n"
2020              "mov %%ecx, %3\n"
2021              "mov $0x00000001, %%eax\n"
2022              "push %%ebx\n"
2023              "cpuid\n"
2024              "pop %%ebx\n"
2025              "1:\n"
2026              "mov %%edx, %0\n"
2027              : "=r" (result),
2028                "=m" (vendor[0]),
2029                "=m" (vendor[4]),
2030                "=m" (vendor[8])
2031              :
2032              : "%eax", "%ecx", "%edx"
2033         );
2034
2035 #elif defined (_MSC_VER)
2036
2037     _asm {
2038       pushfd
2039       pop eax
2040       mov ecx, eax
2041       xor eax, 00200000h
2042       push eax
2043       popfd
2044       pushfd
2045       pop eax
2046       mov edx, 0
2047       xor eax, ecx
2048       jz nocpuid
2049
2050       mov eax, 0
2051       push ebx
2052       cpuid
2053       mov eax, ebx
2054       pop ebx
2055       mov vendor0, eax
2056       mov vendor1, edx
2057       mov vendor2, ecx
2058       mov eax, 1
2059       push ebx
2060       cpuid
2061       pop ebx
2062     nocpuid:
2063       mov result, edx
2064     }
2065     memmove (vendor+0, &vendor0, 4);
2066     memmove (vendor+4, &vendor1, 4);
2067     memmove (vendor+8, &vendor2, 4);
2068
2069 #else
2070 #   error unsupported compiler
2071 #endif
2072
2073     features = 0;
2074     if (result) {
2075         /* result now contains the standard feature bits */
2076         if (result & (1 << 15))
2077             features |= CMOV;
2078         if (result & (1 << 23))
2079             features |= MMX;
2080         if (result & (1 << 25))
2081             features |= SSE;
2082         if (result & (1 << 26))
2083             features |= SSE2;
2084         if ((features & MMX) && !(features & SSE) &&
2085             (strcmp(vendor, "AuthenticAMD") == 0 ||
2086              strcmp(vendor, "Geode by NSC") == 0)) {
2087             /* check for AMD MMX extensions */
2088 #ifdef __GNUC__
2089             __asm__("push %%ebx\n"
2090                     "mov $0x80000000, %%eax\n"
2091                     "cpuid\n"
2092                     "xor %%edx, %%edx\n"
2093                     "cmp $0x1, %%eax\n"
2094                     "jge 2f\n"
2095                     "mov $0x80000001, %%eax\n"
2096                     "cpuid\n"
2097                     "2:\n"
2098                     "pop %%ebx\n"
2099                     "mov %%edx, %0\n"
2100                     : "=r" (result)
2101                     :
2102                     : "%eax", "%ecx", "%edx"
2103                 );
2104 #elif defined _MSC_VER
2105             _asm {
2106               push ebx
2107               mov eax, 80000000h
2108               cpuid
2109               xor edx, edx
2110               cmp eax, 1
2111               jge notamd
2112               mov eax, 80000001h
2113               cpuid
2114             notamd:
2115               pop ebx
2116               mov result, edx
2117             }
2118 #endif
2119             if (result & (1<<22))
2120                 features |= MMX_Extensions;
2121         }
2122     }
2123 #endif /* HAVE_GETISAX */
2124
2125     return features;
2126 }
2127
2128 pixman_bool_t
2129 pixman_have_mmx (void)
2130 {
2131     static pixman_bool_t initialized = FALSE;
2132     static pixman_bool_t mmx_present;
2133
2134     if (!initialized)
2135     {
2136         unsigned int features = detectCPUFeatures();
2137         mmx_present = (features & (MMX|MMX_Extensions)) == (MMX|MMX_Extensions);
2138         initialized = TRUE;
2139     }
2140
2141     return mmx_present;
2142 }
2143
2144 #ifdef USE_SSE2
2145 pixman_bool_t
2146 pixman_have_sse (void)
2147 {
2148     static pixman_bool_t initialized = FALSE;
2149     static pixman_bool_t sse_present;
2150
2151     if (!initialized)
2152     {
2153         unsigned int features = detectCPUFeatures();
2154         sse_present = (features & (MMX|MMX_Extensions|SSE|SSE2)) == (MMX|MMX_Extensions|SSE|SSE2);
2155         initialized = TRUE;
2156     }
2157
2158     return sse_present;
2159 }
2160 #endif
2161
2162 #endif /* __amd64__ */
2163 #endif