6ed1580324e7af5cb3ddd792662dec2908fb8c6f
[profile/ivi/pixman.git] / pixman / pixman-fast-path.c
1 /* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
2 /*
3  * Copyright © 2000 SuSE, Inc.
4  * Copyright © 2007 Red Hat, Inc.
5  *
6  * Permission to use, copy, modify, distribute, and sell this software and its
7  * documentation for any purpose is hereby granted without fee, provided that
8  * the above copyright notice appear in all copies and that both that
9  * copyright notice and this permission notice appear in supporting
10  * documentation, and that the name of SuSE not be used in advertising or
11  * publicity pertaining to distribution of the software without specific,
12  * written prior permission.  SuSE makes no representations about the
13  * suitability of this software for any purpose.  It is provided "as is"
14  * without express or implied warranty.
15  *
16  * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
18  * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
20  * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
21  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
22  *
23  * Author:  Keith Packard, SuSE, Inc.
24  */
25
26 #ifdef HAVE_CONFIG_H
27 #include <config.h>
28 #endif
29 #include <string.h>
30 #include <stdlib.h>
31 #include "pixman-private.h"
32 #include "pixman-combine32.h"
33
34 static force_inline uint32_t
35 fetch_24 (uint8_t *a)
36 {
37     if (((unsigned long)a) & 1)
38     {
39 #ifdef WORDS_BIGENDIAN
40         return (*a << 16) | (*(uint16_t *)(a + 1));
41 #else
42         return *a | (*(uint16_t *)(a + 1) << 8);
43 #endif
44     }
45     else
46     {
47 #ifdef WORDS_BIGENDIAN
48         return (*(uint16_t *)a << 8) | *(a + 2);
49 #else
50         return *(uint16_t *)a | (*(a + 2) << 16);
51 #endif
52     }
53 }
54
55 static force_inline void
56 store_24 (uint8_t *a,
57           uint32_t v)
58 {
59     if (((unsigned long)a) & 1)
60     {
61 #ifdef WORDS_BIGENDIAN
62         *a = (uint8_t) (v >> 16);
63         *(uint16_t *)(a + 1) = (uint16_t) (v);
64 #else
65         *a = (uint8_t) (v);
66         *(uint16_t *)(a + 1) = (uint16_t) (v >> 8);
67 #endif
68     }
69     else
70     {
71 #ifdef WORDS_BIGENDIAN
72         *(uint16_t *)a = (uint16_t)(v >> 8);
73         *(a + 2) = (uint8_t)v;
74 #else
75         *(uint16_t *)a = (uint16_t)v;
76         *(a + 2) = (uint8_t)(v >> 16);
77 #endif
78     }
79 }
80
81 static force_inline uint32_t
82 over (uint32_t src,
83       uint32_t dest)
84 {
85     uint32_t a = ~src >> 24;
86
87     UN8x4_MUL_UN8_ADD_UN8x4 (dest, a, src);
88
89     return dest;
90 }
91
92 static uint32_t
93 in (uint32_t x,
94     uint8_t  y)
95 {
96     uint16_t a = y;
97
98     UN8x4_MUL_UN8 (x, a);
99
100     return x;
101 }
102
103 /*
104  * Naming convention:
105  *
106  *  op_src_mask_dest
107  */
108 static void
109 fast_composite_over_x888_8_8888 (pixman_implementation_t *imp,
110                                  pixman_op_t              op,
111                                  pixman_image_t *         src_image,
112                                  pixman_image_t *         mask_image,
113                                  pixman_image_t *         dst_image,
114                                  int32_t                  src_x,
115                                  int32_t                  src_y,
116                                  int32_t                  mask_x,
117                                  int32_t                  mask_y,
118                                  int32_t                  dest_x,
119                                  int32_t                  dest_y,
120                                  int32_t                  width,
121                                  int32_t                  height)
122 {
123     uint32_t    *src, *src_line;
124     uint32_t    *dst, *dst_line;
125     uint8_t     *mask, *mask_line;
126     int src_stride, mask_stride, dst_stride;
127     uint8_t m;
128     uint32_t s, d;
129     int32_t w;
130
131     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
132     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
133     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
134
135     while (height--)
136     {
137         src = src_line;
138         src_line += src_stride;
139         dst = dst_line;
140         dst_line += dst_stride;
141         mask = mask_line;
142         mask_line += mask_stride;
143
144         w = width;
145         while (w--)
146         {
147             m = *mask++;
148             if (m)
149             {
150                 s = *src | 0xff000000;
151
152                 if (m == 0xff)
153                 {
154                     *dst = s;
155                 }
156                 else
157                 {
158                     d = in (s, m);
159                     *dst = over (d, *dst);
160                 }
161             }
162             src++;
163             dst++;
164         }
165     }
166 }
167
168 static void
169 fast_composite_in_n_8_8 (pixman_implementation_t *imp,
170                          pixman_op_t              op,
171                          pixman_image_t *         src_image,
172                          pixman_image_t *         mask_image,
173                          pixman_image_t *         dest_image,
174                          int32_t                  src_x,
175                          int32_t                  src_y,
176                          int32_t                  mask_x,
177                          int32_t                  mask_y,
178                          int32_t                  dest_x,
179                          int32_t                  dest_y,
180                          int32_t                  width,
181                          int32_t                  height)
182 {
183     uint32_t src, srca;
184     uint8_t     *dst_line, *dst;
185     uint8_t     *mask_line, *mask, m;
186     int dst_stride, mask_stride;
187     int32_t w;
188     uint16_t t;
189
190     src = _pixman_image_get_solid (src_image, dest_image->bits.format);
191
192     srca = src >> 24;
193
194     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
195     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
196
197     if (srca == 0xff)
198     {
199         while (height--)
200         {
201             dst = dst_line;
202             dst_line += dst_stride;
203             mask = mask_line;
204             mask_line += mask_stride;
205             w = width;
206
207             while (w--)
208             {
209                 m = *mask++;
210
211                 if (m == 0)
212                     *dst = 0;
213                 else if (m != 0xff)
214                     *dst = MUL_UN8 (m, *dst, t);
215
216                 dst++;
217             }
218         }
219     }
220     else
221     {
222         while (height--)
223         {
224             dst = dst_line;
225             dst_line += dst_stride;
226             mask = mask_line;
227             mask_line += mask_stride;
228             w = width;
229
230             while (w--)
231             {
232                 m = *mask++;
233                 m = MUL_UN8 (m, srca, t);
234
235                 if (m == 0)
236                     *dst = 0;
237                 else if (m != 0xff)
238                     *dst = MUL_UN8 (m, *dst, t);
239
240                 dst++;
241             }
242         }
243     }
244 }
245
246 static void
247 fast_composite_in_8_8 (pixman_implementation_t *imp,
248                        pixman_op_t              op,
249                        pixman_image_t *         src_image,
250                        pixman_image_t *         mask_image,
251                        pixman_image_t *         dest_image,
252                        int32_t                  src_x,
253                        int32_t                  src_y,
254                        int32_t                  mask_x,
255                        int32_t                  mask_y,
256                        int32_t                  dest_x,
257                        int32_t                  dest_y,
258                        int32_t                  width,
259                        int32_t                  height)
260 {
261     uint8_t     *dst_line, *dst;
262     uint8_t     *src_line, *src;
263     int dst_stride, src_stride;
264     int32_t w;
265     uint8_t s;
266     uint16_t t;
267
268     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
269     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
270
271     while (height--)
272     {
273         dst = dst_line;
274         dst_line += dst_stride;
275         src = src_line;
276         src_line += src_stride;
277         w = width;
278
279         while (w--)
280         {
281             s = *src++;
282
283             if (s == 0)
284                 *dst = 0;
285             else if (s != 0xff)
286                 *dst = MUL_UN8 (s, *dst, t);
287
288             dst++;
289         }
290     }
291 }
292
293 static void
294 fast_composite_over_n_8_8888 (pixman_implementation_t *imp,
295                               pixman_op_t              op,
296                               pixman_image_t *         src_image,
297                               pixman_image_t *         mask_image,
298                               pixman_image_t *         dst_image,
299                               int32_t                  src_x,
300                               int32_t                  src_y,
301                               int32_t                  mask_x,
302                               int32_t                  mask_y,
303                               int32_t                  dest_x,
304                               int32_t                  dest_y,
305                               int32_t                  width,
306                               int32_t                  height)
307 {
308     uint32_t src, srca;
309     uint32_t    *dst_line, *dst, d;
310     uint8_t     *mask_line, *mask, m;
311     int dst_stride, mask_stride;
312     int32_t w;
313
314     src = _pixman_image_get_solid (src_image, dst_image->bits.format);
315
316     srca = src >> 24;
317     if (src == 0)
318         return;
319
320     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
321     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
322
323     while (height--)
324     {
325         dst = dst_line;
326         dst_line += dst_stride;
327         mask = mask_line;
328         mask_line += mask_stride;
329         w = width;
330
331         while (w--)
332         {
333             m = *mask++;
334             if (m == 0xff)
335             {
336                 if (srca == 0xff)
337                     *dst = src;
338                 else
339                     *dst = over (src, *dst);
340             }
341             else if (m)
342             {
343                 d = in (src, m);
344                 *dst = over (d, *dst);
345             }
346             dst++;
347         }
348     }
349 }
350
351 static void
352 fast_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
353                                    pixman_op_t              op,
354                                    pixman_image_t *         src_image,
355                                    pixman_image_t *         mask_image,
356                                    pixman_image_t *         dst_image,
357                                    int32_t                  src_x,
358                                    int32_t                  src_y,
359                                    int32_t                  mask_x,
360                                    int32_t                  mask_y,
361                                    int32_t                  dest_x,
362                                    int32_t                  dest_y,
363                                    int32_t                  width,
364                                    int32_t                  height)
365 {
366     uint32_t src, srca, s;
367     uint32_t    *dst_line, *dst, d;
368     uint32_t    *mask_line, *mask, ma;
369     int dst_stride, mask_stride;
370     int32_t w;
371
372     src = _pixman_image_get_solid (src_image, dst_image->bits.format);
373
374     srca = src >> 24;
375     if (src == 0)
376         return;
377
378     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
379     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
380
381     while (height--)
382     {
383         dst = dst_line;
384         dst_line += dst_stride;
385         mask = mask_line;
386         mask_line += mask_stride;
387         w = width;
388
389         while (w--)
390         {
391             ma = *mask++;
392
393             if (ma)
394             {
395                 d = *dst;
396                 s = src;
397
398                 UN8x4_MUL_UN8x4_ADD_UN8x4 (s, ma, d);
399
400                 *dst = s;
401             }
402
403             dst++;
404         }
405     }
406 }
407
408 static void
409 fast_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
410                                     pixman_op_t              op,
411                                     pixman_image_t *         src_image,
412                                     pixman_image_t *         mask_image,
413                                     pixman_image_t *         dst_image,
414                                     int32_t                  src_x,
415                                     int32_t                  src_y,
416                                     int32_t                  mask_x,
417                                     int32_t                  mask_y,
418                                     int32_t                  dest_x,
419                                     int32_t                  dest_y,
420                                     int32_t                  width,
421                                     int32_t                  height)
422 {
423     uint32_t src, srca, s;
424     uint32_t    *dst_line, *dst, d;
425     uint32_t    *mask_line, *mask, ma;
426     int dst_stride, mask_stride;
427     int32_t w;
428
429     src = _pixman_image_get_solid (src_image, dst_image->bits.format);
430
431     srca = src >> 24;
432     if (src == 0)
433         return;
434
435     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
436     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
437
438     while (height--)
439     {
440         dst = dst_line;
441         dst_line += dst_stride;
442         mask = mask_line;
443         mask_line += mask_stride;
444         w = width;
445
446         while (w--)
447         {
448             ma = *mask++;
449             if (ma == 0xffffffff)
450             {
451                 if (srca == 0xff)
452                     *dst = src;
453                 else
454                     *dst = over (src, *dst);
455             }
456             else if (ma)
457             {
458                 d = *dst;
459                 s = src;
460
461                 UN8x4_MUL_UN8x4 (s, ma);
462                 UN8x4_MUL_UN8 (ma, srca);
463                 ma = ~ma;
464                 UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s);
465
466                 *dst = d;
467             }
468
469             dst++;
470         }
471     }
472 }
473
474 static void
475 fast_composite_over_n_8_0888 (pixman_implementation_t *imp,
476                               pixman_op_t              op,
477                               pixman_image_t *         src_image,
478                               pixman_image_t *         mask_image,
479                               pixman_image_t *         dst_image,
480                               int32_t                  src_x,
481                               int32_t                  src_y,
482                               int32_t                  mask_x,
483                               int32_t                  mask_y,
484                               int32_t                  dest_x,
485                               int32_t                  dest_y,
486                               int32_t                  width,
487                               int32_t                  height)
488 {
489     uint32_t src, srca;
490     uint8_t     *dst_line, *dst;
491     uint32_t d;
492     uint8_t     *mask_line, *mask, m;
493     int dst_stride, mask_stride;
494     int32_t w;
495
496     src = _pixman_image_get_solid (src_image, dst_image->bits.format);
497
498     srca = src >> 24;
499     if (src == 0)
500         return;
501
502     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3);
503     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
504
505     while (height--)
506     {
507         dst = dst_line;
508         dst_line += dst_stride;
509         mask = mask_line;
510         mask_line += mask_stride;
511         w = width;
512
513         while (w--)
514         {
515             m = *mask++;
516             if (m == 0xff)
517             {
518                 if (srca == 0xff)
519                 {
520                     d = src;
521                 }
522                 else
523                 {
524                     d = fetch_24 (dst);
525                     d = over (src, d);
526                 }
527                 store_24 (dst, d);
528             }
529             else if (m)
530             {
531                 d = over (in (src, m), fetch_24 (dst));
532                 store_24 (dst, d);
533             }
534             dst += 3;
535         }
536     }
537 }
538
539 static void
540 fast_composite_over_n_8_0565 (pixman_implementation_t *imp,
541                               pixman_op_t              op,
542                               pixman_image_t *         src_image,
543                               pixman_image_t *         mask_image,
544                               pixman_image_t *         dst_image,
545                               int32_t                  src_x,
546                               int32_t                  src_y,
547                               int32_t                  mask_x,
548                               int32_t                  mask_y,
549                               int32_t                  dest_x,
550                               int32_t                  dest_y,
551                               int32_t                  width,
552                               int32_t                  height)
553 {
554     uint32_t src, srca;
555     uint16_t    *dst_line, *dst;
556     uint32_t d;
557     uint8_t     *mask_line, *mask, m;
558     int dst_stride, mask_stride;
559     int32_t w;
560
561     src = _pixman_image_get_solid (src_image, dst_image->bits.format);
562
563     srca = src >> 24;
564     if (src == 0)
565         return;
566
567     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
568     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
569
570     while (height--)
571     {
572         dst = dst_line;
573         dst_line += dst_stride;
574         mask = mask_line;
575         mask_line += mask_stride;
576         w = width;
577
578         while (w--)
579         {
580             m = *mask++;
581             if (m == 0xff)
582             {
583                 if (srca == 0xff)
584                 {
585                     d = src;
586                 }
587                 else
588                 {
589                     d = *dst;
590                     d = over (src, CONVERT_0565_TO_0888 (d));
591                 }
592                 *dst = CONVERT_8888_TO_0565 (d);
593             }
594             else if (m)
595             {
596                 d = *dst;
597                 d = over (in (src, m), CONVERT_0565_TO_0888 (d));
598                 *dst = CONVERT_8888_TO_0565 (d);
599             }
600             dst++;
601         }
602     }
603 }
604
605 static void
606 fast_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
607                                     pixman_op_t              op,
608                                     pixman_image_t *         src_image,
609                                     pixman_image_t *         mask_image,
610                                     pixman_image_t *         dst_image,
611                                     int32_t                  src_x,
612                                     int32_t                  src_y,
613                                     int32_t                  mask_x,
614                                     int32_t                  mask_y,
615                                     int32_t                  dest_x,
616                                     int32_t                  dest_y,
617                                     int32_t                  width,
618                                     int32_t                  height)
619 {
620     uint32_t  src, srca, s;
621     uint16_t  src16;
622     uint16_t *dst_line, *dst;
623     uint32_t  d;
624     uint32_t *mask_line, *mask, ma;
625     int dst_stride, mask_stride;
626     int32_t w;
627
628     src = _pixman_image_get_solid (src_image, dst_image->bits.format);
629
630     srca = src >> 24;
631     if (src == 0)
632         return;
633
634     src16 = CONVERT_8888_TO_0565 (src);
635
636     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
637     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
638
639     while (height--)
640     {
641         dst = dst_line;
642         dst_line += dst_stride;
643         mask = mask_line;
644         mask_line += mask_stride;
645         w = width;
646
647         while (w--)
648         {
649             ma = *mask++;
650             if (ma == 0xffffffff)
651             {
652                 if (srca == 0xff)
653                 {
654                     *dst = src16;
655                 }
656                 else
657                 {
658                     d = *dst;
659                     d = over (src, CONVERT_0565_TO_0888 (d));
660                     *dst = CONVERT_8888_TO_0565 (d);
661                 }
662             }
663             else if (ma)
664             {
665                 d = *dst;
666                 d = CONVERT_0565_TO_0888 (d);
667
668                 s = src;
669
670                 UN8x4_MUL_UN8x4 (s, ma);
671                 UN8x4_MUL_UN8 (ma, srca);
672                 ma = ~ma;
673                 UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s);
674
675                 *dst = CONVERT_8888_TO_0565 (d);
676             }
677             dst++;
678         }
679     }
680 }
681
682 static void
683 fast_composite_over_8888_8888 (pixman_implementation_t *imp,
684                                pixman_op_t              op,
685                                pixman_image_t *         src_image,
686                                pixman_image_t *         mask_image,
687                                pixman_image_t *         dst_image,
688                                int32_t                  src_x,
689                                int32_t                  src_y,
690                                int32_t                  mask_x,
691                                int32_t                  mask_y,
692                                int32_t                  dest_x,
693                                int32_t                  dest_y,
694                                int32_t                  width,
695                                int32_t                  height)
696 {
697     uint32_t    *dst_line, *dst;
698     uint32_t    *src_line, *src, s;
699     int dst_stride, src_stride;
700     uint8_t a;
701     int32_t w;
702
703     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
704     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
705
706     while (height--)
707     {
708         dst = dst_line;
709         dst_line += dst_stride;
710         src = src_line;
711         src_line += src_stride;
712         w = width;
713
714         while (w--)
715         {
716             s = *src++;
717             a = s >> 24;
718             if (a == 0xff)
719                 *dst = s;
720             else if (s)
721                 *dst = over (s, *dst);
722             dst++;
723         }
724     }
725 }
726
727 static void
728 fast_composite_src_x888_8888 (pixman_implementation_t *imp,
729                               pixman_op_t              op,
730                               pixman_image_t *         src_image,
731                               pixman_image_t *         mask_image,
732                               pixman_image_t *         dst_image,
733                               int32_t                  src_x,
734                               int32_t                  src_y,
735                               int32_t                  mask_x,
736                               int32_t                  mask_y,
737                               int32_t                  dest_x,
738                               int32_t                  dest_y,
739                               int32_t                  width,
740                               int32_t                  height)
741 {
742     uint32_t    *dst_line, *dst;
743     uint32_t    *src_line, *src;
744     int dst_stride, src_stride;
745     int32_t w;
746
747     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
748     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
749
750     while (height--)
751     {
752         dst = dst_line;
753         dst_line += dst_stride;
754         src = src_line;
755         src_line += src_stride;
756         w = width;
757
758         while (w--)
759             *dst++ = (*src++) | 0xff000000;
760     }
761 }
762
763 #if 0
764 static void
765 fast_composite_over_8888_0888 (pixman_implementation_t *imp,
766                                pixman_op_t              op,
767                                pixman_image_t *         src_image,
768                                pixman_image_t *         mask_image,
769                                pixman_image_t *         dst_image,
770                                int32_t                  src_x,
771                                int32_t                  src_y,
772                                int32_t                  mask_x,
773                                int32_t                  mask_y,
774                                int32_t                  dest_x,
775                                int32_t                  dest_y,
776                                int32_t                  width,
777                                int32_t                  height)
778 {
779     uint8_t     *dst_line, *dst;
780     uint32_t d;
781     uint32_t    *src_line, *src, s;
782     uint8_t a;
783     int dst_stride, src_stride;
784     int32_t w;
785
786     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3);
787     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
788
789     while (height--)
790     {
791         dst = dst_line;
792         dst_line += dst_stride;
793         src = src_line;
794         src_line += src_stride;
795         w = width;
796
797         while (w--)
798         {
799             s = *src++;
800             a = s >> 24;
801             if (a)
802             {
803                 if (a == 0xff)
804                     d = s;
805                 else
806                     d = over (s, fetch_24 (dst));
807
808                 store_24 (dst, d);
809             }
810             dst += 3;
811         }
812     }
813 }
814 #endif
815
816 static void
817 fast_composite_over_8888_0565 (pixman_implementation_t *imp,
818                                pixman_op_t              op,
819                                pixman_image_t *         src_image,
820                                pixman_image_t *         mask_image,
821                                pixman_image_t *         dst_image,
822                                int32_t                  src_x,
823                                int32_t                  src_y,
824                                int32_t                  mask_x,
825                                int32_t                  mask_y,
826                                int32_t                  dest_x,
827                                int32_t                  dest_y,
828                                int32_t                  width,
829                                int32_t                  height)
830 {
831     uint16_t    *dst_line, *dst;
832     uint32_t d;
833     uint32_t    *src_line, *src, s;
834     uint8_t a;
835     int dst_stride, src_stride;
836     int32_t w;
837
838     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
839     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
840
841     while (height--)
842     {
843         dst = dst_line;
844         dst_line += dst_stride;
845         src = src_line;
846         src_line += src_stride;
847         w = width;
848
849         while (w--)
850         {
851             s = *src++;
852             a = s >> 24;
853             if (s)
854             {
855                 if (a == 0xff)
856                 {
857                     d = s;
858                 }
859                 else
860                 {
861                     d = *dst;
862                     d = over (s, CONVERT_0565_TO_0888 (d));
863                 }
864                 *dst = CONVERT_8888_TO_0565 (d);
865             }
866             dst++;
867         }
868     }
869 }
870
871 static void
872 fast_composite_src_x888_0565 (pixman_implementation_t *imp,
873                               pixman_op_t              op,
874                               pixman_image_t *         src_image,
875                               pixman_image_t *         mask_image,
876                               pixman_image_t *         dst_image,
877                               int32_t                  src_x,
878                               int32_t                  src_y,
879                               int32_t                  mask_x,
880                               int32_t                  mask_y,
881                               int32_t                  dest_x,
882                               int32_t                  dest_y,
883                               int32_t                  width,
884                               int32_t                  height)
885 {
886     uint16_t    *dst_line, *dst;
887     uint32_t    *src_line, *src, s;
888     int dst_stride, src_stride;
889     int32_t w;
890
891     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
892     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
893
894     while (height--)
895     {
896         dst = dst_line;
897         dst_line += dst_stride;
898         src = src_line;
899         src_line += src_stride;
900         w = width;
901
902         while (w--)
903         {
904             s = *src++;
905             *dst = CONVERT_8888_TO_0565 (s);
906             dst++;
907         }
908     }
909 }
910
911 static void
912 fast_composite_add_8000_8000 (pixman_implementation_t *imp,
913                               pixman_op_t              op,
914                               pixman_image_t *         src_image,
915                               pixman_image_t *         mask_image,
916                               pixman_image_t *         dst_image,
917                               int32_t                  src_x,
918                               int32_t                  src_y,
919                               int32_t                  mask_x,
920                               int32_t                  mask_y,
921                               int32_t                  dest_x,
922                               int32_t                  dest_y,
923                               int32_t                  width,
924                               int32_t                  height)
925 {
926     uint8_t     *dst_line, *dst;
927     uint8_t     *src_line, *src;
928     int dst_stride, src_stride;
929     int32_t w;
930     uint8_t s, d;
931     uint16_t t;
932
933     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
934     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
935
936     while (height--)
937     {
938         dst = dst_line;
939         dst_line += dst_stride;
940         src = src_line;
941         src_line += src_stride;
942         w = width;
943
944         while (w--)
945         {
946             s = *src++;
947             if (s)
948             {
949                 if (s != 0xff)
950                 {
951                     d = *dst;
952                     t = d + s;
953                     s = t | (0 - (t >> 8));
954                 }
955                 *dst = s;
956             }
957             dst++;
958         }
959     }
960 }
961
962 static void
963 fast_composite_add_8888_8888 (pixman_implementation_t *imp,
964                               pixman_op_t              op,
965                               pixman_image_t *         src_image,
966                               pixman_image_t *         mask_image,
967                               pixman_image_t *         dst_image,
968                               int32_t                  src_x,
969                               int32_t                  src_y,
970                               int32_t                  mask_x,
971                               int32_t                  mask_y,
972                               int32_t                  dest_x,
973                               int32_t                  dest_y,
974                               int32_t                  width,
975                               int32_t                  height)
976 {
977     uint32_t    *dst_line, *dst;
978     uint32_t    *src_line, *src;
979     int dst_stride, src_stride;
980     int32_t w;
981     uint32_t s, d;
982
983     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
984     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
985
986     while (height--)
987     {
988         dst = dst_line;
989         dst_line += dst_stride;
990         src = src_line;
991         src_line += src_stride;
992         w = width;
993
994         while (w--)
995         {
996             s = *src++;
997             if (s)
998             {
999                 if (s != 0xffffffff)
1000                 {
1001                     d = *dst;
1002                     if (d)
1003                         UN8x4_ADD_UN8x4 (s, d);
1004                 }
1005                 *dst = s;
1006             }
1007             dst++;
1008         }
1009     }
1010 }
1011
1012 static void
1013 fast_composite_add_n_8_8 (pixman_implementation_t *imp,
1014                           pixman_op_t              op,
1015                           pixman_image_t *         src_image,
1016                           pixman_image_t *         mask_image,
1017                           pixman_image_t *         dst_image,
1018                           int32_t                  src_x,
1019                           int32_t                  src_y,
1020                           int32_t                  mask_x,
1021                           int32_t                  mask_y,
1022                           int32_t                  dest_x,
1023                           int32_t                  dest_y,
1024                           int32_t                  width,
1025                           int32_t                  height)
1026 {
1027     uint8_t     *dst_line, *dst;
1028     uint8_t     *mask_line, *mask;
1029     int dst_stride, mask_stride;
1030     int32_t w;
1031     uint32_t src;
1032     uint8_t sa;
1033
1034     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
1035     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
1036     src = _pixman_image_get_solid (src_image, dst_image->bits.format);
1037     sa = (src >> 24);
1038
1039     while (height--)
1040     {
1041         dst = dst_line;
1042         dst_line += dst_stride;
1043         mask = mask_line;
1044         mask_line += mask_stride;
1045         w = width;
1046
1047         while (w--)
1048         {
1049             uint16_t tmp;
1050             uint16_t a;
1051             uint32_t m, d;
1052             uint32_t r;
1053
1054             a = *mask++;
1055             d = *dst;
1056
1057             m = MUL_UN8 (sa, a, tmp);
1058             r = ADD_UN8 (m, d, tmp);
1059
1060             *dst++ = r;
1061         }
1062     }
1063 }
1064
1065 #ifdef WORDS_BIGENDIAN
1066 #define CREATE_BITMASK(n) (0x80000000 >> (n))
1067 #define UPDATE_BITMASK(n) ((n) >> 1)
1068 #else
1069 #define CREATE_BITMASK(n) (1 << (n))
1070 #define UPDATE_BITMASK(n) ((n) << 1)
1071 #endif
1072
1073 #define TEST_BIT(p, n)                                  \
1074     (*((p) + ((n) >> 5)) & CREATE_BITMASK ((n) & 31))
1075 #define SET_BIT(p, n)                                                   \
1076     do { *((p) + ((n) >> 5)) |= CREATE_BITMASK ((n) & 31); } while (0);
1077
1078 static void
1079 fast_composite_add_1000_1000 (pixman_implementation_t *imp,
1080                               pixman_op_t              op,
1081                               pixman_image_t *         src_image,
1082                               pixman_image_t *         mask_image,
1083                               pixman_image_t *         dst_image,
1084                               int32_t                  src_x,
1085                               int32_t                  src_y,
1086                               int32_t                  mask_x,
1087                               int32_t                  mask_y,
1088                               int32_t                  dest_x,
1089                               int32_t                  dest_y,
1090                               int32_t                  width,
1091                               int32_t                  height)
1092 {
1093     uint32_t     *dst_line, *dst;
1094     uint32_t     *src_line, *src;
1095     int           dst_stride, src_stride;
1096     int32_t       w;
1097
1098     PIXMAN_IMAGE_GET_LINE (src_image, 0, src_y, uint32_t,
1099                            src_stride, src_line, 1);
1100     PIXMAN_IMAGE_GET_LINE (dst_image, 0, dest_y, uint32_t,
1101                            dst_stride, dst_line, 1);
1102
1103     while (height--)
1104     {
1105         dst = dst_line;
1106         dst_line += dst_stride;
1107         src = src_line;
1108         src_line += src_stride;
1109         w = width;
1110
1111         while (w--)
1112         {
1113             /*
1114              * TODO: improve performance by processing uint32_t data instead
1115              *       of individual bits
1116              */
1117             if (TEST_BIT (src, src_x + w))
1118                 SET_BIT (dst, dest_x + w);
1119         }
1120     }
1121 }
1122
1123 static void
1124 fast_composite_over_n_1_8888 (pixman_implementation_t *imp,
1125                               pixman_op_t              op,
1126                               pixman_image_t *         src_image,
1127                               pixman_image_t *         mask_image,
1128                               pixman_image_t *         dst_image,
1129                               int32_t                  src_x,
1130                               int32_t                  src_y,
1131                               int32_t                  mask_x,
1132                               int32_t                  mask_y,
1133                               int32_t                  dest_x,
1134                               int32_t                  dest_y,
1135                               int32_t                  width,
1136                               int32_t                  height)
1137 {
1138     uint32_t     src, srca;
1139     uint32_t    *dst, *dst_line;
1140     uint32_t    *mask, *mask_line;
1141     int          mask_stride, dst_stride;
1142     uint32_t     bitcache, bitmask;
1143     int32_t      w;
1144
1145     if (width <= 0)
1146         return;
1147
1148     src = _pixman_image_get_solid (src_image, dst_image->bits.format);
1149     srca = src >> 24;
1150     if (src == 0)
1151         return;
1152
1153     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t,
1154                            dst_stride, dst_line, 1);
1155     PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t,
1156                            mask_stride, mask_line, 1);
1157     mask_line += mask_x >> 5;
1158
1159     if (srca == 0xff)
1160     {
1161         while (height--)
1162         {
1163             dst = dst_line;
1164             dst_line += dst_stride;
1165             mask = mask_line;
1166             mask_line += mask_stride;
1167             w = width;
1168
1169             bitcache = *mask++;
1170             bitmask = CREATE_BITMASK (mask_x & 31);
1171
1172             while (w--)
1173             {
1174                 if (bitmask == 0)
1175                 {
1176                     bitcache = *mask++;
1177                     bitmask = CREATE_BITMASK (0);
1178                 }
1179                 if (bitcache & bitmask)
1180                     *dst = src;
1181                 bitmask = UPDATE_BITMASK (bitmask);
1182                 dst++;
1183             }
1184         }
1185     }
1186     else
1187     {
1188         while (height--)
1189         {
1190             dst = dst_line;
1191             dst_line += dst_stride;
1192             mask = mask_line;
1193             mask_line += mask_stride;
1194             w = width;
1195
1196             bitcache = *mask++;
1197             bitmask = CREATE_BITMASK (mask_x & 31);
1198
1199             while (w--)
1200             {
1201                 if (bitmask == 0)
1202                 {
1203                     bitcache = *mask++;
1204                     bitmask = CREATE_BITMASK (0);
1205                 }
1206                 if (bitcache & bitmask)
1207                     *dst = over (src, *dst);
1208                 bitmask = UPDATE_BITMASK (bitmask);
1209                 dst++;
1210             }
1211         }
1212     }
1213 }
1214
1215 static void
1216 fast_composite_over_n_1_0565 (pixman_implementation_t *imp,
1217                               pixman_op_t              op,
1218                               pixman_image_t *         src_image,
1219                               pixman_image_t *         mask_image,
1220                               pixman_image_t *         dst_image,
1221                               int32_t                  src_x,
1222                               int32_t                  src_y,
1223                               int32_t                  mask_x,
1224                               int32_t                  mask_y,
1225                               int32_t                  dest_x,
1226                               int32_t                  dest_y,
1227                               int32_t                  width,
1228                               int32_t                  height)
1229 {
1230     uint32_t     src, srca;
1231     uint16_t    *dst, *dst_line;
1232     uint32_t    *mask, *mask_line;
1233     int          mask_stride, dst_stride;
1234     uint32_t     bitcache, bitmask;
1235     int32_t      w;
1236     uint32_t     d;
1237     uint16_t     src565;
1238
1239     if (width <= 0)
1240         return;
1241
1242     src = _pixman_image_get_solid (src_image, dst_image->bits.format);
1243     srca = src >> 24;
1244     if (src == 0)
1245         return;
1246
1247     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t,
1248                            dst_stride, dst_line, 1);
1249     PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t,
1250                            mask_stride, mask_line, 1);
1251     mask_line += mask_x >> 5;
1252
1253     if (srca == 0xff)
1254     {
1255         src565 = CONVERT_8888_TO_0565 (src);
1256         while (height--)
1257         {
1258             dst = dst_line;
1259             dst_line += dst_stride;
1260             mask = mask_line;
1261             mask_line += mask_stride;
1262             w = width;
1263
1264             bitcache = *mask++;
1265             bitmask = CREATE_BITMASK (mask_x & 31);
1266
1267             while (w--)
1268             {
1269                 if (bitmask == 0)
1270                 {
1271                     bitcache = *mask++;
1272                     bitmask = CREATE_BITMASK (0);
1273                 }
1274                 if (bitcache & bitmask)
1275                     *dst = src565;
1276                 bitmask = UPDATE_BITMASK (bitmask);
1277                 dst++;
1278             }
1279         }
1280     }
1281     else
1282     {
1283         while (height--)
1284         {
1285             dst = dst_line;
1286             dst_line += dst_stride;
1287             mask = mask_line;
1288             mask_line += mask_stride;
1289             w = width;
1290
1291             bitcache = *mask++;
1292             bitmask = CREATE_BITMASK (mask_x & 31);
1293
1294             while (w--)
1295             {
1296                 if (bitmask == 0)
1297                 {
1298                     bitcache = *mask++;
1299                     bitmask = CREATE_BITMASK (0);
1300                 }
1301                 if (bitcache & bitmask)
1302                 {
1303                     d = over (src, CONVERT_0565_TO_0888 (*dst));
1304                     *dst = CONVERT_8888_TO_0565 (d);
1305                 }
1306                 bitmask = UPDATE_BITMASK (bitmask);
1307                 dst++;
1308             }
1309         }
1310     }
1311 }
1312
1313 /*
1314  * Simple bitblt
1315  */
1316
1317 static void
1318 fast_composite_solid_fill (pixman_implementation_t *imp,
1319                            pixman_op_t              op,
1320                            pixman_image_t *         src_image,
1321                            pixman_image_t *         mask_image,
1322                            pixman_image_t *         dst_image,
1323                            int32_t                  src_x,
1324                            int32_t                  src_y,
1325                            int32_t                  mask_x,
1326                            int32_t                  mask_y,
1327                            int32_t                  dest_x,
1328                            int32_t                  dest_y,
1329                            int32_t                  width,
1330                            int32_t                  height)
1331 {
1332     uint32_t src;
1333
1334     src = _pixman_image_get_solid (src_image, dst_image->bits.format);
1335
1336     if (dst_image->bits.format == PIXMAN_a8)
1337     {
1338         src = src >> 24;
1339     }
1340     else if (dst_image->bits.format == PIXMAN_r5g6b5 ||
1341              dst_image->bits.format == PIXMAN_b5g6r5)
1342     {
1343         src = CONVERT_8888_TO_0565 (src);
1344     }
1345
1346     pixman_fill (dst_image->bits.bits, dst_image->bits.rowstride,
1347                  PIXMAN_FORMAT_BPP (dst_image->bits.format),
1348                  dest_x, dest_y,
1349                  width, height,
1350                  src);
1351 }
1352
1353 static void
1354 fast_composite_src_memcpy (pixman_implementation_t *imp,
1355                            pixman_op_t              op,
1356                            pixman_image_t *         src_image,
1357                            pixman_image_t *         mask_image,
1358                            pixman_image_t *         dst_image,
1359                            int32_t                  src_x,
1360                            int32_t                  src_y,
1361                            int32_t                  mask_x,
1362                            int32_t                  mask_y,
1363                            int32_t                  dest_x,
1364                            int32_t                  dest_y,
1365                            int32_t                  width,
1366                            int32_t                  height)
1367 {
1368     int bpp = PIXMAN_FORMAT_BPP (dst_image->bits.format) / 8;
1369     uint32_t n_bytes = width * bpp;
1370     int dst_stride, src_stride;
1371     uint8_t    *dst;
1372     uint8_t    *src;
1373
1374     src_stride = src_image->bits.rowstride * 4;
1375     dst_stride = dst_image->bits.rowstride * 4;
1376
1377     src = (uint8_t *)src_image->bits.bits + src_y * src_stride + src_x * bpp;
1378     dst = (uint8_t *)dst_image->bits.bits + dest_y * dst_stride + dest_x * bpp;
1379
1380     while (height--)
1381     {
1382         memcpy (dst, src, n_bytes);
1383
1384         dst += dst_stride;
1385         src += src_stride;
1386     }
1387 }
1388
1389 static force_inline pixman_bool_t
1390 repeat (pixman_repeat_t repeat, int *c, int size)
1391 {
1392     if (repeat == PIXMAN_REPEAT_NONE)
1393     {
1394         if (*c < 0 || *c >= size)
1395             return FALSE;
1396     }
1397     else if (repeat == PIXMAN_REPEAT_NORMAL)
1398     {
1399         while (*c >= size)
1400             *c -= size;
1401         while (*c < 0)
1402             *c += size;
1403     }
1404     else if (repeat == PIXMAN_REPEAT_PAD)
1405     {
1406         *c = CLIP (*c, 0, size - 1);
1407     }
1408     else /* REFLECT */
1409     {
1410         *c = MOD (*c, size * 2);
1411         if (*c >= size)
1412             *c = size * 2 - *c - 1;
1413     }
1414     return TRUE;
1415 }
1416
1417 /* A macroified version of specialized nearest scalers for some
1418  * common 8888 and 565 formats. It supports SRC and OVER ops.
1419  *
1420  * There are two repeat versions, one that handles repeat normal,
1421  * and one without repeat handling that only works if the src region
1422  * used is completely covered by the pre-repeated source samples.
1423  *
1424  * The loops are unrolled to process two pixels per iteration for better
1425  * performance on most CPU architectures (superscalar processors
1426  * can issue several operations simultaneously, other processors can hide
1427  * instructions latencies by pipelining operations). Unrolling more
1428  * does not make much sense because the compiler will start running out
1429  * of spare registers soon.
1430  */
1431
1432 #define GET_8888_ALPHA(s) ((s) >> 24)
1433  /* This is not actually used since we don't have an OVER with
1434     565 source, but it is needed to build. */
1435 #define GET_0565_ALPHA(s) 0xff
1436
1437 #define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT,                                   \
1438                      src_type_t, dst_type_t, OP, repeat_mode)                                   \
1439 static void                                                                                     \
1440 fast_composite_scaled_nearest_ ## scale_func_name ## _ ## OP (pixman_implementation_t *imp,     \
1441                                                               pixman_op_t              op,      \
1442                                                               pixman_image_t *         src_image, \
1443                                                               pixman_image_t *         mask_image, \
1444                                                               pixman_image_t *         dst_image, \
1445                                                               int32_t                  src_x,   \
1446                                                               int32_t                  src_y,   \
1447                                                               int32_t                  mask_x,  \
1448                                                               int32_t                  mask_y,  \
1449                                                               int32_t                  dst_x,   \
1450                                                               int32_t                  dst_y,   \
1451                                                               int32_t                  width,   \
1452                                                               int32_t                  height)  \
1453 {                                                                                               \
1454     dst_type_t *dst_line;                                                                       \
1455     src_type_t *src_first_line;                                                                 \
1456     uint32_t   d;                                                                               \
1457     src_type_t s1, s2;                                                                          \
1458     uint8_t   a1, a2;                                                                           \
1459     int       w;                                                                                \
1460     int       x1, x2, y;                                                                        \
1461     pixman_fixed_t orig_vx;                                                                     \
1462     pixman_fixed_t max_vx, max_vy;                                                              \
1463     pixman_vector_t v;                                                                          \
1464     pixman_fixed_t vx, vy;                                                                      \
1465     pixman_fixed_t unit_x, unit_y;                                                              \
1466                                                                                                 \
1467     src_type_t *src;                                                                            \
1468     dst_type_t *dst;                                                                            \
1469     int       src_stride, dst_stride;                                                           \
1470                                                                                                 \
1471     if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER)                \
1472         abort();                                                                                \
1473                                                                                                 \
1474     if (PIXMAN_REPEAT_ ## repeat_mode != PIXMAN_REPEAT_NORMAL           &&                      \
1475         PIXMAN_REPEAT_ ## repeat_mode != PIXMAN_REPEAT_NONE)                                    \
1476     {                                                                                           \
1477         abort();                                                                                \
1478     }                                                                                           \
1479                                                                                                 \
1480     PIXMAN_IMAGE_GET_LINE (dst_image, dst_x, dst_y, dst_type_t, dst_stride, dst_line, 1);       \
1481     /* pass in 0 instead of src_x and src_y because src_x and src_y need to be                  \
1482      * transformed from destination space to source space */                                    \
1483     PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1);         \
1484                                                                                                 \
1485     /* reference point is the center of the pixel */                                            \
1486     v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;                             \
1487     v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;                             \
1488     v.vector[2] = pixman_fixed_1;                                                               \
1489                                                                                                 \
1490     if (!pixman_transform_point_3d (src_image->common.transform, &v))                           \
1491         return;                                                                                 \
1492                                                                                                 \
1493     unit_x = src_image->common.transform->matrix[0][0];                                         \
1494     unit_y = src_image->common.transform->matrix[1][1];                                         \
1495                                                                                                 \
1496     /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */                   \
1497     v.vector[0] -= pixman_fixed_e;                                                              \
1498     v.vector[1] -= pixman_fixed_e;                                                              \
1499                                                                                                 \
1500     vx = v.vector[0];                                                                           \
1501     vy = v.vector[1];                                                                           \
1502                                                                                                 \
1503     if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)                                  \
1504     {                                                                                           \
1505         /* Clamp repeating positions inside the actual samples */                               \
1506         max_vx = src_image->bits.width << 16;                                                   \
1507         max_vy = src_image->bits.height << 16;                                                  \
1508                                                                                                 \
1509         repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx);                                             \
1510         repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy);                                             \
1511     }                                                                                           \
1512                                                                                                 \
1513     orig_vx = vx;                                                                               \
1514                                                                                                 \
1515     while (--height >= 0)                                                                       \
1516     {                                                                                           \
1517         dst = dst_line;                                                                         \
1518         dst_line += dst_stride;                                                                 \
1519                                                                                                 \
1520         y = vy >> 16;                                                                           \
1521         vy += unit_y;                                                                           \
1522         if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)                              \
1523             repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy);                                         \
1524                                                                                                 \
1525         src = src_first_line + src_stride * y;                                                  \
1526                                                                                                 \
1527         w = width;                                                                              \
1528         vx = orig_vx;                                                                           \
1529         while ((w -= 2) >= 0)                                                                   \
1530         {                                                                                       \
1531             x1 = vx >> 16;                                                                      \
1532             vx += unit_x;                                                                       \
1533             if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)                          \
1534             {                                                                                   \
1535                 /* This works because we know that unit_x is positive */                        \
1536                 while (vx >= max_vx)                                                            \
1537                     vx -= max_vx;                                                               \
1538             }                                                                                   \
1539             s1 = src[x1];                                                                       \
1540                                                                                                 \
1541             x2 = vx >> 16;                                                                      \
1542             vx += unit_x;                                                                       \
1543             if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)                          \
1544             {                                                                                   \
1545                 /* This works because we know that unit_x is positive */                        \
1546                 while (vx >= max_vx)                                                            \
1547                     vx -= max_vx;                                                               \
1548             }                                                                                   \
1549             s2 = src[x2];                                                                       \
1550                                                                                                 \
1551             if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER)                                             \
1552             {                                                                                   \
1553                 a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1);                                          \
1554                 a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2);                                          \
1555                                                                                                 \
1556                 if (a1 == 0xff)                                                                 \
1557                 {                                                                               \
1558                     *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1);                   \
1559                 }                                                                               \
1560                 else if (s1)                                                                    \
1561                 {                                                                               \
1562                     d = CONVERT_ ## DST_FORMAT ## _TO_8888 (*dst);                              \
1563                     s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1);                               \
1564                     a1 ^= 0xff;                                                                 \
1565                     UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1);                                        \
1566                     *dst = CONVERT_8888_TO_ ## DST_FORMAT (d);                                  \
1567                 }                                                                               \
1568                 dst++;                                                                          \
1569                                                                                                 \
1570                 if (a2 == 0xff)                                                                 \
1571                 {                                                                               \
1572                     *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2);                   \
1573                 }                                                                               \
1574                 else if (s2)                                                                    \
1575                 {                                                                               \
1576                     d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst);                               \
1577                     s2 = CONVERT_## SRC_FORMAT ## _TO_8888 (s2);                                \
1578                     a2 ^= 0xff;                                                                 \
1579                     UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2);                                        \
1580                     *dst = CONVERT_8888_TO_ ## DST_FORMAT (d);                                  \
1581                 }                                                                               \
1582                 dst++;                                                                          \
1583             }                                                                                   \
1584             else /* PIXMAN_OP_SRC */                                                            \
1585             {                                                                                   \
1586                 *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1);                     \
1587                 *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2);                     \
1588             }                                                                                   \
1589         }                                                                                       \
1590                                                                                                 \
1591         if (w & 1)                                                                              \
1592         {                                                                                       \
1593             x1 = vx >> 16;                                                                      \
1594             vx += unit_x;                                                                       \
1595             if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)                          \
1596             {                                                                                   \
1597                 /* This works because we know that unit_x is positive */                        \
1598                 while (vx >= max_vx)                                                            \
1599                     vx -= max_vx;                                                               \
1600             }                                                                                   \
1601             s1 = src[x1];                                                                       \
1602                                                                                                 \
1603             if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER)                                             \
1604             {                                                                                   \
1605                 a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1);                                          \
1606                                                                                                 \
1607                 if (a1 == 0xff)                                                                 \
1608                 {                                                                               \
1609                     *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1);                   \
1610                 }                                                                               \
1611                 else if (s1)                                                                    \
1612                 {                                                                               \
1613                     d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst);                               \
1614                     s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1);                               \
1615                     a1 ^= 0xff;                                                                 \
1616                     UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1);                                        \
1617                     *dst = CONVERT_8888_TO_ ## DST_FORMAT (d);                                  \
1618                 }                                                                               \
1619                 dst++;                                                                          \
1620             }                                                                                   \
1621             else /* PIXMAN_OP_SRC */                                                            \
1622             {                                                                                   \
1623                 *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1);                     \
1624             }                                                                                   \
1625         }                                                                                       \
1626     }                                                                                           \
1627 }
1628
1629 FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, SRC, NONE);
1630 FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, SRC, NORMAL);
1631 FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, OVER, NONE);
1632 FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, OVER, NORMAL);
1633 FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, SRC, NONE);
1634 FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, NORMAL);
1635 FAST_NEAREST (565_565_none, 0565, 0565, uint16_t, uint16_t, SRC, NONE);
1636 FAST_NEAREST (565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, NORMAL);
1637 FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE);
1638 FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL);
1639
1640 static force_inline uint32_t
1641 fetch_nearest (pixman_repeat_t src_repeat,
1642                pixman_format_code_t format,
1643                uint32_t *src, int x, int src_width)
1644 {
1645     if (repeat (src_repeat, &x, src_width))
1646     {
1647         if (format == PIXMAN_x8r8g8b8)
1648             return *(src + x) | 0xff000000;
1649         else
1650             return *(src + x);
1651     }
1652     else
1653     {
1654         return 0;
1655     }
1656 }
1657
1658 static force_inline void
1659 combine_over (uint32_t s, uint32_t *dst)
1660 {
1661     if (s)
1662     {
1663         uint8_t ia = 0xff - (s >> 24);
1664
1665         if (ia)
1666             UN8x4_MUL_UN8_ADD_UN8x4 (*dst, ia, s);
1667         else
1668             *dst = s;
1669     }
1670 }
1671
1672 static force_inline void
1673 combine_src (uint32_t s, uint32_t *dst)
1674 {
1675     *dst = s;
1676 }
1677
1678 static void
1679 fast_composite_scaled_nearest (pixman_implementation_t *imp,
1680                                pixman_op_t              op,
1681                                pixman_image_t *         src_image,
1682                                pixman_image_t *         mask_image,
1683                                pixman_image_t *         dst_image,
1684                                int32_t                  src_x,
1685                                int32_t                  src_y,
1686                                int32_t                  mask_x,
1687                                int32_t                  mask_y,
1688                                int32_t                  dest_x,
1689                                int32_t                  dest_y,
1690                                int32_t                  width,
1691                                int32_t                  height)
1692 {
1693     uint32_t       *dst_line;
1694     uint32_t       *src_line;
1695     int             dst_stride, src_stride;
1696     int             src_width, src_height;
1697     pixman_repeat_t src_repeat;
1698     pixman_fixed_t unit_x, unit_y;
1699     pixman_format_code_t src_format;
1700     pixman_vector_t v;
1701     pixman_fixed_t vy;
1702
1703     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
1704     /* pass in 0 instead of src_x and src_y because src_x and src_y need to be
1705      * transformed from destination space to source space
1706      */
1707     PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, uint32_t, src_stride, src_line, 1);
1708
1709     /* reference point is the center of the pixel */
1710     v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;
1711     v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;
1712     v.vector[2] = pixman_fixed_1;
1713
1714     if (!pixman_transform_point_3d (src_image->common.transform, &v))
1715         return;
1716
1717     unit_x = src_image->common.transform->matrix[0][0];
1718     unit_y = src_image->common.transform->matrix[1][1];
1719
1720     /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */
1721     v.vector[0] -= pixman_fixed_e;
1722     v.vector[1] -= pixman_fixed_e;
1723
1724     src_height = src_image->bits.height;
1725     src_width = src_image->bits.width;
1726     src_repeat = src_image->common.repeat;
1727     src_format = src_image->bits.format;
1728
1729     vy = v.vector[1];
1730     while (height--)
1731     {
1732         pixman_fixed_t vx = v.vector[0];
1733         int y = pixman_fixed_to_int (vy);
1734         uint32_t *dst = dst_line;
1735
1736         dst_line += dst_stride;
1737
1738         /* adjust the y location by a unit vector in the y direction
1739          * this is equivalent to transforming y+1 of the destination point to source space */
1740         vy += unit_y;
1741
1742         if (!repeat (src_repeat, &y, src_height))
1743         {
1744             if (op == PIXMAN_OP_SRC)
1745                 memset (dst, 0, sizeof (*dst) * width);
1746         }
1747         else
1748         {
1749             int w = width;
1750
1751             uint32_t *src = src_line + y * src_stride;
1752
1753             while (w >= 2)
1754             {
1755                 uint32_t s1, s2;
1756                 int x1, x2;
1757
1758                 x1 = pixman_fixed_to_int (vx);
1759                 vx += unit_x;
1760
1761                 x2 = pixman_fixed_to_int (vx);
1762                 vx += unit_x;
1763
1764                 w -= 2;
1765
1766                 s1 = fetch_nearest (src_repeat, src_format, src, x1, src_width);
1767                 s2 = fetch_nearest (src_repeat, src_format, src, x2, src_width);
1768
1769                 if (op == PIXMAN_OP_OVER)
1770                 {
1771                     combine_over (s1, dst++);
1772                     combine_over (s2, dst++);
1773                 }
1774                 else
1775                 {
1776                     combine_src (s1, dst++);
1777                     combine_src (s2, dst++);
1778                 }
1779             }
1780
1781             while (w--)
1782             {
1783                 uint32_t s;
1784                 int x;
1785
1786                 x = pixman_fixed_to_int (vx);
1787                 vx += unit_x;
1788
1789                 s = fetch_nearest (src_repeat, src_format, src, x, src_width);
1790
1791                 if (op == PIXMAN_OP_OVER)
1792                     combine_over (s, dst++);
1793                 else
1794                     combine_src (s, dst++);
1795             }
1796         }
1797     }
1798 }
1799
1800 static const pixman_fast_path_t c_fast_paths[] =
1801 {
1802     PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, fast_composite_over_n_8_0565),
1803     PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, fast_composite_over_n_8_0565),
1804     PIXMAN_STD_FAST_PATH (OVER, solid, a8, r8g8b8, fast_composite_over_n_8_0888),
1805     PIXMAN_STD_FAST_PATH (OVER, solid, a8, b8g8r8, fast_composite_over_n_8_0888),
1806     PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, fast_composite_over_n_8_8888),
1807     PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, fast_composite_over_n_8_8888),
1808     PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, fast_composite_over_n_8_8888),
1809     PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, fast_composite_over_n_8_8888),
1810     PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8r8g8b8, fast_composite_over_n_1_8888),
1811     PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8r8g8b8, fast_composite_over_n_1_8888),
1812     PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8b8g8r8, fast_composite_over_n_1_8888),
1813     PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8b8g8r8, fast_composite_over_n_1_8888),
1814     PIXMAN_STD_FAST_PATH (OVER, solid, a1, r5g6b5,   fast_composite_over_n_1_0565),
1815     PIXMAN_STD_FAST_PATH (OVER, solid, a1, b5g6r5,   fast_composite_over_n_1_0565),
1816     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, fast_composite_over_n_8888_8888_ca),
1817     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, fast_composite_over_n_8888_8888_ca),
1818     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, fast_composite_over_n_8888_0565_ca),
1819     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, fast_composite_over_n_8888_8888_ca),
1820     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, fast_composite_over_n_8888_8888_ca),
1821     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, fast_composite_over_n_8888_0565_ca),
1822     PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, fast_composite_over_x888_8_8888),
1823     PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, fast_composite_over_x888_8_8888),
1824     PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, fast_composite_over_x888_8_8888),
1825     PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, fast_composite_over_x888_8_8888),
1826     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, fast_composite_over_8888_8888),
1827     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, fast_composite_over_8888_8888),
1828     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, fast_composite_over_8888_0565),
1829     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, fast_composite_over_8888_8888),
1830     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, fast_composite_over_8888_8888),
1831     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, fast_composite_over_8888_0565),
1832     PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, fast_composite_add_8888_8888),
1833     PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, fast_composite_add_8888_8888),
1834     PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, fast_composite_add_8000_8000),
1835     PIXMAN_STD_FAST_PATH (ADD, a1, null, a1, fast_composite_add_1000_1000),
1836     PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, fast_composite_add_n_8888_8888_ca),
1837     PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, fast_composite_add_n_8_8),
1838     PIXMAN_STD_FAST_PATH (SRC, solid, null, a8r8g8b8, fast_composite_solid_fill),
1839     PIXMAN_STD_FAST_PATH (SRC, solid, null, x8r8g8b8, fast_composite_solid_fill),
1840     PIXMAN_STD_FAST_PATH (SRC, solid, null, a8b8g8r8, fast_composite_solid_fill),
1841     PIXMAN_STD_FAST_PATH (SRC, solid, null, x8b8g8r8, fast_composite_solid_fill),
1842     PIXMAN_STD_FAST_PATH (SRC, solid, null, a8, fast_composite_solid_fill),
1843     PIXMAN_STD_FAST_PATH (SRC, solid, null, r5g6b5, fast_composite_solid_fill),
1844     PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, fast_composite_src_x888_8888),
1845     PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, fast_composite_src_x888_8888),
1846     PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy),
1847     PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, fast_composite_src_memcpy),
1848     PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy),
1849     PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy),
1850     PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, fast_composite_src_memcpy),
1851     PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy),
1852     PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8x8, fast_composite_src_memcpy),
1853     PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8a8, fast_composite_src_memcpy),
1854     PIXMAN_STD_FAST_PATH (SRC, b8g8r8x8, null, b8g8r8x8, fast_composite_src_memcpy),
1855     PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, fast_composite_src_memcpy),
1856     PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, fast_composite_src_memcpy),
1857     PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, fast_composite_src_memcpy),
1858     PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, b8g8r8, fast_composite_src_memcpy),
1859     PIXMAN_STD_FAST_PATH (SRC, x1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
1860     PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
1861     PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, fast_composite_src_memcpy),
1862     PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565),
1863     PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565),
1864     PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565),
1865     PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565),
1866     PIXMAN_STD_FAST_PATH (IN, a8, null, a8, fast_composite_in_8_8),
1867     PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, fast_composite_in_n_8_8),
1868
1869 #define SCALED_NEAREST_FLAGS                                            \
1870     (FAST_PATH_SCALE_TRANSFORM  |                                       \
1871      FAST_PATH_NO_ALPHA_MAP     |                                       \
1872      FAST_PATH_NEAREST_FILTER   |                                       \
1873      FAST_PATH_NO_ACCESSORS     |                                       \
1874      FAST_PATH_NO_WIDE_FORMAT)
1875
1876 #define HAS_NORMAL_REPEAT_FLAGS                                         \
1877     (FAST_PATH_NO_REFLECT_REPEAT |                                      \
1878      FAST_PATH_NO_PAD_REPEAT     |                                      \
1879      FAST_PATH_NO_NONE_REPEAT)
1880
1881 #define SIMPLE_NEAREST_FAST_PATH(op,s,d,func)                           \
1882     {   PIXMAN_OP_ ## op,                                               \
1883         PIXMAN_ ## s,                                                   \
1884         SCALED_NEAREST_FLAGS | HAS_NORMAL_REPEAT_FLAGS | FAST_PATH_16BIT_SAFE | FAST_PATH_X_UNIT_POSITIVE, \
1885         PIXMAN_null, 0,                                                 \
1886         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
1887         fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op,   \
1888     },                                                                  \
1889     {   PIXMAN_OP_ ## op,                                               \
1890         PIXMAN_ ## s,                                                   \
1891         SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP,            \
1892         PIXMAN_null, 0,                                                 \
1893         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                         \
1894         fast_composite_scaled_nearest_ ## func ## _none ## _ ## op,     \
1895     }
1896     SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888_8888),
1897     SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888_8888),
1898     SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, 8888_8888),
1899     SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, 8888_8888),
1900
1901     SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888_8888),
1902     SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, 8888_8888),
1903
1904     SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, 8888_565),
1905     SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, 8888_565),
1906
1907     SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, 565_565),
1908
1909     SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, 8888_8888),
1910     SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, 8888_8888),
1911     SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, 8888_8888),
1912     SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, 8888_8888),
1913
1914     SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, 8888_565),
1915
1916 #define NEAREST_FAST_PATH(op,s,d)               \
1917     {   PIXMAN_OP_ ## op,                       \
1918         PIXMAN_ ## s, SCALED_NEAREST_FLAGS,     \
1919         PIXMAN_null, 0,                         \
1920         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1921         fast_composite_scaled_nearest,          \
1922     }
1923
1924     NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8),
1925     NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8),
1926     NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8),
1927     NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8),
1928
1929     NEAREST_FAST_PATH (SRC, x8r8g8b8, a8r8g8b8),
1930     NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8),
1931     NEAREST_FAST_PATH (SRC, x8b8g8r8, a8b8g8r8),
1932     NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8),
1933
1934     NEAREST_FAST_PATH (OVER, x8r8g8b8, x8r8g8b8),
1935     NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8),
1936     NEAREST_FAST_PATH (OVER, x8b8g8r8, x8b8g8r8),
1937     NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8),
1938
1939     NEAREST_FAST_PATH (OVER, x8r8g8b8, a8r8g8b8),
1940     NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8),
1941     NEAREST_FAST_PATH (OVER, x8b8g8r8, a8b8g8r8),
1942     NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8),
1943
1944     {   PIXMAN_OP_NONE  },
1945 };
1946
1947 static void
1948 pixman_fill8 (uint32_t *bits,
1949               int       stride,
1950               int       x,
1951               int       y,
1952               int       width,
1953               int       height,
1954               uint32_t xor)
1955 {
1956     int byte_stride = stride * (int) sizeof (uint32_t);
1957     uint8_t *dst = (uint8_t *) bits;
1958     uint8_t v = xor & 0xff;
1959     int i;
1960
1961     dst = dst + y * byte_stride + x;
1962
1963     while (height--)
1964     {
1965         for (i = 0; i < width; ++i)
1966             dst[i] = v;
1967
1968         dst += byte_stride;
1969     }
1970 }
1971
1972 static void
1973 pixman_fill16 (uint32_t *bits,
1974                int       stride,
1975                int       x,
1976                int       y,
1977                int       width,
1978                int       height,
1979                uint32_t xor)
1980 {
1981     int short_stride =
1982         (stride * (int)sizeof (uint32_t)) / (int)sizeof (uint16_t);
1983     uint16_t *dst = (uint16_t *)bits;
1984     uint16_t v = xor & 0xffff;
1985     int i;
1986
1987     dst = dst + y * short_stride + x;
1988
1989     while (height--)
1990     {
1991         for (i = 0; i < width; ++i)
1992             dst[i] = v;
1993
1994         dst += short_stride;
1995     }
1996 }
1997
1998 static void
1999 pixman_fill32 (uint32_t *bits,
2000                int       stride,
2001                int       x,
2002                int       y,
2003                int       width,
2004                int       height,
2005                uint32_t  xor)
2006 {
2007     int i;
2008
2009     bits = bits + y * stride + x;
2010
2011     while (height--)
2012     {
2013         for (i = 0; i < width; ++i)
2014             bits[i] = xor;
2015
2016         bits += stride;
2017     }
2018 }
2019
2020 static pixman_bool_t
2021 fast_path_fill (pixman_implementation_t *imp,
2022                 uint32_t *               bits,
2023                 int                      stride,
2024                 int                      bpp,
2025                 int                      x,
2026                 int                      y,
2027                 int                      width,
2028                 int                      height,
2029                 uint32_t                 xor)
2030 {
2031     switch (bpp)
2032     {
2033     case 8:
2034         pixman_fill8 (bits, stride, x, y, width, height, xor);
2035         break;
2036
2037     case 16:
2038         pixman_fill16 (bits, stride, x, y, width, height, xor);
2039         break;
2040
2041     case 32:
2042         pixman_fill32 (bits, stride, x, y, width, height, xor);
2043         break;
2044
2045     default:
2046         return _pixman_implementation_fill (
2047             imp->delegate, bits, stride, bpp, x, y, width, height, xor);
2048         break;
2049     }
2050
2051     return TRUE;
2052 }
2053
2054 pixman_implementation_t *
2055 _pixman_implementation_create_fast_path (void)
2056 {
2057     pixman_implementation_t *general = _pixman_implementation_create_general ();
2058     pixman_implementation_t *imp = _pixman_implementation_create (general, c_fast_paths);
2059
2060     imp->fill = fast_path_fill;
2061
2062     return imp;
2063 }