Add definitions of INT64_MIN and INT64_MAX
[profile/ivi/pixman.git] / pixman / pixman-fast-path.c
1 /* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
2 /*
3  * Copyright © 2000 SuSE, Inc.
4  * Copyright © 2007 Red Hat, Inc.
5  *
6  * Permission to use, copy, modify, distribute, and sell this software and its
7  * documentation for any purpose is hereby granted without fee, provided that
8  * the above copyright notice appear in all copies and that both that
9  * copyright notice and this permission notice appear in supporting
10  * documentation, and that the name of SuSE not be used in advertising or
11  * publicity pertaining to distribution of the software without specific,
12  * written prior permission.  SuSE makes no representations about the
13  * suitability of this software for any purpose.  It is provided "as is"
14  * without express or implied warranty.
15  *
16  * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
18  * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
20  * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
21  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
22  *
23  * Author:  Keith Packard, SuSE, Inc.
24  */
25
26 #ifdef HAVE_CONFIG_H
27 #include <config.h>
28 #endif
29 #include <string.h>
30 #include <stdlib.h>
31 #include "pixman-private.h"
32 #include "pixman-combine32.h"
33 #include "pixman-inlines.h"
34
35 static force_inline uint32_t
36 fetch_24 (uint8_t *a)
37 {
38     if (((unsigned long)a) & 1)
39     {
40 #ifdef WORDS_BIGENDIAN
41         return (*a << 16) | (*(uint16_t *)(a + 1));
42 #else
43         return *a | (*(uint16_t *)(a + 1) << 8);
44 #endif
45     }
46     else
47     {
48 #ifdef WORDS_BIGENDIAN
49         return (*(uint16_t *)a << 8) | *(a + 2);
50 #else
51         return *(uint16_t *)a | (*(a + 2) << 16);
52 #endif
53     }
54 }
55
56 static force_inline void
57 store_24 (uint8_t *a,
58           uint32_t v)
59 {
60     if (((unsigned long)a) & 1)
61     {
62 #ifdef WORDS_BIGENDIAN
63         *a = (uint8_t) (v >> 16);
64         *(uint16_t *)(a + 1) = (uint16_t) (v);
65 #else
66         *a = (uint8_t) (v);
67         *(uint16_t *)(a + 1) = (uint16_t) (v >> 8);
68 #endif
69     }
70     else
71     {
72 #ifdef WORDS_BIGENDIAN
73         *(uint16_t *)a = (uint16_t)(v >> 8);
74         *(a + 2) = (uint8_t)v;
75 #else
76         *(uint16_t *)a = (uint16_t)v;
77         *(a + 2) = (uint8_t)(v >> 16);
78 #endif
79     }
80 }
81
82 static force_inline uint32_t
83 over (uint32_t src,
84       uint32_t dest)
85 {
86     uint32_t a = ~src >> 24;
87
88     UN8x4_MUL_UN8_ADD_UN8x4 (dest, a, src);
89
90     return dest;
91 }
92
93 static uint32_t
94 in (uint32_t x,
95     uint8_t  y)
96 {
97     uint16_t a = y;
98
99     UN8x4_MUL_UN8 (x, a);
100
101     return x;
102 }
103
104 /*
105  * Naming convention:
106  *
107  *  op_src_mask_dest
108  */
109 static void
110 fast_composite_over_x888_8_8888 (pixman_implementation_t *imp,
111                                  pixman_composite_info_t *info)
112 {
113     PIXMAN_COMPOSITE_ARGS (info);
114     uint32_t    *src, *src_line;
115     uint32_t    *dst, *dst_line;
116     uint8_t     *mask, *mask_line;
117     int src_stride, mask_stride, dst_stride;
118     uint8_t m;
119     uint32_t s, d;
120     int32_t w;
121
122     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
123     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
124     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
125
126     while (height--)
127     {
128         src = src_line;
129         src_line += src_stride;
130         dst = dst_line;
131         dst_line += dst_stride;
132         mask = mask_line;
133         mask_line += mask_stride;
134
135         w = width;
136         while (w--)
137         {
138             m = *mask++;
139             if (m)
140             {
141                 s = *src | 0xff000000;
142
143                 if (m == 0xff)
144                 {
145                     *dst = s;
146                 }
147                 else
148                 {
149                     d = in (s, m);
150                     *dst = over (d, *dst);
151                 }
152             }
153             src++;
154             dst++;
155         }
156     }
157 }
158
159 static void
160 fast_composite_in_n_8_8 (pixman_implementation_t *imp,
161                          pixman_composite_info_t *info)
162 {
163     PIXMAN_COMPOSITE_ARGS (info);
164     uint32_t src, srca;
165     uint8_t     *dst_line, *dst;
166     uint8_t     *mask_line, *mask, m;
167     int dst_stride, mask_stride;
168     int32_t w;
169     uint16_t t;
170
171     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
172
173     srca = src >> 24;
174
175     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
176     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
177
178     if (srca == 0xff)
179     {
180         while (height--)
181         {
182             dst = dst_line;
183             dst_line += dst_stride;
184             mask = mask_line;
185             mask_line += mask_stride;
186             w = width;
187
188             while (w--)
189             {
190                 m = *mask++;
191
192                 if (m == 0)
193                     *dst = 0;
194                 else if (m != 0xff)
195                     *dst = MUL_UN8 (m, *dst, t);
196
197                 dst++;
198             }
199         }
200     }
201     else
202     {
203         while (height--)
204         {
205             dst = dst_line;
206             dst_line += dst_stride;
207             mask = mask_line;
208             mask_line += mask_stride;
209             w = width;
210
211             while (w--)
212             {
213                 m = *mask++;
214                 m = MUL_UN8 (m, srca, t);
215
216                 if (m == 0)
217                     *dst = 0;
218                 else if (m != 0xff)
219                     *dst = MUL_UN8 (m, *dst, t);
220
221                 dst++;
222             }
223         }
224     }
225 }
226
227 static void
228 fast_composite_in_8_8 (pixman_implementation_t *imp,
229                        pixman_composite_info_t *info)
230 {
231     PIXMAN_COMPOSITE_ARGS (info);
232     uint8_t     *dst_line, *dst;
233     uint8_t     *src_line, *src;
234     int dst_stride, src_stride;
235     int32_t w;
236     uint8_t s;
237     uint16_t t;
238
239     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
240     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
241
242     while (height--)
243     {
244         dst = dst_line;
245         dst_line += dst_stride;
246         src = src_line;
247         src_line += src_stride;
248         w = width;
249
250         while (w--)
251         {
252             s = *src++;
253
254             if (s == 0)
255                 *dst = 0;
256             else if (s != 0xff)
257                 *dst = MUL_UN8 (s, *dst, t);
258
259             dst++;
260         }
261     }
262 }
263
264 static void
265 fast_composite_over_n_8_8888 (pixman_implementation_t *imp,
266                               pixman_composite_info_t *info)
267 {
268     PIXMAN_COMPOSITE_ARGS (info);
269     uint32_t src, srca;
270     uint32_t    *dst_line, *dst, d;
271     uint8_t     *mask_line, *mask, m;
272     int dst_stride, mask_stride;
273     int32_t w;
274
275     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
276
277     srca = src >> 24;
278     if (src == 0)
279         return;
280
281     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
282     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
283
284     while (height--)
285     {
286         dst = dst_line;
287         dst_line += dst_stride;
288         mask = mask_line;
289         mask_line += mask_stride;
290         w = width;
291
292         while (w--)
293         {
294             m = *mask++;
295             if (m == 0xff)
296             {
297                 if (srca == 0xff)
298                     *dst = src;
299                 else
300                     *dst = over (src, *dst);
301             }
302             else if (m)
303             {
304                 d = in (src, m);
305                 *dst = over (d, *dst);
306             }
307             dst++;
308         }
309     }
310 }
311
312 static void
313 fast_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
314                                    pixman_composite_info_t *info)
315 {
316     PIXMAN_COMPOSITE_ARGS (info);
317     uint32_t src, s;
318     uint32_t    *dst_line, *dst, d;
319     uint32_t    *mask_line, *mask, ma;
320     int dst_stride, mask_stride;
321     int32_t w;
322
323     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
324
325     if (src == 0)
326         return;
327
328     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
329     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
330
331     while (height--)
332     {
333         dst = dst_line;
334         dst_line += dst_stride;
335         mask = mask_line;
336         mask_line += mask_stride;
337         w = width;
338
339         while (w--)
340         {
341             ma = *mask++;
342
343             if (ma)
344             {
345                 d = *dst;
346                 s = src;
347
348                 UN8x4_MUL_UN8x4_ADD_UN8x4 (s, ma, d);
349
350                 *dst = s;
351             }
352
353             dst++;
354         }
355     }
356 }
357
358 static void
359 fast_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
360                                     pixman_composite_info_t *info)
361 {
362     PIXMAN_COMPOSITE_ARGS (info);
363     uint32_t src, srca, s;
364     uint32_t    *dst_line, *dst, d;
365     uint32_t    *mask_line, *mask, ma;
366     int dst_stride, mask_stride;
367     int32_t w;
368
369     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
370
371     srca = src >> 24;
372     if (src == 0)
373         return;
374
375     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
376     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
377
378     while (height--)
379     {
380         dst = dst_line;
381         dst_line += dst_stride;
382         mask = mask_line;
383         mask_line += mask_stride;
384         w = width;
385
386         while (w--)
387         {
388             ma = *mask++;
389             if (ma == 0xffffffff)
390             {
391                 if (srca == 0xff)
392                     *dst = src;
393                 else
394                     *dst = over (src, *dst);
395             }
396             else if (ma)
397             {
398                 d = *dst;
399                 s = src;
400
401                 UN8x4_MUL_UN8x4 (s, ma);
402                 UN8x4_MUL_UN8 (ma, srca);
403                 ma = ~ma;
404                 UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s);
405
406                 *dst = d;
407             }
408
409             dst++;
410         }
411     }
412 }
413
414 static void
415 fast_composite_over_n_8_0888 (pixman_implementation_t *imp,
416                               pixman_composite_info_t *info)
417 {
418     PIXMAN_COMPOSITE_ARGS (info);
419     uint32_t src, srca;
420     uint8_t     *dst_line, *dst;
421     uint32_t d;
422     uint8_t     *mask_line, *mask, m;
423     int dst_stride, mask_stride;
424     int32_t w;
425
426     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
427
428     srca = src >> 24;
429     if (src == 0)
430         return;
431
432     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3);
433     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
434
435     while (height--)
436     {
437         dst = dst_line;
438         dst_line += dst_stride;
439         mask = mask_line;
440         mask_line += mask_stride;
441         w = width;
442
443         while (w--)
444         {
445             m = *mask++;
446             if (m == 0xff)
447             {
448                 if (srca == 0xff)
449                 {
450                     d = src;
451                 }
452                 else
453                 {
454                     d = fetch_24 (dst);
455                     d = over (src, d);
456                 }
457                 store_24 (dst, d);
458             }
459             else if (m)
460             {
461                 d = over (in (src, m), fetch_24 (dst));
462                 store_24 (dst, d);
463             }
464             dst += 3;
465         }
466     }
467 }
468
469 static void
470 fast_composite_over_n_8_0565 (pixman_implementation_t *imp,
471                               pixman_composite_info_t *info)
472 {
473     PIXMAN_COMPOSITE_ARGS (info);
474     uint32_t src, srca;
475     uint16_t    *dst_line, *dst;
476     uint32_t d;
477     uint8_t     *mask_line, *mask, m;
478     int dst_stride, mask_stride;
479     int32_t w;
480
481     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
482
483     srca = src >> 24;
484     if (src == 0)
485         return;
486
487     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
488     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
489
490     while (height--)
491     {
492         dst = dst_line;
493         dst_line += dst_stride;
494         mask = mask_line;
495         mask_line += mask_stride;
496         w = width;
497
498         while (w--)
499         {
500             m = *mask++;
501             if (m == 0xff)
502             {
503                 if (srca == 0xff)
504                 {
505                     d = src;
506                 }
507                 else
508                 {
509                     d = *dst;
510                     d = over (src, CONVERT_0565_TO_0888 (d));
511                 }
512                 *dst = CONVERT_8888_TO_0565 (d);
513             }
514             else if (m)
515             {
516                 d = *dst;
517                 d = over (in (src, m), CONVERT_0565_TO_0888 (d));
518                 *dst = CONVERT_8888_TO_0565 (d);
519             }
520             dst++;
521         }
522     }
523 }
524
525 static void
526 fast_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
527                                     pixman_composite_info_t *info)
528 {
529     PIXMAN_COMPOSITE_ARGS (info);
530     uint32_t  src, srca, s;
531     uint16_t  src16;
532     uint16_t *dst_line, *dst;
533     uint32_t  d;
534     uint32_t *mask_line, *mask, ma;
535     int dst_stride, mask_stride;
536     int32_t w;
537
538     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
539
540     srca = src >> 24;
541     if (src == 0)
542         return;
543
544     src16 = CONVERT_8888_TO_0565 (src);
545
546     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
547     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
548
549     while (height--)
550     {
551         dst = dst_line;
552         dst_line += dst_stride;
553         mask = mask_line;
554         mask_line += mask_stride;
555         w = width;
556
557         while (w--)
558         {
559             ma = *mask++;
560             if (ma == 0xffffffff)
561             {
562                 if (srca == 0xff)
563                 {
564                     *dst = src16;
565                 }
566                 else
567                 {
568                     d = *dst;
569                     d = over (src, CONVERT_0565_TO_0888 (d));
570                     *dst = CONVERT_8888_TO_0565 (d);
571                 }
572             }
573             else if (ma)
574             {
575                 d = *dst;
576                 d = CONVERT_0565_TO_0888 (d);
577
578                 s = src;
579
580                 UN8x4_MUL_UN8x4 (s, ma);
581                 UN8x4_MUL_UN8 (ma, srca);
582                 ma = ~ma;
583                 UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s);
584
585                 *dst = CONVERT_8888_TO_0565 (d);
586             }
587             dst++;
588         }
589     }
590 }
591
592 static void
593 fast_composite_over_8888_8888 (pixman_implementation_t *imp,
594                                pixman_composite_info_t *info)
595 {
596     PIXMAN_COMPOSITE_ARGS (info);
597     uint32_t    *dst_line, *dst;
598     uint32_t    *src_line, *src, s;
599     int dst_stride, src_stride;
600     uint8_t a;
601     int32_t w;
602
603     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
604     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
605
606     while (height--)
607     {
608         dst = dst_line;
609         dst_line += dst_stride;
610         src = src_line;
611         src_line += src_stride;
612         w = width;
613
614         while (w--)
615         {
616             s = *src++;
617             a = s >> 24;
618             if (a == 0xff)
619                 *dst = s;
620             else if (s)
621                 *dst = over (s, *dst);
622             dst++;
623         }
624     }
625 }
626
627 static void
628 fast_composite_src_x888_8888 (pixman_implementation_t *imp,
629                               pixman_composite_info_t *info)
630 {
631     PIXMAN_COMPOSITE_ARGS (info);
632     uint32_t    *dst_line, *dst;
633     uint32_t    *src_line, *src;
634     int dst_stride, src_stride;
635     int32_t w;
636
637     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
638     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
639
640     while (height--)
641     {
642         dst = dst_line;
643         dst_line += dst_stride;
644         src = src_line;
645         src_line += src_stride;
646         w = width;
647
648         while (w--)
649             *dst++ = (*src++) | 0xff000000;
650     }
651 }
652
653 #if 0
654 static void
655 fast_composite_over_8888_0888 (pixman_implementation_t *imp,
656                                pixman_composite_info_t *info)
657 {
658     PIXMAN_COMPOSITE_ARGS (info);
659     uint8_t     *dst_line, *dst;
660     uint32_t d;
661     uint32_t    *src_line, *src, s;
662     uint8_t a;
663     int dst_stride, src_stride;
664     int32_t w;
665
666     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3);
667     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
668
669     while (height--)
670     {
671         dst = dst_line;
672         dst_line += dst_stride;
673         src = src_line;
674         src_line += src_stride;
675         w = width;
676
677         while (w--)
678         {
679             s = *src++;
680             a = s >> 24;
681             if (a)
682             {
683                 if (a == 0xff)
684                     d = s;
685                 else
686                     d = over (s, fetch_24 (dst));
687
688                 store_24 (dst, d);
689             }
690             dst += 3;
691         }
692     }
693 }
694 #endif
695
696 static void
697 fast_composite_over_8888_0565 (pixman_implementation_t *imp,
698                                pixman_composite_info_t *info)
699 {
700     PIXMAN_COMPOSITE_ARGS (info);
701     uint16_t    *dst_line, *dst;
702     uint32_t d;
703     uint32_t    *src_line, *src, s;
704     uint8_t a;
705     int dst_stride, src_stride;
706     int32_t w;
707
708     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
709     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
710
711     while (height--)
712     {
713         dst = dst_line;
714         dst_line += dst_stride;
715         src = src_line;
716         src_line += src_stride;
717         w = width;
718
719         while (w--)
720         {
721             s = *src++;
722             a = s >> 24;
723             if (s)
724             {
725                 if (a == 0xff)
726                 {
727                     d = s;
728                 }
729                 else
730                 {
731                     d = *dst;
732                     d = over (s, CONVERT_0565_TO_0888 (d));
733                 }
734                 *dst = CONVERT_8888_TO_0565 (d);
735             }
736             dst++;
737         }
738     }
739 }
740
741 static void
742 fast_composite_src_x888_0565 (pixman_implementation_t *imp,
743                               pixman_composite_info_t *info)
744 {
745     PIXMAN_COMPOSITE_ARGS (info);
746     uint16_t    *dst_line, *dst;
747     uint32_t    *src_line, *src, s;
748     int dst_stride, src_stride;
749     int32_t w;
750
751     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
752     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
753
754     while (height--)
755     {
756         dst = dst_line;
757         dst_line += dst_stride;
758         src = src_line;
759         src_line += src_stride;
760         w = width;
761
762         while (w--)
763         {
764             s = *src++;
765             *dst = CONVERT_8888_TO_0565 (s);
766             dst++;
767         }
768     }
769 }
770
771 static void
772 fast_composite_add_8_8 (pixman_implementation_t *imp,
773                         pixman_composite_info_t *info)
774 {
775     PIXMAN_COMPOSITE_ARGS (info);
776     uint8_t     *dst_line, *dst;
777     uint8_t     *src_line, *src;
778     int dst_stride, src_stride;
779     int32_t w;
780     uint8_t s, d;
781     uint16_t t;
782
783     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
784     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
785
786     while (height--)
787     {
788         dst = dst_line;
789         dst_line += dst_stride;
790         src = src_line;
791         src_line += src_stride;
792         w = width;
793
794         while (w--)
795         {
796             s = *src++;
797             if (s)
798             {
799                 if (s != 0xff)
800                 {
801                     d = *dst;
802                     t = d + s;
803                     s = t | (0 - (t >> 8));
804                 }
805                 *dst = s;
806             }
807             dst++;
808         }
809     }
810 }
811
812 static void
813 fast_composite_add_8888_8888 (pixman_implementation_t *imp,
814                               pixman_composite_info_t *info)
815 {
816     PIXMAN_COMPOSITE_ARGS (info);
817     uint32_t    *dst_line, *dst;
818     uint32_t    *src_line, *src;
819     int dst_stride, src_stride;
820     int32_t w;
821     uint32_t s, d;
822
823     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
824     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
825
826     while (height--)
827     {
828         dst = dst_line;
829         dst_line += dst_stride;
830         src = src_line;
831         src_line += src_stride;
832         w = width;
833
834         while (w--)
835         {
836             s = *src++;
837             if (s)
838             {
839                 if (s != 0xffffffff)
840                 {
841                     d = *dst;
842                     if (d)
843                         UN8x4_ADD_UN8x4 (s, d);
844                 }
845                 *dst = s;
846             }
847             dst++;
848         }
849     }
850 }
851
852 static void
853 fast_composite_add_n_8_8 (pixman_implementation_t *imp,
854                           pixman_composite_info_t *info)
855 {
856     PIXMAN_COMPOSITE_ARGS (info);
857     uint8_t     *dst_line, *dst;
858     uint8_t     *mask_line, *mask;
859     int dst_stride, mask_stride;
860     int32_t w;
861     uint32_t src;
862     uint8_t sa;
863
864     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
865     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
866     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
867     sa = (src >> 24);
868
869     while (height--)
870     {
871         dst = dst_line;
872         dst_line += dst_stride;
873         mask = mask_line;
874         mask_line += mask_stride;
875         w = width;
876
877         while (w--)
878         {
879             uint16_t tmp;
880             uint16_t a;
881             uint32_t m, d;
882             uint32_t r;
883
884             a = *mask++;
885             d = *dst;
886
887             m = MUL_UN8 (sa, a, tmp);
888             r = ADD_UN8 (m, d, tmp);
889
890             *dst++ = r;
891         }
892     }
893 }
894
895 #ifdef WORDS_BIGENDIAN
896 #define CREATE_BITMASK(n) (0x80000000 >> (n))
897 #define UPDATE_BITMASK(n) ((n) >> 1)
898 #else
899 #define CREATE_BITMASK(n) (1 << (n))
900 #define UPDATE_BITMASK(n) ((n) << 1)
901 #endif
902
903 #define TEST_BIT(p, n)                                  \
904     (*((p) + ((n) >> 5)) & CREATE_BITMASK ((n) & 31))
905 #define SET_BIT(p, n)                                                   \
906     do { *((p) + ((n) >> 5)) |= CREATE_BITMASK ((n) & 31); } while (0);
907
908 static void
909 fast_composite_add_1000_1000 (pixman_implementation_t *imp,
910                               pixman_composite_info_t *info)
911 {
912     PIXMAN_COMPOSITE_ARGS (info);
913     uint32_t     *dst_line, *dst;
914     uint32_t     *src_line, *src;
915     int           dst_stride, src_stride;
916     int32_t       w;
917
918     PIXMAN_IMAGE_GET_LINE (src_image, 0, src_y, uint32_t,
919                            src_stride, src_line, 1);
920     PIXMAN_IMAGE_GET_LINE (dest_image, 0, dest_y, uint32_t,
921                            dst_stride, dst_line, 1);
922
923     while (height--)
924     {
925         dst = dst_line;
926         dst_line += dst_stride;
927         src = src_line;
928         src_line += src_stride;
929         w = width;
930
931         while (w--)
932         {
933             /*
934              * TODO: improve performance by processing uint32_t data instead
935              *       of individual bits
936              */
937             if (TEST_BIT (src, src_x + w))
938                 SET_BIT (dst, dest_x + w);
939         }
940     }
941 }
942
943 static void
944 fast_composite_over_n_1_8888 (pixman_implementation_t *imp,
945                               pixman_composite_info_t *info)
946 {
947     PIXMAN_COMPOSITE_ARGS (info);
948     uint32_t     src, srca;
949     uint32_t    *dst, *dst_line;
950     uint32_t    *mask, *mask_line;
951     int          mask_stride, dst_stride;
952     uint32_t     bitcache, bitmask;
953     int32_t      w;
954
955     if (width <= 0)
956         return;
957
958     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
959     srca = src >> 24;
960     if (src == 0)
961         return;
962
963     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t,
964                            dst_stride, dst_line, 1);
965     PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t,
966                            mask_stride, mask_line, 1);
967     mask_line += mask_x >> 5;
968
969     if (srca == 0xff)
970     {
971         while (height--)
972         {
973             dst = dst_line;
974             dst_line += dst_stride;
975             mask = mask_line;
976             mask_line += mask_stride;
977             w = width;
978
979             bitcache = *mask++;
980             bitmask = CREATE_BITMASK (mask_x & 31);
981
982             while (w--)
983             {
984                 if (bitmask == 0)
985                 {
986                     bitcache = *mask++;
987                     bitmask = CREATE_BITMASK (0);
988                 }
989                 if (bitcache & bitmask)
990                     *dst = src;
991                 bitmask = UPDATE_BITMASK (bitmask);
992                 dst++;
993             }
994         }
995     }
996     else
997     {
998         while (height--)
999         {
1000             dst = dst_line;
1001             dst_line += dst_stride;
1002             mask = mask_line;
1003             mask_line += mask_stride;
1004             w = width;
1005
1006             bitcache = *mask++;
1007             bitmask = CREATE_BITMASK (mask_x & 31);
1008
1009             while (w--)
1010             {
1011                 if (bitmask == 0)
1012                 {
1013                     bitcache = *mask++;
1014                     bitmask = CREATE_BITMASK (0);
1015                 }
1016                 if (bitcache & bitmask)
1017                     *dst = over (src, *dst);
1018                 bitmask = UPDATE_BITMASK (bitmask);
1019                 dst++;
1020             }
1021         }
1022     }
1023 }
1024
1025 static void
1026 fast_composite_over_n_1_0565 (pixman_implementation_t *imp,
1027                               pixman_composite_info_t *info)
1028 {
1029     PIXMAN_COMPOSITE_ARGS (info);
1030     uint32_t     src, srca;
1031     uint16_t    *dst, *dst_line;
1032     uint32_t    *mask, *mask_line;
1033     int          mask_stride, dst_stride;
1034     uint32_t     bitcache, bitmask;
1035     int32_t      w;
1036     uint32_t     d;
1037     uint16_t     src565;
1038
1039     if (width <= 0)
1040         return;
1041
1042     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
1043     srca = src >> 24;
1044     if (src == 0)
1045         return;
1046
1047     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t,
1048                            dst_stride, dst_line, 1);
1049     PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t,
1050                            mask_stride, mask_line, 1);
1051     mask_line += mask_x >> 5;
1052
1053     if (srca == 0xff)
1054     {
1055         src565 = CONVERT_8888_TO_0565 (src);
1056         while (height--)
1057         {
1058             dst = dst_line;
1059             dst_line += dst_stride;
1060             mask = mask_line;
1061             mask_line += mask_stride;
1062             w = width;
1063
1064             bitcache = *mask++;
1065             bitmask = CREATE_BITMASK (mask_x & 31);
1066
1067             while (w--)
1068             {
1069                 if (bitmask == 0)
1070                 {
1071                     bitcache = *mask++;
1072                     bitmask = CREATE_BITMASK (0);
1073                 }
1074                 if (bitcache & bitmask)
1075                     *dst = src565;
1076                 bitmask = UPDATE_BITMASK (bitmask);
1077                 dst++;
1078             }
1079         }
1080     }
1081     else
1082     {
1083         while (height--)
1084         {
1085             dst = dst_line;
1086             dst_line += dst_stride;
1087             mask = mask_line;
1088             mask_line += mask_stride;
1089             w = width;
1090
1091             bitcache = *mask++;
1092             bitmask = CREATE_BITMASK (mask_x & 31);
1093
1094             while (w--)
1095             {
1096                 if (bitmask == 0)
1097                 {
1098                     bitcache = *mask++;
1099                     bitmask = CREATE_BITMASK (0);
1100                 }
1101                 if (bitcache & bitmask)
1102                 {
1103                     d = over (src, CONVERT_0565_TO_0888 (*dst));
1104                     *dst = CONVERT_8888_TO_0565 (d);
1105                 }
1106                 bitmask = UPDATE_BITMASK (bitmask);
1107                 dst++;
1108             }
1109         }
1110     }
1111 }
1112
1113 /*
1114  * Simple bitblt
1115  */
1116
1117 static void
1118 fast_composite_solid_fill (pixman_implementation_t *imp,
1119                            pixman_composite_info_t *info)
1120 {
1121     PIXMAN_COMPOSITE_ARGS (info);
1122     uint32_t src;
1123
1124     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
1125
1126     if (dest_image->bits.format == PIXMAN_a1)
1127     {
1128         src = src >> 31;
1129     }
1130     else if (dest_image->bits.format == PIXMAN_a8)
1131     {
1132         src = src >> 24;
1133     }
1134     else if (dest_image->bits.format == PIXMAN_r5g6b5 ||
1135              dest_image->bits.format == PIXMAN_b5g6r5)
1136     {
1137         src = CONVERT_8888_TO_0565 (src);
1138     }
1139
1140     pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride,
1141                  PIXMAN_FORMAT_BPP (dest_image->bits.format),
1142                  dest_x, dest_y,
1143                  width, height,
1144                  src);
1145 }
1146
1147 static void
1148 fast_composite_src_memcpy (pixman_implementation_t *imp,
1149                            pixman_composite_info_t *info)
1150 {
1151     PIXMAN_COMPOSITE_ARGS (info);
1152     int bpp = PIXMAN_FORMAT_BPP (dest_image->bits.format) / 8;
1153     uint32_t n_bytes = width * bpp;
1154     int dst_stride, src_stride;
1155     uint8_t    *dst;
1156     uint8_t    *src;
1157
1158     src_stride = src_image->bits.rowstride * 4;
1159     dst_stride = dest_image->bits.rowstride * 4;
1160
1161     src = (uint8_t *)src_image->bits.bits + src_y * src_stride + src_x * bpp;
1162     dst = (uint8_t *)dest_image->bits.bits + dest_y * dst_stride + dest_x * bpp;
1163
1164     while (height--)
1165     {
1166         memcpy (dst, src, n_bytes);
1167
1168         dst += dst_stride;
1169         src += src_stride;
1170     }
1171 }
1172
1173 FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, SRC, COVER)
1174 FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, SRC, NONE)
1175 FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, SRC, PAD)
1176 FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, SRC, NORMAL)
1177 FAST_NEAREST (x888_8888_cover, x888, 8888, uint32_t, uint32_t, SRC, COVER)
1178 FAST_NEAREST (x888_8888_pad, x888, 8888, uint32_t, uint32_t, SRC, PAD)
1179 FAST_NEAREST (x888_8888_normal, x888, 8888, uint32_t, uint32_t, SRC, NORMAL)
1180 FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, OVER, COVER)
1181 FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, OVER, NONE)
1182 FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, OVER, PAD)
1183 FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, OVER, NORMAL)
1184 FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, SRC, COVER)
1185 FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, SRC, NONE)
1186 FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, SRC, PAD)
1187 FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, NORMAL)
1188 FAST_NEAREST (565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, NORMAL)
1189 FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, OVER, COVER)
1190 FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE)
1191 FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD)
1192 FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL)
1193
1194 #define REPEAT_MIN_WIDTH    32
1195
1196 static void
1197 fast_composite_tiled_repeat (pixman_implementation_t *imp,
1198                              pixman_composite_info_t *info)
1199 {
1200     PIXMAN_COMPOSITE_ARGS (info);
1201     pixman_composite_func_t func;
1202     pixman_format_code_t mask_format;
1203     uint32_t src_flags, mask_flags;
1204
1205     src_flags = (info->src_flags & ~FAST_PATH_NORMAL_REPEAT) |
1206                     FAST_PATH_SAMPLES_COVER_CLIP_NEAREST;
1207
1208     if (mask_image)
1209     {
1210         mask_format = mask_image->common.extended_format_code;
1211         mask_flags = info->mask_flags;
1212     }
1213     else
1214     {
1215         mask_format = PIXMAN_null;
1216         mask_flags = FAST_PATH_IS_OPAQUE;
1217     }
1218
1219     if (_pixman_lookup_composite_function (
1220             imp->toplevel, info->op,
1221             src_image->common.extended_format_code, src_flags,
1222             mask_format, mask_flags,
1223             dest_image->common.extended_format_code, info->dest_flags,
1224             &imp, &func))
1225     {
1226         int32_t sx, sy;
1227         int32_t width_remain;
1228         int32_t num_pixels;
1229         int32_t src_width;
1230         int32_t i, j;
1231         pixman_image_t extended_src_image;
1232         uint32_t extended_src[REPEAT_MIN_WIDTH * 2];
1233         pixman_bool_t need_src_extension;
1234         uint32_t *src_line;
1235         int32_t src_stride;
1236         int32_t src_bpp;
1237         pixman_composite_info_t info2 = *info;
1238
1239         src_bpp = PIXMAN_FORMAT_BPP (src_image->bits.format);
1240
1241         if (src_image->bits.width < REPEAT_MIN_WIDTH &&
1242             (src_bpp == 32 || src_bpp == 16 || src_bpp == 8))
1243         {
1244             sx = src_x;
1245             sx = MOD (sx, src_image->bits.width);
1246             sx += width;
1247             src_width = 0;
1248
1249             while (src_width < REPEAT_MIN_WIDTH && src_width <= sx)
1250                 src_width += src_image->bits.width;
1251
1252             src_stride = (src_width * (src_bpp >> 3) + 3) / (int) sizeof (uint32_t);
1253
1254             /* Initialize/validate stack-allocated temporary image */
1255             _pixman_bits_image_init (&extended_src_image, src_image->bits.format,
1256                                      src_width, 1, &extended_src[0], src_stride);
1257             _pixman_image_validate (&extended_src_image);
1258
1259             info2.src_image = &extended_src_image;
1260             need_src_extension = TRUE;
1261         }
1262         else
1263         {
1264             src_width = src_image->bits.width;
1265             need_src_extension = FALSE;
1266         }
1267
1268         sx = src_x;
1269         sy = src_y;
1270
1271         while (--height >= 0)
1272         {
1273             sx = MOD (sx, src_width);
1274             sy = MOD (sy, src_image->bits.height);
1275
1276             if (need_src_extension)
1277             {
1278                 if (src_bpp == 32)
1279                 {
1280                     PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint32_t, src_stride, src_line, 1);
1281
1282                     for (i = 0; i < src_width; )
1283                     {
1284                         for (j = 0; j < src_image->bits.width; j++, i++)
1285                             extended_src[i] = src_line[j];
1286                     }
1287                 }
1288                 else if (src_bpp == 16)
1289                 {
1290                     uint16_t *src_line_16;
1291
1292                     PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint16_t, src_stride,
1293                                            src_line_16, 1);
1294                     src_line = (uint32_t*)src_line_16;
1295
1296                     for (i = 0; i < src_width; )
1297                     {
1298                         for (j = 0; j < src_image->bits.width; j++, i++)
1299                             ((uint16_t*)extended_src)[i] = ((uint16_t*)src_line)[j];
1300                     }
1301                 }
1302                 else if (src_bpp == 8)
1303                 {
1304                     uint8_t *src_line_8;
1305
1306                     PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint8_t, src_stride,
1307                                            src_line_8, 1);
1308                     src_line = (uint32_t*)src_line_8;
1309
1310                     for (i = 0; i < src_width; )
1311                     {
1312                         for (j = 0; j < src_image->bits.width; j++, i++)
1313                             ((uint8_t*)extended_src)[i] = ((uint8_t*)src_line)[j];
1314                     }
1315                 }
1316
1317                 info2.src_y = 0;
1318             }
1319             else
1320             {
1321                 info2.src_y = sy;
1322             }
1323
1324             width_remain = width;
1325
1326             while (width_remain > 0)
1327             {
1328                 num_pixels = src_width - sx;
1329
1330                 if (num_pixels > width_remain)
1331                     num_pixels = width_remain;
1332
1333                 info2.src_x = sx;
1334                 info2.width = num_pixels;
1335                 info2.height = 1;
1336
1337                 func (imp, &info2);
1338
1339                 width_remain -= num_pixels;
1340                 info2.mask_x += num_pixels;
1341                 info2.dest_x += num_pixels;
1342                 sx = 0;
1343             }
1344
1345             sx = src_x;
1346             sy++;
1347             info2.mask_x = info->mask_x;
1348             info2.mask_y++;
1349             info2.dest_x = info->dest_x;
1350             info2.dest_y++;
1351         }
1352
1353         if (need_src_extension)
1354             _pixman_image_fini (&extended_src_image);
1355     }
1356     else
1357     {
1358         _pixman_log_error (FUNC, "Didn't find a suitable function ");
1359     }
1360 }
1361
1362 /* Use more unrolling for src_0565_0565 because it is typically CPU bound */
1363 static force_inline void
1364 scaled_nearest_scanline_565_565_SRC (uint16_t *       dst,
1365                                      const uint16_t * src,
1366                                      int32_t          w,
1367                                      pixman_fixed_t   vx,
1368                                      pixman_fixed_t   unit_x,
1369                                      pixman_fixed_t   max_vx,
1370                                      pixman_bool_t    fully_transparent_src)
1371 {
1372     uint16_t tmp1, tmp2, tmp3, tmp4;
1373     while ((w -= 4) >= 0)
1374     {
1375         tmp1 = src[pixman_fixed_to_int (vx)];
1376         vx += unit_x;
1377         tmp2 = src[pixman_fixed_to_int (vx)];
1378         vx += unit_x;
1379         tmp3 = src[pixman_fixed_to_int (vx)];
1380         vx += unit_x;
1381         tmp4 = src[pixman_fixed_to_int (vx)];
1382         vx += unit_x;
1383         *dst++ = tmp1;
1384         *dst++ = tmp2;
1385         *dst++ = tmp3;
1386         *dst++ = tmp4;
1387     }
1388     if (w & 2)
1389     {
1390         tmp1 = src[pixman_fixed_to_int (vx)];
1391         vx += unit_x;
1392         tmp2 = src[pixman_fixed_to_int (vx)];
1393         vx += unit_x;
1394         *dst++ = tmp1;
1395         *dst++ = tmp2;
1396     }
1397     if (w & 1)
1398         *dst++ = src[pixman_fixed_to_int (vx)];
1399 }
1400
1401 FAST_NEAREST_MAINLOOP (565_565_cover_SRC,
1402                        scaled_nearest_scanline_565_565_SRC,
1403                        uint16_t, uint16_t, COVER)
1404 FAST_NEAREST_MAINLOOP (565_565_none_SRC,
1405                        scaled_nearest_scanline_565_565_SRC,
1406                        uint16_t, uint16_t, NONE)
1407 FAST_NEAREST_MAINLOOP (565_565_pad_SRC,
1408                        scaled_nearest_scanline_565_565_SRC,
1409                        uint16_t, uint16_t, PAD)
1410
1411 static force_inline uint32_t
1412 fetch_nearest (pixman_repeat_t src_repeat,
1413                pixman_format_code_t format,
1414                uint32_t *src, int x, int src_width)
1415 {
1416     if (repeat (src_repeat, &x, src_width))
1417     {
1418         if (format == PIXMAN_x8r8g8b8)
1419             return *(src + x) | 0xff000000;
1420         else
1421             return *(src + x);
1422     }
1423     else
1424     {
1425         return 0;
1426     }
1427 }
1428
1429 static force_inline void
1430 combine_over (uint32_t s, uint32_t *dst)
1431 {
1432     if (s)
1433     {
1434         uint8_t ia = 0xff - (s >> 24);
1435
1436         if (ia)
1437             UN8x4_MUL_UN8_ADD_UN8x4 (*dst, ia, s);
1438         else
1439             *dst = s;
1440     }
1441 }
1442
1443 static force_inline void
1444 combine_src (uint32_t s, uint32_t *dst)
1445 {
1446     *dst = s;
1447 }
1448
1449 static void
1450 fast_composite_scaled_nearest (pixman_implementation_t *imp,
1451                                pixman_composite_info_t *info)
1452 {
1453     PIXMAN_COMPOSITE_ARGS (info);
1454     uint32_t       *dst_line;
1455     uint32_t       *src_line;
1456     int             dst_stride, src_stride;
1457     int             src_width, src_height;
1458     pixman_repeat_t src_repeat;
1459     pixman_fixed_t unit_x, unit_y;
1460     pixman_format_code_t src_format;
1461     pixman_vector_t v;
1462     pixman_fixed_t vy;
1463
1464     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
1465     /* pass in 0 instead of src_x and src_y because src_x and src_y need to be
1466      * transformed from destination space to source space
1467      */
1468     PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, uint32_t, src_stride, src_line, 1);
1469
1470     /* reference point is the center of the pixel */
1471     v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;
1472     v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;
1473     v.vector[2] = pixman_fixed_1;
1474
1475     if (!pixman_transform_point_3d (src_image->common.transform, &v))
1476         return;
1477
1478     unit_x = src_image->common.transform->matrix[0][0];
1479     unit_y = src_image->common.transform->matrix[1][1];
1480
1481     /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */
1482     v.vector[0] -= pixman_fixed_e;
1483     v.vector[1] -= pixman_fixed_e;
1484
1485     src_height = src_image->bits.height;
1486     src_width = src_image->bits.width;
1487     src_repeat = src_image->common.repeat;
1488     src_format = src_image->bits.format;
1489
1490     vy = v.vector[1];
1491     while (height--)
1492     {
1493         pixman_fixed_t vx = v.vector[0];
1494         int y = pixman_fixed_to_int (vy);
1495         uint32_t *dst = dst_line;
1496
1497         dst_line += dst_stride;
1498
1499         /* adjust the y location by a unit vector in the y direction
1500          * this is equivalent to transforming y+1 of the destination point to source space */
1501         vy += unit_y;
1502
1503         if (!repeat (src_repeat, &y, src_height))
1504         {
1505             if (op == PIXMAN_OP_SRC)
1506                 memset (dst, 0, sizeof (*dst) * width);
1507         }
1508         else
1509         {
1510             int w = width;
1511
1512             uint32_t *src = src_line + y * src_stride;
1513
1514             while (w >= 2)
1515             {
1516                 uint32_t s1, s2;
1517                 int x1, x2;
1518
1519                 x1 = pixman_fixed_to_int (vx);
1520                 vx += unit_x;
1521
1522                 x2 = pixman_fixed_to_int (vx);
1523                 vx += unit_x;
1524
1525                 w -= 2;
1526
1527                 s1 = fetch_nearest (src_repeat, src_format, src, x1, src_width);
1528                 s2 = fetch_nearest (src_repeat, src_format, src, x2, src_width);
1529
1530                 if (op == PIXMAN_OP_OVER)
1531                 {
1532                     combine_over (s1, dst++);
1533                     combine_over (s2, dst++);
1534                 }
1535                 else
1536                 {
1537                     combine_src (s1, dst++);
1538                     combine_src (s2, dst++);
1539                 }
1540             }
1541
1542             while (w--)
1543             {
1544                 uint32_t s;
1545                 int x;
1546
1547                 x = pixman_fixed_to_int (vx);
1548                 vx += unit_x;
1549
1550                 s = fetch_nearest (src_repeat, src_format, src, x, src_width);
1551
1552                 if (op == PIXMAN_OP_OVER)
1553                     combine_over (s, dst++);
1554                 else
1555                     combine_src (s, dst++);
1556             }
1557         }
1558     }
1559 }
1560
1561 #define CACHE_LINE_SIZE 64
1562
1563 #define FAST_SIMPLE_ROTATE(suffix, pix_type)                                  \
1564                                                                               \
1565 static void                                                                   \
1566 blt_rotated_90_trivial_##suffix (pix_type       *dst,                         \
1567                                  int             dst_stride,                  \
1568                                  const pix_type *src,                         \
1569                                  int             src_stride,                  \
1570                                  int             w,                           \
1571                                  int             h)                           \
1572 {                                                                             \
1573     int x, y;                                                                 \
1574     for (y = 0; y < h; y++)                                                   \
1575     {                                                                         \
1576         const pix_type *s = src + (h - y - 1);                                \
1577         pix_type *d = dst + dst_stride * y;                                   \
1578         for (x = 0; x < w; x++)                                               \
1579         {                                                                     \
1580             *d++ = *s;                                                        \
1581             s += src_stride;                                                  \
1582         }                                                                     \
1583     }                                                                         \
1584 }                                                                             \
1585                                                                               \
1586 static void                                                                   \
1587 blt_rotated_270_trivial_##suffix (pix_type       *dst,                        \
1588                                   int             dst_stride,                 \
1589                                   const pix_type *src,                        \
1590                                   int             src_stride,                 \
1591                                   int             w,                          \
1592                                   int             h)                          \
1593 {                                                                             \
1594     int x, y;                                                                 \
1595     for (y = 0; y < h; y++)                                                   \
1596     {                                                                         \
1597         const pix_type *s = src + src_stride * (w - 1) + y;                   \
1598         pix_type *d = dst + dst_stride * y;                                   \
1599         for (x = 0; x < w; x++)                                               \
1600         {                                                                     \
1601             *d++ = *s;                                                        \
1602             s -= src_stride;                                                  \
1603         }                                                                     \
1604     }                                                                         \
1605 }                                                                             \
1606                                                                               \
1607 static void                                                                   \
1608 blt_rotated_90_##suffix (pix_type       *dst,                                 \
1609                          int             dst_stride,                          \
1610                          const pix_type *src,                                 \
1611                          int             src_stride,                          \
1612                          int             W,                                   \
1613                          int             H)                                   \
1614 {                                                                             \
1615     int x;                                                                    \
1616     int leading_pixels = 0, trailing_pixels = 0;                              \
1617     const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type);                 \
1618                                                                               \
1619     /*                                                                        \
1620      * split processing into handling destination as TILE_SIZExH cache line   \
1621      * aligned vertical stripes (optimistically assuming that destination     \
1622      * stride is a multiple of cache line, if not - it will be just a bit     \
1623      * slower)                                                                \
1624      */                                                                       \
1625                                                                               \
1626     if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1))                               \
1627     {                                                                         \
1628         leading_pixels = TILE_SIZE - (((uintptr_t)dst &                       \
1629                             (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
1630         if (leading_pixels > W)                                               \
1631             leading_pixels = W;                                               \
1632                                                                               \
1633         /* unaligned leading part NxH (where N < TILE_SIZE) */                \
1634         blt_rotated_90_trivial_##suffix (                                     \
1635             dst,                                                              \
1636             dst_stride,                                                       \
1637             src,                                                              \
1638             src_stride,                                                       \
1639             leading_pixels,                                                   \
1640             H);                                                               \
1641                                                                               \
1642         dst += leading_pixels;                                                \
1643         src += leading_pixels * src_stride;                                   \
1644         W -= leading_pixels;                                                  \
1645     }                                                                         \
1646                                                                               \
1647     if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1))                         \
1648     {                                                                         \
1649         trailing_pixels = (((uintptr_t)(dst + W) &                            \
1650                             (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
1651         if (trailing_pixels > W)                                              \
1652             trailing_pixels = W;                                              \
1653         W -= trailing_pixels;                                                 \
1654     }                                                                         \
1655                                                                               \
1656     for (x = 0; x < W; x += TILE_SIZE)                                        \
1657     {                                                                         \
1658         /* aligned middle part TILE_SIZExH */                                 \
1659         blt_rotated_90_trivial_##suffix (                                     \
1660             dst + x,                                                          \
1661             dst_stride,                                                       \
1662             src + src_stride * x,                                             \
1663             src_stride,                                                       \
1664             TILE_SIZE,                                                        \
1665             H);                                                               \
1666     }                                                                         \
1667                                                                               \
1668     if (trailing_pixels)                                                      \
1669     {                                                                         \
1670         /* unaligned trailing part NxH (where N < TILE_SIZE) */               \
1671         blt_rotated_90_trivial_##suffix (                                     \
1672             dst + W,                                                          \
1673             dst_stride,                                                       \
1674             src + W * src_stride,                                             \
1675             src_stride,                                                       \
1676             trailing_pixels,                                                  \
1677             H);                                                               \
1678     }                                                                         \
1679 }                                                                             \
1680                                                                               \
1681 static void                                                                   \
1682 blt_rotated_270_##suffix (pix_type       *dst,                                \
1683                           int             dst_stride,                         \
1684                           const pix_type *src,                                \
1685                           int             src_stride,                         \
1686                           int             W,                                  \
1687                           int             H)                                  \
1688 {                                                                             \
1689     int x;                                                                    \
1690     int leading_pixels = 0, trailing_pixels = 0;                              \
1691     const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type);                 \
1692                                                                               \
1693     /*                                                                        \
1694      * split processing into handling destination as TILE_SIZExH cache line   \
1695      * aligned vertical stripes (optimistically assuming that destination     \
1696      * stride is a multiple of cache line, if not - it will be just a bit     \
1697      * slower)                                                                \
1698      */                                                                       \
1699                                                                               \
1700     if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1))                               \
1701     {                                                                         \
1702         leading_pixels = TILE_SIZE - (((uintptr_t)dst &                       \
1703                             (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
1704         if (leading_pixels > W)                                               \
1705             leading_pixels = W;                                               \
1706                                                                               \
1707         /* unaligned leading part NxH (where N < TILE_SIZE) */                \
1708         blt_rotated_270_trivial_##suffix (                                    \
1709             dst,                                                              \
1710             dst_stride,                                                       \
1711             src + src_stride * (W - leading_pixels),                          \
1712             src_stride,                                                       \
1713             leading_pixels,                                                   \
1714             H);                                                               \
1715                                                                               \
1716         dst += leading_pixels;                                                \
1717         W -= leading_pixels;                                                  \
1718     }                                                                         \
1719                                                                               \
1720     if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1))                         \
1721     {                                                                         \
1722         trailing_pixels = (((uintptr_t)(dst + W) &                            \
1723                             (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
1724         if (trailing_pixels > W)                                              \
1725             trailing_pixels = W;                                              \
1726         W -= trailing_pixels;                                                 \
1727         src += trailing_pixels * src_stride;                                  \
1728     }                                                                         \
1729                                                                               \
1730     for (x = 0; x < W; x += TILE_SIZE)                                        \
1731     {                                                                         \
1732         /* aligned middle part TILE_SIZExH */                                 \
1733         blt_rotated_270_trivial_##suffix (                                    \
1734             dst + x,                                                          \
1735             dst_stride,                                                       \
1736             src + src_stride * (W - x - TILE_SIZE),                           \
1737             src_stride,                                                       \
1738             TILE_SIZE,                                                        \
1739             H);                                                               \
1740     }                                                                         \
1741                                                                               \
1742     if (trailing_pixels)                                                      \
1743     {                                                                         \
1744         /* unaligned trailing part NxH (where N < TILE_SIZE) */               \
1745         blt_rotated_270_trivial_##suffix (                                    \
1746             dst + W,                                                          \
1747             dst_stride,                                                       \
1748             src - trailing_pixels * src_stride,                               \
1749             src_stride,                                                       \
1750             trailing_pixels,                                                  \
1751             H);                                                               \
1752     }                                                                         \
1753 }                                                                             \
1754                                                                               \
1755 static void                                                                   \
1756 fast_composite_rotate_90_##suffix (pixman_implementation_t *imp,              \
1757                                    pixman_composite_info_t *info)             \
1758 {                                                                             \
1759     PIXMAN_COMPOSITE_ARGS (info);                                             \
1760     pix_type       *dst_line;                                                 \
1761     pix_type       *src_line;                                                 \
1762     int             dst_stride, src_stride;                                   \
1763     int             src_x_t, src_y_t;                                         \
1764                                                                               \
1765     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, pix_type,              \
1766                            dst_stride, dst_line, 1);                          \
1767     src_x_t = -src_y + pixman_fixed_to_int (                                  \
1768                                 src_image->common.transform->matrix[0][2] +   \
1769                                 pixman_fixed_1 / 2 - pixman_fixed_e) - height;\
1770     src_y_t = src_x + pixman_fixed_to_int (                                   \
1771                                 src_image->common.transform->matrix[1][2] +   \
1772                                 pixman_fixed_1 / 2 - pixman_fixed_e);         \
1773     PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type,             \
1774                            src_stride, src_line, 1);                          \
1775     blt_rotated_90_##suffix (dst_line, dst_stride, src_line, src_stride,      \
1776                              width, height);                                  \
1777 }                                                                             \
1778                                                                               \
1779 static void                                                                   \
1780 fast_composite_rotate_270_##suffix (pixman_implementation_t *imp,             \
1781                                     pixman_composite_info_t *info)            \
1782 {                                                                             \
1783     PIXMAN_COMPOSITE_ARGS (info);                                             \
1784     pix_type       *dst_line;                                                 \
1785     pix_type       *src_line;                                                 \
1786     int             dst_stride, src_stride;                                   \
1787     int             src_x_t, src_y_t;                                         \
1788                                                                               \
1789     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, pix_type,              \
1790                            dst_stride, dst_line, 1);                          \
1791     src_x_t = src_y + pixman_fixed_to_int (                                   \
1792                                 src_image->common.transform->matrix[0][2] +   \
1793                                 pixman_fixed_1 / 2 - pixman_fixed_e);         \
1794     src_y_t = -src_x + pixman_fixed_to_int (                                  \
1795                                 src_image->common.transform->matrix[1][2] +   \
1796                                 pixman_fixed_1 / 2 - pixman_fixed_e) - width; \
1797     PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type,             \
1798                            src_stride, src_line, 1);                          \
1799     blt_rotated_270_##suffix (dst_line, dst_stride, src_line, src_stride,     \
1800                               width, height);                                 \
1801 }
1802
1803 FAST_SIMPLE_ROTATE (8, uint8_t)
1804 FAST_SIMPLE_ROTATE (565, uint16_t)
1805 FAST_SIMPLE_ROTATE (8888, uint32_t)
1806
1807 static const pixman_fast_path_t c_fast_paths[] =
1808 {
1809     PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, fast_composite_over_n_8_0565),
1810     PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, fast_composite_over_n_8_0565),
1811     PIXMAN_STD_FAST_PATH (OVER, solid, a8, r8g8b8, fast_composite_over_n_8_0888),
1812     PIXMAN_STD_FAST_PATH (OVER, solid, a8, b8g8r8, fast_composite_over_n_8_0888),
1813     PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, fast_composite_over_n_8_8888),
1814     PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, fast_composite_over_n_8_8888),
1815     PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, fast_composite_over_n_8_8888),
1816     PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, fast_composite_over_n_8_8888),
1817     PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8r8g8b8, fast_composite_over_n_1_8888),
1818     PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8r8g8b8, fast_composite_over_n_1_8888),
1819     PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8b8g8r8, fast_composite_over_n_1_8888),
1820     PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8b8g8r8, fast_composite_over_n_1_8888),
1821     PIXMAN_STD_FAST_PATH (OVER, solid, a1, r5g6b5,   fast_composite_over_n_1_0565),
1822     PIXMAN_STD_FAST_PATH (OVER, solid, a1, b5g6r5,   fast_composite_over_n_1_0565),
1823     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, fast_composite_over_n_8888_8888_ca),
1824     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, fast_composite_over_n_8888_8888_ca),
1825     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, fast_composite_over_n_8888_0565_ca),
1826     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, fast_composite_over_n_8888_8888_ca),
1827     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, fast_composite_over_n_8888_8888_ca),
1828     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, fast_composite_over_n_8888_0565_ca),
1829     PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, fast_composite_over_x888_8_8888),
1830     PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, fast_composite_over_x888_8_8888),
1831     PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, fast_composite_over_x888_8_8888),
1832     PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, fast_composite_over_x888_8_8888),
1833     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, fast_composite_over_8888_8888),
1834     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, fast_composite_over_8888_8888),
1835     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, fast_composite_over_8888_0565),
1836     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, fast_composite_over_8888_8888),
1837     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, fast_composite_over_8888_8888),
1838     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, fast_composite_over_8888_0565),
1839     PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, fast_composite_add_8888_8888),
1840     PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, fast_composite_add_8888_8888),
1841     PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, fast_composite_add_8_8),
1842     PIXMAN_STD_FAST_PATH (ADD, a1, null, a1, fast_composite_add_1000_1000),
1843     PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, fast_composite_add_n_8888_8888_ca),
1844     PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, fast_composite_add_n_8_8),
1845     PIXMAN_STD_FAST_PATH (SRC, solid, null, a8r8g8b8, fast_composite_solid_fill),
1846     PIXMAN_STD_FAST_PATH (SRC, solid, null, x8r8g8b8, fast_composite_solid_fill),
1847     PIXMAN_STD_FAST_PATH (SRC, solid, null, a8b8g8r8, fast_composite_solid_fill),
1848     PIXMAN_STD_FAST_PATH (SRC, solid, null, x8b8g8r8, fast_composite_solid_fill),
1849     PIXMAN_STD_FAST_PATH (SRC, solid, null, a1, fast_composite_solid_fill),
1850     PIXMAN_STD_FAST_PATH (SRC, solid, null, a8, fast_composite_solid_fill),
1851     PIXMAN_STD_FAST_PATH (SRC, solid, null, r5g6b5, fast_composite_solid_fill),
1852     PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, fast_composite_src_x888_8888),
1853     PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, fast_composite_src_x888_8888),
1854     PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy),
1855     PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, fast_composite_src_memcpy),
1856     PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy),
1857     PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy),
1858     PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, fast_composite_src_memcpy),
1859     PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy),
1860     PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8x8, fast_composite_src_memcpy),
1861     PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8a8, fast_composite_src_memcpy),
1862     PIXMAN_STD_FAST_PATH (SRC, b8g8r8x8, null, b8g8r8x8, fast_composite_src_memcpy),
1863     PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, fast_composite_src_memcpy),
1864     PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, fast_composite_src_memcpy),
1865     PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, fast_composite_src_memcpy),
1866     PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, b8g8r8, fast_composite_src_memcpy),
1867     PIXMAN_STD_FAST_PATH (SRC, x1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
1868     PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
1869     PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, fast_composite_src_memcpy),
1870     PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565),
1871     PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565),
1872     PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565),
1873     PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565),
1874     PIXMAN_STD_FAST_PATH (IN, a8, null, a8, fast_composite_in_8_8),
1875     PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, fast_composite_in_n_8_8),
1876
1877     SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888_8888),
1878     SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888_8888),
1879     SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, 8888_8888),
1880     SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, 8888_8888),
1881
1882     SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888_8888),
1883     SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, 8888_8888),
1884
1885     SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, 8888_565),
1886     SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, 8888_565),
1887
1888     SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, 565_565),
1889
1890     SIMPLE_NEAREST_FAST_PATH_COVER (SRC, x8r8g8b8, a8r8g8b8, x888_8888),
1891     SIMPLE_NEAREST_FAST_PATH_COVER (SRC, x8b8g8r8, a8b8g8r8, x888_8888),
1892     SIMPLE_NEAREST_FAST_PATH_PAD (SRC, x8r8g8b8, a8r8g8b8, x888_8888),
1893     SIMPLE_NEAREST_FAST_PATH_PAD (SRC, x8b8g8r8, a8b8g8r8, x888_8888),
1894     SIMPLE_NEAREST_FAST_PATH_NORMAL (SRC, x8r8g8b8, a8r8g8b8, x888_8888),
1895     SIMPLE_NEAREST_FAST_PATH_NORMAL (SRC, x8b8g8r8, a8b8g8r8, x888_8888),
1896
1897     SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, 8888_8888),
1898     SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, 8888_8888),
1899     SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, 8888_8888),
1900     SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, 8888_8888),
1901
1902     SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, 8888_565),
1903
1904 #define NEAREST_FAST_PATH(op,s,d)               \
1905     {   PIXMAN_OP_ ## op,                       \
1906         PIXMAN_ ## s, SCALED_NEAREST_FLAGS,     \
1907         PIXMAN_null, 0,                         \
1908         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1909         fast_composite_scaled_nearest,          \
1910     }
1911
1912     NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8),
1913     NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8),
1914     NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8),
1915     NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8),
1916
1917     NEAREST_FAST_PATH (SRC, x8r8g8b8, a8r8g8b8),
1918     NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8),
1919     NEAREST_FAST_PATH (SRC, x8b8g8r8, a8b8g8r8),
1920     NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8),
1921
1922     NEAREST_FAST_PATH (OVER, x8r8g8b8, x8r8g8b8),
1923     NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8),
1924     NEAREST_FAST_PATH (OVER, x8b8g8r8, x8b8g8r8),
1925     NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8),
1926
1927     NEAREST_FAST_PATH (OVER, x8r8g8b8, a8r8g8b8),
1928     NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8),
1929     NEAREST_FAST_PATH (OVER, x8b8g8r8, a8b8g8r8),
1930     NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8),
1931
1932 #define SIMPLE_ROTATE_FLAGS(angle)                                        \
1933     (FAST_PATH_ROTATE_ ## angle ## _TRANSFORM   |                         \
1934      FAST_PATH_NEAREST_FILTER                   |                         \
1935      FAST_PATH_SAMPLES_COVER_CLIP_NEAREST       |                         \
1936      FAST_PATH_STANDARD_FLAGS)
1937
1938 #define SIMPLE_ROTATE_FAST_PATH(op,s,d,suffix)                            \
1939     {   PIXMAN_OP_ ## op,                                                 \
1940         PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (90),                           \
1941         PIXMAN_null, 0,                                                   \
1942         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                           \
1943         fast_composite_rotate_90_##suffix,                                \
1944     },                                                                    \
1945     {   PIXMAN_OP_ ## op,                                                 \
1946         PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (270),                          \
1947         PIXMAN_null, 0,                                                   \
1948         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                           \
1949         fast_composite_rotate_270_##suffix,                               \
1950     }
1951
1952     SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888),
1953     SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888),
1954     SIMPLE_ROTATE_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888),
1955     SIMPLE_ROTATE_FAST_PATH (SRC, r5g6b5, r5g6b5, 565),
1956     SIMPLE_ROTATE_FAST_PATH (SRC, a8, a8, 8),
1957
1958     /* Simple repeat fast path entry. */
1959     {   PIXMAN_OP_any,
1960         PIXMAN_any,
1961         (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | FAST_PATH_BITS_IMAGE |
1962          FAST_PATH_NORMAL_REPEAT),
1963         PIXMAN_any, 0,
1964         PIXMAN_any, FAST_PATH_STD_DEST_FLAGS,
1965         fast_composite_tiled_repeat
1966     },
1967
1968     {   PIXMAN_OP_NONE  },
1969 };
1970
1971 #ifdef WORDS_BIGENDIAN
1972 #define A1_FILL_MASK(n, offs) (((1 << (n)) - 1) << (32 - (offs) - (n)))
1973 #else
1974 #define A1_FILL_MASK(n, offs) (((1 << (n)) - 1) << (offs))
1975 #endif
1976
1977 static force_inline void
1978 pixman_fill1_line (uint32_t *dst, int offs, int width, int v)
1979 {
1980     if (offs)
1981     {
1982         int leading_pixels = 32 - offs;
1983         if (leading_pixels >= width)
1984         {
1985             if (v)
1986                 *dst |= A1_FILL_MASK (width, offs);
1987             else
1988                 *dst &= ~A1_FILL_MASK (width, offs);
1989             return;
1990         }
1991         else
1992         {
1993             if (v)
1994                 *dst++ |= A1_FILL_MASK (leading_pixels, offs);
1995             else
1996                 *dst++ &= ~A1_FILL_MASK (leading_pixels, offs);
1997             width -= leading_pixels;
1998         }
1999     }
2000     while (width >= 32)
2001     {
2002         if (v)
2003             *dst++ = 0xFFFFFFFF;
2004         else
2005             *dst++ = 0;
2006         width -= 32;
2007     }
2008     if (width > 0)
2009     {
2010         if (v)
2011             *dst |= A1_FILL_MASK (width, 0);
2012         else
2013             *dst &= ~A1_FILL_MASK (width, 0);
2014     }
2015 }
2016
2017 static void
2018 pixman_fill1 (uint32_t *bits,
2019               int       stride,
2020               int       x,
2021               int       y,
2022               int       width,
2023               int       height,
2024               uint32_t  xor)
2025 {
2026     uint32_t *dst = bits + y * stride + (x >> 5);
2027     int offs = x & 31;
2028
2029     if (xor & 1)
2030     {
2031         while (height--)
2032         {
2033             pixman_fill1_line (dst, offs, width, 1);
2034             dst += stride;
2035         }
2036     }
2037     else
2038     {
2039         while (height--)
2040         {
2041             pixman_fill1_line (dst, offs, width, 0);
2042             dst += stride;
2043         }
2044     }
2045 }
2046
2047 static void
2048 pixman_fill8 (uint32_t *bits,
2049               int       stride,
2050               int       x,
2051               int       y,
2052               int       width,
2053               int       height,
2054               uint32_t xor)
2055 {
2056     int byte_stride = stride * (int) sizeof (uint32_t);
2057     uint8_t *dst = (uint8_t *) bits;
2058     uint8_t v = xor & 0xff;
2059     int i;
2060
2061     dst = dst + y * byte_stride + x;
2062
2063     while (height--)
2064     {
2065         for (i = 0; i < width; ++i)
2066             dst[i] = v;
2067
2068         dst += byte_stride;
2069     }
2070 }
2071
2072 static void
2073 pixman_fill16 (uint32_t *bits,
2074                int       stride,
2075                int       x,
2076                int       y,
2077                int       width,
2078                int       height,
2079                uint32_t xor)
2080 {
2081     int short_stride =
2082         (stride * (int)sizeof (uint32_t)) / (int)sizeof (uint16_t);
2083     uint16_t *dst = (uint16_t *)bits;
2084     uint16_t v = xor & 0xffff;
2085     int i;
2086
2087     dst = dst + y * short_stride + x;
2088
2089     while (height--)
2090     {
2091         for (i = 0; i < width; ++i)
2092             dst[i] = v;
2093
2094         dst += short_stride;
2095     }
2096 }
2097
2098 static void
2099 pixman_fill32 (uint32_t *bits,
2100                int       stride,
2101                int       x,
2102                int       y,
2103                int       width,
2104                int       height,
2105                uint32_t  xor)
2106 {
2107     int i;
2108
2109     bits = bits + y * stride + x;
2110
2111     while (height--)
2112     {
2113         for (i = 0; i < width; ++i)
2114             bits[i] = xor;
2115
2116         bits += stride;
2117     }
2118 }
2119
2120 static pixman_bool_t
2121 fast_path_fill (pixman_implementation_t *imp,
2122                 uint32_t *               bits,
2123                 int                      stride,
2124                 int                      bpp,
2125                 int                      x,
2126                 int                      y,
2127                 int                      width,
2128                 int                      height,
2129                 uint32_t                 xor)
2130 {
2131     switch (bpp)
2132     {
2133     case 1:
2134         pixman_fill1 (bits, stride, x, y, width, height, xor);
2135         break;
2136
2137     case 8:
2138         pixman_fill8 (bits, stride, x, y, width, height, xor);
2139         break;
2140
2141     case 16:
2142         pixman_fill16 (bits, stride, x, y, width, height, xor);
2143         break;
2144
2145     case 32:
2146         pixman_fill32 (bits, stride, x, y, width, height, xor);
2147         break;
2148
2149     default:
2150         return _pixman_implementation_fill (
2151             imp->delegate, bits, stride, bpp, x, y, width, height, xor);
2152         break;
2153     }
2154
2155     return TRUE;
2156 }
2157
2158 pixman_implementation_t *
2159 _pixman_implementation_create_fast_path (pixman_implementation_t *fallback)
2160 {
2161     pixman_implementation_t *imp = _pixman_implementation_create (fallback, c_fast_paths);
2162
2163     imp->fill = fast_path_fill;
2164
2165     return imp;
2166 }