mmx: fix formats in commented code
[profile/ivi/pixman.git] / pixman / pixman-fast-path.c
1 /* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
2 /*
3  * Copyright © 2000 SuSE, Inc.
4  * Copyright © 2007 Red Hat, Inc.
5  *
6  * Permission to use, copy, modify, distribute, and sell this software and its
7  * documentation for any purpose is hereby granted without fee, provided that
8  * the above copyright notice appear in all copies and that both that
9  * copyright notice and this permission notice appear in supporting
10  * documentation, and that the name of SuSE not be used in advertising or
11  * publicity pertaining to distribution of the software without specific,
12  * written prior permission.  SuSE makes no representations about the
13  * suitability of this software for any purpose.  It is provided "as is"
14  * without express or implied warranty.
15  *
16  * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
18  * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
20  * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
21  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
22  *
23  * Author:  Keith Packard, SuSE, Inc.
24  */
25
26 #ifdef HAVE_CONFIG_H
27 #include <config.h>
28 #endif
29 #include <string.h>
30 #include <stdlib.h>
31 #include "pixman-private.h"
32 #include "pixman-combine32.h"
33 #include "pixman-inlines.h"
34
35 static force_inline uint32_t
36 fetch_24 (uint8_t *a)
37 {
38     if (((unsigned long)a) & 1)
39     {
40 #ifdef WORDS_BIGENDIAN
41         return (*a << 16) | (*(uint16_t *)(a + 1));
42 #else
43         return *a | (*(uint16_t *)(a + 1) << 8);
44 #endif
45     }
46     else
47     {
48 #ifdef WORDS_BIGENDIAN
49         return (*(uint16_t *)a << 8) | *(a + 2);
50 #else
51         return *(uint16_t *)a | (*(a + 2) << 16);
52 #endif
53     }
54 }
55
56 static force_inline void
57 store_24 (uint8_t *a,
58           uint32_t v)
59 {
60     if (((unsigned long)a) & 1)
61     {
62 #ifdef WORDS_BIGENDIAN
63         *a = (uint8_t) (v >> 16);
64         *(uint16_t *)(a + 1) = (uint16_t) (v);
65 #else
66         *a = (uint8_t) (v);
67         *(uint16_t *)(a + 1) = (uint16_t) (v >> 8);
68 #endif
69     }
70     else
71     {
72 #ifdef WORDS_BIGENDIAN
73         *(uint16_t *)a = (uint16_t)(v >> 8);
74         *(a + 2) = (uint8_t)v;
75 #else
76         *(uint16_t *)a = (uint16_t)v;
77         *(a + 2) = (uint8_t)(v >> 16);
78 #endif
79     }
80 }
81
82 static force_inline uint32_t
83 over (uint32_t src,
84       uint32_t dest)
85 {
86     uint32_t a = ~src >> 24;
87
88     UN8x4_MUL_UN8_ADD_UN8x4 (dest, a, src);
89
90     return dest;
91 }
92
93 static uint32_t
94 in (uint32_t x,
95     uint8_t  y)
96 {
97     uint16_t a = y;
98
99     UN8x4_MUL_UN8 (x, a);
100
101     return x;
102 }
103
104 /*
105  * Naming convention:
106  *
107  *  op_src_mask_dest
108  */
109 static void
110 fast_composite_over_x888_8_8888 (pixman_implementation_t *imp,
111                                  pixman_composite_info_t *info)
112 {
113     PIXMAN_COMPOSITE_ARGS (info);
114     uint32_t    *src, *src_line;
115     uint32_t    *dst, *dst_line;
116     uint8_t     *mask, *mask_line;
117     int src_stride, mask_stride, dst_stride;
118     uint8_t m;
119     uint32_t s, d;
120     int32_t w;
121
122     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
123     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
124     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
125
126     while (height--)
127     {
128         src = src_line;
129         src_line += src_stride;
130         dst = dst_line;
131         dst_line += dst_stride;
132         mask = mask_line;
133         mask_line += mask_stride;
134
135         w = width;
136         while (w--)
137         {
138             m = *mask++;
139             if (m)
140             {
141                 s = *src | 0xff000000;
142
143                 if (m == 0xff)
144                 {
145                     *dst = s;
146                 }
147                 else
148                 {
149                     d = in (s, m);
150                     *dst = over (d, *dst);
151                 }
152             }
153             src++;
154             dst++;
155         }
156     }
157 }
158
159 static void
160 fast_composite_in_n_8_8 (pixman_implementation_t *imp,
161                          pixman_composite_info_t *info)
162 {
163     PIXMAN_COMPOSITE_ARGS (info);
164     uint32_t src, srca;
165     uint8_t     *dst_line, *dst;
166     uint8_t     *mask_line, *mask, m;
167     int dst_stride, mask_stride;
168     int32_t w;
169     uint16_t t;
170
171     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
172
173     srca = src >> 24;
174
175     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
176     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
177
178     if (srca == 0xff)
179     {
180         while (height--)
181         {
182             dst = dst_line;
183             dst_line += dst_stride;
184             mask = mask_line;
185             mask_line += mask_stride;
186             w = width;
187
188             while (w--)
189             {
190                 m = *mask++;
191
192                 if (m == 0)
193                     *dst = 0;
194                 else if (m != 0xff)
195                     *dst = MUL_UN8 (m, *dst, t);
196
197                 dst++;
198             }
199         }
200     }
201     else
202     {
203         while (height--)
204         {
205             dst = dst_line;
206             dst_line += dst_stride;
207             mask = mask_line;
208             mask_line += mask_stride;
209             w = width;
210
211             while (w--)
212             {
213                 m = *mask++;
214                 m = MUL_UN8 (m, srca, t);
215
216                 if (m == 0)
217                     *dst = 0;
218                 else if (m != 0xff)
219                     *dst = MUL_UN8 (m, *dst, t);
220
221                 dst++;
222             }
223         }
224     }
225 }
226
227 static void
228 fast_composite_in_8_8 (pixman_implementation_t *imp,
229                        pixman_composite_info_t *info)
230 {
231     PIXMAN_COMPOSITE_ARGS (info);
232     uint8_t     *dst_line, *dst;
233     uint8_t     *src_line, *src;
234     int dst_stride, src_stride;
235     int32_t w;
236     uint8_t s;
237     uint16_t t;
238
239     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
240     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
241
242     while (height--)
243     {
244         dst = dst_line;
245         dst_line += dst_stride;
246         src = src_line;
247         src_line += src_stride;
248         w = width;
249
250         while (w--)
251         {
252             s = *src++;
253
254             if (s == 0)
255                 *dst = 0;
256             else if (s != 0xff)
257                 *dst = MUL_UN8 (s, *dst, t);
258
259             dst++;
260         }
261     }
262 }
263
264 static void
265 fast_composite_over_n_8_8888 (pixman_implementation_t *imp,
266                               pixman_composite_info_t *info)
267 {
268     PIXMAN_COMPOSITE_ARGS (info);
269     uint32_t src, srca;
270     uint32_t    *dst_line, *dst, d;
271     uint8_t     *mask_line, *mask, m;
272     int dst_stride, mask_stride;
273     int32_t w;
274
275     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
276
277     srca = src >> 24;
278     if (src == 0)
279         return;
280
281     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
282     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
283
284     while (height--)
285     {
286         dst = dst_line;
287         dst_line += dst_stride;
288         mask = mask_line;
289         mask_line += mask_stride;
290         w = width;
291
292         while (w--)
293         {
294             m = *mask++;
295             if (m == 0xff)
296             {
297                 if (srca == 0xff)
298                     *dst = src;
299                 else
300                     *dst = over (src, *dst);
301             }
302             else if (m)
303             {
304                 d = in (src, m);
305                 *dst = over (d, *dst);
306             }
307             dst++;
308         }
309     }
310 }
311
312 static void
313 fast_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
314                                    pixman_composite_info_t *info)
315 {
316     PIXMAN_COMPOSITE_ARGS (info);
317     uint32_t src, s;
318     uint32_t    *dst_line, *dst, d;
319     uint32_t    *mask_line, *mask, ma;
320     int dst_stride, mask_stride;
321     int32_t w;
322
323     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
324
325     if (src == 0)
326         return;
327
328     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
329     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
330
331     while (height--)
332     {
333         dst = dst_line;
334         dst_line += dst_stride;
335         mask = mask_line;
336         mask_line += mask_stride;
337         w = width;
338
339         while (w--)
340         {
341             ma = *mask++;
342
343             if (ma)
344             {
345                 d = *dst;
346                 s = src;
347
348                 UN8x4_MUL_UN8x4_ADD_UN8x4 (s, ma, d);
349
350                 *dst = s;
351             }
352
353             dst++;
354         }
355     }
356 }
357
358 static void
359 fast_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
360                                     pixman_composite_info_t *info)
361 {
362     PIXMAN_COMPOSITE_ARGS (info);
363     uint32_t src, srca, s;
364     uint32_t    *dst_line, *dst, d;
365     uint32_t    *mask_line, *mask, ma;
366     int dst_stride, mask_stride;
367     int32_t w;
368
369     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
370
371     srca = src >> 24;
372     if (src == 0)
373         return;
374
375     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
376     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
377
378     while (height--)
379     {
380         dst = dst_line;
381         dst_line += dst_stride;
382         mask = mask_line;
383         mask_line += mask_stride;
384         w = width;
385
386         while (w--)
387         {
388             ma = *mask++;
389             if (ma == 0xffffffff)
390             {
391                 if (srca == 0xff)
392                     *dst = src;
393                 else
394                     *dst = over (src, *dst);
395             }
396             else if (ma)
397             {
398                 d = *dst;
399                 s = src;
400
401                 UN8x4_MUL_UN8x4 (s, ma);
402                 UN8x4_MUL_UN8 (ma, srca);
403                 ma = ~ma;
404                 UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s);
405
406                 *dst = d;
407             }
408
409             dst++;
410         }
411     }
412 }
413
414 static void
415 fast_composite_over_n_8_0888 (pixman_implementation_t *imp,
416                               pixman_composite_info_t *info)
417 {
418     PIXMAN_COMPOSITE_ARGS (info);
419     uint32_t src, srca;
420     uint8_t     *dst_line, *dst;
421     uint32_t d;
422     uint8_t     *mask_line, *mask, m;
423     int dst_stride, mask_stride;
424     int32_t w;
425
426     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
427
428     srca = src >> 24;
429     if (src == 0)
430         return;
431
432     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3);
433     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
434
435     while (height--)
436     {
437         dst = dst_line;
438         dst_line += dst_stride;
439         mask = mask_line;
440         mask_line += mask_stride;
441         w = width;
442
443         while (w--)
444         {
445             m = *mask++;
446             if (m == 0xff)
447             {
448                 if (srca == 0xff)
449                 {
450                     d = src;
451                 }
452                 else
453                 {
454                     d = fetch_24 (dst);
455                     d = over (src, d);
456                 }
457                 store_24 (dst, d);
458             }
459             else if (m)
460             {
461                 d = over (in (src, m), fetch_24 (dst));
462                 store_24 (dst, d);
463             }
464             dst += 3;
465         }
466     }
467 }
468
469 static void
470 fast_composite_over_n_8_0565 (pixman_implementation_t *imp,
471                               pixman_composite_info_t *info)
472 {
473     PIXMAN_COMPOSITE_ARGS (info);
474     uint32_t src, srca;
475     uint16_t    *dst_line, *dst;
476     uint32_t d;
477     uint8_t     *mask_line, *mask, m;
478     int dst_stride, mask_stride;
479     int32_t w;
480
481     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
482
483     srca = src >> 24;
484     if (src == 0)
485         return;
486
487     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
488     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
489
490     while (height--)
491     {
492         dst = dst_line;
493         dst_line += dst_stride;
494         mask = mask_line;
495         mask_line += mask_stride;
496         w = width;
497
498         while (w--)
499         {
500             m = *mask++;
501             if (m == 0xff)
502             {
503                 if (srca == 0xff)
504                 {
505                     d = src;
506                 }
507                 else
508                 {
509                     d = *dst;
510                     d = over (src, CONVERT_0565_TO_0888 (d));
511                 }
512                 *dst = CONVERT_8888_TO_0565 (d);
513             }
514             else if (m)
515             {
516                 d = *dst;
517                 d = over (in (src, m), CONVERT_0565_TO_0888 (d));
518                 *dst = CONVERT_8888_TO_0565 (d);
519             }
520             dst++;
521         }
522     }
523 }
524
525 static void
526 fast_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
527                                     pixman_composite_info_t *info)
528 {
529     PIXMAN_COMPOSITE_ARGS (info);
530     uint32_t  src, srca, s;
531     uint16_t  src16;
532     uint16_t *dst_line, *dst;
533     uint32_t  d;
534     uint32_t *mask_line, *mask, ma;
535     int dst_stride, mask_stride;
536     int32_t w;
537
538     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
539
540     srca = src >> 24;
541     if (src == 0)
542         return;
543
544     src16 = CONVERT_8888_TO_0565 (src);
545
546     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
547     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
548
549     while (height--)
550     {
551         dst = dst_line;
552         dst_line += dst_stride;
553         mask = mask_line;
554         mask_line += mask_stride;
555         w = width;
556
557         while (w--)
558         {
559             ma = *mask++;
560             if (ma == 0xffffffff)
561             {
562                 if (srca == 0xff)
563                 {
564                     *dst = src16;
565                 }
566                 else
567                 {
568                     d = *dst;
569                     d = over (src, CONVERT_0565_TO_0888 (d));
570                     *dst = CONVERT_8888_TO_0565 (d);
571                 }
572             }
573             else if (ma)
574             {
575                 d = *dst;
576                 d = CONVERT_0565_TO_0888 (d);
577
578                 s = src;
579
580                 UN8x4_MUL_UN8x4 (s, ma);
581                 UN8x4_MUL_UN8 (ma, srca);
582                 ma = ~ma;
583                 UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s);
584
585                 *dst = CONVERT_8888_TO_0565 (d);
586             }
587             dst++;
588         }
589     }
590 }
591
592 static void
593 fast_composite_over_8888_8888 (pixman_implementation_t *imp,
594                                pixman_composite_info_t *info)
595 {
596     PIXMAN_COMPOSITE_ARGS (info);
597     uint32_t    *dst_line, *dst;
598     uint32_t    *src_line, *src, s;
599     int dst_stride, src_stride;
600     uint8_t a;
601     int32_t w;
602
603     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
604     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
605
606     while (height--)
607     {
608         dst = dst_line;
609         dst_line += dst_stride;
610         src = src_line;
611         src_line += src_stride;
612         w = width;
613
614         while (w--)
615         {
616             s = *src++;
617             a = s >> 24;
618             if (a == 0xff)
619                 *dst = s;
620             else if (s)
621                 *dst = over (s, *dst);
622             dst++;
623         }
624     }
625 }
626
627 static void
628 fast_composite_src_x888_8888 (pixman_implementation_t *imp,
629                               pixman_composite_info_t *info)
630 {
631     PIXMAN_COMPOSITE_ARGS (info);
632     uint32_t    *dst_line, *dst;
633     uint32_t    *src_line, *src;
634     int dst_stride, src_stride;
635     int32_t w;
636
637     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
638     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
639
640     while (height--)
641     {
642         dst = dst_line;
643         dst_line += dst_stride;
644         src = src_line;
645         src_line += src_stride;
646         w = width;
647
648         while (w--)
649             *dst++ = (*src++) | 0xff000000;
650     }
651 }
652
653 #if 0
654 static void
655 fast_composite_over_8888_0888 (pixman_implementation_t *imp,
656                                pixman_composite_info_t *info)
657 {
658     PIXMAN_COMPOSITE_ARGS (info);
659     uint8_t     *dst_line, *dst;
660     uint32_t d;
661     uint32_t    *src_line, *src, s;
662     uint8_t a;
663     int dst_stride, src_stride;
664     int32_t w;
665
666     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3);
667     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
668
669     while (height--)
670     {
671         dst = dst_line;
672         dst_line += dst_stride;
673         src = src_line;
674         src_line += src_stride;
675         w = width;
676
677         while (w--)
678         {
679             s = *src++;
680             a = s >> 24;
681             if (a)
682             {
683                 if (a == 0xff)
684                     d = s;
685                 else
686                     d = over (s, fetch_24 (dst));
687
688                 store_24 (dst, d);
689             }
690             dst += 3;
691         }
692     }
693 }
694 #endif
695
696 static void
697 fast_composite_over_8888_0565 (pixman_implementation_t *imp,
698                                pixman_composite_info_t *info)
699 {
700     PIXMAN_COMPOSITE_ARGS (info);
701     uint16_t    *dst_line, *dst;
702     uint32_t d;
703     uint32_t    *src_line, *src, s;
704     uint8_t a;
705     int dst_stride, src_stride;
706     int32_t w;
707
708     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
709     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
710
711     while (height--)
712     {
713         dst = dst_line;
714         dst_line += dst_stride;
715         src = src_line;
716         src_line += src_stride;
717         w = width;
718
719         while (w--)
720         {
721             s = *src++;
722             a = s >> 24;
723             if (s)
724             {
725                 if (a == 0xff)
726                 {
727                     d = s;
728                 }
729                 else
730                 {
731                     d = *dst;
732                     d = over (s, CONVERT_0565_TO_0888 (d));
733                 }
734                 *dst = CONVERT_8888_TO_0565 (d);
735             }
736             dst++;
737         }
738     }
739 }
740
741 static void
742 fast_composite_src_x888_0565 (pixman_implementation_t *imp,
743                               pixman_composite_info_t *info)
744 {
745     PIXMAN_COMPOSITE_ARGS (info);
746     uint16_t    *dst_line, *dst;
747     uint32_t    *src_line, *src, s;
748     int dst_stride, src_stride;
749     int32_t w;
750
751     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
752     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
753
754     while (height--)
755     {
756         dst = dst_line;
757         dst_line += dst_stride;
758         src = src_line;
759         src_line += src_stride;
760         w = width;
761
762         while (w--)
763         {
764             s = *src++;
765             *dst = CONVERT_8888_TO_0565 (s);
766             dst++;
767         }
768     }
769 }
770
771 static void
772 fast_composite_add_8_8 (pixman_implementation_t *imp,
773                         pixman_composite_info_t *info)
774 {
775     PIXMAN_COMPOSITE_ARGS (info);
776     uint8_t     *dst_line, *dst;
777     uint8_t     *src_line, *src;
778     int dst_stride, src_stride;
779     int32_t w;
780     uint8_t s, d;
781     uint16_t t;
782
783     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
784     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
785
786     while (height--)
787     {
788         dst = dst_line;
789         dst_line += dst_stride;
790         src = src_line;
791         src_line += src_stride;
792         w = width;
793
794         while (w--)
795         {
796             s = *src++;
797             if (s)
798             {
799                 if (s != 0xff)
800                 {
801                     d = *dst;
802                     t = d + s;
803                     s = t | (0 - (t >> 8));
804                 }
805                 *dst = s;
806             }
807             dst++;
808         }
809     }
810 }
811
812 static void
813 fast_composite_add_8888_8888 (pixman_implementation_t *imp,
814                               pixman_composite_info_t *info)
815 {
816     PIXMAN_COMPOSITE_ARGS (info);
817     uint32_t    *dst_line, *dst;
818     uint32_t    *src_line, *src;
819     int dst_stride, src_stride;
820     int32_t w;
821     uint32_t s, d;
822
823     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
824     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
825
826     while (height--)
827     {
828         dst = dst_line;
829         dst_line += dst_stride;
830         src = src_line;
831         src_line += src_stride;
832         w = width;
833
834         while (w--)
835         {
836             s = *src++;
837             if (s)
838             {
839                 if (s != 0xffffffff)
840                 {
841                     d = *dst;
842                     if (d)
843                         UN8x4_ADD_UN8x4 (s, d);
844                 }
845                 *dst = s;
846             }
847             dst++;
848         }
849     }
850 }
851
852 static void
853 fast_composite_add_n_8_8 (pixman_implementation_t *imp,
854                           pixman_composite_info_t *info)
855 {
856     PIXMAN_COMPOSITE_ARGS (info);
857     uint8_t     *dst_line, *dst;
858     uint8_t     *mask_line, *mask;
859     int dst_stride, mask_stride;
860     int32_t w;
861     uint32_t src;
862     uint8_t sa;
863
864     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
865     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
866     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
867     sa = (src >> 24);
868
869     while (height--)
870     {
871         dst = dst_line;
872         dst_line += dst_stride;
873         mask = mask_line;
874         mask_line += mask_stride;
875         w = width;
876
877         while (w--)
878         {
879             uint16_t tmp;
880             uint16_t a;
881             uint32_t m, d;
882             uint32_t r;
883
884             a = *mask++;
885             d = *dst;
886
887             m = MUL_UN8 (sa, a, tmp);
888             r = ADD_UN8 (m, d, tmp);
889
890             *dst++ = r;
891         }
892     }
893 }
894
895 #ifdef WORDS_BIGENDIAN
896 #define CREATE_BITMASK(n) (0x80000000 >> (n))
897 #define UPDATE_BITMASK(n) ((n) >> 1)
898 #else
899 #define CREATE_BITMASK(n) (1 << (n))
900 #define UPDATE_BITMASK(n) ((n) << 1)
901 #endif
902
903 #define TEST_BIT(p, n)                                  \
904     (*((p) + ((n) >> 5)) & CREATE_BITMASK ((n) & 31))
905 #define SET_BIT(p, n)                                                   \
906     do { *((p) + ((n) >> 5)) |= CREATE_BITMASK ((n) & 31); } while (0);
907
908 static void
909 fast_composite_add_1000_1000 (pixman_implementation_t *imp,
910                               pixman_composite_info_t *info)
911 {
912     PIXMAN_COMPOSITE_ARGS (info);
913     uint32_t     *dst_line, *dst;
914     uint32_t     *src_line, *src;
915     int           dst_stride, src_stride;
916     int32_t       w;
917
918     PIXMAN_IMAGE_GET_LINE (src_image, 0, src_y, uint32_t,
919                            src_stride, src_line, 1);
920     PIXMAN_IMAGE_GET_LINE (dest_image, 0, dest_y, uint32_t,
921                            dst_stride, dst_line, 1);
922
923     while (height--)
924     {
925         dst = dst_line;
926         dst_line += dst_stride;
927         src = src_line;
928         src_line += src_stride;
929         w = width;
930
931         while (w--)
932         {
933             /*
934              * TODO: improve performance by processing uint32_t data instead
935              *       of individual bits
936              */
937             if (TEST_BIT (src, src_x + w))
938                 SET_BIT (dst, dest_x + w);
939         }
940     }
941 }
942
943 static void
944 fast_composite_over_n_1_8888 (pixman_implementation_t *imp,
945                               pixman_composite_info_t *info)
946 {
947     PIXMAN_COMPOSITE_ARGS (info);
948     uint32_t     src, srca;
949     uint32_t    *dst, *dst_line;
950     uint32_t    *mask, *mask_line;
951     int          mask_stride, dst_stride;
952     uint32_t     bitcache, bitmask;
953     int32_t      w;
954
955     if (width <= 0)
956         return;
957
958     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
959     srca = src >> 24;
960     if (src == 0)
961         return;
962
963     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t,
964                            dst_stride, dst_line, 1);
965     PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t,
966                            mask_stride, mask_line, 1);
967     mask_line += mask_x >> 5;
968
969     if (srca == 0xff)
970     {
971         while (height--)
972         {
973             dst = dst_line;
974             dst_line += dst_stride;
975             mask = mask_line;
976             mask_line += mask_stride;
977             w = width;
978
979             bitcache = *mask++;
980             bitmask = CREATE_BITMASK (mask_x & 31);
981
982             while (w--)
983             {
984                 if (bitmask == 0)
985                 {
986                     bitcache = *mask++;
987                     bitmask = CREATE_BITMASK (0);
988                 }
989                 if (bitcache & bitmask)
990                     *dst = src;
991                 bitmask = UPDATE_BITMASK (bitmask);
992                 dst++;
993             }
994         }
995     }
996     else
997     {
998         while (height--)
999         {
1000             dst = dst_line;
1001             dst_line += dst_stride;
1002             mask = mask_line;
1003             mask_line += mask_stride;
1004             w = width;
1005
1006             bitcache = *mask++;
1007             bitmask = CREATE_BITMASK (mask_x & 31);
1008
1009             while (w--)
1010             {
1011                 if (bitmask == 0)
1012                 {
1013                     bitcache = *mask++;
1014                     bitmask = CREATE_BITMASK (0);
1015                 }
1016                 if (bitcache & bitmask)
1017                     *dst = over (src, *dst);
1018                 bitmask = UPDATE_BITMASK (bitmask);
1019                 dst++;
1020             }
1021         }
1022     }
1023 }
1024
1025 static void
1026 fast_composite_over_n_1_0565 (pixman_implementation_t *imp,
1027                               pixman_composite_info_t *info)
1028 {
1029     PIXMAN_COMPOSITE_ARGS (info);
1030     uint32_t     src, srca;
1031     uint16_t    *dst, *dst_line;
1032     uint32_t    *mask, *mask_line;
1033     int          mask_stride, dst_stride;
1034     uint32_t     bitcache, bitmask;
1035     int32_t      w;
1036     uint32_t     d;
1037     uint16_t     src565;
1038
1039     if (width <= 0)
1040         return;
1041
1042     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
1043     srca = src >> 24;
1044     if (src == 0)
1045         return;
1046
1047     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t,
1048                            dst_stride, dst_line, 1);
1049     PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t,
1050                            mask_stride, mask_line, 1);
1051     mask_line += mask_x >> 5;
1052
1053     if (srca == 0xff)
1054     {
1055         src565 = CONVERT_8888_TO_0565 (src);
1056         while (height--)
1057         {
1058             dst = dst_line;
1059             dst_line += dst_stride;
1060             mask = mask_line;
1061             mask_line += mask_stride;
1062             w = width;
1063
1064             bitcache = *mask++;
1065             bitmask = CREATE_BITMASK (mask_x & 31);
1066
1067             while (w--)
1068             {
1069                 if (bitmask == 0)
1070                 {
1071                     bitcache = *mask++;
1072                     bitmask = CREATE_BITMASK (0);
1073                 }
1074                 if (bitcache & bitmask)
1075                     *dst = src565;
1076                 bitmask = UPDATE_BITMASK (bitmask);
1077                 dst++;
1078             }
1079         }
1080     }
1081     else
1082     {
1083         while (height--)
1084         {
1085             dst = dst_line;
1086             dst_line += dst_stride;
1087             mask = mask_line;
1088             mask_line += mask_stride;
1089             w = width;
1090
1091             bitcache = *mask++;
1092             bitmask = CREATE_BITMASK (mask_x & 31);
1093
1094             while (w--)
1095             {
1096                 if (bitmask == 0)
1097                 {
1098                     bitcache = *mask++;
1099                     bitmask = CREATE_BITMASK (0);
1100                 }
1101                 if (bitcache & bitmask)
1102                 {
1103                     d = over (src, CONVERT_0565_TO_0888 (*dst));
1104                     *dst = CONVERT_8888_TO_0565 (d);
1105                 }
1106                 bitmask = UPDATE_BITMASK (bitmask);
1107                 dst++;
1108             }
1109         }
1110     }
1111 }
1112
1113 /*
1114  * Simple bitblt
1115  */
1116
1117 static void
1118 fast_composite_solid_fill (pixman_implementation_t *imp,
1119                            pixman_composite_info_t *info)
1120 {
1121     PIXMAN_COMPOSITE_ARGS (info);
1122     uint32_t src;
1123
1124     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
1125
1126     if (dest_image->bits.format == PIXMAN_a1)
1127     {
1128         src = src >> 31;
1129     }
1130     else if (dest_image->bits.format == PIXMAN_a8)
1131     {
1132         src = src >> 24;
1133     }
1134     else if (dest_image->bits.format == PIXMAN_r5g6b5 ||
1135              dest_image->bits.format == PIXMAN_b5g6r5)
1136     {
1137         src = CONVERT_8888_TO_0565 (src);
1138     }
1139
1140     pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride,
1141                  PIXMAN_FORMAT_BPP (dest_image->bits.format),
1142                  dest_x, dest_y,
1143                  width, height,
1144                  src);
1145 }
1146
1147 static void
1148 fast_composite_src_memcpy (pixman_implementation_t *imp,
1149                            pixman_composite_info_t *info)
1150 {
1151     PIXMAN_COMPOSITE_ARGS (info);
1152     int bpp = PIXMAN_FORMAT_BPP (dest_image->bits.format) / 8;
1153     uint32_t n_bytes = width * bpp;
1154     int dst_stride, src_stride;
1155     uint8_t    *dst;
1156     uint8_t    *src;
1157
1158     src_stride = src_image->bits.rowstride * 4;
1159     dst_stride = dest_image->bits.rowstride * 4;
1160
1161     src = (uint8_t *)src_image->bits.bits + src_y * src_stride + src_x * bpp;
1162     dst = (uint8_t *)dest_image->bits.bits + dest_y * dst_stride + dest_x * bpp;
1163
1164     while (height--)
1165     {
1166         memcpy (dst, src, n_bytes);
1167
1168         dst += dst_stride;
1169         src += src_stride;
1170     }
1171 }
1172
1173 FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, SRC, COVER)
1174 FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, SRC, NONE)
1175 FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, SRC, PAD)
1176 FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, SRC, NORMAL)
1177 FAST_NEAREST (x888_8888_cover, x888, 8888, uint32_t, uint32_t, SRC, COVER)
1178 FAST_NEAREST (x888_8888_pad, x888, 8888, uint32_t, uint32_t, SRC, PAD)
1179 FAST_NEAREST (x888_8888_normal, x888, 8888, uint32_t, uint32_t, SRC, NORMAL)
1180 FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, OVER, COVER)
1181 FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, OVER, NONE)
1182 FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, OVER, PAD)
1183 FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, OVER, NORMAL)
1184 FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, SRC, COVER)
1185 FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, SRC, NONE)
1186 FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, SRC, PAD)
1187 FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, NORMAL)
1188 FAST_NEAREST (565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, NORMAL)
1189 FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, OVER, COVER)
1190 FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE)
1191 FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD)
1192 FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL)
1193
1194 /* Use more unrolling for src_0565_0565 because it is typically CPU bound */
1195 static force_inline void
1196 scaled_nearest_scanline_565_565_SRC (uint16_t *       dst,
1197                                      const uint16_t * src,
1198                                      int32_t          w,
1199                                      pixman_fixed_t   vx,
1200                                      pixman_fixed_t   unit_x,
1201                                      pixman_fixed_t   max_vx,
1202                                      pixman_bool_t    fully_transparent_src)
1203 {
1204     uint16_t tmp1, tmp2, tmp3, tmp4;
1205     while ((w -= 4) >= 0)
1206     {
1207         tmp1 = src[pixman_fixed_to_int (vx)];
1208         vx += unit_x;
1209         tmp2 = src[pixman_fixed_to_int (vx)];
1210         vx += unit_x;
1211         tmp3 = src[pixman_fixed_to_int (vx)];
1212         vx += unit_x;
1213         tmp4 = src[pixman_fixed_to_int (vx)];
1214         vx += unit_x;
1215         *dst++ = tmp1;
1216         *dst++ = tmp2;
1217         *dst++ = tmp3;
1218         *dst++ = tmp4;
1219     }
1220     if (w & 2)
1221     {
1222         tmp1 = src[pixman_fixed_to_int (vx)];
1223         vx += unit_x;
1224         tmp2 = src[pixman_fixed_to_int (vx)];
1225         vx += unit_x;
1226         *dst++ = tmp1;
1227         *dst++ = tmp2;
1228     }
1229     if (w & 1)
1230         *dst++ = src[pixman_fixed_to_int (vx)];
1231 }
1232
1233 FAST_NEAREST_MAINLOOP (565_565_cover_SRC,
1234                        scaled_nearest_scanline_565_565_SRC,
1235                        uint16_t, uint16_t, COVER)
1236 FAST_NEAREST_MAINLOOP (565_565_none_SRC,
1237                        scaled_nearest_scanline_565_565_SRC,
1238                        uint16_t, uint16_t, NONE)
1239 FAST_NEAREST_MAINLOOP (565_565_pad_SRC,
1240                        scaled_nearest_scanline_565_565_SRC,
1241                        uint16_t, uint16_t, PAD)
1242
1243 static force_inline uint32_t
1244 fetch_nearest (pixman_repeat_t src_repeat,
1245                pixman_format_code_t format,
1246                uint32_t *src, int x, int src_width)
1247 {
1248     if (repeat (src_repeat, &x, src_width))
1249     {
1250         if (format == PIXMAN_x8r8g8b8)
1251             return *(src + x) | 0xff000000;
1252         else
1253             return *(src + x);
1254     }
1255     else
1256     {
1257         return 0;
1258     }
1259 }
1260
1261 static force_inline void
1262 combine_over (uint32_t s, uint32_t *dst)
1263 {
1264     if (s)
1265     {
1266         uint8_t ia = 0xff - (s >> 24);
1267
1268         if (ia)
1269             UN8x4_MUL_UN8_ADD_UN8x4 (*dst, ia, s);
1270         else
1271             *dst = s;
1272     }
1273 }
1274
1275 static force_inline void
1276 combine_src (uint32_t s, uint32_t *dst)
1277 {
1278     *dst = s;
1279 }
1280
1281 static void
1282 fast_composite_scaled_nearest (pixman_implementation_t *imp,
1283                                pixman_composite_info_t *info)
1284 {
1285     PIXMAN_COMPOSITE_ARGS (info);
1286     uint32_t       *dst_line;
1287     uint32_t       *src_line;
1288     int             dst_stride, src_stride;
1289     int             src_width, src_height;
1290     pixman_repeat_t src_repeat;
1291     pixman_fixed_t unit_x, unit_y;
1292     pixman_format_code_t src_format;
1293     pixman_vector_t v;
1294     pixman_fixed_t vy;
1295
1296     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
1297     /* pass in 0 instead of src_x and src_y because src_x and src_y need to be
1298      * transformed from destination space to source space
1299      */
1300     PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, uint32_t, src_stride, src_line, 1);
1301
1302     /* reference point is the center of the pixel */
1303     v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;
1304     v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;
1305     v.vector[2] = pixman_fixed_1;
1306
1307     if (!pixman_transform_point_3d (src_image->common.transform, &v))
1308         return;
1309
1310     unit_x = src_image->common.transform->matrix[0][0];
1311     unit_y = src_image->common.transform->matrix[1][1];
1312
1313     /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */
1314     v.vector[0] -= pixman_fixed_e;
1315     v.vector[1] -= pixman_fixed_e;
1316
1317     src_height = src_image->bits.height;
1318     src_width = src_image->bits.width;
1319     src_repeat = src_image->common.repeat;
1320     src_format = src_image->bits.format;
1321
1322     vy = v.vector[1];
1323     while (height--)
1324     {
1325         pixman_fixed_t vx = v.vector[0];
1326         int y = pixman_fixed_to_int (vy);
1327         uint32_t *dst = dst_line;
1328
1329         dst_line += dst_stride;
1330
1331         /* adjust the y location by a unit vector in the y direction
1332          * this is equivalent to transforming y+1 of the destination point to source space */
1333         vy += unit_y;
1334
1335         if (!repeat (src_repeat, &y, src_height))
1336         {
1337             if (op == PIXMAN_OP_SRC)
1338                 memset (dst, 0, sizeof (*dst) * width);
1339         }
1340         else
1341         {
1342             int w = width;
1343
1344             uint32_t *src = src_line + y * src_stride;
1345
1346             while (w >= 2)
1347             {
1348                 uint32_t s1, s2;
1349                 int x1, x2;
1350
1351                 x1 = pixman_fixed_to_int (vx);
1352                 vx += unit_x;
1353
1354                 x2 = pixman_fixed_to_int (vx);
1355                 vx += unit_x;
1356
1357                 w -= 2;
1358
1359                 s1 = fetch_nearest (src_repeat, src_format, src, x1, src_width);
1360                 s2 = fetch_nearest (src_repeat, src_format, src, x2, src_width);
1361
1362                 if (op == PIXMAN_OP_OVER)
1363                 {
1364                     combine_over (s1, dst++);
1365                     combine_over (s2, dst++);
1366                 }
1367                 else
1368                 {
1369                     combine_src (s1, dst++);
1370                     combine_src (s2, dst++);
1371                 }
1372             }
1373
1374             while (w--)
1375             {
1376                 uint32_t s;
1377                 int x;
1378
1379                 x = pixman_fixed_to_int (vx);
1380                 vx += unit_x;
1381
1382                 s = fetch_nearest (src_repeat, src_format, src, x, src_width);
1383
1384                 if (op == PIXMAN_OP_OVER)
1385                     combine_over (s, dst++);
1386                 else
1387                     combine_src (s, dst++);
1388             }
1389         }
1390     }
1391 }
1392
1393 #define CACHE_LINE_SIZE 64
1394
1395 #define FAST_SIMPLE_ROTATE(suffix, pix_type)                                  \
1396                                                                               \
1397 static void                                                                   \
1398 blt_rotated_90_trivial_##suffix (pix_type       *dst,                         \
1399                                  int             dst_stride,                  \
1400                                  const pix_type *src,                         \
1401                                  int             src_stride,                  \
1402                                  int             w,                           \
1403                                  int             h)                           \
1404 {                                                                             \
1405     int x, y;                                                                 \
1406     for (y = 0; y < h; y++)                                                   \
1407     {                                                                         \
1408         const pix_type *s = src + (h - y - 1);                                \
1409         pix_type *d = dst + dst_stride * y;                                   \
1410         for (x = 0; x < w; x++)                                               \
1411         {                                                                     \
1412             *d++ = *s;                                                        \
1413             s += src_stride;                                                  \
1414         }                                                                     \
1415     }                                                                         \
1416 }                                                                             \
1417                                                                               \
1418 static void                                                                   \
1419 blt_rotated_270_trivial_##suffix (pix_type       *dst,                        \
1420                                   int             dst_stride,                 \
1421                                   const pix_type *src,                        \
1422                                   int             src_stride,                 \
1423                                   int             w,                          \
1424                                   int             h)                          \
1425 {                                                                             \
1426     int x, y;                                                                 \
1427     for (y = 0; y < h; y++)                                                   \
1428     {                                                                         \
1429         const pix_type *s = src + src_stride * (w - 1) + y;                   \
1430         pix_type *d = dst + dst_stride * y;                                   \
1431         for (x = 0; x < w; x++)                                               \
1432         {                                                                     \
1433             *d++ = *s;                                                        \
1434             s -= src_stride;                                                  \
1435         }                                                                     \
1436     }                                                                         \
1437 }                                                                             \
1438                                                                               \
1439 static void                                                                   \
1440 blt_rotated_90_##suffix (pix_type       *dst,                                 \
1441                          int             dst_stride,                          \
1442                          const pix_type *src,                                 \
1443                          int             src_stride,                          \
1444                          int             W,                                   \
1445                          int             H)                                   \
1446 {                                                                             \
1447     int x;                                                                    \
1448     int leading_pixels = 0, trailing_pixels = 0;                              \
1449     const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type);                 \
1450                                                                               \
1451     /*                                                                        \
1452      * split processing into handling destination as TILE_SIZExH cache line   \
1453      * aligned vertical stripes (optimistically assuming that destination     \
1454      * stride is a multiple of cache line, if not - it will be just a bit     \
1455      * slower)                                                                \
1456      */                                                                       \
1457                                                                               \
1458     if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1))                               \
1459     {                                                                         \
1460         leading_pixels = TILE_SIZE - (((uintptr_t)dst &                       \
1461                             (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
1462         if (leading_pixels > W)                                               \
1463             leading_pixels = W;                                               \
1464                                                                               \
1465         /* unaligned leading part NxH (where N < TILE_SIZE) */                \
1466         blt_rotated_90_trivial_##suffix (                                     \
1467             dst,                                                              \
1468             dst_stride,                                                       \
1469             src,                                                              \
1470             src_stride,                                                       \
1471             leading_pixels,                                                   \
1472             H);                                                               \
1473                                                                               \
1474         dst += leading_pixels;                                                \
1475         src += leading_pixels * src_stride;                                   \
1476         W -= leading_pixels;                                                  \
1477     }                                                                         \
1478                                                                               \
1479     if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1))                         \
1480     {                                                                         \
1481         trailing_pixels = (((uintptr_t)(dst + W) &                            \
1482                             (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
1483         if (trailing_pixels > W)                                              \
1484             trailing_pixels = W;                                              \
1485         W -= trailing_pixels;                                                 \
1486     }                                                                         \
1487                                                                               \
1488     for (x = 0; x < W; x += TILE_SIZE)                                        \
1489     {                                                                         \
1490         /* aligned middle part TILE_SIZExH */                                 \
1491         blt_rotated_90_trivial_##suffix (                                     \
1492             dst + x,                                                          \
1493             dst_stride,                                                       \
1494             src + src_stride * x,                                             \
1495             src_stride,                                                       \
1496             TILE_SIZE,                                                        \
1497             H);                                                               \
1498     }                                                                         \
1499                                                                               \
1500     if (trailing_pixels)                                                      \
1501     {                                                                         \
1502         /* unaligned trailing part NxH (where N < TILE_SIZE) */               \
1503         blt_rotated_90_trivial_##suffix (                                     \
1504             dst + W,                                                          \
1505             dst_stride,                                                       \
1506             src + W * src_stride,                                             \
1507             src_stride,                                                       \
1508             trailing_pixels,                                                  \
1509             H);                                                               \
1510     }                                                                         \
1511 }                                                                             \
1512                                                                               \
1513 static void                                                                   \
1514 blt_rotated_270_##suffix (pix_type       *dst,                                \
1515                           int             dst_stride,                         \
1516                           const pix_type *src,                                \
1517                           int             src_stride,                         \
1518                           int             W,                                  \
1519                           int             H)                                  \
1520 {                                                                             \
1521     int x;                                                                    \
1522     int leading_pixels = 0, trailing_pixels = 0;                              \
1523     const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type);                 \
1524                                                                               \
1525     /*                                                                        \
1526      * split processing into handling destination as TILE_SIZExH cache line   \
1527      * aligned vertical stripes (optimistically assuming that destination     \
1528      * stride is a multiple of cache line, if not - it will be just a bit     \
1529      * slower)                                                                \
1530      */                                                                       \
1531                                                                               \
1532     if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1))                               \
1533     {                                                                         \
1534         leading_pixels = TILE_SIZE - (((uintptr_t)dst &                       \
1535                             (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
1536         if (leading_pixels > W)                                               \
1537             leading_pixels = W;                                               \
1538                                                                               \
1539         /* unaligned leading part NxH (where N < TILE_SIZE) */                \
1540         blt_rotated_270_trivial_##suffix (                                    \
1541             dst,                                                              \
1542             dst_stride,                                                       \
1543             src + src_stride * (W - leading_pixels),                          \
1544             src_stride,                                                       \
1545             leading_pixels,                                                   \
1546             H);                                                               \
1547                                                                               \
1548         dst += leading_pixels;                                                \
1549         W -= leading_pixels;                                                  \
1550     }                                                                         \
1551                                                                               \
1552     if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1))                         \
1553     {                                                                         \
1554         trailing_pixels = (((uintptr_t)(dst + W) &                            \
1555                             (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
1556         if (trailing_pixels > W)                                              \
1557             trailing_pixels = W;                                              \
1558         W -= trailing_pixels;                                                 \
1559         src += trailing_pixels * src_stride;                                  \
1560     }                                                                         \
1561                                                                               \
1562     for (x = 0; x < W; x += TILE_SIZE)                                        \
1563     {                                                                         \
1564         /* aligned middle part TILE_SIZExH */                                 \
1565         blt_rotated_270_trivial_##suffix (                                    \
1566             dst + x,                                                          \
1567             dst_stride,                                                       \
1568             src + src_stride * (W - x - TILE_SIZE),                           \
1569             src_stride,                                                       \
1570             TILE_SIZE,                                                        \
1571             H);                                                               \
1572     }                                                                         \
1573                                                                               \
1574     if (trailing_pixels)                                                      \
1575     {                                                                         \
1576         /* unaligned trailing part NxH (where N < TILE_SIZE) */               \
1577         blt_rotated_270_trivial_##suffix (                                    \
1578             dst + W,                                                          \
1579             dst_stride,                                                       \
1580             src - trailing_pixels * src_stride,                               \
1581             src_stride,                                                       \
1582             trailing_pixels,                                                  \
1583             H);                                                               \
1584     }                                                                         \
1585 }                                                                             \
1586                                                                               \
1587 static void                                                                   \
1588 fast_composite_rotate_90_##suffix (pixman_implementation_t *imp,              \
1589                                    pixman_composite_info_t *info)             \
1590 {                                                                             \
1591     PIXMAN_COMPOSITE_ARGS (info);                                             \
1592     pix_type       *dst_line;                                                 \
1593     pix_type       *src_line;                                                 \
1594     int             dst_stride, src_stride;                                   \
1595     int             src_x_t, src_y_t;                                         \
1596                                                                               \
1597     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, pix_type,              \
1598                            dst_stride, dst_line, 1);                          \
1599     src_x_t = -src_y + pixman_fixed_to_int (                                  \
1600                                 src_image->common.transform->matrix[0][2] +   \
1601                                 pixman_fixed_1 / 2 - pixman_fixed_e) - height;\
1602     src_y_t = src_x + pixman_fixed_to_int (                                   \
1603                                 src_image->common.transform->matrix[1][2] +   \
1604                                 pixman_fixed_1 / 2 - pixman_fixed_e);         \
1605     PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type,             \
1606                            src_stride, src_line, 1);                          \
1607     blt_rotated_90_##suffix (dst_line, dst_stride, src_line, src_stride,      \
1608                              width, height);                                  \
1609 }                                                                             \
1610                                                                               \
1611 static void                                                                   \
1612 fast_composite_rotate_270_##suffix (pixman_implementation_t *imp,             \
1613                                     pixman_composite_info_t *info)            \
1614 {                                                                             \
1615     PIXMAN_COMPOSITE_ARGS (info);                                             \
1616     pix_type       *dst_line;                                                 \
1617     pix_type       *src_line;                                                 \
1618     int             dst_stride, src_stride;                                   \
1619     int             src_x_t, src_y_t;                                         \
1620                                                                               \
1621     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, pix_type,              \
1622                            dst_stride, dst_line, 1);                          \
1623     src_x_t = src_y + pixman_fixed_to_int (                                   \
1624                                 src_image->common.transform->matrix[0][2] +   \
1625                                 pixman_fixed_1 / 2 - pixman_fixed_e);         \
1626     src_y_t = -src_x + pixman_fixed_to_int (                                  \
1627                                 src_image->common.transform->matrix[1][2] +   \
1628                                 pixman_fixed_1 / 2 - pixman_fixed_e) - width; \
1629     PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type,             \
1630                            src_stride, src_line, 1);                          \
1631     blt_rotated_270_##suffix (dst_line, dst_stride, src_line, src_stride,     \
1632                               width, height);                                 \
1633 }
1634
1635 FAST_SIMPLE_ROTATE (8, uint8_t)
1636 FAST_SIMPLE_ROTATE (565, uint16_t)
1637 FAST_SIMPLE_ROTATE (8888, uint32_t)
1638
1639 static const pixman_fast_path_t c_fast_paths[] =
1640 {
1641     PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, fast_composite_over_n_8_0565),
1642     PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, fast_composite_over_n_8_0565),
1643     PIXMAN_STD_FAST_PATH (OVER, solid, a8, r8g8b8, fast_composite_over_n_8_0888),
1644     PIXMAN_STD_FAST_PATH (OVER, solid, a8, b8g8r8, fast_composite_over_n_8_0888),
1645     PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, fast_composite_over_n_8_8888),
1646     PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, fast_composite_over_n_8_8888),
1647     PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, fast_composite_over_n_8_8888),
1648     PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, fast_composite_over_n_8_8888),
1649     PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8r8g8b8, fast_composite_over_n_1_8888),
1650     PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8r8g8b8, fast_composite_over_n_1_8888),
1651     PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8b8g8r8, fast_composite_over_n_1_8888),
1652     PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8b8g8r8, fast_composite_over_n_1_8888),
1653     PIXMAN_STD_FAST_PATH (OVER, solid, a1, r5g6b5,   fast_composite_over_n_1_0565),
1654     PIXMAN_STD_FAST_PATH (OVER, solid, a1, b5g6r5,   fast_composite_over_n_1_0565),
1655     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, fast_composite_over_n_8888_8888_ca),
1656     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, fast_composite_over_n_8888_8888_ca),
1657     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, fast_composite_over_n_8888_0565_ca),
1658     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, fast_composite_over_n_8888_8888_ca),
1659     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, fast_composite_over_n_8888_8888_ca),
1660     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, fast_composite_over_n_8888_0565_ca),
1661     PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, fast_composite_over_x888_8_8888),
1662     PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, fast_composite_over_x888_8_8888),
1663     PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, fast_composite_over_x888_8_8888),
1664     PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, fast_composite_over_x888_8_8888),
1665     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, fast_composite_over_8888_8888),
1666     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, fast_composite_over_8888_8888),
1667     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, fast_composite_over_8888_0565),
1668     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, fast_composite_over_8888_8888),
1669     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, fast_composite_over_8888_8888),
1670     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, fast_composite_over_8888_0565),
1671     PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, fast_composite_add_8888_8888),
1672     PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, fast_composite_add_8888_8888),
1673     PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, fast_composite_add_8_8),
1674     PIXMAN_STD_FAST_PATH (ADD, a1, null, a1, fast_composite_add_1000_1000),
1675     PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, fast_composite_add_n_8888_8888_ca),
1676     PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, fast_composite_add_n_8_8),
1677     PIXMAN_STD_FAST_PATH (SRC, solid, null, a8r8g8b8, fast_composite_solid_fill),
1678     PIXMAN_STD_FAST_PATH (SRC, solid, null, x8r8g8b8, fast_composite_solid_fill),
1679     PIXMAN_STD_FAST_PATH (SRC, solid, null, a8b8g8r8, fast_composite_solid_fill),
1680     PIXMAN_STD_FAST_PATH (SRC, solid, null, x8b8g8r8, fast_composite_solid_fill),
1681     PIXMAN_STD_FAST_PATH (SRC, solid, null, a1, fast_composite_solid_fill),
1682     PIXMAN_STD_FAST_PATH (SRC, solid, null, a8, fast_composite_solid_fill),
1683     PIXMAN_STD_FAST_PATH (SRC, solid, null, r5g6b5, fast_composite_solid_fill),
1684     PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, fast_composite_src_x888_8888),
1685     PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, fast_composite_src_x888_8888),
1686     PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy),
1687     PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, fast_composite_src_memcpy),
1688     PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy),
1689     PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy),
1690     PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, fast_composite_src_memcpy),
1691     PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy),
1692     PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8x8, fast_composite_src_memcpy),
1693     PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8a8, fast_composite_src_memcpy),
1694     PIXMAN_STD_FAST_PATH (SRC, b8g8r8x8, null, b8g8r8x8, fast_composite_src_memcpy),
1695     PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, fast_composite_src_memcpy),
1696     PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, fast_composite_src_memcpy),
1697     PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, fast_composite_src_memcpy),
1698     PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, b8g8r8, fast_composite_src_memcpy),
1699     PIXMAN_STD_FAST_PATH (SRC, x1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
1700     PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
1701     PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, fast_composite_src_memcpy),
1702     PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565),
1703     PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565),
1704     PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565),
1705     PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565),
1706     PIXMAN_STD_FAST_PATH (IN, a8, null, a8, fast_composite_in_8_8),
1707     PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, fast_composite_in_n_8_8),
1708
1709     SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888_8888),
1710     SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888_8888),
1711     SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, 8888_8888),
1712     SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, 8888_8888),
1713
1714     SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888_8888),
1715     SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, 8888_8888),
1716
1717     SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, 8888_565),
1718     SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, 8888_565),
1719
1720     SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, 565_565),
1721
1722     SIMPLE_NEAREST_FAST_PATH_COVER (SRC, x8r8g8b8, a8r8g8b8, x888_8888),
1723     SIMPLE_NEAREST_FAST_PATH_COVER (SRC, x8b8g8r8, a8b8g8r8, x888_8888),
1724     SIMPLE_NEAREST_FAST_PATH_PAD (SRC, x8r8g8b8, a8r8g8b8, x888_8888),
1725     SIMPLE_NEAREST_FAST_PATH_PAD (SRC, x8b8g8r8, a8b8g8r8, x888_8888),
1726     SIMPLE_NEAREST_FAST_PATH_NORMAL (SRC, x8r8g8b8, a8r8g8b8, x888_8888),
1727     SIMPLE_NEAREST_FAST_PATH_NORMAL (SRC, x8b8g8r8, a8b8g8r8, x888_8888),
1728
1729     SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, 8888_8888),
1730     SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, 8888_8888),
1731     SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, 8888_8888),
1732     SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, 8888_8888),
1733
1734     SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, 8888_565),
1735
1736 #define NEAREST_FAST_PATH(op,s,d)               \
1737     {   PIXMAN_OP_ ## op,                       \
1738         PIXMAN_ ## s, SCALED_NEAREST_FLAGS,     \
1739         PIXMAN_null, 0,                         \
1740         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1741         fast_composite_scaled_nearest,          \
1742     }
1743
1744     NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8),
1745     NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8),
1746     NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8),
1747     NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8),
1748
1749     NEAREST_FAST_PATH (SRC, x8r8g8b8, a8r8g8b8),
1750     NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8),
1751     NEAREST_FAST_PATH (SRC, x8b8g8r8, a8b8g8r8),
1752     NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8),
1753
1754     NEAREST_FAST_PATH (OVER, x8r8g8b8, x8r8g8b8),
1755     NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8),
1756     NEAREST_FAST_PATH (OVER, x8b8g8r8, x8b8g8r8),
1757     NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8),
1758
1759     NEAREST_FAST_PATH (OVER, x8r8g8b8, a8r8g8b8),
1760     NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8),
1761     NEAREST_FAST_PATH (OVER, x8b8g8r8, a8b8g8r8),
1762     NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8),
1763
1764 #define SIMPLE_ROTATE_FLAGS(angle)                                        \
1765     (FAST_PATH_ROTATE_ ## angle ## _TRANSFORM   |                         \
1766      FAST_PATH_NEAREST_FILTER                   |                         \
1767      FAST_PATH_SAMPLES_COVER_CLIP_NEAREST       |                         \
1768      FAST_PATH_STANDARD_FLAGS)
1769
1770 #define SIMPLE_ROTATE_FAST_PATH(op,s,d,suffix)                            \
1771     {   PIXMAN_OP_ ## op,                                                 \
1772         PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (90),                           \
1773         PIXMAN_null, 0,                                                   \
1774         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                           \
1775         fast_composite_rotate_90_##suffix,                                \
1776     },                                                                    \
1777     {   PIXMAN_OP_ ## op,                                                 \
1778         PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (270),                          \
1779         PIXMAN_null, 0,                                                   \
1780         PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,                           \
1781         fast_composite_rotate_270_##suffix,                               \
1782     }
1783
1784     SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888),
1785     SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888),
1786     SIMPLE_ROTATE_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888),
1787     SIMPLE_ROTATE_FAST_PATH (SRC, r5g6b5, r5g6b5, 565),
1788     SIMPLE_ROTATE_FAST_PATH (SRC, a8, a8, 8),
1789
1790     {   PIXMAN_OP_NONE  },
1791 };
1792
1793 #ifdef WORDS_BIGENDIAN
1794 #define A1_FILL_MASK(n, offs) (((1 << (n)) - 1) << (32 - (offs) - (n)))
1795 #else
1796 #define A1_FILL_MASK(n, offs) (((1 << (n)) - 1) << (offs))
1797 #endif
1798
1799 static force_inline void
1800 pixman_fill1_line (uint32_t *dst, int offs, int width, int v)
1801 {
1802     if (offs)
1803     {
1804         int leading_pixels = 32 - offs;
1805         if (leading_pixels >= width)
1806         {
1807             if (v)
1808                 *dst |= A1_FILL_MASK (width, offs);
1809             else
1810                 *dst &= ~A1_FILL_MASK (width, offs);
1811             return;
1812         }
1813         else
1814         {
1815             if (v)
1816                 *dst++ |= A1_FILL_MASK (leading_pixels, offs);
1817             else
1818                 *dst++ &= ~A1_FILL_MASK (leading_pixels, offs);
1819             width -= leading_pixels;
1820         }
1821     }
1822     while (width >= 32)
1823     {
1824         if (v)
1825             *dst++ = 0xFFFFFFFF;
1826         else
1827             *dst++ = 0;
1828         width -= 32;
1829     }
1830     if (width > 0)
1831     {
1832         if (v)
1833             *dst |= A1_FILL_MASK (width, 0);
1834         else
1835             *dst &= ~A1_FILL_MASK (width, 0);
1836     }
1837 }
1838
1839 static void
1840 pixman_fill1 (uint32_t *bits,
1841               int       stride,
1842               int       x,
1843               int       y,
1844               int       width,
1845               int       height,
1846               uint32_t  xor)
1847 {
1848     uint32_t *dst = bits + y * stride + (x >> 5);
1849     int offs = x & 31;
1850
1851     if (xor & 1)
1852     {
1853         while (height--)
1854         {
1855             pixman_fill1_line (dst, offs, width, 1);
1856             dst += stride;
1857         }
1858     }
1859     else
1860     {
1861         while (height--)
1862         {
1863             pixman_fill1_line (dst, offs, width, 0);
1864             dst += stride;
1865         }
1866     }
1867 }
1868
1869 static void
1870 pixman_fill8 (uint32_t *bits,
1871               int       stride,
1872               int       x,
1873               int       y,
1874               int       width,
1875               int       height,
1876               uint32_t xor)
1877 {
1878     int byte_stride = stride * (int) sizeof (uint32_t);
1879     uint8_t *dst = (uint8_t *) bits;
1880     uint8_t v = xor & 0xff;
1881     int i;
1882
1883     dst = dst + y * byte_stride + x;
1884
1885     while (height--)
1886     {
1887         for (i = 0; i < width; ++i)
1888             dst[i] = v;
1889
1890         dst += byte_stride;
1891     }
1892 }
1893
1894 static void
1895 pixman_fill16 (uint32_t *bits,
1896                int       stride,
1897                int       x,
1898                int       y,
1899                int       width,
1900                int       height,
1901                uint32_t xor)
1902 {
1903     int short_stride =
1904         (stride * (int)sizeof (uint32_t)) / (int)sizeof (uint16_t);
1905     uint16_t *dst = (uint16_t *)bits;
1906     uint16_t v = xor & 0xffff;
1907     int i;
1908
1909     dst = dst + y * short_stride + x;
1910
1911     while (height--)
1912     {
1913         for (i = 0; i < width; ++i)
1914             dst[i] = v;
1915
1916         dst += short_stride;
1917     }
1918 }
1919
1920 static void
1921 pixman_fill32 (uint32_t *bits,
1922                int       stride,
1923                int       x,
1924                int       y,
1925                int       width,
1926                int       height,
1927                uint32_t  xor)
1928 {
1929     int i;
1930
1931     bits = bits + y * stride + x;
1932
1933     while (height--)
1934     {
1935         for (i = 0; i < width; ++i)
1936             bits[i] = xor;
1937
1938         bits += stride;
1939     }
1940 }
1941
1942 static pixman_bool_t
1943 fast_path_fill (pixman_implementation_t *imp,
1944                 uint32_t *               bits,
1945                 int                      stride,
1946                 int                      bpp,
1947                 int                      x,
1948                 int                      y,
1949                 int                      width,
1950                 int                      height,
1951                 uint32_t                 xor)
1952 {
1953     switch (bpp)
1954     {
1955     case 1:
1956         pixman_fill1 (bits, stride, x, y, width, height, xor);
1957         break;
1958
1959     case 8:
1960         pixman_fill8 (bits, stride, x, y, width, height, xor);
1961         break;
1962
1963     case 16:
1964         pixman_fill16 (bits, stride, x, y, width, height, xor);
1965         break;
1966
1967     case 32:
1968         pixman_fill32 (bits, stride, x, y, width, height, xor);
1969         break;
1970
1971     default:
1972         return _pixman_implementation_fill (
1973             imp->delegate, bits, stride, bpp, x, y, width, height, xor);
1974         break;
1975     }
1976
1977     return TRUE;
1978 }
1979
1980 pixman_implementation_t *
1981 _pixman_implementation_create_fast_path (pixman_implementation_t *fallback)
1982 {
1983     pixman_implementation_t *imp = _pixman_implementation_create (fallback, c_fast_paths);
1984
1985     imp->fill = fast_path_fill;
1986
1987     return imp;
1988 }