441cd9a21987dc2d5d8d0c299d5464f4e806d49e
[platform/upstream/SDL.git] / src / video / SDL_blit_N.c
1 /*
2   Simple DirectMedia Layer
3   Copyright (C) 1997-2018 Sam Lantinga <slouken@libsdl.org>
4
5   This software is provided 'as-is', without any express or implied
6   warranty.  In no event will the authors be held liable for any damages
7   arising from the use of this software.
8
9   Permission is granted to anyone to use this software for any purpose,
10   including commercial applications, and to alter it and redistribute it
11   freely, subject to the following restrictions:
12
13   1. The origin of this software must not be misrepresented; you must not
14      claim that you wrote the original software. If you use this software
15      in a product, an acknowledgment in the product documentation would be
16      appreciated but is not required.
17   2. Altered source versions must be plainly marked as such, and must not be
18      misrepresented as being the original software.
19   3. This notice may not be removed or altered from any source distribution.
20 */
21 #include "../SDL_internal.h"
22
23 #include "SDL_video.h"
24 #include "SDL_endian.h"
25 #include "SDL_cpuinfo.h"
26 #include "SDL_blit.h"
27
28 #include "SDL_assert.h"
29
30 /* Functions to blit from N-bit surfaces to other surfaces */
31
32 #if SDL_ALTIVEC_BLITTERS
33 #ifdef HAVE_ALTIVEC_H
34 #include <altivec.h>
35 #endif
36 #ifdef __MACOSX__
37 #include <sys/sysctl.h>
38 static size_t
39 GetL3CacheSize(void)
40 {
41     const char key[] = "hw.l3cachesize";
42     u_int64_t result = 0;
43     size_t typeSize = sizeof(result);
44
45
46     int err = sysctlbyname(key, &result, &typeSize, NULL, 0);
47     if (0 != err)
48         return 0;
49
50     return result;
51 }
52 #else
53 static size_t
54 GetL3CacheSize(void)
55 {
56     /* XXX: Just guess G4 */
57     return 2097152;
58 }
59 #endif /* __MACOSX__ */
60
61 #if (defined(__MACOSX__) && (__GNUC__ < 4))
62 #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
63         (vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p )
64 #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
65         (vector unsigned short) ( a,b,c,d,e,f,g,h )
66 #else
67 #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
68         (vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p }
69 #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
70         (vector unsigned short) { a,b,c,d,e,f,g,h }
71 #endif
72
73 #define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F)
74 #define VSWIZZLE32(a,b,c,d) (vector unsigned char) \
75                                ( 0x00+a, 0x00+b, 0x00+c, 0x00+d, \
76                                  0x04+a, 0x04+b, 0x04+c, 0x04+d, \
77                                  0x08+a, 0x08+b, 0x08+c, 0x08+d, \
78                                  0x0C+a, 0x0C+b, 0x0C+c, 0x0C+d )
79
80 #define MAKE8888(dstfmt, r, g, b, a)  \
81     ( ((r<<dstfmt->Rshift)&dstfmt->Rmask) | \
82       ((g<<dstfmt->Gshift)&dstfmt->Gmask) | \
83       ((b<<dstfmt->Bshift)&dstfmt->Bmask) | \
84       ((a<<dstfmt->Ashift)&dstfmt->Amask) )
85
86 /*
87  * Data Stream Touch...Altivec cache prefetching.
88  *
89  *  Don't use this on a G5...however, the speed boost is very significant
90  *   on a G4.
91  */
92 #define DST_CHAN_SRC 1
93 #define DST_CHAN_DEST 2
94
95 /* macro to set DST control word value... */
96 #define DST_CTRL(size, count, stride) \
97     (((size) << 24) | ((count) << 16) | (stride))
98
99 #define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \
100     ? vec_lvsl(0, src) \
101     : vec_add(vec_lvsl(8, src), vec_splat_u8(8)))
102
103 /* Calculate the permute vector used for 32->32 swizzling */
104 static vector unsigned char
105 calc_swizzle32(const SDL_PixelFormat * srcfmt, const SDL_PixelFormat * dstfmt)
106 {
107     /*
108      * We have to assume that the bits that aren't used by other
109      *  colors is alpha, and it's one complete byte, since some formats
110      *  leave alpha with a zero mask, but we should still swizzle the bits.
111      */
112     /* ARGB */
113     const static const struct SDL_PixelFormat default_pixel_format = {
114         0, NULL, 0, 0,
115         {0, 0},
116         0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000,
117         0, 0, 0, 0,
118         16, 8, 0, 24,
119         0, NULL
120     };
121     const vector unsigned char plus = VECUINT8_LITERAL(0x00, 0x00, 0x00, 0x00,
122                                                        0x04, 0x04, 0x04, 0x04,
123                                                        0x08, 0x08, 0x08, 0x08,
124                                                        0x0C, 0x0C, 0x0C,
125                                                        0x0C);
126     vector unsigned char vswiz;
127     vector unsigned int srcvec;
128     Uint32 rmask, gmask, bmask, amask;
129
130     if (!srcfmt) {
131         srcfmt = &default_pixel_format;
132     }
133     if (!dstfmt) {
134         dstfmt = &default_pixel_format;
135     }
136
137 #define RESHIFT(X) (3 - ((X) >> 3))
138     rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift);
139     gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift);
140     bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift);
141
142     /* Use zero for alpha if either surface doesn't have alpha */
143     if (dstfmt->Amask) {
144         amask =
145             ((srcfmt->Amask) ? RESHIFT(srcfmt->
146                                        Ashift) : 0x10) << (dstfmt->Ashift);
147     } else {
148         amask =
149             0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^
150                           0xFFFFFFFF);
151     }
152 #undef RESHIFT
153
154     ((unsigned int *) (char *) &srcvec)[0] = (rmask | gmask | bmask | amask);
155     vswiz = vec_add(plus, (vector unsigned char) vec_splat(srcvec, 0));
156     return (vswiz);
157 }
158
159 static void Blit_RGB888_RGB565(SDL_BlitInfo * info);
160 static void
161 Blit_RGB888_RGB565Altivec(SDL_BlitInfo * info)
162 {
163     int height = info->dst_h;
164     Uint8 *src = (Uint8 *) info->src;
165     int srcskip = info->src_skip;
166     Uint8 *dst = (Uint8 *) info->dst;
167     int dstskip = info->dst_skip;
168     SDL_PixelFormat *srcfmt = info->src_fmt;
169     vector unsigned char valpha = vec_splat_u8(0);
170     vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL);
171     vector unsigned char vgmerge = VECUINT8_LITERAL(0x00, 0x02, 0x00, 0x06,
172                                                     0x00, 0x0a, 0x00, 0x0e,
173                                                     0x00, 0x12, 0x00, 0x16,
174                                                     0x00, 0x1a, 0x00, 0x1e);
175     vector unsigned short v1 = vec_splat_u16(1);
176     vector unsigned short v3 = vec_splat_u16(3);
177     vector unsigned short v3f =
178         VECUINT16_LITERAL(0x003f, 0x003f, 0x003f, 0x003f,
179                           0x003f, 0x003f, 0x003f, 0x003f);
180     vector unsigned short vfc =
181         VECUINT16_LITERAL(0x00fc, 0x00fc, 0x00fc, 0x00fc,
182                           0x00fc, 0x00fc, 0x00fc, 0x00fc);
183     vector unsigned short vf800 = (vector unsigned short) vec_splat_u8(-7);
184     vf800 = vec_sl(vf800, vec_splat_u16(8));
185
186     while (height--) {
187         vector unsigned char valigner;
188         vector unsigned char voverflow;
189         vector unsigned char vsrc;
190
191         int width = info->dst_w;
192         int extrawidth;
193
194         /* do scalar until we can align... */
195 #define ONE_PIXEL_BLEND(condition, widthvar) \
196         while (condition) { \
197             Uint32 Pixel; \
198             unsigned sR, sG, sB, sA; \
199             DISEMBLE_RGBA((Uint8 *)src, 4, srcfmt, Pixel, \
200                           sR, sG, sB, sA); \
201             *(Uint16 *)(dst) = (((sR << 8) & 0x0000F800) | \
202                                 ((sG << 3) & 0x000007E0) | \
203                                 ((sB >> 3) & 0x0000001F)); \
204             dst += 2; \
205             src += 4; \
206             widthvar--; \
207         }
208
209         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
210
211         /* After all that work, here's the vector part! */
212         extrawidth = (width % 8);       /* trailing unaligned stores */
213         width -= extrawidth;
214         vsrc = vec_ld(0, src);
215         valigner = VEC_ALIGNER(src);
216
217         while (width) {
218             vector unsigned short vpixel, vrpixel, vgpixel, vbpixel;
219             vector unsigned int vsrc1, vsrc2;
220             vector unsigned char vdst;
221
222             voverflow = vec_ld(15, src);
223             vsrc = vec_perm(vsrc, voverflow, valigner);
224             vsrc1 = (vector unsigned int) vec_perm(vsrc, valpha, vpermute);
225             src += 16;
226             vsrc = voverflow;
227             voverflow = vec_ld(15, src);
228             vsrc = vec_perm(vsrc, voverflow, valigner);
229             vsrc2 = (vector unsigned int) vec_perm(vsrc, valpha, vpermute);
230             /* 1555 */
231             vpixel = (vector unsigned short) vec_packpx(vsrc1, vsrc2);
232             vgpixel = (vector unsigned short) vec_perm(vsrc1, vsrc2, vgmerge);
233             vgpixel = vec_and(vgpixel, vfc);
234             vgpixel = vec_sl(vgpixel, v3);
235             vrpixel = vec_sl(vpixel, v1);
236             vrpixel = vec_and(vrpixel, vf800);
237             vbpixel = vec_and(vpixel, v3f);
238             vdst =
239                 vec_or((vector unsigned char) vrpixel,
240                        (vector unsigned char) vgpixel);
241             /* 565 */
242             vdst = vec_or(vdst, (vector unsigned char) vbpixel);
243             vec_st(vdst, 0, dst);
244
245             width -= 8;
246             src += 16;
247             dst += 16;
248             vsrc = voverflow;
249         }
250
251         SDL_assert(width == 0);
252
253         /* do scalar until we can align... */
254         ONE_PIXEL_BLEND((extrawidth), extrawidth);
255 #undef ONE_PIXEL_BLEND
256
257         src += srcskip;         /* move to next row, accounting for pitch. */
258         dst += dstskip;
259     }
260
261
262 }
263
264 static void
265 Blit_RGB565_32Altivec(SDL_BlitInfo * info)
266 {
267     int height = info->dst_h;
268     Uint8 *src = (Uint8 *) info->src;
269     int srcskip = info->src_skip;
270     Uint8 *dst = (Uint8 *) info->dst;
271     int dstskip = info->dst_skip;
272     SDL_PixelFormat *srcfmt = info->src_fmt;
273     SDL_PixelFormat *dstfmt = info->dst_fmt;
274     unsigned alpha;
275     vector unsigned char valpha;
276     vector unsigned char vpermute;
277     vector unsigned short vf800;
278     vector unsigned int v8 = vec_splat_u32(8);
279     vector unsigned int v16 = vec_add(v8, v8);
280     vector unsigned short v2 = vec_splat_u16(2);
281     vector unsigned short v3 = vec_splat_u16(3);
282     /*
283        0x10 - 0x1f is the alpha
284        0x00 - 0x0e evens are the red
285        0x01 - 0x0f odds are zero
286      */
287     vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01,
288                                                        0x10, 0x02, 0x01, 0x01,
289                                                        0x10, 0x04, 0x01, 0x01,
290                                                        0x10, 0x06, 0x01,
291                                                        0x01);
292     vector unsigned char vredalpha2 =
293         (vector unsigned
294          char) (vec_add((vector unsigned int) vredalpha1, vec_sl(v8, v16))
295         );
296     /*
297        0x00 - 0x0f is ARxx ARxx ARxx ARxx
298        0x11 - 0x0f odds are blue
299      */
300     vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11,
301                                                    0x04, 0x05, 0x06, 0x13,
302                                                    0x08, 0x09, 0x0a, 0x15,
303                                                    0x0c, 0x0d, 0x0e, 0x17);
304     vector unsigned char vblue2 =
305         (vector unsigned char) (vec_add((vector unsigned int) vblue1, v8)
306         );
307     /*
308        0x00 - 0x0f is ARxB ARxB ARxB ARxB
309        0x10 - 0x0e evens are green
310      */
311     vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03,
312                                                     0x04, 0x05, 0x12, 0x07,
313                                                     0x08, 0x09, 0x14, 0x0b,
314                                                     0x0c, 0x0d, 0x16, 0x0f);
315     vector unsigned char vgreen2 =
316         (vector unsigned
317          char) (vec_add((vector unsigned int) vgreen1, vec_sl(v8, v8))
318         );
319
320     SDL_assert(srcfmt->BytesPerPixel == 2);
321     SDL_assert(dstfmt->BytesPerPixel == 4);
322
323     vf800 = (vector unsigned short) vec_splat_u8(-7);
324     vf800 = vec_sl(vf800, vec_splat_u16(8));
325
326     if (dstfmt->Amask && info->a) {
327         ((unsigned char *) &valpha)[0] = alpha = info->a;
328         valpha = vec_splat(valpha, 0);
329     } else {
330         alpha = 0;
331         valpha = vec_splat_u8(0);
332     }
333
334     vpermute = calc_swizzle32(NULL, dstfmt);
335     while (height--) {
336         vector unsigned char valigner;
337         vector unsigned char voverflow;
338         vector unsigned char vsrc;
339
340         int width = info->dst_w;
341         int extrawidth;
342
343         /* do scalar until we can align... */
344 #define ONE_PIXEL_BLEND(condition, widthvar) \
345         while (condition) { \
346             unsigned sR, sG, sB; \
347             unsigned short Pixel = *((unsigned short *)src); \
348             sR = (Pixel >> 8) & 0xf8; \
349             sG = (Pixel >> 3) & 0xfc; \
350             sB = (Pixel << 3) & 0xf8; \
351             ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
352             src += 2; \
353             dst += 4; \
354             widthvar--; \
355         }
356         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
357
358         /* After all that work, here's the vector part! */
359         extrawidth = (width % 8);       /* trailing unaligned stores */
360         width -= extrawidth;
361         vsrc = vec_ld(0, src);
362         valigner = VEC_ALIGNER(src);
363
364         while (width) {
365             vector unsigned short vR, vG, vB;
366             vector unsigned char vdst1, vdst2;
367
368             voverflow = vec_ld(15, src);
369             vsrc = vec_perm(vsrc, voverflow, valigner);
370
371             vR = vec_and((vector unsigned short) vsrc, vf800);
372             vB = vec_sl((vector unsigned short) vsrc, v3);
373             vG = vec_sl(vB, v2);
374
375             vdst1 =
376                 (vector unsigned char) vec_perm((vector unsigned char) vR,
377                                                 valpha, vredalpha1);
378             vdst1 = vec_perm(vdst1, (vector unsigned char) vB, vblue1);
379             vdst1 = vec_perm(vdst1, (vector unsigned char) vG, vgreen1);
380             vdst1 = vec_perm(vdst1, valpha, vpermute);
381             vec_st(vdst1, 0, dst);
382
383             vdst2 =
384                 (vector unsigned char) vec_perm((vector unsigned char) vR,
385                                                 valpha, vredalpha2);
386             vdst2 = vec_perm(vdst2, (vector unsigned char) vB, vblue2);
387             vdst2 = vec_perm(vdst2, (vector unsigned char) vG, vgreen2);
388             vdst2 = vec_perm(vdst2, valpha, vpermute);
389             vec_st(vdst2, 16, dst);
390
391             width -= 8;
392             dst += 32;
393             src += 16;
394             vsrc = voverflow;
395         }
396
397         SDL_assert(width == 0);
398
399
400         /* do scalar until we can align... */
401         ONE_PIXEL_BLEND((extrawidth), extrawidth);
402 #undef ONE_PIXEL_BLEND
403
404         src += srcskip;         /* move to next row, accounting for pitch. */
405         dst += dstskip;
406     }
407
408 }
409
410
411 static void
412 Blit_RGB555_32Altivec(SDL_BlitInfo * info)
413 {
414     int height = info->dst_h;
415     Uint8 *src = (Uint8 *) info->src;
416     int srcskip = info->src_skip;
417     Uint8 *dst = (Uint8 *) info->dst;
418     int dstskip = info->dst_skip;
419     SDL_PixelFormat *srcfmt = info->src_fmt;
420     SDL_PixelFormat *dstfmt = info->dst_fmt;
421     unsigned alpha;
422     vector unsigned char valpha;
423     vector unsigned char vpermute;
424     vector unsigned short vf800;
425     vector unsigned int v8 = vec_splat_u32(8);
426     vector unsigned int v16 = vec_add(v8, v8);
427     vector unsigned short v1 = vec_splat_u16(1);
428     vector unsigned short v3 = vec_splat_u16(3);
429     /*
430        0x10 - 0x1f is the alpha
431        0x00 - 0x0e evens are the red
432        0x01 - 0x0f odds are zero
433      */
434     vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01,
435                                                        0x10, 0x02, 0x01, 0x01,
436                                                        0x10, 0x04, 0x01, 0x01,
437                                                        0x10, 0x06, 0x01,
438                                                        0x01);
439     vector unsigned char vredalpha2 =
440         (vector unsigned
441          char) (vec_add((vector unsigned int) vredalpha1, vec_sl(v8, v16))
442         );
443     /*
444        0x00 - 0x0f is ARxx ARxx ARxx ARxx
445        0x11 - 0x0f odds are blue
446      */
447     vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11,
448                                                    0x04, 0x05, 0x06, 0x13,
449                                                    0x08, 0x09, 0x0a, 0x15,
450                                                    0x0c, 0x0d, 0x0e, 0x17);
451     vector unsigned char vblue2 =
452         (vector unsigned char) (vec_add((vector unsigned int) vblue1, v8)
453         );
454     /*
455        0x00 - 0x0f is ARxB ARxB ARxB ARxB
456        0x10 - 0x0e evens are green
457      */
458     vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03,
459                                                     0x04, 0x05, 0x12, 0x07,
460                                                     0x08, 0x09, 0x14, 0x0b,
461                                                     0x0c, 0x0d, 0x16, 0x0f);
462     vector unsigned char vgreen2 =
463         (vector unsigned
464          char) (vec_add((vector unsigned int) vgreen1, vec_sl(v8, v8))
465         );
466
467     SDL_assert(srcfmt->BytesPerPixel == 2);
468     SDL_assert(dstfmt->BytesPerPixel == 4);
469
470     vf800 = (vector unsigned short) vec_splat_u8(-7);
471     vf800 = vec_sl(vf800, vec_splat_u16(8));
472
473     if (dstfmt->Amask && info->a) {
474         ((unsigned char *) &valpha)[0] = alpha = info->a;
475         valpha = vec_splat(valpha, 0);
476     } else {
477         alpha = 0;
478         valpha = vec_splat_u8(0);
479     }
480
481     vpermute = calc_swizzle32(NULL, dstfmt);
482     while (height--) {
483         vector unsigned char valigner;
484         vector unsigned char voverflow;
485         vector unsigned char vsrc;
486
487         int width = info->dst_w;
488         int extrawidth;
489
490         /* do scalar until we can align... */
491 #define ONE_PIXEL_BLEND(condition, widthvar) \
492         while (condition) { \
493             unsigned sR, sG, sB; \
494             unsigned short Pixel = *((unsigned short *)src); \
495             sR = (Pixel >> 7) & 0xf8; \
496             sG = (Pixel >> 2) & 0xf8; \
497             sB = (Pixel << 3) & 0xf8; \
498             ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
499             src += 2; \
500             dst += 4; \
501             widthvar--; \
502         }
503         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
504
505         /* After all that work, here's the vector part! */
506         extrawidth = (width % 8);       /* trailing unaligned stores */
507         width -= extrawidth;
508         vsrc = vec_ld(0, src);
509         valigner = VEC_ALIGNER(src);
510
511         while (width) {
512             vector unsigned short vR, vG, vB;
513             vector unsigned char vdst1, vdst2;
514
515             voverflow = vec_ld(15, src);
516             vsrc = vec_perm(vsrc, voverflow, valigner);
517
518             vR = vec_and(vec_sl((vector unsigned short) vsrc, v1), vf800);
519             vB = vec_sl((vector unsigned short) vsrc, v3);
520             vG = vec_sl(vB, v3);
521
522             vdst1 =
523                 (vector unsigned char) vec_perm((vector unsigned char) vR,
524                                                 valpha, vredalpha1);
525             vdst1 = vec_perm(vdst1, (vector unsigned char) vB, vblue1);
526             vdst1 = vec_perm(vdst1, (vector unsigned char) vG, vgreen1);
527             vdst1 = vec_perm(vdst1, valpha, vpermute);
528             vec_st(vdst1, 0, dst);
529
530             vdst2 =
531                 (vector unsigned char) vec_perm((vector unsigned char) vR,
532                                                 valpha, vredalpha2);
533             vdst2 = vec_perm(vdst2, (vector unsigned char) vB, vblue2);
534             vdst2 = vec_perm(vdst2, (vector unsigned char) vG, vgreen2);
535             vdst2 = vec_perm(vdst2, valpha, vpermute);
536             vec_st(vdst2, 16, dst);
537
538             width -= 8;
539             dst += 32;
540             src += 16;
541             vsrc = voverflow;
542         }
543
544         SDL_assert(width == 0);
545
546
547         /* do scalar until we can align... */
548         ONE_PIXEL_BLEND((extrawidth), extrawidth);
549 #undef ONE_PIXEL_BLEND
550
551         src += srcskip;         /* move to next row, accounting for pitch. */
552         dst += dstskip;
553     }
554
555 }
556
557 static void BlitNtoNKey(SDL_BlitInfo * info);
558 static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo * info);
559 static void
560 Blit32to32KeyAltivec(SDL_BlitInfo * info)
561 {
562     int height = info->dst_h;
563     Uint32 *srcp = (Uint32 *) info->src;
564     int srcskip = info->src_skip / 4;
565     Uint32 *dstp = (Uint32 *) info->dst;
566     int dstskip = info->dst_skip / 4;
567     SDL_PixelFormat *srcfmt = info->src_fmt;
568     int srcbpp = srcfmt->BytesPerPixel;
569     SDL_PixelFormat *dstfmt = info->dst_fmt;
570     int dstbpp = dstfmt->BytesPerPixel;
571     int copy_alpha = (srcfmt->Amask && dstfmt->Amask);
572     unsigned alpha = dstfmt->Amask ? info->a : 0;
573     Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
574     Uint32 ckey = info->colorkey;
575     vector unsigned int valpha;
576     vector unsigned char vpermute;
577     vector unsigned char vzero;
578     vector unsigned int vckey;
579     vector unsigned int vrgbmask;
580     vpermute = calc_swizzle32(srcfmt, dstfmt);
581     if (info->dst_w < 16) {
582         if (copy_alpha) {
583             BlitNtoNKeyCopyAlpha(info);
584         } else {
585             BlitNtoNKey(info);
586         }
587         return;
588     }
589     vzero = vec_splat_u8(0);
590     if (alpha) {
591         ((unsigned char *) &valpha)[0] = (unsigned char) alpha;
592         valpha =
593             (vector unsigned int) vec_splat((vector unsigned char) valpha, 0);
594     } else {
595         valpha = (vector unsigned int) vzero;
596     }
597     ckey &= rgbmask;
598     ((unsigned int *) (char *) &vckey)[0] = ckey;
599     vckey = vec_splat(vckey, 0);
600     ((unsigned int *) (char *) &vrgbmask)[0] = rgbmask;
601     vrgbmask = vec_splat(vrgbmask, 0);
602
603     while (height--) {
604 #define ONE_PIXEL_BLEND(condition, widthvar) \
605         if (copy_alpha) { \
606             while (condition) { \
607                 Uint32 Pixel; \
608                 unsigned sR, sG, sB, sA; \
609                 DISEMBLE_RGBA((Uint8 *)srcp, srcbpp, srcfmt, Pixel, \
610                           sR, sG, sB, sA); \
611                 if ( (Pixel & rgbmask) != ckey ) { \
612                       ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
613                             sR, sG, sB, sA); \
614                 } \
615                 dstp = (Uint32 *) (((Uint8 *) dstp) + dstbpp); \
616                 srcp = (Uint32 *) (((Uint8 *) srcp) + srcbpp); \
617                 widthvar--; \
618             } \
619         } else { \
620             while (condition) { \
621                 Uint32 Pixel; \
622                 unsigned sR, sG, sB; \
623                 RETRIEVE_RGB_PIXEL((Uint8 *)srcp, srcbpp, Pixel); \
624                 if ( Pixel != ckey ) { \
625                     RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \
626                     ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
627                               sR, sG, sB, alpha); \
628                 } \
629                 dstp = (Uint32 *) (((Uint8 *)dstp) + dstbpp); \
630                 srcp = (Uint32 *) (((Uint8 *)srcp) + srcbpp); \
631                 widthvar--; \
632             } \
633         }
634         int width = info->dst_w;
635         ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
636         SDL_assert(width > 0);
637         if (width > 0) {
638             int extrawidth = (width % 4);
639             vector unsigned char valigner = VEC_ALIGNER(srcp);
640             vector unsigned int vs = vec_ld(0, srcp);
641             width -= extrawidth;
642             SDL_assert(width >= 4);
643             while (width) {
644                 vector unsigned char vsel;
645                 vector unsigned int vd;
646                 vector unsigned int voverflow = vec_ld(15, srcp);
647                 /* load the source vec */
648                 vs = vec_perm(vs, voverflow, valigner);
649                 /* vsel is set for items that match the key */
650                 vsel = (vector unsigned char) vec_and(vs, vrgbmask);
651                 vsel = (vector unsigned char) vec_cmpeq(vs, vckey);
652                 /* permute the src vec to the dest format */
653                 vs = vec_perm(vs, valpha, vpermute);
654                 /* load the destination vec */
655                 vd = vec_ld(0, dstp);
656                 /* select the source and dest into vs */
657                 vd = (vector unsigned int) vec_sel((vector unsigned char) vs,
658                                                    (vector unsigned char) vd,
659                                                    vsel);
660
661                 vec_st(vd, 0, dstp);
662                 srcp += 4;
663                 width -= 4;
664                 dstp += 4;
665                 vs = voverflow;
666             }
667             ONE_PIXEL_BLEND((extrawidth), extrawidth);
668 #undef ONE_PIXEL_BLEND
669             srcp += srcskip;
670             dstp += dstskip;
671         }
672     }
673 }
674
675 /* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
676 /* Use this on a G5 */
677 static void
678 ConvertAltivec32to32_noprefetch(SDL_BlitInfo * info)
679 {
680     int height = info->dst_h;
681     Uint32 *src = (Uint32 *) info->src;
682     int srcskip = info->src_skip / 4;
683     Uint32 *dst = (Uint32 *) info->dst;
684     int dstskip = info->dst_skip / 4;
685     SDL_PixelFormat *srcfmt = info->src_fmt;
686     SDL_PixelFormat *dstfmt = info->dst_fmt;
687     vector unsigned int vzero = vec_splat_u32(0);
688     vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
689     if (dstfmt->Amask && !srcfmt->Amask) {
690         if (info->a) {
691             vector unsigned char valpha;
692             ((unsigned char *) &valpha)[0] = info->a;
693             vzero = (vector unsigned int) vec_splat(valpha, 0);
694         }
695     }
696
697     SDL_assert(srcfmt->BytesPerPixel == 4);
698     SDL_assert(dstfmt->BytesPerPixel == 4);
699
700     while (height--) {
701         vector unsigned char valigner;
702         vector unsigned int vbits;
703         vector unsigned int voverflow;
704         Uint32 bits;
705         Uint8 r, g, b, a;
706
707         int width = info->dst_w;
708         int extrawidth;
709
710         /* do scalar until we can align... */
711         while ((UNALIGNED_PTR(dst)) && (width)) {
712             bits = *(src++);
713             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
714             if(!srcfmt->Amask)
715               a = info->a;
716             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
717             width--;
718         }
719
720         /* After all that work, here's the vector part! */
721         extrawidth = (width % 4);
722         width -= extrawidth;
723         valigner = VEC_ALIGNER(src);
724         vbits = vec_ld(0, src);
725
726         while (width) {
727             voverflow = vec_ld(15, src);
728             src += 4;
729             width -= 4;
730             vbits = vec_perm(vbits, voverflow, valigner);       /* src is ready. */
731             vbits = vec_perm(vbits, vzero, vpermute);   /* swizzle it. */
732             vec_st(vbits, 0, dst);      /* store it back out. */
733             dst += 4;
734             vbits = voverflow;
735         }
736
737         SDL_assert(width == 0);
738
739         /* cover pixels at the end of the row that didn't fit in 16 bytes. */
740         while (extrawidth) {
741             bits = *(src++);    /* max 7 pixels, don't bother with prefetch. */
742             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
743             if(!srcfmt->Amask)
744               a = info->a;
745             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
746             extrawidth--;
747         }
748
749         src += srcskip;
750         dst += dstskip;
751     }
752
753 }
754
755 /* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
756 /* Use this on a G4 */
757 static void
758 ConvertAltivec32to32_prefetch(SDL_BlitInfo * info)
759 {
760     const int scalar_dst_lead = sizeof(Uint32) * 4;
761     const int vector_dst_lead = sizeof(Uint32) * 16;
762
763     int height = info->dst_h;
764     Uint32 *src = (Uint32 *) info->src;
765     int srcskip = info->src_skip / 4;
766     Uint32 *dst = (Uint32 *) info->dst;
767     int dstskip = info->dst_skip / 4;
768     SDL_PixelFormat *srcfmt = info->src_fmt;
769     SDL_PixelFormat *dstfmt = info->dst_fmt;
770     vector unsigned int vzero = vec_splat_u32(0);
771     vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
772     if (dstfmt->Amask && !srcfmt->Amask) {
773         if (info->a) {
774             vector unsigned char valpha;
775             ((unsigned char *) &valpha)[0] = info->a;
776             vzero = (vector unsigned int) vec_splat(valpha, 0);
777         }
778     }
779
780     SDL_assert(srcfmt->BytesPerPixel == 4);
781     SDL_assert(dstfmt->BytesPerPixel == 4);
782
783     while (height--) {
784         vector unsigned char valigner;
785         vector unsigned int vbits;
786         vector unsigned int voverflow;
787         Uint32 bits;
788         Uint8 r, g, b, a;
789
790         int width = info->dst_w;
791         int extrawidth;
792
793         /* do scalar until we can align... */
794         while ((UNALIGNED_PTR(dst)) && (width)) {
795             vec_dstt(src + scalar_dst_lead, DST_CTRL(2, 32, 1024),
796                      DST_CHAN_SRC);
797             vec_dstst(dst + scalar_dst_lead, DST_CTRL(2, 32, 1024),
798                       DST_CHAN_DEST);
799             bits = *(src++);
800             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
801             if(!srcfmt->Amask)
802               a = info->a;
803             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
804             width--;
805         }
806
807         /* After all that work, here's the vector part! */
808         extrawidth = (width % 4);
809         width -= extrawidth;
810         valigner = VEC_ALIGNER(src);
811         vbits = vec_ld(0, src);
812
813         while (width) {
814             vec_dstt(src + vector_dst_lead, DST_CTRL(2, 32, 1024),
815                      DST_CHAN_SRC);
816             vec_dstst(dst + vector_dst_lead, DST_CTRL(2, 32, 1024),
817                       DST_CHAN_DEST);
818             voverflow = vec_ld(15, src);
819             src += 4;
820             width -= 4;
821             vbits = vec_perm(vbits, voverflow, valigner);       /* src is ready. */
822             vbits = vec_perm(vbits, vzero, vpermute);   /* swizzle it. */
823             vec_st(vbits, 0, dst);      /* store it back out. */
824             dst += 4;
825             vbits = voverflow;
826         }
827
828         SDL_assert(width == 0);
829
830         /* cover pixels at the end of the row that didn't fit in 16 bytes. */
831         while (extrawidth) {
832             bits = *(src++);    /* max 7 pixels, don't bother with prefetch. */
833             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
834             if(!srcfmt->Amask)
835               a = info->a;
836             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
837             extrawidth--;
838         }
839
840         src += srcskip;
841         dst += dstskip;
842     }
843
844     vec_dss(DST_CHAN_SRC);
845     vec_dss(DST_CHAN_DEST);
846 }
847
848 static Uint32
849 GetBlitFeatures(void)
850 {
851     static Uint32 features = 0xffffffff;
852     if (features == 0xffffffff) {
853         /* Provide an override for testing .. */
854         char *override = SDL_getenv("SDL_ALTIVEC_BLIT_FEATURES");
855         if (override) {
856             features = 0;
857             SDL_sscanf(override, "%u", &features);
858         } else {
859             features = (0
860                         /* Feature 1 is has-MMX */
861                         | ((SDL_HasMMX())? 1 : 0)
862                         /* Feature 2 is has-AltiVec */
863                         | ((SDL_HasAltiVec())? 2 : 0)
864                         /* Feature 4 is dont-use-prefetch */
865                         /* !!!! FIXME: Check for G5 or later, not the cache size! Always prefetch on a G4. */
866                         | ((GetL3CacheSize() == 0) ? 4 : 0)
867                 );
868         }
869     }
870     return features;
871 }
872
873 #if __MWERKS__
874 #pragma altivec_model off
875 #endif
876 #else
877 /* Feature 1 is has-MMX */
878 #define GetBlitFeatures() ((Uint32)(SDL_HasMMX() ? 1 : 0))
879 #endif
880
881 /* This is now endian dependent */
882 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
883 #define HI  1
884 #define LO  0
885 #else /* SDL_BYTEORDER == SDL_BIG_ENDIAN */
886 #define HI  0
887 #define LO  1
888 #endif
889
890 /* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
891 #define RGB888_RGB332(dst, src) { \
892     dst = (Uint8)((((src)&0x00E00000)>>16)| \
893                   (((src)&0x0000E000)>>11)| \
894                   (((src)&0x000000C0)>>6)); \
895 }
896 static void
897 Blit_RGB888_index8(SDL_BlitInfo * info)
898 {
899 #ifndef USE_DUFFS_LOOP
900     int c;
901 #endif
902     int width, height;
903     Uint32 *src;
904     const Uint8 *map;
905     Uint8 *dst;
906     int srcskip, dstskip;
907
908     /* Set up some basic variables */
909     width = info->dst_w;
910     height = info->dst_h;
911     src = (Uint32 *) info->src;
912     srcskip = info->src_skip / 4;
913     dst = info->dst;
914     dstskip = info->dst_skip;
915     map = info->table;
916
917     if (map == NULL) {
918         while (height--) {
919 #ifdef USE_DUFFS_LOOP
920             /* *INDENT-OFF* */
921             DUFFS_LOOP(
922                 RGB888_RGB332(*dst++, *src);
923             , width);
924             /* *INDENT-ON* */
925 #else
926             for (c = width / 4; c; --c) {
927                 /* Pack RGB into 8bit pixel */
928                 ++src;
929                 RGB888_RGB332(*dst++, *src);
930                 ++src;
931                 RGB888_RGB332(*dst++, *src);
932                 ++src;
933                 RGB888_RGB332(*dst++, *src);
934                 ++src;
935             }
936             switch (width & 3) {
937             case 3:
938                 RGB888_RGB332(*dst++, *src);
939                 ++src;
940             case 2:
941                 RGB888_RGB332(*dst++, *src);
942                 ++src;
943             case 1:
944                 RGB888_RGB332(*dst++, *src);
945                 ++src;
946             }
947 #endif /* USE_DUFFS_LOOP */
948             src += srcskip;
949             dst += dstskip;
950         }
951     } else {
952         int Pixel;
953
954         while (height--) {
955 #ifdef USE_DUFFS_LOOP
956             /* *INDENT-OFF* */
957             DUFFS_LOOP(
958                 RGB888_RGB332(Pixel, *src);
959                 *dst++ = map[Pixel];
960                 ++src;
961             , width);
962             /* *INDENT-ON* */
963 #else
964             for (c = width / 4; c; --c) {
965                 /* Pack RGB into 8bit pixel */
966                 RGB888_RGB332(Pixel, *src);
967                 *dst++ = map[Pixel];
968                 ++src;
969                 RGB888_RGB332(Pixel, *src);
970                 *dst++ = map[Pixel];
971                 ++src;
972                 RGB888_RGB332(Pixel, *src);
973                 *dst++ = map[Pixel];
974                 ++src;
975                 RGB888_RGB332(Pixel, *src);
976                 *dst++ = map[Pixel];
977                 ++src;
978             }
979             switch (width & 3) {
980             case 3:
981                 RGB888_RGB332(Pixel, *src);
982                 *dst++ = map[Pixel];
983                 ++src;
984             case 2:
985                 RGB888_RGB332(Pixel, *src);
986                 *dst++ = map[Pixel];
987                 ++src;
988             case 1:
989                 RGB888_RGB332(Pixel, *src);
990                 *dst++ = map[Pixel];
991                 ++src;
992             }
993 #endif /* USE_DUFFS_LOOP */
994             src += srcskip;
995             dst += dstskip;
996         }
997     }
998 }
999
1000 /* Special optimized blit for RGB 10-10-10 --> RGB 3-3-2 */
1001 #define RGB101010_RGB332(dst, src) { \
1002     dst = (Uint8)((((src)&0x38000000)>>22)| \
1003                   (((src)&0x000E0000)>>15)| \
1004                   (((src)&0x00000300)>>8)); \
1005 }
1006 static void
1007 Blit_RGB101010_index8(SDL_BlitInfo * info)
1008 {
1009 #ifndef USE_DUFFS_LOOP
1010     int c;
1011 #endif
1012     int width, height;
1013     Uint32 *src;
1014     const Uint8 *map;
1015     Uint8 *dst;
1016     int srcskip, dstskip;
1017
1018     /* Set up some basic variables */
1019     width = info->dst_w;
1020     height = info->dst_h;
1021     src = (Uint32 *) info->src;
1022     srcskip = info->src_skip / 4;
1023     dst = info->dst;
1024     dstskip = info->dst_skip;
1025     map = info->table;
1026
1027     if (map == NULL) {
1028         while (height--) {
1029 #ifdef USE_DUFFS_LOOP
1030             /* *INDENT-OFF* */
1031             DUFFS_LOOP(
1032                 RGB101010_RGB332(*dst++, *src);
1033             , width);
1034             /* *INDENT-ON* */
1035 #else
1036             for (c = width / 4; c; --c) {
1037                 /* Pack RGB into 8bit pixel */
1038                 ++src;
1039                 RGB101010_RGB332(*dst++, *src);
1040                 ++src;
1041                 RGB101010_RGB332(*dst++, *src);
1042                 ++src;
1043                 RGB101010_RGB332(*dst++, *src);
1044                 ++src;
1045             }
1046             switch (width & 3) {
1047             case 3:
1048                 RGB101010_RGB332(*dst++, *src);
1049                 ++src;
1050             case 2:
1051                 RGB101010_RGB332(*dst++, *src);
1052                 ++src;
1053             case 1:
1054                 RGB101010_RGB332(*dst++, *src);
1055                 ++src;
1056             }
1057 #endif /* USE_DUFFS_LOOP */
1058             src += srcskip;
1059             dst += dstskip;
1060         }
1061     } else {
1062         int Pixel;
1063
1064         while (height--) {
1065 #ifdef USE_DUFFS_LOOP
1066             /* *INDENT-OFF* */
1067             DUFFS_LOOP(
1068                 RGB101010_RGB332(Pixel, *src);
1069                 *dst++ = map[Pixel];
1070                 ++src;
1071             , width);
1072             /* *INDENT-ON* */
1073 #else
1074             for (c = width / 4; c; --c) {
1075                 /* Pack RGB into 8bit pixel */
1076                 RGB101010_RGB332(Pixel, *src);
1077                 *dst++ = map[Pixel];
1078                 ++src;
1079                 RGB101010_RGB332(Pixel, *src);
1080                 *dst++ = map[Pixel];
1081                 ++src;
1082                 RGB101010_RGB332(Pixel, *src);
1083                 *dst++ = map[Pixel];
1084                 ++src;
1085                 RGB101010_RGB332(Pixel, *src);
1086                 *dst++ = map[Pixel];
1087                 ++src;
1088             }
1089             switch (width & 3) {
1090             case 3:
1091                 RGB101010_RGB332(Pixel, *src);
1092                 *dst++ = map[Pixel];
1093                 ++src;
1094             case 2:
1095                 RGB101010_RGB332(Pixel, *src);
1096                 *dst++ = map[Pixel];
1097                 ++src;
1098             case 1:
1099                 RGB101010_RGB332(Pixel, *src);
1100                 *dst++ = map[Pixel];
1101                 ++src;
1102             }
1103 #endif /* USE_DUFFS_LOOP */
1104             src += srcskip;
1105             dst += dstskip;
1106         }
1107     }
1108 }
1109
1110 /* Special optimized blit for RGB 8-8-8 --> RGB 5-5-5 */
1111 #define RGB888_RGB555(dst, src) { \
1112     *(Uint16 *)(dst) = (Uint16)((((*src)&0x00F80000)>>9)| \
1113                                 (((*src)&0x0000F800)>>6)| \
1114                                 (((*src)&0x000000F8)>>3)); \
1115 }
1116 #ifndef USE_DUFFS_LOOP
1117 #define RGB888_RGB555_TWO(dst, src) { \
1118     *(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>9)| \
1119                          (((src[HI])&0x0000F800)>>6)| \
1120                          (((src[HI])&0x000000F8)>>3))<<16)| \
1121                          (((src[LO])&0x00F80000)>>9)| \
1122                          (((src[LO])&0x0000F800)>>6)| \
1123                          (((src[LO])&0x000000F8)>>3); \
1124 }
1125 #endif
1126 static void
1127 Blit_RGB888_RGB555(SDL_BlitInfo * info)
1128 {
1129 #ifndef USE_DUFFS_LOOP
1130     int c;
1131 #endif
1132     int width, height;
1133     Uint32 *src;
1134     Uint16 *dst;
1135     int srcskip, dstskip;
1136
1137     /* Set up some basic variables */
1138     width = info->dst_w;
1139     height = info->dst_h;
1140     src = (Uint32 *) info->src;
1141     srcskip = info->src_skip / 4;
1142     dst = (Uint16 *) info->dst;
1143     dstskip = info->dst_skip / 2;
1144
1145 #ifdef USE_DUFFS_LOOP
1146     while (height--) {
1147         /* *INDENT-OFF* */
1148         DUFFS_LOOP(
1149             RGB888_RGB555(dst, src);
1150             ++src;
1151             ++dst;
1152         , width);
1153         /* *INDENT-ON* */
1154         src += srcskip;
1155         dst += dstskip;
1156     }
1157 #else
1158     /* Memory align at 4-byte boundary, if necessary */
1159     if ((long) dst & 0x03) {
1160         /* Don't do anything if width is 0 */
1161         if (width == 0) {
1162             return;
1163         }
1164         --width;
1165
1166         while (height--) {
1167             /* Perform copy alignment */
1168             RGB888_RGB555(dst, src);
1169             ++src;
1170             ++dst;
1171
1172             /* Copy in 4 pixel chunks */
1173             for (c = width / 4; c; --c) {
1174                 RGB888_RGB555_TWO(dst, src);
1175                 src += 2;
1176                 dst += 2;
1177                 RGB888_RGB555_TWO(dst, src);
1178                 src += 2;
1179                 dst += 2;
1180             }
1181             /* Get any leftovers */
1182             switch (width & 3) {
1183             case 3:
1184                 RGB888_RGB555(dst, src);
1185                 ++src;
1186                 ++dst;
1187             case 2:
1188                 RGB888_RGB555_TWO(dst, src);
1189                 src += 2;
1190                 dst += 2;
1191                 break;
1192             case 1:
1193                 RGB888_RGB555(dst, src);
1194                 ++src;
1195                 ++dst;
1196                 break;
1197             }
1198             src += srcskip;
1199             dst += dstskip;
1200         }
1201     } else {
1202         while (height--) {
1203             /* Copy in 4 pixel chunks */
1204             for (c = width / 4; c; --c) {
1205                 RGB888_RGB555_TWO(dst, src);
1206                 src += 2;
1207                 dst += 2;
1208                 RGB888_RGB555_TWO(dst, src);
1209                 src += 2;
1210                 dst += 2;
1211             }
1212             /* Get any leftovers */
1213             switch (width & 3) {
1214             case 3:
1215                 RGB888_RGB555(dst, src);
1216                 ++src;
1217                 ++dst;
1218             case 2:
1219                 RGB888_RGB555_TWO(dst, src);
1220                 src += 2;
1221                 dst += 2;
1222                 break;
1223             case 1:
1224                 RGB888_RGB555(dst, src);
1225                 ++src;
1226                 ++dst;
1227                 break;
1228             }
1229             src += srcskip;
1230             dst += dstskip;
1231         }
1232     }
1233 #endif /* USE_DUFFS_LOOP */
1234 }
1235
1236 /* Special optimized blit for RGB 8-8-8 --> RGB 5-6-5 */
1237 #define RGB888_RGB565(dst, src) { \
1238     *(Uint16 *)(dst) = (Uint16)((((*src)&0x00F80000)>>8)| \
1239                                 (((*src)&0x0000FC00)>>5)| \
1240                                 (((*src)&0x000000F8)>>3)); \
1241 }
1242 #ifndef USE_DUFFS_LOOP
1243 #define RGB888_RGB565_TWO(dst, src) { \
1244     *(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>8)| \
1245                          (((src[HI])&0x0000FC00)>>5)| \
1246                          (((src[HI])&0x000000F8)>>3))<<16)| \
1247                          (((src[LO])&0x00F80000)>>8)| \
1248                          (((src[LO])&0x0000FC00)>>5)| \
1249                          (((src[LO])&0x000000F8)>>3); \
1250 }
1251 #endif
1252 static void
1253 Blit_RGB888_RGB565(SDL_BlitInfo * info)
1254 {
1255 #ifndef USE_DUFFS_LOOP
1256     int c;
1257 #endif
1258     int width, height;
1259     Uint32 *src;
1260     Uint16 *dst;
1261     int srcskip, dstskip;
1262
1263     /* Set up some basic variables */
1264     width = info->dst_w;
1265     height = info->dst_h;
1266     src = (Uint32 *) info->src;
1267     srcskip = info->src_skip / 4;
1268     dst = (Uint16 *) info->dst;
1269     dstskip = info->dst_skip / 2;
1270
1271 #ifdef USE_DUFFS_LOOP
1272     while (height--) {
1273         /* *INDENT-OFF* */
1274         DUFFS_LOOP(
1275             RGB888_RGB565(dst, src);
1276             ++src;
1277             ++dst;
1278         , width);
1279         /* *INDENT-ON* */
1280         src += srcskip;
1281         dst += dstskip;
1282     }
1283 #else
1284     /* Memory align at 4-byte boundary, if necessary */
1285     if ((long) dst & 0x03) {
1286         /* Don't do anything if width is 0 */
1287         if (width == 0) {
1288             return;
1289         }
1290         --width;
1291
1292         while (height--) {
1293             /* Perform copy alignment */
1294             RGB888_RGB565(dst, src);
1295             ++src;
1296             ++dst;
1297
1298             /* Copy in 4 pixel chunks */
1299             for (c = width / 4; c; --c) {
1300                 RGB888_RGB565_TWO(dst, src);
1301                 src += 2;
1302                 dst += 2;
1303                 RGB888_RGB565_TWO(dst, src);
1304                 src += 2;
1305                 dst += 2;
1306             }
1307             /* Get any leftovers */
1308             switch (width & 3) {
1309             case 3:
1310                 RGB888_RGB565(dst, src);
1311                 ++src;
1312                 ++dst;
1313             case 2:
1314                 RGB888_RGB565_TWO(dst, src);
1315                 src += 2;
1316                 dst += 2;
1317                 break;
1318             case 1:
1319                 RGB888_RGB565(dst, src);
1320                 ++src;
1321                 ++dst;
1322                 break;
1323             }
1324             src += srcskip;
1325             dst += dstskip;
1326         }
1327     } else {
1328         while (height--) {
1329             /* Copy in 4 pixel chunks */
1330             for (c = width / 4; c; --c) {
1331                 RGB888_RGB565_TWO(dst, src);
1332                 src += 2;
1333                 dst += 2;
1334                 RGB888_RGB565_TWO(dst, src);
1335                 src += 2;
1336                 dst += 2;
1337             }
1338             /* Get any leftovers */
1339             switch (width & 3) {
1340             case 3:
1341                 RGB888_RGB565(dst, src);
1342                 ++src;
1343                 ++dst;
1344             case 2:
1345                 RGB888_RGB565_TWO(dst, src);
1346                 src += 2;
1347                 dst += 2;
1348                 break;
1349             case 1:
1350                 RGB888_RGB565(dst, src);
1351                 ++src;
1352                 ++dst;
1353                 break;
1354             }
1355             src += srcskip;
1356             dst += dstskip;
1357         }
1358     }
1359 #endif /* USE_DUFFS_LOOP */
1360 }
1361
1362
1363 /* Special optimized blit for RGB 5-6-5 --> 32-bit RGB surfaces */
1364 #define RGB565_32(dst, src, map) (map[src[LO]*2] + map[src[HI]*2+1])
1365 static void
1366 Blit_RGB565_32(SDL_BlitInfo * info, const Uint32 * map)
1367 {
1368 #ifndef USE_DUFFS_LOOP
1369     int c;
1370 #endif
1371     int width, height;
1372     Uint8 *src;
1373     Uint32 *dst;
1374     int srcskip, dstskip;
1375
1376     /* Set up some basic variables */
1377     width = info->dst_w;
1378     height = info->dst_h;
1379     src = (Uint8 *) info->src;
1380     srcskip = info->src_skip;
1381     dst = (Uint32 *) info->dst;
1382     dstskip = info->dst_skip / 4;
1383
1384 #ifdef USE_DUFFS_LOOP
1385     while (height--) {
1386         /* *INDENT-OFF* */
1387         DUFFS_LOOP(
1388         {
1389             *dst++ = RGB565_32(dst, src, map);
1390             src += 2;
1391         },
1392         width);
1393         /* *INDENT-ON* */
1394         src += srcskip;
1395         dst += dstskip;
1396     }
1397 #else
1398     while (height--) {
1399         /* Copy in 4 pixel chunks */
1400         for (c = width / 4; c; --c) {
1401             *dst++ = RGB565_32(dst, src, map);
1402             src += 2;
1403             *dst++ = RGB565_32(dst, src, map);
1404             src += 2;
1405             *dst++ = RGB565_32(dst, src, map);
1406             src += 2;
1407             *dst++ = RGB565_32(dst, src, map);
1408             src += 2;
1409         }
1410         /* Get any leftovers */
1411         switch (width & 3) {
1412         case 3:
1413             *dst++ = RGB565_32(dst, src, map);
1414             src += 2;
1415         case 2:
1416             *dst++ = RGB565_32(dst, src, map);
1417             src += 2;
1418         case 1:
1419             *dst++ = RGB565_32(dst, src, map);
1420             src += 2;
1421             break;
1422         }
1423         src += srcskip;
1424         dst += dstskip;
1425     }
1426 #endif /* USE_DUFFS_LOOP */
1427 }
1428
1429 /* Special optimized blit for RGB 5-6-5 --> ARGB 8-8-8-8 */
1430 static const Uint32 RGB565_ARGB8888_LUT[512] = {
1431     0x00000000, 0xff000000, 0x00000008, 0xff002000,
1432     0x00000010, 0xff004000, 0x00000018, 0xff006100,
1433     0x00000020, 0xff008100, 0x00000029, 0xff00a100,
1434     0x00000031, 0xff00c200, 0x00000039, 0xff00e200,
1435     0x00000041, 0xff080000, 0x0000004a, 0xff082000,
1436     0x00000052, 0xff084000, 0x0000005a, 0xff086100,
1437     0x00000062, 0xff088100, 0x0000006a, 0xff08a100,
1438     0x00000073, 0xff08c200, 0x0000007b, 0xff08e200,
1439     0x00000083, 0xff100000, 0x0000008b, 0xff102000,
1440     0x00000094, 0xff104000, 0x0000009c, 0xff106100,
1441     0x000000a4, 0xff108100, 0x000000ac, 0xff10a100,
1442     0x000000b4, 0xff10c200, 0x000000bd, 0xff10e200,
1443     0x000000c5, 0xff180000, 0x000000cd, 0xff182000,
1444     0x000000d5, 0xff184000, 0x000000de, 0xff186100,
1445     0x000000e6, 0xff188100, 0x000000ee, 0xff18a100,
1446     0x000000f6, 0xff18c200, 0x000000ff, 0xff18e200,
1447     0x00000400, 0xff200000, 0x00000408, 0xff202000,
1448     0x00000410, 0xff204000, 0x00000418, 0xff206100,
1449     0x00000420, 0xff208100, 0x00000429, 0xff20a100,
1450     0x00000431, 0xff20c200, 0x00000439, 0xff20e200,
1451     0x00000441, 0xff290000, 0x0000044a, 0xff292000,
1452     0x00000452, 0xff294000, 0x0000045a, 0xff296100,
1453     0x00000462, 0xff298100, 0x0000046a, 0xff29a100,
1454     0x00000473, 0xff29c200, 0x0000047b, 0xff29e200,
1455     0x00000483, 0xff310000, 0x0000048b, 0xff312000,
1456     0x00000494, 0xff314000, 0x0000049c, 0xff316100,
1457     0x000004a4, 0xff318100, 0x000004ac, 0xff31a100,
1458     0x000004b4, 0xff31c200, 0x000004bd, 0xff31e200,
1459     0x000004c5, 0xff390000, 0x000004cd, 0xff392000,
1460     0x000004d5, 0xff394000, 0x000004de, 0xff396100,
1461     0x000004e6, 0xff398100, 0x000004ee, 0xff39a100,
1462     0x000004f6, 0xff39c200, 0x000004ff, 0xff39e200,
1463     0x00000800, 0xff410000, 0x00000808, 0xff412000,
1464     0x00000810, 0xff414000, 0x00000818, 0xff416100,
1465     0x00000820, 0xff418100, 0x00000829, 0xff41a100,
1466     0x00000831, 0xff41c200, 0x00000839, 0xff41e200,
1467     0x00000841, 0xff4a0000, 0x0000084a, 0xff4a2000,
1468     0x00000852, 0xff4a4000, 0x0000085a, 0xff4a6100,
1469     0x00000862, 0xff4a8100, 0x0000086a, 0xff4aa100,
1470     0x00000873, 0xff4ac200, 0x0000087b, 0xff4ae200,
1471     0x00000883, 0xff520000, 0x0000088b, 0xff522000,
1472     0x00000894, 0xff524000, 0x0000089c, 0xff526100,
1473     0x000008a4, 0xff528100, 0x000008ac, 0xff52a100,
1474     0x000008b4, 0xff52c200, 0x000008bd, 0xff52e200,
1475     0x000008c5, 0xff5a0000, 0x000008cd, 0xff5a2000,
1476     0x000008d5, 0xff5a4000, 0x000008de, 0xff5a6100,
1477     0x000008e6, 0xff5a8100, 0x000008ee, 0xff5aa100,
1478     0x000008f6, 0xff5ac200, 0x000008ff, 0xff5ae200,
1479     0x00000c00, 0xff620000, 0x00000c08, 0xff622000,
1480     0x00000c10, 0xff624000, 0x00000c18, 0xff626100,
1481     0x00000c20, 0xff628100, 0x00000c29, 0xff62a100,
1482     0x00000c31, 0xff62c200, 0x00000c39, 0xff62e200,
1483     0x00000c41, 0xff6a0000, 0x00000c4a, 0xff6a2000,
1484     0x00000c52, 0xff6a4000, 0x00000c5a, 0xff6a6100,
1485     0x00000c62, 0xff6a8100, 0x00000c6a, 0xff6aa100,
1486     0x00000c73, 0xff6ac200, 0x00000c7b, 0xff6ae200,
1487     0x00000c83, 0xff730000, 0x00000c8b, 0xff732000,
1488     0x00000c94, 0xff734000, 0x00000c9c, 0xff736100,
1489     0x00000ca4, 0xff738100, 0x00000cac, 0xff73a100,
1490     0x00000cb4, 0xff73c200, 0x00000cbd, 0xff73e200,
1491     0x00000cc5, 0xff7b0000, 0x00000ccd, 0xff7b2000,
1492     0x00000cd5, 0xff7b4000, 0x00000cde, 0xff7b6100,
1493     0x00000ce6, 0xff7b8100, 0x00000cee, 0xff7ba100,
1494     0x00000cf6, 0xff7bc200, 0x00000cff, 0xff7be200,
1495     0x00001000, 0xff830000, 0x00001008, 0xff832000,
1496     0x00001010, 0xff834000, 0x00001018, 0xff836100,
1497     0x00001020, 0xff838100, 0x00001029, 0xff83a100,
1498     0x00001031, 0xff83c200, 0x00001039, 0xff83e200,
1499     0x00001041, 0xff8b0000, 0x0000104a, 0xff8b2000,
1500     0x00001052, 0xff8b4000, 0x0000105a, 0xff8b6100,
1501     0x00001062, 0xff8b8100, 0x0000106a, 0xff8ba100,
1502     0x00001073, 0xff8bc200, 0x0000107b, 0xff8be200,
1503     0x00001083, 0xff940000, 0x0000108b, 0xff942000,
1504     0x00001094, 0xff944000, 0x0000109c, 0xff946100,
1505     0x000010a4, 0xff948100, 0x000010ac, 0xff94a100,
1506     0x000010b4, 0xff94c200, 0x000010bd, 0xff94e200,
1507     0x000010c5, 0xff9c0000, 0x000010cd, 0xff9c2000,
1508     0x000010d5, 0xff9c4000, 0x000010de, 0xff9c6100,
1509     0x000010e6, 0xff9c8100, 0x000010ee, 0xff9ca100,
1510     0x000010f6, 0xff9cc200, 0x000010ff, 0xff9ce200,
1511     0x00001400, 0xffa40000, 0x00001408, 0xffa42000,
1512     0x00001410, 0xffa44000, 0x00001418, 0xffa46100,
1513     0x00001420, 0xffa48100, 0x00001429, 0xffa4a100,
1514     0x00001431, 0xffa4c200, 0x00001439, 0xffa4e200,
1515     0x00001441, 0xffac0000, 0x0000144a, 0xffac2000,
1516     0x00001452, 0xffac4000, 0x0000145a, 0xffac6100,
1517     0x00001462, 0xffac8100, 0x0000146a, 0xffaca100,
1518     0x00001473, 0xffacc200, 0x0000147b, 0xfface200,
1519     0x00001483, 0xffb40000, 0x0000148b, 0xffb42000,
1520     0x00001494, 0xffb44000, 0x0000149c, 0xffb46100,
1521     0x000014a4, 0xffb48100, 0x000014ac, 0xffb4a100,
1522     0x000014b4, 0xffb4c200, 0x000014bd, 0xffb4e200,
1523     0x000014c5, 0xffbd0000, 0x000014cd, 0xffbd2000,
1524     0x000014d5, 0xffbd4000, 0x000014de, 0xffbd6100,
1525     0x000014e6, 0xffbd8100, 0x000014ee, 0xffbda100,
1526     0x000014f6, 0xffbdc200, 0x000014ff, 0xffbde200,
1527     0x00001800, 0xffc50000, 0x00001808, 0xffc52000,
1528     0x00001810, 0xffc54000, 0x00001818, 0xffc56100,
1529     0x00001820, 0xffc58100, 0x00001829, 0xffc5a100,
1530     0x00001831, 0xffc5c200, 0x00001839, 0xffc5e200,
1531     0x00001841, 0xffcd0000, 0x0000184a, 0xffcd2000,
1532     0x00001852, 0xffcd4000, 0x0000185a, 0xffcd6100,
1533     0x00001862, 0xffcd8100, 0x0000186a, 0xffcda100,
1534     0x00001873, 0xffcdc200, 0x0000187b, 0xffcde200,
1535     0x00001883, 0xffd50000, 0x0000188b, 0xffd52000,
1536     0x00001894, 0xffd54000, 0x0000189c, 0xffd56100,
1537     0x000018a4, 0xffd58100, 0x000018ac, 0xffd5a100,
1538     0x000018b4, 0xffd5c200, 0x000018bd, 0xffd5e200,
1539     0x000018c5, 0xffde0000, 0x000018cd, 0xffde2000,
1540     0x000018d5, 0xffde4000, 0x000018de, 0xffde6100,
1541     0x000018e6, 0xffde8100, 0x000018ee, 0xffdea100,
1542     0x000018f6, 0xffdec200, 0x000018ff, 0xffdee200,
1543     0x00001c00, 0xffe60000, 0x00001c08, 0xffe62000,
1544     0x00001c10, 0xffe64000, 0x00001c18, 0xffe66100,
1545     0x00001c20, 0xffe68100, 0x00001c29, 0xffe6a100,
1546     0x00001c31, 0xffe6c200, 0x00001c39, 0xffe6e200,
1547     0x00001c41, 0xffee0000, 0x00001c4a, 0xffee2000,
1548     0x00001c52, 0xffee4000, 0x00001c5a, 0xffee6100,
1549     0x00001c62, 0xffee8100, 0x00001c6a, 0xffeea100,
1550     0x00001c73, 0xffeec200, 0x00001c7b, 0xffeee200,
1551     0x00001c83, 0xfff60000, 0x00001c8b, 0xfff62000,
1552     0x00001c94, 0xfff64000, 0x00001c9c, 0xfff66100,
1553     0x00001ca4, 0xfff68100, 0x00001cac, 0xfff6a100,
1554     0x00001cb4, 0xfff6c200, 0x00001cbd, 0xfff6e200,
1555     0x00001cc5, 0xffff0000, 0x00001ccd, 0xffff2000,
1556     0x00001cd5, 0xffff4000, 0x00001cde, 0xffff6100,
1557     0x00001ce6, 0xffff8100, 0x00001cee, 0xffffa100,
1558     0x00001cf6, 0xffffc200, 0x00001cff, 0xffffe200
1559 };
1560
1561 static void
1562 Blit_RGB565_ARGB8888(SDL_BlitInfo * info)
1563 {
1564     Blit_RGB565_32(info, RGB565_ARGB8888_LUT);
1565 }
1566
1567 /* Special optimized blit for RGB 5-6-5 --> ABGR 8-8-8-8 */
1568 static const Uint32 RGB565_ABGR8888_LUT[512] = {
1569     0xff000000, 0x00000000, 0xff080000, 0x00002000,
1570     0xff100000, 0x00004000, 0xff180000, 0x00006100,
1571     0xff200000, 0x00008100, 0xff290000, 0x0000a100,
1572     0xff310000, 0x0000c200, 0xff390000, 0x0000e200,
1573     0xff410000, 0x00000008, 0xff4a0000, 0x00002008,
1574     0xff520000, 0x00004008, 0xff5a0000, 0x00006108,
1575     0xff620000, 0x00008108, 0xff6a0000, 0x0000a108,
1576     0xff730000, 0x0000c208, 0xff7b0000, 0x0000e208,
1577     0xff830000, 0x00000010, 0xff8b0000, 0x00002010,
1578     0xff940000, 0x00004010, 0xff9c0000, 0x00006110,
1579     0xffa40000, 0x00008110, 0xffac0000, 0x0000a110,
1580     0xffb40000, 0x0000c210, 0xffbd0000, 0x0000e210,
1581     0xffc50000, 0x00000018, 0xffcd0000, 0x00002018,
1582     0xffd50000, 0x00004018, 0xffde0000, 0x00006118,
1583     0xffe60000, 0x00008118, 0xffee0000, 0x0000a118,
1584     0xfff60000, 0x0000c218, 0xffff0000, 0x0000e218,
1585     0xff000400, 0x00000020, 0xff080400, 0x00002020,
1586     0xff100400, 0x00004020, 0xff180400, 0x00006120,
1587     0xff200400, 0x00008120, 0xff290400, 0x0000a120,
1588     0xff310400, 0x0000c220, 0xff390400, 0x0000e220,
1589     0xff410400, 0x00000029, 0xff4a0400, 0x00002029,
1590     0xff520400, 0x00004029, 0xff5a0400, 0x00006129,
1591     0xff620400, 0x00008129, 0xff6a0400, 0x0000a129,
1592     0xff730400, 0x0000c229, 0xff7b0400, 0x0000e229,
1593     0xff830400, 0x00000031, 0xff8b0400, 0x00002031,
1594     0xff940400, 0x00004031, 0xff9c0400, 0x00006131,
1595     0xffa40400, 0x00008131, 0xffac0400, 0x0000a131,
1596     0xffb40400, 0x0000c231, 0xffbd0400, 0x0000e231,
1597     0xffc50400, 0x00000039, 0xffcd0400, 0x00002039,
1598     0xffd50400, 0x00004039, 0xffde0400, 0x00006139,
1599     0xffe60400, 0x00008139, 0xffee0400, 0x0000a139,
1600     0xfff60400, 0x0000c239, 0xffff0400, 0x0000e239,
1601     0xff000800, 0x00000041, 0xff080800, 0x00002041,
1602     0xff100800, 0x00004041, 0xff180800, 0x00006141,
1603     0xff200800, 0x00008141, 0xff290800, 0x0000a141,
1604     0xff310800, 0x0000c241, 0xff390800, 0x0000e241,
1605     0xff410800, 0x0000004a, 0xff4a0800, 0x0000204a,
1606     0xff520800, 0x0000404a, 0xff5a0800, 0x0000614a,
1607     0xff620800, 0x0000814a, 0xff6a0800, 0x0000a14a,
1608     0xff730800, 0x0000c24a, 0xff7b0800, 0x0000e24a,
1609     0xff830800, 0x00000052, 0xff8b0800, 0x00002052,
1610     0xff940800, 0x00004052, 0xff9c0800, 0x00006152,
1611     0xffa40800, 0x00008152, 0xffac0800, 0x0000a152,
1612     0xffb40800, 0x0000c252, 0xffbd0800, 0x0000e252,
1613     0xffc50800, 0x0000005a, 0xffcd0800, 0x0000205a,
1614     0xffd50800, 0x0000405a, 0xffde0800, 0x0000615a,
1615     0xffe60800, 0x0000815a, 0xffee0800, 0x0000a15a,
1616     0xfff60800, 0x0000c25a, 0xffff0800, 0x0000e25a,
1617     0xff000c00, 0x00000062, 0xff080c00, 0x00002062,
1618     0xff100c00, 0x00004062, 0xff180c00, 0x00006162,
1619     0xff200c00, 0x00008162, 0xff290c00, 0x0000a162,
1620     0xff310c00, 0x0000c262, 0xff390c00, 0x0000e262,
1621     0xff410c00, 0x0000006a, 0xff4a0c00, 0x0000206a,
1622     0xff520c00, 0x0000406a, 0xff5a0c00, 0x0000616a,
1623     0xff620c00, 0x0000816a, 0xff6a0c00, 0x0000a16a,
1624     0xff730c00, 0x0000c26a, 0xff7b0c00, 0x0000e26a,
1625     0xff830c00, 0x00000073, 0xff8b0c00, 0x00002073,
1626     0xff940c00, 0x00004073, 0xff9c0c00, 0x00006173,
1627     0xffa40c00, 0x00008173, 0xffac0c00, 0x0000a173,
1628     0xffb40c00, 0x0000c273, 0xffbd0c00, 0x0000e273,
1629     0xffc50c00, 0x0000007b, 0xffcd0c00, 0x0000207b,
1630     0xffd50c00, 0x0000407b, 0xffde0c00, 0x0000617b,
1631     0xffe60c00, 0x0000817b, 0xffee0c00, 0x0000a17b,
1632     0xfff60c00, 0x0000c27b, 0xffff0c00, 0x0000e27b,
1633     0xff001000, 0x00000083, 0xff081000, 0x00002083,
1634     0xff101000, 0x00004083, 0xff181000, 0x00006183,
1635     0xff201000, 0x00008183, 0xff291000, 0x0000a183,
1636     0xff311000, 0x0000c283, 0xff391000, 0x0000e283,
1637     0xff411000, 0x0000008b, 0xff4a1000, 0x0000208b,
1638     0xff521000, 0x0000408b, 0xff5a1000, 0x0000618b,
1639     0xff621000, 0x0000818b, 0xff6a1000, 0x0000a18b,
1640     0xff731000, 0x0000c28b, 0xff7b1000, 0x0000e28b,
1641     0xff831000, 0x00000094, 0xff8b1000, 0x00002094,
1642     0xff941000, 0x00004094, 0xff9c1000, 0x00006194,
1643     0xffa41000, 0x00008194, 0xffac1000, 0x0000a194,
1644     0xffb41000, 0x0000c294, 0xffbd1000, 0x0000e294,
1645     0xffc51000, 0x0000009c, 0xffcd1000, 0x0000209c,
1646     0xffd51000, 0x0000409c, 0xffde1000, 0x0000619c,
1647     0xffe61000, 0x0000819c, 0xffee1000, 0x0000a19c,
1648     0xfff61000, 0x0000c29c, 0xffff1000, 0x0000e29c,
1649     0xff001400, 0x000000a4, 0xff081400, 0x000020a4,
1650     0xff101400, 0x000040a4, 0xff181400, 0x000061a4,
1651     0xff201400, 0x000081a4, 0xff291400, 0x0000a1a4,
1652     0xff311400, 0x0000c2a4, 0xff391400, 0x0000e2a4,
1653     0xff411400, 0x000000ac, 0xff4a1400, 0x000020ac,
1654     0xff521400, 0x000040ac, 0xff5a1400, 0x000061ac,
1655     0xff621400, 0x000081ac, 0xff6a1400, 0x0000a1ac,
1656     0xff731400, 0x0000c2ac, 0xff7b1400, 0x0000e2ac,
1657     0xff831400, 0x000000b4, 0xff8b1400, 0x000020b4,
1658     0xff941400, 0x000040b4, 0xff9c1400, 0x000061b4,
1659     0xffa41400, 0x000081b4, 0xffac1400, 0x0000a1b4,
1660     0xffb41400, 0x0000c2b4, 0xffbd1400, 0x0000e2b4,
1661     0xffc51400, 0x000000bd, 0xffcd1400, 0x000020bd,
1662     0xffd51400, 0x000040bd, 0xffde1400, 0x000061bd,
1663     0xffe61400, 0x000081bd, 0xffee1400, 0x0000a1bd,
1664     0xfff61400, 0x0000c2bd, 0xffff1400, 0x0000e2bd,
1665     0xff001800, 0x000000c5, 0xff081800, 0x000020c5,
1666     0xff101800, 0x000040c5, 0xff181800, 0x000061c5,
1667     0xff201800, 0x000081c5, 0xff291800, 0x0000a1c5,
1668     0xff311800, 0x0000c2c5, 0xff391800, 0x0000e2c5,
1669     0xff411800, 0x000000cd, 0xff4a1800, 0x000020cd,
1670     0xff521800, 0x000040cd, 0xff5a1800, 0x000061cd,
1671     0xff621800, 0x000081cd, 0xff6a1800, 0x0000a1cd,
1672     0xff731800, 0x0000c2cd, 0xff7b1800, 0x0000e2cd,
1673     0xff831800, 0x000000d5, 0xff8b1800, 0x000020d5,
1674     0xff941800, 0x000040d5, 0xff9c1800, 0x000061d5,
1675     0xffa41800, 0x000081d5, 0xffac1800, 0x0000a1d5,
1676     0xffb41800, 0x0000c2d5, 0xffbd1800, 0x0000e2d5,
1677     0xffc51800, 0x000000de, 0xffcd1800, 0x000020de,
1678     0xffd51800, 0x000040de, 0xffde1800, 0x000061de,
1679     0xffe61800, 0x000081de, 0xffee1800, 0x0000a1de,
1680     0xfff61800, 0x0000c2de, 0xffff1800, 0x0000e2de,
1681     0xff001c00, 0x000000e6, 0xff081c00, 0x000020e6,
1682     0xff101c00, 0x000040e6, 0xff181c00, 0x000061e6,
1683     0xff201c00, 0x000081e6, 0xff291c00, 0x0000a1e6,
1684     0xff311c00, 0x0000c2e6, 0xff391c00, 0x0000e2e6,
1685     0xff411c00, 0x000000ee, 0xff4a1c00, 0x000020ee,
1686     0xff521c00, 0x000040ee, 0xff5a1c00, 0x000061ee,
1687     0xff621c00, 0x000081ee, 0xff6a1c00, 0x0000a1ee,
1688     0xff731c00, 0x0000c2ee, 0xff7b1c00, 0x0000e2ee,
1689     0xff831c00, 0x000000f6, 0xff8b1c00, 0x000020f6,
1690     0xff941c00, 0x000040f6, 0xff9c1c00, 0x000061f6,
1691     0xffa41c00, 0x000081f6, 0xffac1c00, 0x0000a1f6,
1692     0xffb41c00, 0x0000c2f6, 0xffbd1c00, 0x0000e2f6,
1693     0xffc51c00, 0x000000ff, 0xffcd1c00, 0x000020ff,
1694     0xffd51c00, 0x000040ff, 0xffde1c00, 0x000061ff,
1695     0xffe61c00, 0x000081ff, 0xffee1c00, 0x0000a1ff,
1696     0xfff61c00, 0x0000c2ff, 0xffff1c00, 0x0000e2ff
1697 };
1698
1699 static void
1700 Blit_RGB565_ABGR8888(SDL_BlitInfo * info)
1701 {
1702     Blit_RGB565_32(info, RGB565_ABGR8888_LUT);
1703 }
1704
1705 /* Special optimized blit for RGB 5-6-5 --> RGBA 8-8-8-8 */
1706 static const Uint32 RGB565_RGBA8888_LUT[512] = {
1707     0x000000ff, 0x00000000, 0x000008ff, 0x00200000,
1708     0x000010ff, 0x00400000, 0x000018ff, 0x00610000,
1709     0x000020ff, 0x00810000, 0x000029ff, 0x00a10000,
1710     0x000031ff, 0x00c20000, 0x000039ff, 0x00e20000,
1711     0x000041ff, 0x08000000, 0x00004aff, 0x08200000,
1712     0x000052ff, 0x08400000, 0x00005aff, 0x08610000,
1713     0x000062ff, 0x08810000, 0x00006aff, 0x08a10000,
1714     0x000073ff, 0x08c20000, 0x00007bff, 0x08e20000,
1715     0x000083ff, 0x10000000, 0x00008bff, 0x10200000,
1716     0x000094ff, 0x10400000, 0x00009cff, 0x10610000,
1717     0x0000a4ff, 0x10810000, 0x0000acff, 0x10a10000,
1718     0x0000b4ff, 0x10c20000, 0x0000bdff, 0x10e20000,
1719     0x0000c5ff, 0x18000000, 0x0000cdff, 0x18200000,
1720     0x0000d5ff, 0x18400000, 0x0000deff, 0x18610000,
1721     0x0000e6ff, 0x18810000, 0x0000eeff, 0x18a10000,
1722     0x0000f6ff, 0x18c20000, 0x0000ffff, 0x18e20000,
1723     0x000400ff, 0x20000000, 0x000408ff, 0x20200000,
1724     0x000410ff, 0x20400000, 0x000418ff, 0x20610000,
1725     0x000420ff, 0x20810000, 0x000429ff, 0x20a10000,
1726     0x000431ff, 0x20c20000, 0x000439ff, 0x20e20000,
1727     0x000441ff, 0x29000000, 0x00044aff, 0x29200000,
1728     0x000452ff, 0x29400000, 0x00045aff, 0x29610000,
1729     0x000462ff, 0x29810000, 0x00046aff, 0x29a10000,
1730     0x000473ff, 0x29c20000, 0x00047bff, 0x29e20000,
1731     0x000483ff, 0x31000000, 0x00048bff, 0x31200000,
1732     0x000494ff, 0x31400000, 0x00049cff, 0x31610000,
1733     0x0004a4ff, 0x31810000, 0x0004acff, 0x31a10000,
1734     0x0004b4ff, 0x31c20000, 0x0004bdff, 0x31e20000,
1735     0x0004c5ff, 0x39000000, 0x0004cdff, 0x39200000,
1736     0x0004d5ff, 0x39400000, 0x0004deff, 0x39610000,
1737     0x0004e6ff, 0x39810000, 0x0004eeff, 0x39a10000,
1738     0x0004f6ff, 0x39c20000, 0x0004ffff, 0x39e20000,
1739     0x000800ff, 0x41000000, 0x000808ff, 0x41200000,
1740     0x000810ff, 0x41400000, 0x000818ff, 0x41610000,
1741     0x000820ff, 0x41810000, 0x000829ff, 0x41a10000,
1742     0x000831ff, 0x41c20000, 0x000839ff, 0x41e20000,
1743     0x000841ff, 0x4a000000, 0x00084aff, 0x4a200000,
1744     0x000852ff, 0x4a400000, 0x00085aff, 0x4a610000,
1745     0x000862ff, 0x4a810000, 0x00086aff, 0x4aa10000,
1746     0x000873ff, 0x4ac20000, 0x00087bff, 0x4ae20000,
1747     0x000883ff, 0x52000000, 0x00088bff, 0x52200000,
1748     0x000894ff, 0x52400000, 0x00089cff, 0x52610000,
1749     0x0008a4ff, 0x52810000, 0x0008acff, 0x52a10000,
1750     0x0008b4ff, 0x52c20000, 0x0008bdff, 0x52e20000,
1751     0x0008c5ff, 0x5a000000, 0x0008cdff, 0x5a200000,
1752     0x0008d5ff, 0x5a400000, 0x0008deff, 0x5a610000,
1753     0x0008e6ff, 0x5a810000, 0x0008eeff, 0x5aa10000,
1754     0x0008f6ff, 0x5ac20000, 0x0008ffff, 0x5ae20000,
1755     0x000c00ff, 0x62000000, 0x000c08ff, 0x62200000,
1756     0x000c10ff, 0x62400000, 0x000c18ff, 0x62610000,
1757     0x000c20ff, 0x62810000, 0x000c29ff, 0x62a10000,
1758     0x000c31ff, 0x62c20000, 0x000c39ff, 0x62e20000,
1759     0x000c41ff, 0x6a000000, 0x000c4aff, 0x6a200000,
1760     0x000c52ff, 0x6a400000, 0x000c5aff, 0x6a610000,
1761     0x000c62ff, 0x6a810000, 0x000c6aff, 0x6aa10000,
1762     0x000c73ff, 0x6ac20000, 0x000c7bff, 0x6ae20000,
1763     0x000c83ff, 0x73000000, 0x000c8bff, 0x73200000,
1764     0x000c94ff, 0x73400000, 0x000c9cff, 0x73610000,
1765     0x000ca4ff, 0x73810000, 0x000cacff, 0x73a10000,
1766     0x000cb4ff, 0x73c20000, 0x000cbdff, 0x73e20000,
1767     0x000cc5ff, 0x7b000000, 0x000ccdff, 0x7b200000,
1768     0x000cd5ff, 0x7b400000, 0x000cdeff, 0x7b610000,
1769     0x000ce6ff, 0x7b810000, 0x000ceeff, 0x7ba10000,
1770     0x000cf6ff, 0x7bc20000, 0x000cffff, 0x7be20000,
1771     0x001000ff, 0x83000000, 0x001008ff, 0x83200000,
1772     0x001010ff, 0x83400000, 0x001018ff, 0x83610000,
1773     0x001020ff, 0x83810000, 0x001029ff, 0x83a10000,
1774     0x001031ff, 0x83c20000, 0x001039ff, 0x83e20000,
1775     0x001041ff, 0x8b000000, 0x00104aff, 0x8b200000,
1776     0x001052ff, 0x8b400000, 0x00105aff, 0x8b610000,
1777     0x001062ff, 0x8b810000, 0x00106aff, 0x8ba10000,
1778     0x001073ff, 0x8bc20000, 0x00107bff, 0x8be20000,
1779     0x001083ff, 0x94000000, 0x00108bff, 0x94200000,
1780     0x001094ff, 0x94400000, 0x00109cff, 0x94610000,
1781     0x0010a4ff, 0x94810000, 0x0010acff, 0x94a10000,
1782     0x0010b4ff, 0x94c20000, 0x0010bdff, 0x94e20000,
1783     0x0010c5ff, 0x9c000000, 0x0010cdff, 0x9c200000,
1784     0x0010d5ff, 0x9c400000, 0x0010deff, 0x9c610000,
1785     0x0010e6ff, 0x9c810000, 0x0010eeff, 0x9ca10000,
1786     0x0010f6ff, 0x9cc20000, 0x0010ffff, 0x9ce20000,
1787     0x001400ff, 0xa4000000, 0x001408ff, 0xa4200000,
1788     0x001410ff, 0xa4400000, 0x001418ff, 0xa4610000,
1789     0x001420ff, 0xa4810000, 0x001429ff, 0xa4a10000,
1790     0x001431ff, 0xa4c20000, 0x001439ff, 0xa4e20000,
1791     0x001441ff, 0xac000000, 0x00144aff, 0xac200000,
1792     0x001452ff, 0xac400000, 0x00145aff, 0xac610000,
1793     0x001462ff, 0xac810000, 0x00146aff, 0xaca10000,
1794     0x001473ff, 0xacc20000, 0x00147bff, 0xace20000,
1795     0x001483ff, 0xb4000000, 0x00148bff, 0xb4200000,
1796     0x001494ff, 0xb4400000, 0x00149cff, 0xb4610000,
1797     0x0014a4ff, 0xb4810000, 0x0014acff, 0xb4a10000,
1798     0x0014b4ff, 0xb4c20000, 0x0014bdff, 0xb4e20000,
1799     0x0014c5ff, 0xbd000000, 0x0014cdff, 0xbd200000,
1800     0x0014d5ff, 0xbd400000, 0x0014deff, 0xbd610000,
1801     0x0014e6ff, 0xbd810000, 0x0014eeff, 0xbda10000,
1802     0x0014f6ff, 0xbdc20000, 0x0014ffff, 0xbde20000,
1803     0x001800ff, 0xc5000000, 0x001808ff, 0xc5200000,
1804     0x001810ff, 0xc5400000, 0x001818ff, 0xc5610000,
1805     0x001820ff, 0xc5810000, 0x001829ff, 0xc5a10000,
1806     0x001831ff, 0xc5c20000, 0x001839ff, 0xc5e20000,
1807     0x001841ff, 0xcd000000, 0x00184aff, 0xcd200000,
1808     0x001852ff, 0xcd400000, 0x00185aff, 0xcd610000,
1809     0x001862ff, 0xcd810000, 0x00186aff, 0xcda10000,
1810     0x001873ff, 0xcdc20000, 0x00187bff, 0xcde20000,
1811     0x001883ff, 0xd5000000, 0x00188bff, 0xd5200000,
1812     0x001894ff, 0xd5400000, 0x00189cff, 0xd5610000,
1813     0x0018a4ff, 0xd5810000, 0x0018acff, 0xd5a10000,
1814     0x0018b4ff, 0xd5c20000, 0x0018bdff, 0xd5e20000,
1815     0x0018c5ff, 0xde000000, 0x0018cdff, 0xde200000,
1816     0x0018d5ff, 0xde400000, 0x0018deff, 0xde610000,
1817     0x0018e6ff, 0xde810000, 0x0018eeff, 0xdea10000,
1818     0x0018f6ff, 0xdec20000, 0x0018ffff, 0xdee20000,
1819     0x001c00ff, 0xe6000000, 0x001c08ff, 0xe6200000,
1820     0x001c10ff, 0xe6400000, 0x001c18ff, 0xe6610000,
1821     0x001c20ff, 0xe6810000, 0x001c29ff, 0xe6a10000,
1822     0x001c31ff, 0xe6c20000, 0x001c39ff, 0xe6e20000,
1823     0x001c41ff, 0xee000000, 0x001c4aff, 0xee200000,
1824     0x001c52ff, 0xee400000, 0x001c5aff, 0xee610000,
1825     0x001c62ff, 0xee810000, 0x001c6aff, 0xeea10000,
1826     0x001c73ff, 0xeec20000, 0x001c7bff, 0xeee20000,
1827     0x001c83ff, 0xf6000000, 0x001c8bff, 0xf6200000,
1828     0x001c94ff, 0xf6400000, 0x001c9cff, 0xf6610000,
1829     0x001ca4ff, 0xf6810000, 0x001cacff, 0xf6a10000,
1830     0x001cb4ff, 0xf6c20000, 0x001cbdff, 0xf6e20000,
1831     0x001cc5ff, 0xff000000, 0x001ccdff, 0xff200000,
1832     0x001cd5ff, 0xff400000, 0x001cdeff, 0xff610000,
1833     0x001ce6ff, 0xff810000, 0x001ceeff, 0xffa10000,
1834     0x001cf6ff, 0xffc20000, 0x001cffff, 0xffe20000,
1835 };
1836
1837 static void
1838 Blit_RGB565_RGBA8888(SDL_BlitInfo * info)
1839 {
1840     Blit_RGB565_32(info, RGB565_RGBA8888_LUT);
1841 }
1842
1843 /* Special optimized blit for RGB 5-6-5 --> BGRA 8-8-8-8 */
1844 static const Uint32 RGB565_BGRA8888_LUT[512] = {
1845     0x00000000, 0x000000ff, 0x08000000, 0x002000ff,
1846     0x10000000, 0x004000ff, 0x18000000, 0x006100ff,
1847     0x20000000, 0x008100ff, 0x29000000, 0x00a100ff,
1848     0x31000000, 0x00c200ff, 0x39000000, 0x00e200ff,
1849     0x41000000, 0x000008ff, 0x4a000000, 0x002008ff,
1850     0x52000000, 0x004008ff, 0x5a000000, 0x006108ff,
1851     0x62000000, 0x008108ff, 0x6a000000, 0x00a108ff,
1852     0x73000000, 0x00c208ff, 0x7b000000, 0x00e208ff,
1853     0x83000000, 0x000010ff, 0x8b000000, 0x002010ff,
1854     0x94000000, 0x004010ff, 0x9c000000, 0x006110ff,
1855     0xa4000000, 0x008110ff, 0xac000000, 0x00a110ff,
1856     0xb4000000, 0x00c210ff, 0xbd000000, 0x00e210ff,
1857     0xc5000000, 0x000018ff, 0xcd000000, 0x002018ff,
1858     0xd5000000, 0x004018ff, 0xde000000, 0x006118ff,
1859     0xe6000000, 0x008118ff, 0xee000000, 0x00a118ff,
1860     0xf6000000, 0x00c218ff, 0xff000000, 0x00e218ff,
1861     0x00040000, 0x000020ff, 0x08040000, 0x002020ff,
1862     0x10040000, 0x004020ff, 0x18040000, 0x006120ff,
1863     0x20040000, 0x008120ff, 0x29040000, 0x00a120ff,
1864     0x31040000, 0x00c220ff, 0x39040000, 0x00e220ff,
1865     0x41040000, 0x000029ff, 0x4a040000, 0x002029ff,
1866     0x52040000, 0x004029ff, 0x5a040000, 0x006129ff,
1867     0x62040000, 0x008129ff, 0x6a040000, 0x00a129ff,
1868     0x73040000, 0x00c229ff, 0x7b040000, 0x00e229ff,
1869     0x83040000, 0x000031ff, 0x8b040000, 0x002031ff,
1870     0x94040000, 0x004031ff, 0x9c040000, 0x006131ff,
1871     0xa4040000, 0x008131ff, 0xac040000, 0x00a131ff,
1872     0xb4040000, 0x00c231ff, 0xbd040000, 0x00e231ff,
1873     0xc5040000, 0x000039ff, 0xcd040000, 0x002039ff,
1874     0xd5040000, 0x004039ff, 0xde040000, 0x006139ff,
1875     0xe6040000, 0x008139ff, 0xee040000, 0x00a139ff,
1876     0xf6040000, 0x00c239ff, 0xff040000, 0x00e239ff,
1877     0x00080000, 0x000041ff, 0x08080000, 0x002041ff,
1878     0x10080000, 0x004041ff, 0x18080000, 0x006141ff,
1879     0x20080000, 0x008141ff, 0x29080000, 0x00a141ff,
1880     0x31080000, 0x00c241ff, 0x39080000, 0x00e241ff,
1881     0x41080000, 0x00004aff, 0x4a080000, 0x00204aff,
1882     0x52080000, 0x00404aff, 0x5a080000, 0x00614aff,
1883     0x62080000, 0x00814aff, 0x6a080000, 0x00a14aff,
1884     0x73080000, 0x00c24aff, 0x7b080000, 0x00e24aff,
1885     0x83080000, 0x000052ff, 0x8b080000, 0x002052ff,
1886     0x94080000, 0x004052ff, 0x9c080000, 0x006152ff,
1887     0xa4080000, 0x008152ff, 0xac080000, 0x00a152ff,
1888     0xb4080000, 0x00c252ff, 0xbd080000, 0x00e252ff,
1889     0xc5080000, 0x00005aff, 0xcd080000, 0x00205aff,
1890     0xd5080000, 0x00405aff, 0xde080000, 0x00615aff,
1891     0xe6080000, 0x00815aff, 0xee080000, 0x00a15aff,
1892     0xf6080000, 0x00c25aff, 0xff080000, 0x00e25aff,
1893     0x000c0000, 0x000062ff, 0x080c0000, 0x002062ff,
1894     0x100c0000, 0x004062ff, 0x180c0000, 0x006162ff,
1895     0x200c0000, 0x008162ff, 0x290c0000, 0x00a162ff,
1896     0x310c0000, 0x00c262ff, 0x390c0000, 0x00e262ff,
1897     0x410c0000, 0x00006aff, 0x4a0c0000, 0x00206aff,
1898     0x520c0000, 0x00406aff, 0x5a0c0000, 0x00616aff,
1899     0x620c0000, 0x00816aff, 0x6a0c0000, 0x00a16aff,
1900     0x730c0000, 0x00c26aff, 0x7b0c0000, 0x00e26aff,
1901     0x830c0000, 0x000073ff, 0x8b0c0000, 0x002073ff,
1902     0x940c0000, 0x004073ff, 0x9c0c0000, 0x006173ff,
1903     0xa40c0000, 0x008173ff, 0xac0c0000, 0x00a173ff,
1904     0xb40c0000, 0x00c273ff, 0xbd0c0000, 0x00e273ff,
1905     0xc50c0000, 0x00007bff, 0xcd0c0000, 0x00207bff,
1906     0xd50c0000, 0x00407bff, 0xde0c0000, 0x00617bff,
1907     0xe60c0000, 0x00817bff, 0xee0c0000, 0x00a17bff,
1908     0xf60c0000, 0x00c27bff, 0xff0c0000, 0x00e27bff,
1909     0x00100000, 0x000083ff, 0x08100000, 0x002083ff,
1910     0x10100000, 0x004083ff, 0x18100000, 0x006183ff,
1911     0x20100000, 0x008183ff, 0x29100000, 0x00a183ff,
1912     0x31100000, 0x00c283ff, 0x39100000, 0x00e283ff,
1913     0x41100000, 0x00008bff, 0x4a100000, 0x00208bff,
1914     0x52100000, 0x00408bff, 0x5a100000, 0x00618bff,
1915     0x62100000, 0x00818bff, 0x6a100000, 0x00a18bff,
1916     0x73100000, 0x00c28bff, 0x7b100000, 0x00e28bff,
1917     0x83100000, 0x000094ff, 0x8b100000, 0x002094ff,
1918     0x94100000, 0x004094ff, 0x9c100000, 0x006194ff,
1919     0xa4100000, 0x008194ff, 0xac100000, 0x00a194ff,
1920     0xb4100000, 0x00c294ff, 0xbd100000, 0x00e294ff,
1921     0xc5100000, 0x00009cff, 0xcd100000, 0x00209cff,
1922     0xd5100000, 0x00409cff, 0xde100000, 0x00619cff,
1923     0xe6100000, 0x00819cff, 0xee100000, 0x00a19cff,
1924     0xf6100000, 0x00c29cff, 0xff100000, 0x00e29cff,
1925     0x00140000, 0x0000a4ff, 0x08140000, 0x0020a4ff,
1926     0x10140000, 0x0040a4ff, 0x18140000, 0x0061a4ff,
1927     0x20140000, 0x0081a4ff, 0x29140000, 0x00a1a4ff,
1928     0x31140000, 0x00c2a4ff, 0x39140000, 0x00e2a4ff,
1929     0x41140000, 0x0000acff, 0x4a140000, 0x0020acff,
1930     0x52140000, 0x0040acff, 0x5a140000, 0x0061acff,
1931     0x62140000, 0x0081acff, 0x6a140000, 0x00a1acff,
1932     0x73140000, 0x00c2acff, 0x7b140000, 0x00e2acff,
1933     0x83140000, 0x0000b4ff, 0x8b140000, 0x0020b4ff,
1934     0x94140000, 0x0040b4ff, 0x9c140000, 0x0061b4ff,
1935     0xa4140000, 0x0081b4ff, 0xac140000, 0x00a1b4ff,
1936     0xb4140000, 0x00c2b4ff, 0xbd140000, 0x00e2b4ff,
1937     0xc5140000, 0x0000bdff, 0xcd140000, 0x0020bdff,
1938     0xd5140000, 0x0040bdff, 0xde140000, 0x0061bdff,
1939     0xe6140000, 0x0081bdff, 0xee140000, 0x00a1bdff,
1940     0xf6140000, 0x00c2bdff, 0xff140000, 0x00e2bdff,
1941     0x00180000, 0x0000c5ff, 0x08180000, 0x0020c5ff,
1942     0x10180000, 0x0040c5ff, 0x18180000, 0x0061c5ff,
1943     0x20180000, 0x0081c5ff, 0x29180000, 0x00a1c5ff,
1944     0x31180000, 0x00c2c5ff, 0x39180000, 0x00e2c5ff,
1945     0x41180000, 0x0000cdff, 0x4a180000, 0x0020cdff,
1946     0x52180000, 0x0040cdff, 0x5a180000, 0x0061cdff,
1947     0x62180000, 0x0081cdff, 0x6a180000, 0x00a1cdff,
1948     0x73180000, 0x00c2cdff, 0x7b180000, 0x00e2cdff,
1949     0x83180000, 0x0000d5ff, 0x8b180000, 0x0020d5ff,
1950     0x94180000, 0x0040d5ff, 0x9c180000, 0x0061d5ff,
1951     0xa4180000, 0x0081d5ff, 0xac180000, 0x00a1d5ff,
1952     0xb4180000, 0x00c2d5ff, 0xbd180000, 0x00e2d5ff,
1953     0xc5180000, 0x0000deff, 0xcd180000, 0x0020deff,
1954     0xd5180000, 0x0040deff, 0xde180000, 0x0061deff,
1955     0xe6180000, 0x0081deff, 0xee180000, 0x00a1deff,
1956     0xf6180000, 0x00c2deff, 0xff180000, 0x00e2deff,
1957     0x001c0000, 0x0000e6ff, 0x081c0000, 0x0020e6ff,
1958     0x101c0000, 0x0040e6ff, 0x181c0000, 0x0061e6ff,
1959     0x201c0000, 0x0081e6ff, 0x291c0000, 0x00a1e6ff,
1960     0x311c0000, 0x00c2e6ff, 0x391c0000, 0x00e2e6ff,
1961     0x411c0000, 0x0000eeff, 0x4a1c0000, 0x0020eeff,
1962     0x521c0000, 0x0040eeff, 0x5a1c0000, 0x0061eeff,
1963     0x621c0000, 0x0081eeff, 0x6a1c0000, 0x00a1eeff,
1964     0x731c0000, 0x00c2eeff, 0x7b1c0000, 0x00e2eeff,
1965     0x831c0000, 0x0000f6ff, 0x8b1c0000, 0x0020f6ff,
1966     0x941c0000, 0x0040f6ff, 0x9c1c0000, 0x0061f6ff,
1967     0xa41c0000, 0x0081f6ff, 0xac1c0000, 0x00a1f6ff,
1968     0xb41c0000, 0x00c2f6ff, 0xbd1c0000, 0x00e2f6ff,
1969     0xc51c0000, 0x0000ffff, 0xcd1c0000, 0x0020ffff,
1970     0xd51c0000, 0x0040ffff, 0xde1c0000, 0x0061ffff,
1971     0xe61c0000, 0x0081ffff, 0xee1c0000, 0x00a1ffff,
1972     0xf61c0000, 0x00c2ffff, 0xff1c0000, 0x00e2ffff
1973 };
1974
1975 static void
1976 Blit_RGB565_BGRA8888(SDL_BlitInfo * info)
1977 {
1978     Blit_RGB565_32(info, RGB565_BGRA8888_LUT);
1979 }
1980
1981 static void
1982 BlitNto1(SDL_BlitInfo * info)
1983 {
1984 #ifndef USE_DUFFS_LOOP
1985     int c;
1986 #endif
1987     int width, height;
1988     Uint8 *src;
1989     const Uint8 *map;
1990     Uint8 *dst;
1991     int srcskip, dstskip;
1992     int srcbpp;
1993     Uint32 Pixel;
1994     int sR, sG, sB;
1995     SDL_PixelFormat *srcfmt;
1996
1997     /* Set up some basic variables */
1998     width = info->dst_w;
1999     height = info->dst_h;
2000     src = info->src;
2001     srcskip = info->src_skip;
2002     dst = info->dst;
2003     dstskip = info->dst_skip;
2004     map = info->table;
2005     srcfmt = info->src_fmt;
2006     srcbpp = srcfmt->BytesPerPixel;
2007
2008     if (map == NULL) {
2009         while (height--) {
2010 #ifdef USE_DUFFS_LOOP
2011             /* *INDENT-OFF* */
2012             DUFFS_LOOP(
2013                 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
2014                                 sR, sG, sB);
2015                 if ( 1 ) {
2016                     /* Pack RGB into 8bit pixel */
2017                     *dst = ((sR>>5)<<(3+2))|
2018                             ((sG>>5)<<(2)) |
2019                             ((sB>>6)<<(0)) ;
2020                 }
2021                 dst++;
2022                 src += srcbpp;
2023             , width);
2024             /* *INDENT-ON* */
2025 #else
2026             for (c = width; c; --c) {
2027                 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
2028                 if (1) {
2029                     /* Pack RGB into 8bit pixel */
2030                     *dst = ((sR >> 5) << (3 + 2)) |
2031                         ((sG >> 5) << (2)) | ((sB >> 6) << (0));
2032                 }
2033                 dst++;
2034                 src += srcbpp;
2035             }
2036 #endif
2037             src += srcskip;
2038             dst += dstskip;
2039         }
2040     } else {
2041         while (height--) {
2042 #ifdef USE_DUFFS_LOOP
2043             /* *INDENT-OFF* */
2044             DUFFS_LOOP(
2045                 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
2046                                 sR, sG, sB);
2047                 if ( 1 ) {
2048                     /* Pack RGB into 8bit pixel */
2049                     *dst = map[((sR>>5)<<(3+2))|
2050                            ((sG>>5)<<(2))  |
2051                            ((sB>>6)<<(0))  ];
2052                 }
2053                 dst++;
2054                 src += srcbpp;
2055             , width);
2056             /* *INDENT-ON* */
2057 #else
2058             for (c = width; c; --c) {
2059                 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
2060                 if (1) {
2061                     /* Pack RGB into 8bit pixel */
2062                     *dst = map[((sR >> 5) << (3 + 2)) |
2063                                ((sG >> 5) << (2)) | ((sB >> 6) << (0))];
2064                 }
2065                 dst++;
2066                 src += srcbpp;
2067             }
2068 #endif /* USE_DUFFS_LOOP */
2069             src += srcskip;
2070             dst += dstskip;
2071         }
2072     }
2073 }
2074
2075 /* blits 32 bit RGB<->RGBA with both surfaces having the same R,G,B fields */
2076 static void
2077 Blit4to4MaskAlpha(SDL_BlitInfo * info)
2078 {
2079     int width = info->dst_w;
2080     int height = info->dst_h;
2081     Uint32 *src = (Uint32 *) info->src;
2082     int srcskip = info->src_skip;
2083     Uint32 *dst = (Uint32 *) info->dst;
2084     int dstskip = info->dst_skip;
2085     SDL_PixelFormat *srcfmt = info->src_fmt;
2086     SDL_PixelFormat *dstfmt = info->dst_fmt;
2087
2088     if (dstfmt->Amask) {
2089         /* RGB->RGBA, SET_ALPHA */
2090         Uint32 mask = (info->a >> dstfmt->Aloss) << dstfmt->Ashift;
2091
2092         while (height--) {
2093             /* *INDENT-OFF* */
2094             DUFFS_LOOP(
2095             {
2096                 *dst = *src | mask;
2097                 ++dst;
2098                 ++src;
2099             },
2100             width);
2101             /* *INDENT-ON* */
2102             src = (Uint32 *) ((Uint8 *) src + srcskip);
2103             dst = (Uint32 *) ((Uint8 *) dst + dstskip);
2104         }
2105     } else {
2106         /* RGBA->RGB, NO_ALPHA */
2107         Uint32 mask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
2108
2109         while (height--) {
2110             /* *INDENT-OFF* */
2111             DUFFS_LOOP(
2112             {
2113                 *dst = *src & mask;
2114                 ++dst;
2115                 ++src;
2116             },
2117             width);
2118             /* *INDENT-ON* */
2119             src = (Uint32 *) ((Uint8 *) src + srcskip);
2120             dst = (Uint32 *) ((Uint8 *) dst + dstskip);
2121         }
2122     }
2123 }
2124
2125 /* blits 32 bit RGBA<->RGBA with both surfaces having the same R,G,B,A fields */
2126 static void
2127 Blit4to4CopyAlpha(SDL_BlitInfo * info)
2128 {
2129     int width = info->dst_w;
2130     int height = info->dst_h;
2131     Uint32 *src = (Uint32 *) info->src;
2132     int srcskip = info->src_skip;
2133     Uint32 *dst = (Uint32 *) info->dst;
2134     int dstskip = info->dst_skip;
2135
2136     /* RGBA->RGBA, COPY_ALPHA */
2137     while (height--) {
2138         /* *INDENT-OFF* */
2139         DUFFS_LOOP(
2140         {
2141             *dst = *src;
2142             ++dst;
2143             ++src;
2144         },
2145         width);
2146         /* *INDENT-ON* */
2147         src = (Uint32 *) ((Uint8 *) src + srcskip);
2148         dst = (Uint32 *) ((Uint8 *) dst + dstskip);
2149     }
2150 }
2151
2152 static void
2153 BlitNtoN(SDL_BlitInfo * info)
2154 {
2155     int width = info->dst_w;
2156     int height = info->dst_h;
2157     Uint8 *src = info->src;
2158     int srcskip = info->src_skip;
2159     Uint8 *dst = info->dst;
2160     int dstskip = info->dst_skip;
2161     SDL_PixelFormat *srcfmt = info->src_fmt;
2162     int srcbpp = srcfmt->BytesPerPixel;
2163     SDL_PixelFormat *dstfmt = info->dst_fmt;
2164     int dstbpp = dstfmt->BytesPerPixel;
2165     unsigned alpha = dstfmt->Amask ? info->a : 0;
2166
2167     while (height--) {
2168         /* *INDENT-OFF* */
2169         DUFFS_LOOP(
2170         {
2171             Uint32 Pixel;
2172             unsigned sR;
2173             unsigned sG;
2174             unsigned sB;
2175             DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
2176             ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, alpha);
2177             dst += dstbpp;
2178             src += srcbpp;
2179         },
2180         width);
2181         /* *INDENT-ON* */
2182         src += srcskip;
2183         dst += dstskip;
2184     }
2185 }
2186
2187 static void
2188 BlitNtoNCopyAlpha(SDL_BlitInfo * info)
2189 {
2190     int width = info->dst_w;
2191     int height = info->dst_h;
2192     Uint8 *src = info->src;
2193     int srcskip = info->src_skip;
2194     Uint8 *dst = info->dst;
2195     int dstskip = info->dst_skip;
2196     SDL_PixelFormat *srcfmt = info->src_fmt;
2197     int srcbpp = srcfmt->BytesPerPixel;
2198     SDL_PixelFormat *dstfmt = info->dst_fmt;
2199     int dstbpp = dstfmt->BytesPerPixel;
2200     int c;
2201
2202     while (height--) {
2203         for (c = width; c; --c) {
2204             Uint32 Pixel;
2205             unsigned sR, sG, sB, sA;
2206             DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA);
2207             ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, sA);
2208             dst += dstbpp;
2209             src += srcbpp;
2210         }
2211         src += srcskip;
2212         dst += dstskip;
2213     }
2214 }
2215
2216 static void
2217 BlitNto1Key(SDL_BlitInfo * info)
2218 {
2219     int width = info->dst_w;
2220     int height = info->dst_h;
2221     Uint8 *src = info->src;
2222     int srcskip = info->src_skip;
2223     Uint8 *dst = info->dst;
2224     int dstskip = info->dst_skip;
2225     SDL_PixelFormat *srcfmt = info->src_fmt;
2226     const Uint8 *palmap = info->table;
2227     Uint32 ckey = info->colorkey;
2228     Uint32 rgbmask = ~srcfmt->Amask;
2229     int srcbpp;
2230     Uint32 Pixel;
2231     unsigned sR, sG, sB;
2232
2233     /* Set up some basic variables */
2234     srcbpp = srcfmt->BytesPerPixel;
2235     ckey &= rgbmask;
2236
2237     if (palmap == NULL) {
2238         while (height--) {
2239             /* *INDENT-OFF* */
2240             DUFFS_LOOP(
2241             {
2242                 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
2243                                 sR, sG, sB);
2244                 if ( (Pixel & rgbmask) != ckey ) {
2245                     /* Pack RGB into 8bit pixel */
2246                     *dst = (Uint8)(((sR>>5)<<(3+2))|
2247                                    ((sG>>5)<<(2)) |
2248                                    ((sB>>6)<<(0)));
2249                 }
2250                 dst++;
2251                 src += srcbpp;
2252             },
2253             width);
2254             /* *INDENT-ON* */
2255             src += srcskip;
2256             dst += dstskip;
2257         }
2258     } else {
2259         while (height--) {
2260             /* *INDENT-OFF* */
2261             DUFFS_LOOP(
2262             {
2263                 DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
2264                                 sR, sG, sB);
2265                 if ( (Pixel & rgbmask) != ckey ) {
2266                     /* Pack RGB into 8bit pixel */
2267                     *dst = (Uint8)palmap[((sR>>5)<<(3+2))|
2268                                          ((sG>>5)<<(2))  |
2269                                          ((sB>>6)<<(0))  ];
2270                 }
2271                 dst++;
2272                 src += srcbpp;
2273             },
2274             width);
2275             /* *INDENT-ON* */
2276             src += srcskip;
2277             dst += dstskip;
2278         }
2279     }
2280 }
2281
2282 static void
2283 Blit2to2Key(SDL_BlitInfo * info)
2284 {
2285     int width = info->dst_w;
2286     int height = info->dst_h;
2287     Uint16 *srcp = (Uint16 *) info->src;
2288     int srcskip = info->src_skip;
2289     Uint16 *dstp = (Uint16 *) info->dst;
2290     int dstskip = info->dst_skip;
2291     Uint32 ckey = info->colorkey;
2292     Uint32 rgbmask = ~info->src_fmt->Amask;
2293
2294     /* Set up some basic variables */
2295     srcskip /= 2;
2296     dstskip /= 2;
2297     ckey &= rgbmask;
2298
2299     while (height--) {
2300         /* *INDENT-OFF* */
2301         DUFFS_LOOP(
2302         {
2303             if ( (*srcp & rgbmask) != ckey ) {
2304                 *dstp = *srcp;
2305             }
2306             dstp++;
2307             srcp++;
2308         },
2309         width);
2310         /* *INDENT-ON* */
2311         srcp += srcskip;
2312         dstp += dstskip;
2313     }
2314 }
2315
2316 static void
2317 BlitNtoNKey(SDL_BlitInfo * info)
2318 {
2319     int width = info->dst_w;
2320     int height = info->dst_h;
2321     Uint8 *src = info->src;
2322     int srcskip = info->src_skip;
2323     Uint8 *dst = info->dst;
2324     int dstskip = info->dst_skip;
2325     Uint32 ckey = info->colorkey;
2326     SDL_PixelFormat *srcfmt = info->src_fmt;
2327     SDL_PixelFormat *dstfmt = info->dst_fmt;
2328     int srcbpp = srcfmt->BytesPerPixel;
2329     int dstbpp = dstfmt->BytesPerPixel;
2330     unsigned alpha = dstfmt->Amask ? info->a : 0;
2331     Uint32 rgbmask = ~srcfmt->Amask;
2332
2333     /* Set up some basic variables */
2334     ckey &= rgbmask;
2335
2336     while (height--) {
2337         /* *INDENT-OFF* */
2338         DUFFS_LOOP(
2339         {
2340             Uint32 Pixel;
2341             unsigned sR;
2342             unsigned sG;
2343             unsigned sB;
2344             RETRIEVE_RGB_PIXEL(src, srcbpp, Pixel);
2345             if ( (Pixel & rgbmask) != ckey ) {
2346                 RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB);
2347                 ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, alpha);
2348             }
2349             dst += dstbpp;
2350             src += srcbpp;
2351         },
2352         width);
2353         /* *INDENT-ON* */
2354         src += srcskip;
2355         dst += dstskip;
2356     }
2357 }
2358
2359 static void
2360 BlitNtoNKeyCopyAlpha(SDL_BlitInfo * info)
2361 {
2362     int width = info->dst_w;
2363     int height = info->dst_h;
2364     Uint8 *src = info->src;
2365     int srcskip = info->src_skip;
2366     Uint8 *dst = info->dst;
2367     int dstskip = info->dst_skip;
2368     Uint32 ckey = info->colorkey;
2369     SDL_PixelFormat *srcfmt = info->src_fmt;
2370     SDL_PixelFormat *dstfmt = info->dst_fmt;
2371     Uint32 rgbmask = ~srcfmt->Amask;
2372
2373     Uint8 srcbpp;
2374     Uint8 dstbpp;
2375     Uint32 Pixel;
2376     unsigned sR, sG, sB, sA;
2377
2378     /* Set up some basic variables */
2379     srcbpp = srcfmt->BytesPerPixel;
2380     dstbpp = dstfmt->BytesPerPixel;
2381     ckey &= rgbmask;
2382
2383     while (height--) {
2384         /* *INDENT-OFF* */
2385         DUFFS_LOOP(
2386         {
2387             DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA);
2388             if ( (Pixel & rgbmask) != ckey ) {
2389                   ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, sA);
2390             }
2391             dst += dstbpp;
2392             src += srcbpp;
2393         },
2394         width);
2395         /* *INDENT-ON* */
2396         src += srcskip;
2397         dst += dstskip;
2398     }
2399 }
2400
2401 /* Special optimized blit for ARGB 2-10-10-10 --> RGBA */
2402 static void
2403 Blit2101010toN(SDL_BlitInfo * info)
2404 {
2405     int width = info->dst_w;
2406     int height = info->dst_h;
2407     Uint8 *src = info->src;
2408     int srcskip = info->src_skip;
2409     Uint8 *dst = info->dst;
2410     int dstskip = info->dst_skip;
2411     SDL_PixelFormat *dstfmt = info->dst_fmt;
2412     int dstbpp = dstfmt->BytesPerPixel;
2413     Uint32 Pixel;
2414     unsigned sR, sG, sB, sA;
2415
2416     while (height--) {
2417         /* *INDENT-OFF* */
2418         DUFFS_LOOP(
2419         {
2420             Pixel = *(Uint32 *)src;
2421             RGBA_FROM_ARGB2101010(Pixel, sR, sG, sB, sA);
2422             ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, sA);
2423             dst += dstbpp;
2424             src += 4;
2425         },
2426         width);
2427         /* *INDENT-ON* */
2428         src += srcskip;
2429         dst += dstskip;
2430     }
2431 }
2432
2433 /* Special optimized blit for RGBA --> ARGB 2-10-10-10 */
2434 static void
2435 BlitNto2101010(SDL_BlitInfo * info)
2436 {
2437     int width = info->dst_w;
2438     int height = info->dst_h;
2439     Uint8 *src = info->src;
2440     int srcskip = info->src_skip;
2441     Uint8 *dst = info->dst;
2442     int dstskip = info->dst_skip;
2443     SDL_PixelFormat *srcfmt = info->src_fmt;
2444     int srcbpp = srcfmt->BytesPerPixel;
2445     Uint32 Pixel;
2446     unsigned sR, sG, sB, sA;
2447
2448     while (height--) {
2449         /* *INDENT-OFF* */
2450         DUFFS_LOOP(
2451         {
2452             DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA);
2453             ARGB2101010_FROM_RGBA(Pixel, sR, sG, sB, sA);
2454             *(Uint32 *)dst = Pixel;
2455             dst += 4;
2456             src += srcbpp;
2457         },
2458         width);
2459         /* *INDENT-ON* */
2460         src += srcskip;
2461         dst += dstskip;
2462     }
2463 }
2464
2465 /* Normal N to N optimized blitters */
2466 #define NO_ALPHA   1
2467 #define SET_ALPHA  2
2468 #define COPY_ALPHA 4
2469 struct blit_table
2470 {
2471     Uint32 srcR, srcG, srcB;
2472     int dstbpp;
2473     Uint32 dstR, dstG, dstB;
2474     Uint32 blit_features;
2475     SDL_BlitFunc blitfunc;
2476     Uint32 alpha;  /* bitwise NO_ALPHA, SET_ALPHA, COPY_ALPHA */
2477 };
2478 static const struct blit_table normal_blit_1[] = {
2479     /* Default for 8-bit RGB source, never optimized */
2480     {0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0}
2481 };
2482
2483 static const struct blit_table normal_blit_2[] = {
2484 #if SDL_ALTIVEC_BLITTERS
2485     /* has-altivec */
2486     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00000000, 0x00000000, 0x00000000,
2487      2, Blit_RGB565_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
2488     {0x00007C00, 0x000003E0, 0x0000001F, 4, 0x00000000, 0x00000000, 0x00000000,
2489      2, Blit_RGB555_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
2490 #endif
2491     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00FF0000, 0x0000FF00, 0x000000FF,
2492      0, Blit_RGB565_ARGB8888, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
2493     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x000000FF, 0x0000FF00, 0x00FF0000,
2494      0, Blit_RGB565_ABGR8888, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
2495     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0xFF000000, 0x00FF0000, 0x0000FF00,
2496      0, Blit_RGB565_RGBA8888, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
2497     {0x0000F800, 0x000007E0, 0x0000001F, 4, 0x0000FF00, 0x00FF0000, 0xFF000000,
2498      0, Blit_RGB565_BGRA8888, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
2499
2500     /* Default for 16-bit RGB source, used if no other blitter matches */
2501     {0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0}
2502 };
2503
2504 static const struct blit_table normal_blit_3[] = {
2505     /* Default for 24-bit RGB source, never optimized */
2506     {0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0}
2507 };
2508
2509 static const struct blit_table normal_blit_4[] = {
2510 #if SDL_ALTIVEC_BLITTERS
2511     /* has-altivec | dont-use-prefetch */
2512     {0x00000000, 0x00000000, 0x00000000, 4, 0x00000000, 0x00000000, 0x00000000,
2513      6, ConvertAltivec32to32_noprefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
2514     /* has-altivec */
2515     {0x00000000, 0x00000000, 0x00000000, 4, 0x00000000, 0x00000000, 0x00000000,
2516      2, ConvertAltivec32to32_prefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
2517     /* has-altivec */
2518     {0x00000000, 0x00000000, 0x00000000, 2, 0x0000F800, 0x000007E0, 0x0000001F,
2519      2, Blit_RGB888_RGB565Altivec, NO_ALPHA},
2520 #endif
2521     {0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x0000F800, 0x000007E0, 0x0000001F,
2522      0, Blit_RGB888_RGB565, NO_ALPHA},
2523     {0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x00007C00, 0x000003E0, 0x0000001F,
2524      0, Blit_RGB888_RGB555, NO_ALPHA},
2525     /* Default for 32-bit RGB source, used if no other blitter matches */
2526     {0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0}
2527 };
2528
2529 static const struct blit_table *const normal_blit[] = {
2530     normal_blit_1, normal_blit_2, normal_blit_3, normal_blit_4
2531 };
2532
2533 /* Mask matches table, or table entry is zero */
2534 #define MASKOK(x, y) (((x) == (y)) || ((y) == 0x00000000))
2535
2536 SDL_BlitFunc
2537 SDL_CalculateBlitN(SDL_Surface * surface)
2538 {
2539     SDL_PixelFormat *srcfmt;
2540     SDL_PixelFormat *dstfmt;
2541     const struct blit_table *table;
2542     int which;
2543     SDL_BlitFunc blitfun;
2544
2545     /* Set up data for choosing the blit */
2546     srcfmt = surface->format;
2547     dstfmt = surface->map->dst->format;
2548
2549     /* We don't support destinations less than 8-bits */
2550     if (dstfmt->BitsPerPixel < 8) {
2551         return (NULL);
2552     }
2553
2554     switch (surface->map->info.flags & ~SDL_COPY_RLE_MASK) {
2555     case 0:
2556         blitfun = NULL;
2557         if (dstfmt->BitsPerPixel == 8) {
2558             if ((srcfmt->BytesPerPixel == 4) &&
2559                 (srcfmt->Rmask == 0x00FF0000) &&
2560                 (srcfmt->Gmask == 0x0000FF00) &&
2561                 (srcfmt->Bmask == 0x000000FF)) {
2562                 blitfun = Blit_RGB888_index8;
2563             } else if ((srcfmt->BytesPerPixel == 4) &&
2564                 (srcfmt->Rmask == 0x3FF00000) &&
2565                 (srcfmt->Gmask == 0x000FFC00) &&
2566                 (srcfmt->Bmask == 0x000003FF)) {
2567                 blitfun = Blit_RGB101010_index8;
2568             } else {
2569                 blitfun = BlitNto1;
2570             }
2571         } else {
2572             /* Now the meat, choose the blitter we want */
2573             int a_need = NO_ALPHA;
2574             if (dstfmt->Amask)
2575                 a_need = srcfmt->Amask ? COPY_ALPHA : SET_ALPHA;
2576             table = normal_blit[srcfmt->BytesPerPixel - 1];
2577             for (which = 0; table[which].dstbpp; ++which) {
2578                 if (MASKOK(srcfmt->Rmask, table[which].srcR) &&
2579                     MASKOK(srcfmt->Gmask, table[which].srcG) &&
2580                     MASKOK(srcfmt->Bmask, table[which].srcB) &&
2581                     MASKOK(dstfmt->Rmask, table[which].dstR) &&
2582                     MASKOK(dstfmt->Gmask, table[which].dstG) &&
2583                     MASKOK(dstfmt->Bmask, table[which].dstB) &&
2584                     dstfmt->BytesPerPixel == table[which].dstbpp &&
2585                     (a_need & table[which].alpha) == a_need &&
2586                     ((table[which].blit_features & GetBlitFeatures()) ==
2587                      table[which].blit_features))
2588                     break;
2589             }
2590             blitfun = table[which].blitfunc;
2591
2592             if (blitfun == BlitNtoN) {  /* default C fallback catch-all. Slow! */
2593                 if (srcfmt->format == SDL_PIXELFORMAT_ARGB2101010) {
2594                     blitfun = Blit2101010toN;
2595                 } else if (dstfmt->format == SDL_PIXELFORMAT_ARGB2101010) {
2596                     blitfun = BlitNto2101010;
2597                 } else if (srcfmt->BytesPerPixel == 4 &&
2598                             dstfmt->BytesPerPixel == 4 &&
2599                             srcfmt->Rmask == dstfmt->Rmask &&
2600                             srcfmt->Gmask == dstfmt->Gmask &&
2601                             srcfmt->Bmask == dstfmt->Bmask) {
2602                     if (a_need == COPY_ALPHA) {
2603                         if (srcfmt->Amask == dstfmt->Amask) {
2604                             /* Fastpath C fallback: 32bit RGBA<->RGBA blit with matching RGBA */
2605                             blitfun = Blit4to4CopyAlpha;
2606                         } else {
2607                             blitfun = BlitNtoNCopyAlpha;
2608                         }
2609                     } else {
2610                         /* Fastpath C fallback: 32bit RGB<->RGBA blit with matching RGB */
2611                         blitfun = Blit4to4MaskAlpha;
2612                     }
2613                 } else if (a_need == COPY_ALPHA) {
2614                     blitfun = BlitNtoNCopyAlpha;
2615                 }
2616             }
2617         }
2618         return (blitfun);
2619
2620     case SDL_COPY_COLORKEY:
2621         /* colorkey blit: Here we don't have too many options, mostly
2622            because RLE is the preferred fast way to deal with this.
2623            If a particular case turns out to be useful we'll add it. */
2624
2625         if (srcfmt->BytesPerPixel == 2 && surface->map->identity)
2626             return Blit2to2Key;
2627         else if (dstfmt->BytesPerPixel == 1)
2628             return BlitNto1Key;
2629         else {
2630 #if SDL_ALTIVEC_BLITTERS
2631             if ((srcfmt->BytesPerPixel == 4) && (dstfmt->BytesPerPixel == 4)
2632                 && SDL_HasAltiVec()) {
2633                 return Blit32to32KeyAltivec;
2634             } else
2635 #endif
2636             if (srcfmt->Amask && dstfmt->Amask) {
2637                 return BlitNtoNKeyCopyAlpha;
2638             } else {
2639                 return BlitNtoNKey;
2640             }
2641         }
2642     }
2643
2644     return NULL;
2645 }
2646
2647 /* vi: set ts=4 sw=4 expandtab: */