src/video/SDL_RLEaccel.c

   1 /*
   2   Simple DirectMedia Layer
   3   Copyright (C) 1997-2016 Sam Lantinga <slouken@libsdl.org>
   4
   5   This software is provided 'as-is', without any express or implied
   6   warranty.  In no event will the authors be held liable for any damages
   7   arising from the use of this software.
   8
   9   Permission is granted to anyone to use this software for any purpose,
  10   including commercial applications, and to alter it and redistribute it
  11   freely, subject to the following restrictions:
  12
  13   1. The origin of this software must not be misrepresented; you must not
  14      claim that you wrote the original software. If you use this software
  15      in a product, an acknowledgment in the product documentation would be
  16      appreciated but is not required.
  17   2. Altered source versions must be plainly marked as such, and must not be
  18      misrepresented as being the original software.
  19   3. This notice may not be removed or altered from any source distribution.
  20 */
  21 #include "../SDL_internal.h"
  22
  23 /*
  24  * RLE encoding for software colorkey and alpha-channel acceleration
  25  *
  26  * Original version by Sam Lantinga
  27  *
  28  * Mattias Engdegård (Yorick): Rewrite. New encoding format, encoder and
  29  * decoder. Added per-surface alpha blitter. Added per-pixel alpha
  30  * format, encoder and blitter.
  31  *
  32  * Many thanks to Xark and johns for hints, benchmarks and useful comments
  33  * leading to this code.
  34  *
  35  * Welcome to Macro Mayhem.
  36  */
  37
  38 /*
  39  * The encoding translates the image data to a stream of segments of the form
  40  *
  41  * <skip> <run> <data>
  42  *
  43  * where <skip> is the number of transparent pixels to skip,
  44  *       <run>  is the number of opaque pixels to blit,
  45  * and   <data> are the pixels themselves.
  46  *
  47  * This basic structure is used both for colorkeyed surfaces, used for simple
  48  * binary transparency and for per-surface alpha blending, and for surfaces
  49  * with per-pixel alpha. The details differ, however:
  50  *
  51  * Encoding of colorkeyed surfaces:
  52  *
  53  *   Encoded pixels always have the same format as the target surface.
  54  *   <skip> and <run> are unsigned 8 bit integers, except for 32 bit depth
  55  *   where they are 16 bit. This makes the pixel data aligned at all times.
  56  *   Segments never wrap around from one scan line to the next.
  57  *
  58  *   The end of the sequence is marked by a zero <skip>,<run> pair at the *
  59  *   beginning of a line.
  60  *
  61  * Encoding of surfaces with per-pixel alpha:
  62  *
  63  *   The sequence begins with a struct RLEDestFormat describing the target
  64  *   pixel format, to provide reliable un-encoding.
  65  *
  66  *   Each scan line is encoded twice: First all completely opaque pixels,
  67  *   encoded in the target format as described above, and then all
  68  *   partially transparent (translucent) pixels (where 1 <= alpha <= 254),
  69  *   in the following 32-bit format:
  70  *
  71  *   For 32-bit targets, each pixel has the target RGB format but with
  72  *   the alpha value occupying the highest 8 bits. The <skip> and <run>
  73  *   counts are 16 bit.
  74  *
  75  *   For 16-bit targets, each pixel has the target RGB format, but with
  76  *   the middle component (usually green) shifted 16 steps to the left,
  77  *   and the hole filled with the 5 most significant bits of the alpha value.
  78  *   i.e. if the target has the format         rrrrrggggggbbbbb,
  79  *   the encoded pixel will be 00000gggggg00000rrrrr0aaaaabbbbb.
  80  *   The <skip> and <run> counts are 8 bit for the opaque lines, 16 bit
  81  *   for the translucent lines. Two padding bytes may be inserted
  82  *   before each translucent line to keep them 32-bit aligned.
  83  *
  84  *   The end of the sequence is marked by a zero <skip>,<run> pair at the
  85  *   beginning of an opaque line.
  86  */
  87
  88 #include "SDL_video.h"
  89 #include "SDL_sysvideo.h"
  90 #include "SDL_blit.h"
  91 #include "SDL_RLEaccel_c.h"
  92
  93 #ifndef MAX
  94 #define MAX(a, b) ((a) > (b) ? (a) : (b))
  95 #endif
  96 #ifndef MIN
  97 #define MIN(a, b) ((a) < (b) ? (a) : (b))
  98 #endif
  99
 100 #define PIXEL_COPY(to, from, len, bpp)          \
 101     SDL_memcpy(to, from, (size_t)(len) * (bpp))
 102
 103 /*
 104  * Various colorkey blit methods, for opaque and per-surface alpha
 105  */
 106
 107 #define OPAQUE_BLIT(to, from, length, bpp, alpha)   \
 108     PIXEL_COPY(to, from, length, bpp)
 109
 110 /*
 111  * For 32bpp pixels on the form 0x00rrggbb:
 112  * If we treat the middle component separately, we can process the two
 113  * remaining in parallel. This is safe to do because of the gap to the left
 114  * of each component, so the bits from the multiplication don't collide.
 115  * This can be used for any RGB permutation of course.
 116  */
 117 #define ALPHA_BLIT32_888(to, from, length, bpp, alpha)      \
 118     do {                                                    \
 119         int i;                                              \
 120         Uint32 *src = (Uint32 *)(from);                     \
 121         Uint32 *dst = (Uint32 *)(to);                       \
 122         for (i = 0; i < (int)(length); i++) {               \
 123             Uint32 s = *src++;                              \
 124             Uint32 d = *dst;                                \
 125             Uint32 s1 = s & 0xff00ff;                       \
 126             Uint32 d1 = d & 0xff00ff;                       \
 127             d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff; \
 128             s &= 0xff00;                                    \
 129             d &= 0xff00;                                    \
 130             d = (d + ((s - d) * alpha >> 8)) & 0xff00;      \
 131             *dst++ = d1 | d;                                \
 132         }                                                   \
 133     } while (0)
 134
 135 /*
 136  * For 16bpp pixels we can go a step further: put the middle component
 137  * in the high 16 bits of a 32 bit word, and process all three RGB
 138  * components at the same time. Since the smallest gap is here just
 139  * 5 bits, we have to scale alpha down to 5 bits as well.
 140  */
 141 #define ALPHA_BLIT16_565(to, from, length, bpp, alpha)  \
 142     do {                                                \
 143         int i;                                          \
 144         Uint16 *src = (Uint16 *)(from);                 \
 145         Uint16 *dst = (Uint16 *)(to);                   \
 146         Uint32 ALPHA = alpha >> 3;                      \
 147         for(i = 0; i < (int)(length); i++) {            \
 148             Uint32 s = *src++;                          \
 149             Uint32 d = *dst;                            \
 150             s = (s | s << 16) & 0x07e0f81f;             \
 151             d = (d | d << 16) & 0x07e0f81f;             \
 152             d += (s - d) * ALPHA >> 5;                  \
 153             d &= 0x07e0f81f;                            \
 154             *dst++ = (Uint16)(d | d >> 16);             \
 155         }                                               \
 156     } while(0)
 157
 158 #define ALPHA_BLIT16_555(to, from, length, bpp, alpha)  \
 159     do {                                                \
 160         int i;                                          \
 161         Uint16 *src = (Uint16 *)(from);                 \
 162         Uint16 *dst = (Uint16 *)(to);                   \
 163         Uint32 ALPHA = alpha >> 3;                      \
 164         for(i = 0; i < (int)(length); i++) {            \
 165             Uint32 s = *src++;                          \
 166             Uint32 d = *dst;                            \
 167             s = (s | s << 16) & 0x03e07c1f;             \
 168             d = (d | d << 16) & 0x03e07c1f;             \
 169             d += (s - d) * ALPHA >> 5;                  \
 170             d &= 0x03e07c1f;                            \
 171             *dst++ = (Uint16)(d | d >> 16);             \
 172         }                                               \
 173     } while(0)
 174
 175 /*
 176  * The general slow catch-all function, for remaining depths and formats
 177  */
 178 #define ALPHA_BLIT_ANY(to, from, length, bpp, alpha)            \
 179     do {                                                        \
 180         int i;                                                  \
 181         Uint8 *src = from;                                      \
 182         Uint8 *dst = to;                                        \
 183         for (i = 0; i < (int)(length); i++) {                   \
 184             Uint32 s, d;                                        \
 185             unsigned rs, gs, bs, rd, gd, bd;                    \
 186             switch (bpp) {                                      \
 187             case 2:                                             \
 188                 s = *(Uint16 *)src;                             \
 189                 d = *(Uint16 *)dst;                             \
 190                 break;                                          \
 191             case 3:                                             \
 192                 if (SDL_BYTEORDER == SDL_BIG_ENDIAN) {          \
 193                     s = (src[0] << 16) | (src[1] << 8) | src[2]; \
 194                     d = (dst[0] << 16) | (dst[1] << 8) | dst[2]; \
 195                 } else {                                        \
 196                     s = (src[2] << 16) | (src[1] << 8) | src[0]; \
 197                     d = (dst[2] << 16) | (dst[1] << 8) | dst[0]; \
 198                 }                                               \
 199                 break;                                          \
 200             case 4:                                             \
 201                 s = *(Uint32 *)src;                             \
 202                 d = *(Uint32 *)dst;                             \
 203                 break;                                          \
 204             }                                                   \
 205             RGB_FROM_PIXEL(s, fmt, rs, gs, bs);                 \
 206             RGB_FROM_PIXEL(d, fmt, rd, gd, bd);                 \
 207             rd += (rs - rd) * alpha >> 8;                       \
 208             gd += (gs - gd) * alpha >> 8;                       \
 209             bd += (bs - bd) * alpha >> 8;                       \
 210             PIXEL_FROM_RGB(d, fmt, rd, gd, bd);                 \
 211             switch (bpp) {                                      \
 212             case 2:                                             \
 213                 *(Uint16 *)dst = (Uint16)d;                     \
 214                 break;                                          \
 215             case 3:                                             \
 216                 if (SDL_BYTEORDER == SDL_BIG_ENDIAN) {          \
 217                     dst[0] = (Uint8)(d >> 16);                  \
 218                     dst[1] = (Uint8)(d >> 8);                   \
 219                     dst[2] = (Uint8)(d);                        \
 220                 } else {                                        \
 221                     dst[0] = (Uint8)d;                          \
 222                     dst[1] = (Uint8)(d >> 8);                   \
 223                     dst[2] = (Uint8)(d >> 16);                  \
 224                 }                                               \
 225                 break;                                          \
 226             case 4:                                             \
 227                 *(Uint32 *)dst = d;                             \
 228                 break;                                          \
 229             }                                                   \
 230             src += bpp;                                         \
 231             dst += bpp;                                         \
 232         }                                                       \
 233     } while(0)
 234
 235 /*
 236  * Special case: 50% alpha (alpha=128)
 237  * This is treated specially because it can be optimized very well, and
 238  * since it is good for many cases of semi-translucency.
 239  * The theory is to do all three components at the same time:
 240  * First zero the lowest bit of each component, which gives us room to
 241  * add them. Then shift right and add the sum of the lowest bits.
 242  */
 243 #define ALPHA_BLIT32_888_50(to, from, length, bpp, alpha)       \
 244     do {                                                        \
 245         int i;                                                  \
 246         Uint32 *src = (Uint32 *)(from);                         \
 247         Uint32 *dst = (Uint32 *)(to);                           \
 248         for(i = 0; i < (int)(length); i++) {                    \
 249             Uint32 s = *src++;                                  \
 250             Uint32 d = *dst;                                    \
 251             *dst++ = (((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) \
 252                  + (s & d & 0x00010101);                        \
 253         }                                                       \
 254     } while(0)
 255
 256 /*
 257  * For 16bpp, we can actually blend two pixels in parallel, if we take
 258  * care to shift before we add, not after.
 259  */
 260
 261 /* helper: blend a single 16 bit pixel at 50% */
 262 #define BLEND16_50(dst, src, mask)                              \
 263     do {                                                        \
 264         Uint32 s = *src++;                                      \
 265         Uint32 d = *dst;                                        \
 266         *dst++ = (Uint16)((((s & mask) + (d & mask)) >> 1) +    \
 267                           (s & d & (~mask & 0xffff)));          \
 268     } while(0)
 269
 270 /* basic 16bpp blender. mask is the pixels to keep when adding. */
 271 #define ALPHA_BLIT16_50(to, from, length, bpp, alpha, mask)     \
 272     do {                                                        \
 273         unsigned n = (length);                                  \
 274         Uint16 *src = (Uint16 *)(from);                         \
 275         Uint16 *dst = (Uint16 *)(to);                           \
 276         if (((uintptr_t)src ^ (uintptr_t)dst) & 3) {            \
 277             /* source and destination not in phase, blit one by one */ \
 278             while (n--)                                         \
 279                 BLEND16_50(dst, src, mask);                     \
 280         } else {                                                \
 281             if ((uintptr_t)src & 3) {                           \
 282                 /* first odd pixel */                           \
 283                 BLEND16_50(dst, src, mask);                     \
 284                 n--;                                            \
 285             }                                                   \
 286             for (; n > 1; n -= 2) {                             \
 287                 Uint32 s = *(Uint32 *)src;                      \
 288                 Uint32 d = *(Uint32 *)dst;                      \
 289                 *(Uint32 *)dst = ((s & (mask | mask << 16)) >> 1) \
 290                     + ((d & (mask | mask << 16)) >> 1)          \
 291                     + (s & d & (~(mask | mask << 16)));         \
 292                 src += 2;                                       \
 293                 dst += 2;                                       \
 294             }                                                   \
 295             if (n)                                              \
 296                 BLEND16_50(dst, src, mask); /* last odd pixel */ \
 297         }                                                       \
 298     } while(0)
 299
 300 #define ALPHA_BLIT16_565_50(to, from, length, bpp, alpha)       \
 301     ALPHA_BLIT16_50(to, from, length, bpp, alpha, 0xf7de)
 302
 303 #define ALPHA_BLIT16_555_50(to, from, length, bpp, alpha)       \
 304     ALPHA_BLIT16_50(to, from, length, bpp, alpha, 0xfbde)
 305
 306 #define CHOOSE_BLIT(blitter, alpha, fmt)                        \
 307     do {                                                        \
 308         if (alpha == 255) {                                     \
 309             switch (fmt->BytesPerPixel) {                       \
 310             case 1: blitter(1, Uint8, OPAQUE_BLIT); break;      \
 311             case 2: blitter(2, Uint8, OPAQUE_BLIT); break;      \
 312             case 3: blitter(3, Uint8, OPAQUE_BLIT); break;      \
 313             case 4: blitter(4, Uint16, OPAQUE_BLIT); break;     \
 314             }                                                   \
 315         } else {                                                \
 316             switch (fmt->BytesPerPixel) {                       \
 317             case 1:                                             \
 318                 /* No 8bpp alpha blitting */                    \
 319                 break;                                          \
 320                                                                 \
 321             case 2:                                             \
 322                 switch (fmt->Rmask | fmt->Gmask | fmt->Bmask) { \
 323                 case 0xffff:                                    \
 324                     if (fmt->Gmask == 0x07e0                    \
 325                         || fmt->Rmask == 0x07e0                 \
 326                         || fmt->Bmask == 0x07e0) {              \
 327                         if (alpha == 128) {                     \
 328                             blitter(2, Uint8, ALPHA_BLIT16_565_50); \
 329                         } else {                                \
 330                             blitter(2, Uint8, ALPHA_BLIT16_565); \
 331                         }                                       \
 332                     } else                                      \
 333                         goto general16;                         \
 334                     break;                                      \
 335                                                                 \
 336                 case 0x7fff:                                    \
 337                     if (fmt->Gmask == 0x03e0                    \
 338                         || fmt->Rmask == 0x03e0                 \
 339                         || fmt->Bmask == 0x03e0) {              \
 340                         if (alpha == 128) {                     \
 341                             blitter(2, Uint8, ALPHA_BLIT16_555_50); \
 342                         } else {                                \
 343                             blitter(2, Uint8, ALPHA_BLIT16_555); \
 344                         }                                       \
 345                         break;                                  \
 346                     } else                                      \
 347                         goto general16;                         \
 348                     break;                                      \
 349                                                                 \
 350                 default:                                        \
 351     general16:                                                  \
 352                     blitter(2, Uint8, ALPHA_BLIT_ANY);          \
 353                 }                                               \
 354                 break;                                          \
 355                                                                 \
 356             case 3:                                             \
 357                 blitter(3, Uint8, ALPHA_BLIT_ANY);              \
 358                 break;                                          \
 359                                                                 \
 360             case 4:                                             \
 361                 if ((fmt->Rmask | fmt->Gmask | fmt->Bmask) == 0x00ffffff \
 362                     && (fmt->Gmask == 0xff00 || fmt->Rmask == 0xff00 \
 363                     || fmt->Bmask == 0xff00)) {                 \
 364                     if (alpha == 128) {                         \
 365                         blitter(4, Uint16, ALPHA_BLIT32_888_50); \
 366                     } else {                                    \
 367                         blitter(4, Uint16, ALPHA_BLIT32_888);   \
 368                     }                                           \
 369                 } else                                          \
 370                     blitter(4, Uint16, ALPHA_BLIT_ANY);         \
 371                 break;                                          \
 372             }                                                   \
 373         }                                                       \
 374     } while(0)
 375
 376 /*
 377  * Set a pixel value using the given format, except that the alpha value is
 378  * placed in the top byte. This is the format used for RLE with alpha.
 379  */
 380 #define RLEPIXEL_FROM_RGBA(Pixel, fmt, r, g, b, a)                      \
 381 {                                                                       \
 382     Pixel = ((r>>fmt->Rloss)<<fmt->Rshift)|                             \
 383         ((g>>fmt->Gloss)<<fmt->Gshift)|                                 \
 384         ((b>>fmt->Bloss)<<fmt->Bshift)|                                 \
 385         (a<<24);                                                        \
 386 }
 387
 388 /*
 389  * This takes care of the case when the surface is clipped on the left and/or
 390  * right. Top clipping has already been taken care of.
 391  */
 392 static void
 393 RLEClipBlit(int w, Uint8 * srcbuf, SDL_Surface * surf_dst,
 394             Uint8 * dstbuf, SDL_Rect * srcrect, unsigned alpha)
 395 {
 396     SDL_PixelFormat *fmt = surf_dst->format;
 397
 398 #define RLECLIPBLIT(bpp, Type, do_blit)                         \
 399     do {                                                        \
 400         int linecount = srcrect->h;                             \
 401         int ofs = 0;                                            \
 402         int left = srcrect->x;                                  \
 403         int right = left + srcrect->w;                          \
 404         dstbuf -= left * bpp;                                   \
 405         for (;;) {                                              \
 406             int run;                                            \
 407             ofs += *(Type *)srcbuf;                             \
 408             run = ((Type *)srcbuf)[1];                          \
 409             srcbuf += 2 * sizeof(Type);                         \
 410             if (run) {                                          \
 411                 /* clip to left and right borders */            \
 412                 if (ofs < right) {                              \
 413                     int start = 0;                              \
 414                     int len = run;                              \
 415                     int startcol;                               \
 416                     if (left - ofs > 0) {                       \
 417                         start = left - ofs;                     \
 418                         len -= start;                           \
 419                         if (len <= 0)                           \
 420                             goto nocopy ## bpp ## do_blit;      \
 421                     }                                           \
 422                     startcol = ofs + start;                     \
 423                     if (len > right - startcol)                 \
 424                         len = right - startcol;                 \
 425                     do_blit(dstbuf + startcol * bpp, srcbuf + start * bpp, \
 426                         len, bpp, alpha);                       \
 427                 }                                               \
 428     nocopy ## bpp ## do_blit:                                   \
 429                 srcbuf += run * bpp;                            \
 430                 ofs += run;                                     \
 431             } else if (!ofs)                                    \
 432                 break;                                          \
 433                                                                 \
 434             if (ofs == w) {                                     \
 435                 ofs = 0;                                        \
 436                 dstbuf += surf_dst->pitch;                      \
 437                 if (!--linecount)                               \
 438                     break;                                      \
 439             }                                                   \
 440         }                                                       \
 441     } while(0)
 442
 443     CHOOSE_BLIT(RLECLIPBLIT, alpha, fmt);
 444
 445 #undef RLECLIPBLIT
 446
 447 }
 448
 449
 450 /* blit a colorkeyed RLE surface */
 451 int
 452 SDL_RLEBlit(SDL_Surface * surf_src, SDL_Rect * srcrect,
 453             SDL_Surface * surf_dst, SDL_Rect * dstrect)
 454 {
 455     Uint8 *dstbuf;
 456     Uint8 *srcbuf;
 457     int x, y;
 458     int w = surf_src->w;
 459     unsigned alpha;
 460
 461     /* Lock the destination if necessary */
 462     if (SDL_MUSTLOCK(surf_dst)) {
 463         if (SDL_LockSurface(surf_dst) < 0) {
 464             return (-1);
 465         }
 466     }
 467
 468     /* Set up the source and destination pointers */
 469     x = dstrect->x;
 470     y = dstrect->y;
 471     dstbuf = (Uint8 *) surf_dst->pixels
 472         + y * surf_dst->pitch + x * surf_src->format->BytesPerPixel;
 473     srcbuf = (Uint8 *) surf_src->map->data;
 474
 475     {
 476         /* skip lines at the top if necessary */
 477         int vskip = srcrect->y;
 478         int ofs = 0;
 479         if (vskip) {
 480
 481 #define RLESKIP(bpp, Type)          \
 482         for(;;) {           \
 483             int run;            \
 484             ofs += *(Type *)srcbuf; \
 485             run = ((Type *)srcbuf)[1];  \
 486             srcbuf += sizeof(Type) * 2; \
 487             if(run) {           \
 488             srcbuf += run * bpp;    \
 489             ofs += run;     \
 490             } else if(!ofs)     \
 491             goto done;      \
 492             if(ofs == w) {      \
 493             ofs = 0;        \
 494             if(!--vskip)        \
 495                 break;      \
 496             }               \
 497         }
 498
 499             switch (surf_src->format->BytesPerPixel) {
 500             case 1:
 501                 RLESKIP(1, Uint8);
 502                 break;
 503             case 2:
 504                 RLESKIP(2, Uint8);
 505                 break;
 506             case 3:
 507                 RLESKIP(3, Uint8);
 508                 break;
 509             case 4:
 510                 RLESKIP(4, Uint16);
 511                 break;
 512             }
 513
 514 #undef RLESKIP
 515
 516         }
 517     }
 518
 519     alpha = surf_src->map->info.a;
 520     /* if left or right edge clipping needed, call clip blit */
 521     if (srcrect->x || srcrect->w != surf_src->w) {
 522         RLEClipBlit(w, srcbuf, surf_dst, dstbuf, srcrect, alpha);
 523     } else {
 524         SDL_PixelFormat *fmt = surf_src->format;
 525
 526 #define RLEBLIT(bpp, Type, do_blit)                       \
 527         do {                                  \
 528         int linecount = srcrect->h;                   \
 529         int ofs = 0;                              \
 530         for(;;) {                             \
 531             unsigned run;                         \
 532             ofs += *(Type *)srcbuf;                   \
 533             run = ((Type *)srcbuf)[1];                    \
 534             srcbuf += 2 * sizeof(Type);                   \
 535             if(run) {                             \
 536             do_blit(dstbuf + ofs * bpp, srcbuf, run, bpp, alpha); \
 537             srcbuf += run * bpp;                      \
 538             ofs += run;                       \
 539             } else if(!ofs)                       \
 540             break;                            \
 541             if(ofs == w) {                        \
 542             ofs = 0;                          \
 543             dstbuf += surf_dst->pitch;                     \
 544             if(!--linecount)                      \
 545                 break;                        \
 546             }                                 \
 547         }                                 \
 548         } while(0)
 549
 550         CHOOSE_BLIT(RLEBLIT, alpha, fmt);
 551
 552 #undef RLEBLIT
 553     }
 554
 555   done:
 556     /* Unlock the destination if necessary */
 557     if (SDL_MUSTLOCK(surf_dst)) {
 558         SDL_UnlockSurface(surf_dst);
 559     }
 560     return (0);
 561 }
 562
 563 #undef OPAQUE_BLIT
 564
 565 /*
 566  * Per-pixel blitting macros for translucent pixels:
 567  * These use the same techniques as the per-surface blitting macros
 568  */
 569
 570 /*
 571  * For 32bpp pixels, we have made sure the alpha is stored in the top
 572  * 8 bits, so proceed as usual
 573  */
 574 #define BLIT_TRANSL_888(src, dst)               \
 575     do {                            \
 576         Uint32 s = src;                     \
 577     Uint32 d = dst;                     \
 578     unsigned alpha = s >> 24;               \
 579     Uint32 s1 = s & 0xff00ff;               \
 580     Uint32 d1 = d & 0xff00ff;               \
 581     d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff;    \
 582     s &= 0xff00;                        \
 583     d &= 0xff00;                        \
 584     d = (d + ((s - d) * alpha >> 8)) & 0xff00;      \
 585     dst = d1 | d | 0xff000000;              \
 586     } while(0)
 587
 588 /*
 589  * For 16bpp pixels, we have stored the 5 most significant alpha bits in
 590  * bits 5-10. As before, we can process all 3 RGB components at the same time.
 591  */
 592 #define BLIT_TRANSL_565(src, dst)       \
 593     do {                    \
 594     Uint32 s = src;             \
 595     Uint32 d = dst;             \
 596     unsigned alpha = (s & 0x3e0) >> 5;  \
 597     s &= 0x07e0f81f;            \
 598     d = (d | d << 16) & 0x07e0f81f;     \
 599     d += (s - d) * alpha >> 5;      \
 600     d &= 0x07e0f81f;            \
 601     dst = (Uint16)(d | d >> 16);            \
 602     } while(0)
 603
 604 #define BLIT_TRANSL_555(src, dst)       \
 605     do {                    \
 606     Uint32 s = src;             \
 607     Uint32 d = dst;             \
 608     unsigned alpha = (s & 0x3e0) >> 5;  \
 609     s &= 0x03e07c1f;            \
 610     d = (d | d << 16) & 0x03e07c1f;     \
 611     d += (s - d) * alpha >> 5;      \
 612     d &= 0x03e07c1f;            \
 613     dst = (Uint16)(d | d >> 16);            \
 614     } while(0)
 615
 616 /* used to save the destination format in the encoding. Designed to be
 617    macro-compatible with SDL_PixelFormat but without the unneeded fields */
 618 typedef struct
 619 {
 620     Uint8 BytesPerPixel;
 621     Uint8 padding[3];
 622     Uint32 Rmask;
 623     Uint32 Gmask;
 624     Uint32 Bmask;
 625     Uint32 Amask;
 626     Uint8 Rloss;
 627     Uint8 Gloss;
 628     Uint8 Bloss;
 629     Uint8 Aloss;
 630     Uint8 Rshift;
 631     Uint8 Gshift;
 632     Uint8 Bshift;
 633     Uint8 Ashift;
 634 } RLEDestFormat;
 635
 636 /* blit a pixel-alpha RLE surface clipped at the right and/or left edges */
 637 static void
 638 RLEAlphaClipBlit(int w, Uint8 * srcbuf, SDL_Surface * surf_dst,
 639                  Uint8 * dstbuf, SDL_Rect * srcrect)
 640 {
 641     SDL_PixelFormat *df = surf_dst->format;
 642     /*
 643      * clipped blitter: Ptype is the destination pixel type,
 644      * Ctype the translucent count type, and do_blend the macro
 645      * to blend one pixel.
 646      */
 647 #define RLEALPHACLIPBLIT(Ptype, Ctype, do_blend)              \
 648     do {                                  \
 649     int linecount = srcrect->h;                   \
 650     int left = srcrect->x;                        \
 651     int right = left + srcrect->w;                    \
 652     dstbuf -= left * sizeof(Ptype);                   \
 653     do {                                  \
 654         int ofs = 0;                          \
 655         /* blit opaque pixels on one line */              \
 656         do {                              \
 657         unsigned run;                         \
 658         ofs += ((Ctype *)srcbuf)[0];                  \
 659         run = ((Ctype *)srcbuf)[1];               \
 660         srcbuf += 2 * sizeof(Ctype);                  \
 661         if(run) {                         \
 662             /* clip to left and right borders */          \
 663             int cofs = ofs;                   \
 664             int crun = run;                   \
 665             if(left - cofs > 0) {                 \
 666             crun -= left - cofs;                  \
 667             cofs = left;                      \
 668             }                             \
 669             if(crun > right - cofs)               \
 670             crun = right - cofs;                  \
 671             if(crun > 0)                      \
 672             PIXEL_COPY(dstbuf + cofs * sizeof(Ptype),     \
 673                    srcbuf + (cofs - ofs) * sizeof(Ptype), \
 674                    (unsigned)crun, sizeof(Ptype));    \
 675             srcbuf += run * sizeof(Ptype);            \
 676             ofs += run;                       \
 677         } else if(!ofs)                       \
 678             return;                       \
 679         } while(ofs < w);                         \
 680         /* skip padding if necessary */               \
 681         if(sizeof(Ptype) == 2)                    \
 682         srcbuf += (uintptr_t)srcbuf & 2;              \
 683         /* blit translucent pixels on the same line */        \
 684         ofs = 0;                              \
 685         do {                              \
 686         unsigned run;                         \
 687         ofs += ((Uint16 *)srcbuf)[0];                 \
 688         run = ((Uint16 *)srcbuf)[1];                  \
 689         srcbuf += 4;                          \
 690         if(run) {                         \
 691             /* clip to left and right borders */          \
 692             int cofs = ofs;                   \
 693             int crun = run;                   \
 694             if(left - cofs > 0) {                 \
 695             crun -= left - cofs;                  \
 696             cofs = left;                      \
 697             }                             \
 698             if(crun > right - cofs)               \
 699             crun = right - cofs;                  \
 700             if(crun > 0) {                    \
 701             Ptype *dst = (Ptype *)dstbuf + cofs;          \
 702             Uint32 *src = (Uint32 *)srcbuf + (cofs - ofs);    \
 703             int i;                        \
 704             for(i = 0; i < crun; i++)             \
 705                 do_blend(src[i], dst[i]);             \
 706             }                             \
 707             srcbuf += run * 4;                    \
 708             ofs += run;                       \
 709         }                             \
 710         } while(ofs < w);                         \
 711         dstbuf += surf_dst->pitch;                     \
 712     } while(--linecount);                         \
 713     } while(0)
 714
 715     switch (df->BytesPerPixel) {
 716     case 2:
 717         if (df->Gmask == 0x07e0 || df->Rmask == 0x07e0 || df->Bmask == 0x07e0)
 718             RLEALPHACLIPBLIT(Uint16, Uint8, BLIT_TRANSL_565);
 719         else
 720             RLEALPHACLIPBLIT(Uint16, Uint8, BLIT_TRANSL_555);
 721         break;
 722     case 4:
 723         RLEALPHACLIPBLIT(Uint32, Uint16, BLIT_TRANSL_888);
 724         break;
 725     }
 726 }
 727
 728 /* blit a pixel-alpha RLE surface */
 729 int
 730 SDL_RLEAlphaBlit(SDL_Surface * surf_src, SDL_Rect * srcrect,
 731                  SDL_Surface * surf_dst, SDL_Rect * dstrect)
 732 {
 733     int x, y;
 734     int w = surf_src->w;
 735     Uint8 *srcbuf, *dstbuf;
 736     SDL_PixelFormat *df = surf_dst->format;
 737
 738     /* Lock the destination if necessary */
 739     if (SDL_MUSTLOCK(surf_dst)) {
 740         if (SDL_LockSurface(surf_dst) < 0) {
 741             return -1;
 742         }
 743     }
 744
 745     x = dstrect->x;
 746     y = dstrect->y;
 747     dstbuf = (Uint8 *) surf_dst->pixels + y * surf_dst->pitch + x * df->BytesPerPixel;
 748     srcbuf = (Uint8 *) surf_src->map->data + sizeof(RLEDestFormat);
 749
 750     {
 751         /* skip lines at the top if necessary */
 752         int vskip = srcrect->y;
 753         if (vskip) {
 754             int ofs;
 755             if (df->BytesPerPixel == 2) {
 756                 /* the 16/32 interleaved format */
 757                 do {
 758                     /* skip opaque line */
 759                     ofs = 0;
 760                     do {
 761                         int run;
 762                         ofs += srcbuf[0];
 763                         run = srcbuf[1];
 764                         srcbuf += 2;
 765                         if (run) {
 766                             srcbuf += 2 * run;
 767                             ofs += run;
 768                         } else if (!ofs)
 769                             goto done;
 770                     } while (ofs < w);
 771
 772                     /* skip padding */
 773                     srcbuf += (uintptr_t) srcbuf & 2;
 774
 775                     /* skip translucent line */
 776                     ofs = 0;
 777                     do {
 778                         int run;
 779                         ofs += ((Uint16 *) srcbuf)[0];
 780                         run = ((Uint16 *) srcbuf)[1];
 781                         srcbuf += 4 * (run + 1);
 782                         ofs += run;
 783                     } while (ofs < w);
 784                 } while (--vskip);
 785             } else {
 786                 /* the 32/32 interleaved format */
 787                 vskip <<= 1;    /* opaque and translucent have same format */
 788                 do {
 789                     ofs = 0;
 790                     do {
 791                         int run;
 792                         ofs += ((Uint16 *) srcbuf)[0];
 793                         run = ((Uint16 *) srcbuf)[1];
 794                         srcbuf += 4;
 795                         if (run) {
 796                             srcbuf += 4 * run;
 797                             ofs += run;
 798                         } else if (!ofs)
 799                             goto done;
 800                     } while (ofs < w);
 801                 } while (--vskip);
 802             }
 803         }
 804     }
 805
 806     /* if left or right edge clipping needed, call clip blit */
 807     if (srcrect->x || srcrect->w != surf_src->w) {
 808         RLEAlphaClipBlit(w, srcbuf, surf_dst, dstbuf, srcrect);
 809     } else {
 810
 811         /*
 812          * non-clipped blitter. Ptype is the destination pixel type,
 813          * Ctype the translucent count type, and do_blend the
 814          * macro to blend one pixel.
 815          */
 816 #define RLEALPHABLIT(Ptype, Ctype, do_blend)                 \
 817     do {                                 \
 818         int linecount = srcrect->h;                  \
 819         do {                             \
 820         int ofs = 0;                         \
 821         /* blit opaque pixels on one line */             \
 822         do {                             \
 823             unsigned run;                    \
 824             ofs += ((Ctype *)srcbuf)[0];             \
 825             run = ((Ctype *)srcbuf)[1];              \
 826             srcbuf += 2 * sizeof(Ctype);             \
 827             if(run) {                        \
 828             PIXEL_COPY(dstbuf + ofs * sizeof(Ptype), srcbuf, \
 829                    run, sizeof(Ptype));          \
 830             srcbuf += run * sizeof(Ptype);           \
 831             ofs += run;                  \
 832             } else if(!ofs)                  \
 833             goto done;                   \
 834         } while(ofs < w);                    \
 835         /* skip padding if necessary */              \
 836         if(sizeof(Ptype) == 2)                   \
 837             srcbuf += (uintptr_t)srcbuf & 2;             \
 838         /* blit translucent pixels on the same line */       \
 839         ofs = 0;                         \
 840         do {                             \
 841             unsigned run;                    \
 842             ofs += ((Uint16 *)srcbuf)[0];            \
 843             run = ((Uint16 *)srcbuf)[1];             \
 844             srcbuf += 4;                     \
 845             if(run) {                        \
 846             Ptype *dst = (Ptype *)dstbuf + ofs;      \
 847             unsigned i;                  \
 848             for(i = 0; i < run; i++) {           \
 849                 Uint32 src = *(Uint32 *)srcbuf;      \
 850                 do_blend(src, *dst);             \
 851                 srcbuf += 4;                 \
 852                 dst++;                   \
 853             }                        \
 854             ofs += run;                  \
 855             }                            \
 856         } while(ofs < w);                    \
 857         dstbuf += surf_dst->pitch;                    \
 858         } while(--linecount);                    \
 859     } while(0)
 860
 861         switch (df->BytesPerPixel) {
 862         case 2:
 863             if (df->Gmask == 0x07e0 || df->Rmask == 0x07e0
 864                 || df->Bmask == 0x07e0)
 865                 RLEALPHABLIT(Uint16, Uint8, BLIT_TRANSL_565);
 866             else
 867                 RLEALPHABLIT(Uint16, Uint8, BLIT_TRANSL_555);
 868             break;
 869         case 4:
 870             RLEALPHABLIT(Uint32, Uint16, BLIT_TRANSL_888);
 871             break;
 872         }
 873     }
 874
 875   done:
 876     /* Unlock the destination if necessary */
 877     if (SDL_MUSTLOCK(surf_dst)) {
 878         SDL_UnlockSurface(surf_dst);
 879     }
 880     return 0;
 881 }
 882
 883 /*
 884  * Auxiliary functions:
 885  * The encoding functions take 32bpp rgb + a, and
 886  * return the number of bytes copied to the destination.
 887  * The decoding functions copy to 32bpp rgb + a, and
 888  * return the number of bytes copied from the source.
 889  * These are only used in the encoder and un-RLE code and are therefore not
 890  * highly optimised.
 891  */
 892
 893 /* encode 32bpp rgb + a into 16bpp rgb, losing alpha */
 894 static int
 895 copy_opaque_16(void *dst, Uint32 * src, int n,
 896                SDL_PixelFormat * sfmt, SDL_PixelFormat * dfmt)
 897 {
 898     int i;
 899     Uint16 *d = dst;
 900     for (i = 0; i < n; i++) {
 901         unsigned r, g, b;
 902         RGB_FROM_PIXEL(*src, sfmt, r, g, b);
 903         PIXEL_FROM_RGB(*d, dfmt, r, g, b);
 904         src++;
 905         d++;
 906     }
 907     return n * 2;
 908 }
 909
 910 /* decode opaque pixels from 16bpp to 32bpp rgb + a */
 911 static int
 912 uncopy_opaque_16(Uint32 * dst, void *src, int n,
 913                  RLEDestFormat * sfmt, SDL_PixelFormat * dfmt)
 914 {
 915     int i;
 916     Uint16 *s = src;
 917     unsigned alpha = dfmt->Amask ? 255 : 0;
 918     for (i = 0; i < n; i++) {
 919         unsigned r, g, b;
 920         RGB_FROM_PIXEL(*s, sfmt, r, g, b);
 921         PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, alpha);
 922         s++;
 923         dst++;
 924     }
 925     return n * 2;
 926 }
 927
 928
 929
 930 /* encode 32bpp rgb + a into 32bpp G0RAB format for blitting into 565 */
 931 static int
 932 copy_transl_565(void *dst, Uint32 * src, int n,
 933                 SDL_PixelFormat * sfmt, SDL_PixelFormat * dfmt)
 934 {
 935     int i;
 936     Uint32 *d = dst;
 937     for (i = 0; i < n; i++) {
 938         unsigned r, g, b, a;
 939         Uint16 pix;
 940         RGBA_FROM_8888(*src, sfmt, r, g, b, a);
 941         PIXEL_FROM_RGB(pix, dfmt, r, g, b);
 942         *d = ((pix & 0x7e0) << 16) | (pix & 0xf81f) | ((a << 2) & 0x7e0);
 943         src++;
 944         d++;
 945     }
 946     return n * 4;
 947 }
 948
 949 /* encode 32bpp rgb + a into 32bpp G0RAB format for blitting into 555 */
 950 static int
 951 copy_transl_555(void *dst, Uint32 * src, int n,
 952                 SDL_PixelFormat * sfmt, SDL_PixelFormat * dfmt)
 953 {
 954     int i;
 955     Uint32 *d = dst;
 956     for (i = 0; i < n; i++) {
 957         unsigned r, g, b, a;
 958         Uint16 pix;
 959         RGBA_FROM_8888(*src, sfmt, r, g, b, a);
 960         PIXEL_FROM_RGB(pix, dfmt, r, g, b);
 961         *d = ((pix & 0x3e0) << 16) | (pix & 0xfc1f) | ((a << 2) & 0x3e0);
 962         src++;
 963         d++;
 964     }
 965     return n * 4;
 966 }
 967
 968 /* decode translucent pixels from 32bpp GORAB to 32bpp rgb + a */
 969 static int
 970 uncopy_transl_16(Uint32 * dst, void *src, int n,
 971                  RLEDestFormat * sfmt, SDL_PixelFormat * dfmt)
 972 {
 973     int i;
 974     Uint32 *s = src;
 975     for (i = 0; i < n; i++) {
 976         unsigned r, g, b, a;
 977         Uint32 pix = *s++;
 978         a = (pix & 0x3e0) >> 2;
 979         pix = (pix & ~0x3e0) | pix >> 16;
 980         RGB_FROM_PIXEL(pix, sfmt, r, g, b);
 981         PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, a);
 982         dst++;
 983     }
 984     return n * 4;
 985 }
 986
 987 /* encode 32bpp rgba into 32bpp rgba, keeping alpha (dual purpose) */
 988 static int
 989 copy_32(void *dst, Uint32 * src, int n,
 990         SDL_PixelFormat * sfmt, SDL_PixelFormat * dfmt)
 991 {
 992     int i;
 993     Uint32 *d = dst;
 994     for (i = 0; i < n; i++) {
 995         unsigned r, g, b, a;
 996         RGBA_FROM_8888(*src, sfmt, r, g, b, a);
 997         RLEPIXEL_FROM_RGBA(*d, dfmt, r, g, b, a);
 998         d++;
 999         src++;
1000     }
1001     return n * 4;
1002 }
1003
1004 /* decode 32bpp rgba into 32bpp rgba, keeping alpha (dual purpose) */
1005 static int
1006 uncopy_32(Uint32 * dst, void *src, int n,
1007           RLEDestFormat * sfmt, SDL_PixelFormat * dfmt)
1008 {
1009     int i;
1010     Uint32 *s = src;
1011     for (i = 0; i < n; i++) {
1012         unsigned r, g, b, a;
1013         Uint32 pixel = *s++;
1014         RGB_FROM_PIXEL(pixel, sfmt, r, g, b);
1015         a = pixel >> 24;
1016         PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, a);
1017         dst++;
1018     }
1019     return n * 4;
1020 }
1021
1022 #define ISOPAQUE(pixel, fmt) ((((pixel) & fmt->Amask) >> fmt->Ashift) == 255)
1023
1024 #define ISTRANSL(pixel, fmt)    \
1025     ((unsigned)((((pixel) & fmt->Amask) >> fmt->Ashift) - 1U) < 254U)
1026
1027 /* convert surface to be quickly alpha-blittable onto dest, if possible */
1028 static int
1029 RLEAlphaSurface(SDL_Surface * surface)
1030 {
1031     SDL_Surface *dest;
1032     SDL_PixelFormat *df;
1033     int maxsize = 0;
1034     int max_opaque_run;
1035     int max_transl_run = 65535;
1036     unsigned masksum;
1037     Uint8 *rlebuf, *dst;
1038     int (*copy_opaque) (void *, Uint32 *, int,
1039                         SDL_PixelFormat *, SDL_PixelFormat *);
1040     int (*copy_transl) (void *, Uint32 *, int,
1041                         SDL_PixelFormat *, SDL_PixelFormat *);
1042
1043     dest = surface->map->dst;
1044     if (!dest)
1045         return -1;
1046     df = dest->format;
1047     if (surface->format->BitsPerPixel != 32)
1048         return -1;              /* only 32bpp source supported */
1049
1050     /* find out whether the destination is one we support,
1051        and determine the max size of the encoded result */
1052     masksum = df->Rmask | df->Gmask | df->Bmask;
1053     switch (df->BytesPerPixel) {
1054     case 2:
1055         /* 16bpp: only support 565 and 555 formats */
1056         switch (masksum) {
1057         case 0xffff:
1058             if (df->Gmask == 0x07e0
1059                 || df->Rmask == 0x07e0 || df->Bmask == 0x07e0) {
1060                 copy_opaque = copy_opaque_16;
1061                 copy_transl = copy_transl_565;
1062             } else
1063                 return -1;
1064             break;
1065         case 0x7fff:
1066             if (df->Gmask == 0x03e0
1067                 || df->Rmask == 0x03e0 || df->Bmask == 0x03e0) {
1068                 copy_opaque = copy_opaque_16;
1069                 copy_transl = copy_transl_555;
1070             } else
1071                 return -1;
1072             break;
1073         default:
1074             return -1;
1075         }
1076         max_opaque_run = 255;   /* runs stored as bytes */
1077
1078         /* worst case is alternating opaque and translucent pixels,
1079            with room for alignment padding between lines */
1080         maxsize = surface->h * (2 + (4 + 2) * (surface->w + 1)) + 2;
1081         break;
1082     case 4:
1083         if (masksum != 0x00ffffff)
1084             return -1;          /* requires unused high byte */
1085         copy_opaque = copy_32;
1086         copy_transl = copy_32;
1087         max_opaque_run = 255;   /* runs stored as short ints */
1088
1089         /* worst case is alternating opaque and translucent pixels */
1090         maxsize = surface->h * 2 * 4 * (surface->w + 1) + 4;
1091         break;
1092     default:
1093         return -1;              /* anything else unsupported right now */
1094     }
1095
1096     maxsize += sizeof(RLEDestFormat);
1097     rlebuf = (Uint8 *) SDL_malloc(maxsize);
1098     if (!rlebuf) {
1099         return SDL_OutOfMemory();
1100     }
1101     {
1102         /* save the destination format so we can undo the encoding later */
1103         RLEDestFormat *r = (RLEDestFormat *) rlebuf;
1104         r->BytesPerPixel = df->BytesPerPixel;
1105         r->Rmask = df->Rmask;
1106         r->Gmask = df->Gmask;
1107         r->Bmask = df->Bmask;
1108         r->Amask = df->Amask;
1109         r->Rloss = df->Rloss;
1110         r->Gloss = df->Gloss;
1111         r->Bloss = df->Bloss;
1112         r->Aloss = df->Aloss;
1113         r->Rshift = df->Rshift;
1114         r->Gshift = df->Gshift;
1115         r->Bshift = df->Bshift;
1116         r->Ashift = df->Ashift;
1117     }
1118     dst = rlebuf + sizeof(RLEDestFormat);
1119
1120     /* Do the actual encoding */
1121     {
1122         int x, y;
1123         int h = surface->h, w = surface->w;
1124         SDL_PixelFormat *sf = surface->format;
1125         Uint32 *src = (Uint32 *) surface->pixels;
1126         Uint8 *lastline = dst;  /* end of last non-blank line */
1127
1128         /* opaque counts are 8 or 16 bits, depending on target depth */
1129 #define ADD_OPAQUE_COUNTS(n, m)         \
1130     if(df->BytesPerPixel == 4) {        \
1131         ((Uint16 *)dst)[0] = n;     \
1132         ((Uint16 *)dst)[1] = m;     \
1133         dst += 4;               \
1134     } else {                \
1135         dst[0] = n;             \
1136         dst[1] = m;             \
1137         dst += 2;               \
1138     }
1139
1140         /* translucent counts are always 16 bit */
1141 #define ADD_TRANSL_COUNTS(n, m)     \
1142     (((Uint16 *)dst)[0] = n, ((Uint16 *)dst)[1] = m, dst += 4)
1143
1144         for (y = 0; y < h; y++) {
1145             int runstart, skipstart;
1146             int blankline = 0;
1147             /* First encode all opaque pixels of a scan line */
1148             x = 0;
1149             do {
1150                 int run, skip, len;
1151                 skipstart = x;
1152                 while (x < w && !ISOPAQUE(src[x], sf))
1153                     x++;
1154                 runstart = x;
1155                 while (x < w && ISOPAQUE(src[x], sf))
1156                     x++;
1157                 skip = runstart - skipstart;
1158                 if (skip == w)
1159                     blankline = 1;
1160                 run = x - runstart;
1161                 while (skip > max_opaque_run) {
1162                     ADD_OPAQUE_COUNTS(max_opaque_run, 0);
1163                     skip -= max_opaque_run;
1164                 }
1165                 len = MIN(run, max_opaque_run);
1166                 ADD_OPAQUE_COUNTS(skip, len);
1167                 dst += copy_opaque(dst, src + runstart, len, sf, df);
1168                 runstart += len;
1169                 run -= len;
1170                 while (run) {
1171                     len = MIN(run, max_opaque_run);
1172                     ADD_OPAQUE_COUNTS(0, len);
1173                     dst += copy_opaque(dst, src + runstart, len, sf, df);
1174                     runstart += len;
1175                     run -= len;
1176                 }
1177             } while (x < w);
1178
1179             /* Make sure the next output address is 32-bit aligned */
1180             dst += (uintptr_t) dst & 2;
1181
1182             /* Next, encode all translucent pixels of the same scan line */
1183             x = 0;
1184             do {
1185                 int run, skip, len;
1186                 skipstart = x;
1187                 while (x < w && !ISTRANSL(src[x], sf))
1188                     x++;
1189                 runstart = x;
1190                 while (x < w && ISTRANSL(src[x], sf))
1191                     x++;
1192                 skip = runstart - skipstart;
1193                 blankline &= (skip == w);
1194                 run = x - runstart;
1195                 while (skip > max_transl_run) {
1196                     ADD_TRANSL_COUNTS(max_transl_run, 0);
1197                     skip -= max_transl_run;
1198                 }
1199                 len = MIN(run, max_transl_run);
1200                 ADD_TRANSL_COUNTS(skip, len);
1201                 dst += copy_transl(dst, src + runstart, len, sf, df);
1202                 runstart += len;
1203                 run -= len;
1204                 while (run) {
1205                     len = MIN(run, max_transl_run);
1206                     ADD_TRANSL_COUNTS(0, len);
1207                     dst += copy_transl(dst, src + runstart, len, sf, df);
1208                     runstart += len;
1209                     run -= len;
1210                 }
1211                 if (!blankline)
1212                     lastline = dst;
1213             } while (x < w);
1214
1215             src += surface->pitch >> 2;
1216         }
1217         dst = lastline;         /* back up past trailing blank lines */
1218         ADD_OPAQUE_COUNTS(0, 0);
1219     }
1220
1221 #undef ADD_OPAQUE_COUNTS
1222 #undef ADD_TRANSL_COUNTS
1223
1224     /* Now that we have it encoded, release the original pixels */
1225     if (!(surface->flags & SDL_PREALLOC)) {
1226         SDL_free(surface->pixels);
1227         surface->pixels = NULL;
1228     }
1229
1230     /* realloc the buffer to release unused memory */
1231     {
1232         Uint8 *p = SDL_realloc(rlebuf, dst - rlebuf);
1233         if (!p)
1234             p = rlebuf;
1235         surface->map->data = p;
1236     }
1237
1238     return 0;
1239 }
1240
1241 static Uint32
1242 getpix_8(Uint8 * srcbuf)
1243 {
1244     return *srcbuf;
1245 }
1246
1247 static Uint32
1248 getpix_16(Uint8 * srcbuf)
1249 {
1250     return *(Uint16 *) srcbuf;
1251 }
1252
1253 static Uint32
1254 getpix_24(Uint8 * srcbuf)
1255 {
1256 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
1257     return srcbuf[0] + (srcbuf[1] << 8) + (srcbuf[2] << 16);
1258 #else
1259     return (srcbuf[0] << 16) + (srcbuf[1] << 8) + srcbuf[2];
1260 #endif
1261 }
1262
1263 static Uint32
1264 getpix_32(Uint8 * srcbuf)
1265 {
1266     return *(Uint32 *) srcbuf;
1267 }
1268
1269 typedef Uint32(*getpix_func) (Uint8 *);
1270
1271 static const getpix_func getpixes[4] = {
1272     getpix_8, getpix_16, getpix_24, getpix_32
1273 };
1274
1275 static int
1276 RLEColorkeySurface(SDL_Surface * surface)
1277 {
1278     Uint8 *rlebuf, *dst;
1279     int maxn;
1280     int y;
1281     Uint8 *srcbuf, *lastline;
1282     int maxsize = 0;
1283     int bpp = surface->format->BytesPerPixel;
1284     getpix_func getpix;
1285     Uint32 ckey, rgbmask;
1286     int w, h;
1287
1288     /* calculate the worst case size for the compressed surface */
1289     switch (bpp) {
1290     case 1:
1291         /* worst case is alternating opaque and transparent pixels,
1292            starting with an opaque pixel */
1293         maxsize = surface->h * 3 * (surface->w / 2 + 1) + 2;
1294         break;
1295     case 2:
1296     case 3:
1297         /* worst case is solid runs, at most 255 pixels wide */
1298         maxsize = surface->h * (2 * (surface->w / 255 + 1)
1299                                 + surface->w * bpp) + 2;
1300         break;
1301     case 4:
1302         /* worst case is solid runs, at most 65535 pixels wide */
1303         maxsize = surface->h * (4 * (surface->w / 65535 + 1)
1304                                 + surface->w * 4) + 4;
1305         break;
1306     }
1307
1308     rlebuf = (Uint8 *) SDL_malloc(maxsize);
1309     if (rlebuf == NULL) {
1310         return SDL_OutOfMemory();
1311     }
1312
1313     /* Set up the conversion */
1314     srcbuf = (Uint8 *) surface->pixels;
1315     maxn = bpp == 4 ? 65535 : 255;
1316     dst = rlebuf;
1317     rgbmask = ~surface->format->Amask;
1318     ckey = surface->map->info.colorkey & rgbmask;
1319     lastline = dst;
1320     getpix = getpixes[bpp - 1];
1321     w = surface->w;
1322     h = surface->h;
1323
1324 #define ADD_COUNTS(n, m)            \
1325     if(bpp == 4) {              \
1326         ((Uint16 *)dst)[0] = n;     \
1327         ((Uint16 *)dst)[1] = m;     \
1328         dst += 4;               \
1329     } else {                \
1330         dst[0] = n;             \
1331         dst[1] = m;             \
1332         dst += 2;               \
1333     }
1334
1335     for (y = 0; y < h; y++) {
1336         int x = 0;
1337         int blankline = 0;
1338         do {
1339             int run, skip, len;
1340             int runstart;
1341             int skipstart = x;
1342
1343             /* find run of transparent, then opaque pixels */
1344             while (x < w && (getpix(srcbuf + x * bpp) & rgbmask) == ckey)
1345                 x++;
1346             runstart = x;
1347             while (x < w && (getpix(srcbuf + x * bpp) & rgbmask) != ckey)
1348                 x++;
1349             skip = runstart - skipstart;
1350             if (skip == w)
1351                 blankline = 1;
1352             run = x - runstart;
1353
1354             /* encode segment */
1355             while (skip > maxn) {
1356                 ADD_COUNTS(maxn, 0);
1357                 skip -= maxn;
1358             }
1359             len = MIN(run, maxn);
1360             ADD_COUNTS(skip, len);
1361             SDL_memcpy(dst, srcbuf + runstart * bpp, len * bpp);
1362             dst += len * bpp;
1363             run -= len;
1364             runstart += len;
1365             while (run) {
1366                 len = MIN(run, maxn);
1367                 ADD_COUNTS(0, len);
1368                 SDL_memcpy(dst, srcbuf + runstart * bpp, len * bpp);
1369                 dst += len * bpp;
1370                 runstart += len;
1371                 run -= len;
1372             }
1373             if (!blankline)
1374                 lastline = dst;
1375         } while (x < w);
1376
1377         srcbuf += surface->pitch;
1378     }
1379     dst = lastline;             /* back up bast trailing blank lines */
1380     ADD_COUNTS(0, 0);
1381
1382 #undef ADD_COUNTS
1383
1384     /* Now that we have it encoded, release the original pixels */
1385     if (!(surface->flags & SDL_PREALLOC)) {
1386         SDL_free(surface->pixels);
1387         surface->pixels = NULL;
1388     }
1389
1390     /* realloc the buffer to release unused memory */
1391     {
1392         /* If realloc returns NULL, the original block is left intact */
1393         Uint8 *p = SDL_realloc(rlebuf, dst - rlebuf);
1394         if (!p)
1395             p = rlebuf;
1396         surface->map->data = p;
1397     }
1398
1399     return (0);
1400 }
1401
1402 int
1403 SDL_RLESurface(SDL_Surface * surface)
1404 {
1405     int flags;
1406
1407     /* Clear any previous RLE conversion */
1408     if ((surface->flags & SDL_RLEACCEL) == SDL_RLEACCEL) {
1409         SDL_UnRLESurface(surface, 1);
1410     }
1411
1412     /* We don't support RLE encoding of bitmaps */
1413     if (surface->format->BitsPerPixel < 8) {
1414         return -1;
1415     }
1416
1417     /* Make sure the pixels are available */
1418     if (!surface->pixels) {
1419         return -1;
1420     }
1421
1422     /* If we don't have colorkey or blending, nothing to do... */
1423     flags = surface->map->info.flags;
1424     if (!(flags & (SDL_COPY_COLORKEY | SDL_COPY_BLEND))) {
1425         return -1;
1426     }
1427
1428     /* Pass on combinations not supported */
1429     if ((flags & SDL_COPY_MODULATE_COLOR) ||
1430         ((flags & SDL_COPY_MODULATE_ALPHA) && surface->format->Amask) ||
1431         (flags & (SDL_COPY_ADD | SDL_COPY_MOD)) ||
1432         (flags & SDL_COPY_NEAREST)) {
1433         return -1;
1434     }
1435
1436     /* Encode and set up the blit */
1437     if (!surface->format->Amask || !(flags & SDL_COPY_BLEND)) {
1438         if (!surface->map->identity) {
1439             return -1;
1440         }
1441         if (RLEColorkeySurface(surface) < 0) {
1442             return -1;
1443         }
1444         surface->map->blit = SDL_RLEBlit;
1445         surface->map->info.flags |= SDL_COPY_RLE_COLORKEY;
1446     } else {
1447         if (RLEAlphaSurface(surface) < 0) {
1448             return -1;
1449         }
1450         surface->map->blit = SDL_RLEAlphaBlit;
1451         surface->map->info.flags |= SDL_COPY_RLE_ALPHAKEY;
1452     }
1453
1454     /* The surface is now accelerated */
1455     surface->flags |= SDL_RLEACCEL;
1456
1457     return (0);
1458 }
1459
1460 /*
1461  * Un-RLE a surface with pixel alpha
1462  * This may not give back exactly the image before RLE-encoding; all
1463  * completely transparent pixels will be lost, and color and alpha depth
1464  * may have been reduced (when encoding for 16bpp targets).
1465  */
1466 static SDL_bool
1467 UnRLEAlpha(SDL_Surface * surface)
1468 {
1469     Uint8 *srcbuf;
1470     Uint32 *dst;
1471     SDL_PixelFormat *sf = surface->format;
1472     RLEDestFormat *df = surface->map->data;
1473     int (*uncopy_opaque) (Uint32 *, void *, int,
1474                           RLEDestFormat *, SDL_PixelFormat *);
1475     int (*uncopy_transl) (Uint32 *, void *, int,
1476                           RLEDestFormat *, SDL_PixelFormat *);
1477     int w = surface->w;
1478     int bpp = df->BytesPerPixel;
1479
1480     if (bpp == 2) {
1481         uncopy_opaque = uncopy_opaque_16;
1482         uncopy_transl = uncopy_transl_16;
1483     } else {
1484         uncopy_opaque = uncopy_transl = uncopy_32;
1485     }
1486
1487     surface->pixels = SDL_malloc(surface->h * surface->pitch);
1488     if (!surface->pixels) {
1489         return (SDL_FALSE);
1490     }
1491     /* fill background with transparent pixels */
1492     SDL_memset(surface->pixels, 0, surface->h * surface->pitch);
1493
1494     dst = surface->pixels;
1495     srcbuf = (Uint8 *) (df + 1);
1496     for (;;) {
1497         /* copy opaque pixels */
1498         int ofs = 0;
1499         do {
1500             unsigned run;
1501             if (bpp == 2) {
1502                 ofs += srcbuf[0];
1503                 run = srcbuf[1];
1504                 srcbuf += 2;
1505             } else {
1506                 ofs += ((Uint16 *) srcbuf)[0];
1507                 run = ((Uint16 *) srcbuf)[1];
1508                 srcbuf += 4;
1509             }
1510             if (run) {
1511                 srcbuf += uncopy_opaque(dst + ofs, srcbuf, run, df, sf);
1512                 ofs += run;
1513             } else if (!ofs)
1514                 return (SDL_TRUE);
1515         } while (ofs < w);
1516
1517         /* skip padding if needed */
1518         if (bpp == 2)
1519             srcbuf += (uintptr_t) srcbuf & 2;
1520
1521         /* copy translucent pixels */
1522         ofs = 0;
1523         do {
1524             unsigned run;
1525             ofs += ((Uint16 *) srcbuf)[0];
1526             run = ((Uint16 *) srcbuf)[1];
1527             srcbuf += 4;
1528             if (run) {
1529                 srcbuf += uncopy_transl(dst + ofs, srcbuf, run, df, sf);
1530                 ofs += run;
1531             }
1532         } while (ofs < w);
1533         dst += surface->pitch >> 2;
1534     }
1535     /* Make the compiler happy */
1536     return (SDL_TRUE);
1537 }
1538
1539 void
1540 SDL_UnRLESurface(SDL_Surface * surface, int recode)
1541 {
1542     if (surface->flags & SDL_RLEACCEL) {
1543         surface->flags &= ~SDL_RLEACCEL;
1544
1545         if (recode && !(surface->flags & SDL_PREALLOC)) {
1546             if (surface->map->info.flags & SDL_COPY_RLE_COLORKEY) {
1547                 SDL_Rect full;
1548
1549                 /* re-create the original surface */
1550                 surface->pixels = SDL_malloc(surface->h * surface->pitch);
1551                 if (!surface->pixels) {
1552                     /* Oh crap... */
1553                     surface->flags |= SDL_RLEACCEL;
1554                     return;
1555                 }
1556
1557                 /* fill it with the background color */
1558                 SDL_FillRect(surface, NULL, surface->map->info.colorkey);
1559
1560                 /* now render the encoded surface */
1561                 full.x = full.y = 0;
1562                 full.w = surface->w;
1563                 full.h = surface->h;
1564                 SDL_RLEBlit(surface, &full, surface, &full);
1565             } else {
1566                 if (!UnRLEAlpha(surface)) {
1567                     /* Oh crap... */
1568                     surface->flags |= SDL_RLEACCEL;
1569                     return;
1570                 }
1571             }
1572         }
1573         surface->map->info.flags &=
1574             ~(SDL_COPY_RLE_COLORKEY | SDL_COPY_RLE_ALPHAKEY);
1575
1576         SDL_free(surface->map->data);
1577         surface->map->data = NULL;
1578     }
1579 }
1580
1581 /* vi: set ts=4 sw=4 expandtab: */