src/render/SDL_yuv_sw.c

   1 /*
   2   Simple DirectMedia Layer
   3   Copyright (C) 1997-2016 Sam Lantinga <slouken@libsdl.org>
   4
   5   This software is provided 'as-is', without any express or implied
   6   warranty.  In no event will the authors be held liable for any damages
   7   arising from the use of this software.
   8
   9   Permission is granted to anyone to use this software for any purpose,
  10   including commercial applications, and to alter it and redistribute it
  11   freely, subject to the following restrictions:
  12
  13   1. The origin of this software must not be misrepresented; you must not
  14      claim that you wrote the original software. If you use this software
  15      in a product, an acknowledgment in the product documentation would be
  16      appreciated but is not required.
  17   2. Altered source versions must be plainly marked as such, and must not be
  18      misrepresented as being the original software.
  19   3. This notice may not be removed or altered from any source distribution.
  20 */
  21 #include "../SDL_internal.h"
  22
  23 /* This is the software implementation of the YUV texture support */
  24
  25 /* This code was derived from code carrying the following copyright notices:
  26
  27  * Copyright (c) 1995 The Regents of the University of California.
  28  * All rights reserved.
  29  *
  30  * Permission to use, copy, modify, and distribute this software and its
  31  * documentation for any purpose, without fee, and without written agreement is
  32  * hereby granted, provided that the above copyright notice and the following
  33  * two paragraphs appear in all copies of this software.
  34  *
  35  * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
  36  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
  37  * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF
  38  * CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  39  *
  40  * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
  41  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
  42  * AND FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
  43  * ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO
  44  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
  45
  46  * Copyright (c) 1995 Erik Corry
  47  * All rights reserved.
  48  *
  49  * Permission to use, copy, modify, and distribute this software and its
  50  * documentation for any purpose, without fee, and without written agreement is
  51  * hereby granted, provided that the above copyright notice and the following
  52  * two paragraphs appear in all copies of this software.
  53  *
  54  * IN NO EVENT SHALL ERIK CORRY BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
  55  * SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF
  56  * THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF ERIK CORRY HAS BEEN ADVISED
  57  * OF THE POSSIBILITY OF SUCH DAMAGE.
  58  *
  59  * ERIK CORRY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
  60  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
  61  * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
  62  * BASIS, AND ERIK CORRY HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT,
  63  * UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
  64
  65  * Portions of this software Copyright (c) 1995 Brown University.
  66  * All rights reserved.
  67  *
  68  * Permission to use, copy, modify, and distribute this software and its
  69  * documentation for any purpose, without fee, and without written agreement
  70  * is hereby granted, provided that the above copyright notice and the
  71  * following two paragraphs appear in all copies of this software.
  72  *
  73  * IN NO EVENT SHALL BROWN UNIVERSITY BE LIABLE TO ANY PARTY FOR
  74  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
  75  * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF BROWN
  76  * UNIVERSITY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  77  *
  78  * BROWN UNIVERSITY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
  79  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
  80  * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
  81  * BASIS, AND BROWN UNIVERSITY HAS NO OBLIGATION TO PROVIDE MAINTENANCE,
  82  * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
  83  */
  84
  85 #include "SDL_assert.h"
  86 #include "SDL_video.h"
  87 #include "SDL_cpuinfo.h"
  88 #include "SDL_yuv_sw_c.h"
  89
  90
  91 /* The colorspace conversion functions */
  92
  93 #if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
  94 extern void Color565DitherYV12MMX1X(int *colortab, Uint32 * rgb_2_pix,
  95                                     unsigned char *lum, unsigned char *cr,
  96                                     unsigned char *cb, unsigned char *out,
  97                                     int rows, int cols, int mod);
  98 extern void ColorRGBDitherYV12MMX1X(int *colortab, Uint32 * rgb_2_pix,
  99                                     unsigned char *lum, unsigned char *cr,
 100                                     unsigned char *cb, unsigned char *out,
 101                                     int rows, int cols, int mod);
 102 #endif
 103
 104 static void
 105 Color16DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
 106                        unsigned char *lum, unsigned char *cr,
 107                        unsigned char *cb, unsigned char *out,
 108                        int rows, int cols, int mod)
 109 {
 110     unsigned short *row1;
 111     unsigned short *row2;
 112     unsigned char *lum2;
 113     int x, y;
 114     int cr_r;
 115     int crb_g;
 116     int cb_b;
 117     int cols_2 = cols / 2;
 118
 119     row1 = (unsigned short *) out;
 120     row2 = row1 + cols + mod;
 121     lum2 = lum + cols;
 122
 123     mod += cols + mod;
 124
 125     y = rows / 2;
 126     while (y--) {
 127         x = cols_2;
 128         while (x--) {
 129             register int L;
 130
 131             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
 132             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
 133                 + colortab[*cb + 2 * 256];
 134             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
 135             ++cr;
 136             ++cb;
 137
 138             L = *lum++;
 139             *row1++ = (unsigned short) (rgb_2_pix[L + cr_r] |
 140                                         rgb_2_pix[L + crb_g] |
 141                                         rgb_2_pix[L + cb_b]);
 142
 143             L = *lum++;
 144             *row1++ = (unsigned short) (rgb_2_pix[L + cr_r] |
 145                                         rgb_2_pix[L + crb_g] |
 146                                         rgb_2_pix[L + cb_b]);
 147
 148
 149             /* Now, do second row.  */
 150
 151             L = *lum2++;
 152             *row2++ = (unsigned short) (rgb_2_pix[L + cr_r] |
 153                                         rgb_2_pix[L + crb_g] |
 154                                         rgb_2_pix[L + cb_b]);
 155
 156             L = *lum2++;
 157             *row2++ = (unsigned short) (rgb_2_pix[L + cr_r] |
 158                                         rgb_2_pix[L + crb_g] |
 159                                         rgb_2_pix[L + cb_b]);
 160         }
 161
 162         /*
 163          * These values are at the start of the next line, (due
 164          * to the ++'s above),but they need to be at the start
 165          * of the line after that.
 166          */
 167         lum += cols;
 168         lum2 += cols;
 169         row1 += mod;
 170         row2 += mod;
 171     }
 172 }
 173
 174 static void
 175 Color24DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
 176                        unsigned char *lum, unsigned char *cr,
 177                        unsigned char *cb, unsigned char *out,
 178                        int rows, int cols, int mod)
 179 {
 180     unsigned int value;
 181     unsigned char *row1;
 182     unsigned char *row2;
 183     unsigned char *lum2;
 184     int x, y;
 185     int cr_r;
 186     int crb_g;
 187     int cb_b;
 188     int cols_2 = cols / 2;
 189
 190     row1 = out;
 191     row2 = row1 + cols * 3 + mod * 3;
 192     lum2 = lum + cols;
 193
 194     mod += cols + mod;
 195     mod *= 3;
 196
 197     y = rows / 2;
 198     while (y--) {
 199         x = cols_2;
 200         while (x--) {
 201             register int L;
 202
 203             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
 204             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
 205                 + colortab[*cb + 2 * 256];
 206             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
 207             ++cr;
 208             ++cb;
 209
 210             L = *lum++;
 211             value = (rgb_2_pix[L + cr_r] |
 212                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
 213             *row1++ = (value) & 0xFF;
 214             *row1++ = (value >> 8) & 0xFF;
 215             *row1++ = (value >> 16) & 0xFF;
 216
 217             L = *lum++;
 218             value = (rgb_2_pix[L + cr_r] |
 219                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
 220             *row1++ = (value) & 0xFF;
 221             *row1++ = (value >> 8) & 0xFF;
 222             *row1++ = (value >> 16) & 0xFF;
 223
 224
 225             /* Now, do second row.  */
 226
 227             L = *lum2++;
 228             value = (rgb_2_pix[L + cr_r] |
 229                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
 230             *row2++ = (value) & 0xFF;
 231             *row2++ = (value >> 8) & 0xFF;
 232             *row2++ = (value >> 16) & 0xFF;
 233
 234             L = *lum2++;
 235             value = (rgb_2_pix[L + cr_r] |
 236                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
 237             *row2++ = (value) & 0xFF;
 238             *row2++ = (value >> 8) & 0xFF;
 239             *row2++ = (value >> 16) & 0xFF;
 240         }
 241
 242         /*
 243          * These values are at the start of the next line, (due
 244          * to the ++'s above),but they need to be at the start
 245          * of the line after that.
 246          */
 247         lum += cols;
 248         lum2 += cols;
 249         row1 += mod;
 250         row2 += mod;
 251     }
 252 }
 253
 254 static void
 255 Color32DitherYV12Mod1X(int *colortab, Uint32 * rgb_2_pix,
 256                        unsigned char *lum, unsigned char *cr,
 257                        unsigned char *cb, unsigned char *out,
 258                        int rows, int cols, int mod)
 259 {
 260     unsigned int *row1;
 261     unsigned int *row2;
 262     unsigned char *lum2;
 263     int x, y;
 264     int cr_r;
 265     int crb_g;
 266     int cb_b;
 267     int cols_2 = cols / 2;
 268
 269     row1 = (unsigned int *) out;
 270     row2 = row1 + cols + mod;
 271     lum2 = lum + cols;
 272
 273     mod += cols + mod;
 274
 275     y = rows / 2;
 276     while (y--) {
 277         x = cols_2;
 278         while (x--) {
 279             register int L;
 280
 281             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
 282             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
 283                 + colortab[*cb + 2 * 256];
 284             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
 285             ++cr;
 286             ++cb;
 287
 288             L = *lum++;
 289             *row1++ = (rgb_2_pix[L + cr_r] |
 290                        rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
 291
 292             L = *lum++;
 293             *row1++ = (rgb_2_pix[L + cr_r] |
 294                        rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
 295
 296
 297             /* Now, do second row.  */
 298
 299             L = *lum2++;
 300             *row2++ = (rgb_2_pix[L + cr_r] |
 301                        rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
 302
 303             L = *lum2++;
 304             *row2++ = (rgb_2_pix[L + cr_r] |
 305                        rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
 306         }
 307
 308         /*
 309          * These values are at the start of the next line, (due
 310          * to the ++'s above),but they need to be at the start
 311          * of the line after that.
 312          */
 313         lum += cols;
 314         lum2 += cols;
 315         row1 += mod;
 316         row2 += mod;
 317     }
 318 }
 319
 320 /*
 321  * In this function I make use of a nasty trick. The tables have the lower
 322  * 16 bits replicated in the upper 16. This means I can write ints and get
 323  * the horisontal doubling for free (almost).
 324  */
 325 static void
 326 Color16DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
 327                        unsigned char *lum, unsigned char *cr,
 328                        unsigned char *cb, unsigned char *out,
 329                        int rows, int cols, int mod)
 330 {
 331     unsigned int *row1 = (unsigned int *) out;
 332     const int next_row = cols + (mod / 2);
 333     unsigned int *row2 = row1 + 2 * next_row;
 334     unsigned char *lum2;
 335     int x, y;
 336     int cr_r;
 337     int crb_g;
 338     int cb_b;
 339     int cols_2 = cols / 2;
 340
 341     lum2 = lum + cols;
 342
 343     mod = (next_row * 3) + (mod / 2);
 344
 345     y = rows / 2;
 346     while (y--) {
 347         x = cols_2;
 348         while (x--) {
 349             register int L;
 350
 351             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
 352             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
 353                 + colortab[*cb + 2 * 256];
 354             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
 355             ++cr;
 356             ++cb;
 357
 358             L = *lum++;
 359             row1[0] = row1[next_row] = (rgb_2_pix[L + cr_r] |
 360                                         rgb_2_pix[L + crb_g] |
 361                                         rgb_2_pix[L + cb_b]);
 362             row1++;
 363
 364             L = *lum++;
 365             row1[0] = row1[next_row] = (rgb_2_pix[L + cr_r] |
 366                                         rgb_2_pix[L + crb_g] |
 367                                         rgb_2_pix[L + cb_b]);
 368             row1++;
 369
 370
 371             /* Now, do second row. */
 372
 373             L = *lum2++;
 374             row2[0] = row2[next_row] = (rgb_2_pix[L + cr_r] |
 375                                         rgb_2_pix[L + crb_g] |
 376                                         rgb_2_pix[L + cb_b]);
 377             row2++;
 378
 379             L = *lum2++;
 380             row2[0] = row2[next_row] = (rgb_2_pix[L + cr_r] |
 381                                         rgb_2_pix[L + crb_g] |
 382                                         rgb_2_pix[L + cb_b]);
 383             row2++;
 384         }
 385
 386         /*
 387          * These values are at the start of the next line, (due
 388          * to the ++'s above),but they need to be at the start
 389          * of the line after that.
 390          */
 391         lum += cols;
 392         lum2 += cols;
 393         row1 += mod;
 394         row2 += mod;
 395     }
 396 }
 397
 398 static void
 399 Color24DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
 400                        unsigned char *lum, unsigned char *cr,
 401                        unsigned char *cb, unsigned char *out,
 402                        int rows, int cols, int mod)
 403 {
 404     unsigned int value;
 405     unsigned char *row1 = out;
 406     const int next_row = (cols * 2 + mod) * 3;
 407     unsigned char *row2 = row1 + 2 * next_row;
 408     unsigned char *lum2;
 409     int x, y;
 410     int cr_r;
 411     int crb_g;
 412     int cb_b;
 413     int cols_2 = cols / 2;
 414
 415     lum2 = lum + cols;
 416
 417     mod = next_row * 3 + mod * 3;
 418
 419     y = rows / 2;
 420     while (y--) {
 421         x = cols_2;
 422         while (x--) {
 423             register int L;
 424
 425             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
 426             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
 427                 + colortab[*cb + 2 * 256];
 428             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
 429             ++cr;
 430             ++cb;
 431
 432             L = *lum++;
 433             value = (rgb_2_pix[L + cr_r] |
 434                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
 435             row1[0 + 0] = row1[3 + 0] = row1[next_row + 0] =
 436                 row1[next_row + 3 + 0] = (value) & 0xFF;
 437             row1[0 + 1] = row1[3 + 1] = row1[next_row + 1] =
 438                 row1[next_row + 3 + 1] = (value >> 8) & 0xFF;
 439             row1[0 + 2] = row1[3 + 2] = row1[next_row + 2] =
 440                 row1[next_row + 3 + 2] = (value >> 16) & 0xFF;
 441             row1 += 2 * 3;
 442
 443             L = *lum++;
 444             value = (rgb_2_pix[L + cr_r] |
 445                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
 446             row1[0 + 0] = row1[3 + 0] = row1[next_row + 0] =
 447                 row1[next_row + 3 + 0] = (value) & 0xFF;
 448             row1[0 + 1] = row1[3 + 1] = row1[next_row + 1] =
 449                 row1[next_row + 3 + 1] = (value >> 8) & 0xFF;
 450             row1[0 + 2] = row1[3 + 2] = row1[next_row + 2] =
 451                 row1[next_row + 3 + 2] = (value >> 16) & 0xFF;
 452             row1 += 2 * 3;
 453
 454
 455             /* Now, do second row. */
 456
 457             L = *lum2++;
 458             value = (rgb_2_pix[L + cr_r] |
 459                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
 460             row2[0 + 0] = row2[3 + 0] = row2[next_row + 0] =
 461                 row2[next_row + 3 + 0] = (value) & 0xFF;
 462             row2[0 + 1] = row2[3 + 1] = row2[next_row + 1] =
 463                 row2[next_row + 3 + 1] = (value >> 8) & 0xFF;
 464             row2[0 + 2] = row2[3 + 2] = row2[next_row + 2] =
 465                 row2[next_row + 3 + 2] = (value >> 16) & 0xFF;
 466             row2 += 2 * 3;
 467
 468             L = *lum2++;
 469             value = (rgb_2_pix[L + cr_r] |
 470                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
 471             row2[0 + 0] = row2[3 + 0] = row2[next_row + 0] =
 472                 row2[next_row + 3 + 0] = (value) & 0xFF;
 473             row2[0 + 1] = row2[3 + 1] = row2[next_row + 1] =
 474                 row2[next_row + 3 + 1] = (value >> 8) & 0xFF;
 475             row2[0 + 2] = row2[3 + 2] = row2[next_row + 2] =
 476                 row2[next_row + 3 + 2] = (value >> 16) & 0xFF;
 477             row2 += 2 * 3;
 478         }
 479
 480         /*
 481          * These values are at the start of the next line, (due
 482          * to the ++'s above),but they need to be at the start
 483          * of the line after that.
 484          */
 485         lum += cols;
 486         lum2 += cols;
 487         row1 += mod;
 488         row2 += mod;
 489     }
 490 }
 491
 492 static void
 493 Color32DitherYV12Mod2X(int *colortab, Uint32 * rgb_2_pix,
 494                        unsigned char *lum, unsigned char *cr,
 495                        unsigned char *cb, unsigned char *out,
 496                        int rows, int cols, int mod)
 497 {
 498     unsigned int *row1 = (unsigned int *) out;
 499     const int next_row = cols * 2 + mod;
 500     unsigned int *row2 = row1 + 2 * next_row;
 501     unsigned char *lum2;
 502     int x, y;
 503     int cr_r;
 504     int crb_g;
 505     int cb_b;
 506     int cols_2 = cols / 2;
 507
 508     lum2 = lum + cols;
 509
 510     mod = (next_row * 3) + mod;
 511
 512     y = rows / 2;
 513     while (y--) {
 514         x = cols_2;
 515         while (x--) {
 516             register int L;
 517
 518             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
 519             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
 520                 + colortab[*cb + 2 * 256];
 521             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
 522             ++cr;
 523             ++cb;
 524
 525             L = *lum++;
 526             row1[0] = row1[1] = row1[next_row] = row1[next_row + 1] =
 527                 (rgb_2_pix[L + cr_r] |
 528                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
 529             row1 += 2;
 530
 531             L = *lum++;
 532             row1[0] = row1[1] = row1[next_row] = row1[next_row + 1] =
 533                 (rgb_2_pix[L + cr_r] |
 534                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
 535             row1 += 2;
 536
 537
 538             /* Now, do second row. */
 539
 540             L = *lum2++;
 541             row2[0] = row2[1] = row2[next_row] = row2[next_row + 1] =
 542                 (rgb_2_pix[L + cr_r] |
 543                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
 544             row2 += 2;
 545
 546             L = *lum2++;
 547             row2[0] = row2[1] = row2[next_row] = row2[next_row + 1] =
 548                 (rgb_2_pix[L + cr_r] |
 549                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
 550             row2 += 2;
 551         }
 552
 553         /*
 554          * These values are at the start of the next line, (due
 555          * to the ++'s above),but they need to be at the start
 556          * of the line after that.
 557          */
 558         lum += cols;
 559         lum2 += cols;
 560         row1 += mod;
 561         row2 += mod;
 562     }
 563 }
 564
 565 static void
 566 Color16DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
 567                        unsigned char *lum, unsigned char *cr,
 568                        unsigned char *cb, unsigned char *out,
 569                        int rows, int cols, int mod)
 570 {
 571     unsigned short *row;
 572     int x, y;
 573     int cr_r;
 574     int crb_g;
 575     int cb_b;
 576     int cols_2 = cols / 2;
 577
 578     row = (unsigned short *) out;
 579
 580     y = rows;
 581     while (y--) {
 582         x = cols_2;
 583         while (x--) {
 584             register int L;
 585
 586             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
 587             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
 588                 + colortab[*cb + 2 * 256];
 589             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
 590             cr += 4;
 591             cb += 4;
 592
 593             L = *lum;
 594             lum += 2;
 595             *row++ = (unsigned short) (rgb_2_pix[L + cr_r] |
 596                                        rgb_2_pix[L + crb_g] |
 597                                        rgb_2_pix[L + cb_b]);
 598
 599             L = *lum;
 600             lum += 2;
 601             *row++ = (unsigned short) (rgb_2_pix[L + cr_r] |
 602                                        rgb_2_pix[L + crb_g] |
 603                                        rgb_2_pix[L + cb_b]);
 604
 605         }
 606
 607         row += mod;
 608     }
 609 }
 610
 611 static void
 612 Color24DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
 613                        unsigned char *lum, unsigned char *cr,
 614                        unsigned char *cb, unsigned char *out,
 615                        int rows, int cols, int mod)
 616 {
 617     unsigned int value;
 618     unsigned char *row;
 619     int x, y;
 620     int cr_r;
 621     int crb_g;
 622     int cb_b;
 623     int cols_2 = cols / 2;
 624
 625     row = (unsigned char *) out;
 626     mod *= 3;
 627     y = rows;
 628     while (y--) {
 629         x = cols_2;
 630         while (x--) {
 631             register int L;
 632
 633             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
 634             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
 635                 + colortab[*cb + 2 * 256];
 636             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
 637             cr += 4;
 638             cb += 4;
 639
 640             L = *lum;
 641             lum += 2;
 642             value = (rgb_2_pix[L + cr_r] |
 643                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
 644             *row++ = (value) & 0xFF;
 645             *row++ = (value >> 8) & 0xFF;
 646             *row++ = (value >> 16) & 0xFF;
 647
 648             L = *lum;
 649             lum += 2;
 650             value = (rgb_2_pix[L + cr_r] |
 651                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
 652             *row++ = (value) & 0xFF;
 653             *row++ = (value >> 8) & 0xFF;
 654             *row++ = (value >> 16) & 0xFF;
 655
 656         }
 657         row += mod;
 658     }
 659 }
 660
 661 static void
 662 Color32DitherYUY2Mod1X(int *colortab, Uint32 * rgb_2_pix,
 663                        unsigned char *lum, unsigned char *cr,
 664                        unsigned char *cb, unsigned char *out,
 665                        int rows, int cols, int mod)
 666 {
 667     unsigned int *row;
 668     int x, y;
 669     int cr_r;
 670     int crb_g;
 671     int cb_b;
 672     int cols_2 = cols / 2;
 673
 674     row = (unsigned int *) out;
 675     y = rows;
 676     while (y--) {
 677         x = cols_2;
 678         while (x--) {
 679             register int L;
 680
 681             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
 682             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
 683                 + colortab[*cb + 2 * 256];
 684             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
 685             cr += 4;
 686             cb += 4;
 687
 688             L = *lum;
 689             lum += 2;
 690             *row++ = (rgb_2_pix[L + cr_r] |
 691                       rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
 692
 693             L = *lum;
 694             lum += 2;
 695             *row++ = (rgb_2_pix[L + cr_r] |
 696                       rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
 697
 698
 699         }
 700         row += mod;
 701     }
 702 }
 703
 704 /*
 705  * In this function I make use of a nasty trick. The tables have the lower
 706  * 16 bits replicated in the upper 16. This means I can write ints and get
 707  * the horisontal doubling for free (almost).
 708  */
 709 static void
 710 Color16DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
 711                        unsigned char *lum, unsigned char *cr,
 712                        unsigned char *cb, unsigned char *out,
 713                        int rows, int cols, int mod)
 714 {
 715     unsigned int *row = (unsigned int *) out;
 716     const int next_row = cols + (mod / 2);
 717     int x, y;
 718     int cr_r;
 719     int crb_g;
 720     int cb_b;
 721     int cols_2 = cols / 2;
 722
 723     y = rows;
 724     while (y--) {
 725         x = cols_2;
 726         while (x--) {
 727             register int L;
 728
 729             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
 730             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
 731                 + colortab[*cb + 2 * 256];
 732             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
 733             cr += 4;
 734             cb += 4;
 735
 736             L = *lum;
 737             lum += 2;
 738             row[0] = row[next_row] = (rgb_2_pix[L + cr_r] |
 739                                       rgb_2_pix[L + crb_g] |
 740                                       rgb_2_pix[L + cb_b]);
 741             row++;
 742
 743             L = *lum;
 744             lum += 2;
 745             row[0] = row[next_row] = (rgb_2_pix[L + cr_r] |
 746                                       rgb_2_pix[L + crb_g] |
 747                                       rgb_2_pix[L + cb_b]);
 748             row++;
 749
 750         }
 751         row += next_row;
 752     }
 753 }
 754
 755 static void
 756 Color24DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
 757                        unsigned char *lum, unsigned char *cr,
 758                        unsigned char *cb, unsigned char *out,
 759                        int rows, int cols, int mod)
 760 {
 761     unsigned int value;
 762     unsigned char *row = out;
 763     const int next_row = (cols * 2 + mod) * 3;
 764     int x, y;
 765     int cr_r;
 766     int crb_g;
 767     int cb_b;
 768     int cols_2 = cols / 2;
 769     y = rows;
 770     while (y--) {
 771         x = cols_2;
 772         while (x--) {
 773             register int L;
 774
 775             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
 776             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
 777                 + colortab[*cb + 2 * 256];
 778             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
 779             cr += 4;
 780             cb += 4;
 781
 782             L = *lum;
 783             lum += 2;
 784             value = (rgb_2_pix[L + cr_r] |
 785                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
 786             row[0 + 0] = row[3 + 0] = row[next_row + 0] =
 787                 row[next_row + 3 + 0] = (value) & 0xFF;
 788             row[0 + 1] = row[3 + 1] = row[next_row + 1] =
 789                 row[next_row + 3 + 1] = (value >> 8) & 0xFF;
 790             row[0 + 2] = row[3 + 2] = row[next_row + 2] =
 791                 row[next_row + 3 + 2] = (value >> 16) & 0xFF;
 792             row += 2 * 3;
 793
 794             L = *lum;
 795             lum += 2;
 796             value = (rgb_2_pix[L + cr_r] |
 797                      rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
 798             row[0 + 0] = row[3 + 0] = row[next_row + 0] =
 799                 row[next_row + 3 + 0] = (value) & 0xFF;
 800             row[0 + 1] = row[3 + 1] = row[next_row + 1] =
 801                 row[next_row + 3 + 1] = (value >> 8) & 0xFF;
 802             row[0 + 2] = row[3 + 2] = row[next_row + 2] =
 803                 row[next_row + 3 + 2] = (value >> 16) & 0xFF;
 804             row += 2 * 3;
 805
 806         }
 807         row += next_row;
 808     }
 809 }
 810
 811 static void
 812 Color32DitherYUY2Mod2X(int *colortab, Uint32 * rgb_2_pix,
 813                        unsigned char *lum, unsigned char *cr,
 814                        unsigned char *cb, unsigned char *out,
 815                        int rows, int cols, int mod)
 816 {
 817     unsigned int *row = (unsigned int *) out;
 818     const int next_row = cols * 2 + mod;
 819     int x, y;
 820     int cr_r;
 821     int crb_g;
 822     int cb_b;
 823     int cols_2 = cols / 2;
 824     mod += mod;
 825     y = rows;
 826     while (y--) {
 827         x = cols_2;
 828         while (x--) {
 829             register int L;
 830
 831             cr_r = 0 * 768 + 256 + colortab[*cr + 0 * 256];
 832             crb_g = 1 * 768 + 256 + colortab[*cr + 1 * 256]
 833                 + colortab[*cb + 2 * 256];
 834             cb_b = 2 * 768 + 256 + colortab[*cb + 3 * 256];
 835             cr += 4;
 836             cb += 4;
 837
 838             L = *lum;
 839             lum += 2;
 840             row[0] = row[1] = row[next_row] = row[next_row + 1] =
 841                 (rgb_2_pix[L + cr_r] |
 842                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
 843             row += 2;
 844
 845             L = *lum;
 846             lum += 2;
 847             row[0] = row[1] = row[next_row] = row[next_row + 1] =
 848                 (rgb_2_pix[L + cr_r] |
 849                  rgb_2_pix[L + crb_g] | rgb_2_pix[L + cb_b]);
 850             row += 2;
 851
 852
 853         }
 854
 855         row += next_row;
 856     }
 857 }
 858
 859 /*
 860  * How many 1 bits are there in the Uint32.
 861  * Low performance, do not call often.
 862  */
 863 static int
 864 number_of_bits_set(Uint32 a)
 865 {
 866     if (!a)
 867         return 0;
 868     if (a & 1)
 869         return 1 + number_of_bits_set(a >> 1);
 870     return (number_of_bits_set(a >> 1));
 871 }
 872
 873 /*
 874  * How many 0 bits are there at least significant end of Uint32.
 875  * Low performance, do not call often.
 876  */
 877 static int
 878 free_bits_at_bottom(Uint32 a)
 879 {
 880     /* assume char is 8 bits */
 881     if (!a)
 882         return sizeof(Uint32) * 8;
 883     if (((Sint32) a) & 1l)
 884         return 0;
 885     return 1 + free_bits_at_bottom(a >> 1);
 886 }
 887
 888 static int
 889 SDL_SW_SetupYUVDisplay(SDL_SW_YUVTexture * swdata, Uint32 target_format)
 890 {
 891     Uint32 *r_2_pix_alloc;
 892     Uint32 *g_2_pix_alloc;
 893     Uint32 *b_2_pix_alloc;
 894     int i;
 895     int bpp;
 896     Uint32 Rmask, Gmask, Bmask, Amask;
 897
 898     if (!SDL_PixelFormatEnumToMasks
 899         (target_format, &bpp, &Rmask, &Gmask, &Bmask, &Amask) || bpp < 15) {
 900         return SDL_SetError("Unsupported YUV destination format");
 901     }
 902
 903     swdata->target_format = target_format;
 904     r_2_pix_alloc = &swdata->rgb_2_pix[0 * 768];
 905     g_2_pix_alloc = &swdata->rgb_2_pix[1 * 768];
 906     b_2_pix_alloc = &swdata->rgb_2_pix[2 * 768];
 907
 908     /*
 909      * Set up entries 0-255 in rgb-to-pixel value tables.
 910      */
 911     for (i = 0; i < 256; ++i) {
 912         r_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Rmask));
 913         r_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Rmask);
 914         r_2_pix_alloc[i + 256] |= Amask;
 915         g_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Gmask));
 916         g_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Gmask);
 917         g_2_pix_alloc[i + 256] |= Amask;
 918         b_2_pix_alloc[i + 256] = i >> (8 - number_of_bits_set(Bmask));
 919         b_2_pix_alloc[i + 256] <<= free_bits_at_bottom(Bmask);
 920         b_2_pix_alloc[i + 256] |= Amask;
 921     }
 922
 923     /*
 924      * If we have 16-bit output depth, then we double the value
 925      * in the top word. This means that we can write out both
 926      * pixels in the pixel doubling mode with one op. It is
 927      * harmless in the normal case as storing a 32-bit value
 928      * through a short pointer will lose the top bits anyway.
 929      */
 930     if (SDL_BYTESPERPIXEL(target_format) == 2) {
 931         for (i = 0; i < 256; ++i) {
 932             r_2_pix_alloc[i + 256] |= (r_2_pix_alloc[i + 256]) << 16;
 933             g_2_pix_alloc[i + 256] |= (g_2_pix_alloc[i + 256]) << 16;
 934             b_2_pix_alloc[i + 256] |= (b_2_pix_alloc[i + 256]) << 16;
 935         }
 936     }
 937
 938     /*
 939      * Spread out the values we have to the rest of the array so that
 940      * we do not need to check for overflow.
 941      */
 942     for (i = 0; i < 256; ++i) {
 943         r_2_pix_alloc[i] = r_2_pix_alloc[256];
 944         r_2_pix_alloc[i + 512] = r_2_pix_alloc[511];
 945         g_2_pix_alloc[i] = g_2_pix_alloc[256];
 946         g_2_pix_alloc[i + 512] = g_2_pix_alloc[511];
 947         b_2_pix_alloc[i] = b_2_pix_alloc[256];
 948         b_2_pix_alloc[i + 512] = b_2_pix_alloc[511];
 949     }
 950
 951     /* You have chosen wisely... */
 952     switch (swdata->format) {
 953     case SDL_PIXELFORMAT_YV12:
 954     case SDL_PIXELFORMAT_IYUV:
 955         if (SDL_BYTESPERPIXEL(target_format) == 2) {
 956 #if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
 957             /* inline assembly functions */
 958             if (SDL_HasMMX() && (Rmask == 0xF800) &&
 959                 (Gmask == 0x07E0) && (Bmask == 0x001F)
 960                 && (swdata->w & 15) == 0) {
 961 /* printf("Using MMX 16-bit 565 dither\n"); */
 962                 swdata->Display1X = Color565DitherYV12MMX1X;
 963             } else {
 964 /* printf("Using C 16-bit dither\n"); */
 965                 swdata->Display1X = Color16DitherYV12Mod1X;
 966             }
 967 #else
 968             swdata->Display1X = Color16DitherYV12Mod1X;
 969 #endif
 970             swdata->Display2X = Color16DitherYV12Mod2X;
 971         }
 972         if (SDL_BYTESPERPIXEL(target_format) == 3) {
 973             swdata->Display1X = Color24DitherYV12Mod1X;
 974             swdata->Display2X = Color24DitherYV12Mod2X;
 975         }
 976         if (SDL_BYTESPERPIXEL(target_format) == 4) {
 977 #if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
 978             /* inline assembly functions */
 979             if (SDL_HasMMX() && (Rmask == 0x00FF0000) &&
 980                 (Gmask == 0x0000FF00) &&
 981                 (Bmask == 0x000000FF) && (swdata->w & 15) == 0) {
 982 /* printf("Using MMX 32-bit dither\n"); */
 983                 swdata->Display1X = ColorRGBDitherYV12MMX1X;
 984             } else {
 985 /* printf("Using C 32-bit dither\n"); */
 986                 swdata->Display1X = Color32DitherYV12Mod1X;
 987             }
 988 #else
 989             swdata->Display1X = Color32DitherYV12Mod1X;
 990 #endif
 991             swdata->Display2X = Color32DitherYV12Mod2X;
 992         }
 993         break;
 994     case SDL_PIXELFORMAT_YUY2:
 995     case SDL_PIXELFORMAT_UYVY:
 996     case SDL_PIXELFORMAT_YVYU:
 997         if (SDL_BYTESPERPIXEL(target_format) == 2) {
 998             swdata->Display1X = Color16DitherYUY2Mod1X;
 999             swdata->Display2X = Color16DitherYUY2Mod2X;
1000         }
1001         if (SDL_BYTESPERPIXEL(target_format) == 3) {
1002             swdata->Display1X = Color24DitherYUY2Mod1X;
1003             swdata->Display2X = Color24DitherYUY2Mod2X;
1004         }
1005         if (SDL_BYTESPERPIXEL(target_format) == 4) {
1006             swdata->Display1X = Color32DitherYUY2Mod1X;
1007             swdata->Display2X = Color32DitherYUY2Mod2X;
1008         }
1009         break;
1010     default:
1011         /* We should never get here (caught above) */
1012         break;
1013     }
1014
1015     SDL_FreeSurface(swdata->display);
1016     swdata->display = NULL;
1017     return 0;
1018 }
1019
1020 SDL_SW_YUVTexture *
1021 SDL_SW_CreateYUVTexture(Uint32 format, int w, int h)
1022 {
1023     SDL_SW_YUVTexture *swdata;
1024     int *Cr_r_tab;
1025     int *Cr_g_tab;
1026     int *Cb_g_tab;
1027     int *Cb_b_tab;
1028     int i;
1029     int CR, CB;
1030
1031     switch (format) {
1032     case SDL_PIXELFORMAT_YV12:
1033     case SDL_PIXELFORMAT_IYUV:
1034     case SDL_PIXELFORMAT_YUY2:
1035     case SDL_PIXELFORMAT_UYVY:
1036     case SDL_PIXELFORMAT_YVYU:
1037         break;
1038     default:
1039         SDL_SetError("Unsupported YUV format");
1040         return NULL;
1041     }
1042
1043     swdata = (SDL_SW_YUVTexture *) SDL_calloc(1, sizeof(*swdata));
1044     if (!swdata) {
1045         SDL_OutOfMemory();
1046         return NULL;
1047     }
1048
1049     swdata->format = format;
1050     swdata->target_format = SDL_PIXELFORMAT_UNKNOWN;
1051     swdata->w = w;
1052     swdata->h = h;
1053     swdata->pixels = (Uint8 *) SDL_malloc(w * h * 2);
1054     swdata->colortab = (int *) SDL_malloc(4 * 256 * sizeof(int));
1055     swdata->rgb_2_pix = (Uint32 *) SDL_malloc(3 * 768 * sizeof(Uint32));
1056     if (!swdata->pixels || !swdata->colortab || !swdata->rgb_2_pix) {
1057         SDL_SW_DestroyYUVTexture(swdata);
1058         SDL_OutOfMemory();
1059         return NULL;
1060     }
1061
1062     /* Generate the tables for the display surface */
1063     Cr_r_tab = &swdata->colortab[0 * 256];
1064     Cr_g_tab = &swdata->colortab[1 * 256];
1065     Cb_g_tab = &swdata->colortab[2 * 256];
1066     Cb_b_tab = &swdata->colortab[3 * 256];
1067     for (i = 0; i < 256; i++) {
1068         /* Gamma correction (luminescence table) and chroma correction
1069            would be done here.  See the Berkeley mpeg_play sources.
1070          */
1071         CB = CR = (i - 128);
1072         Cr_r_tab[i] = (int) ((0.419 / 0.299) * CR);
1073         Cr_g_tab[i] = (int) (-(0.299 / 0.419) * CR);
1074         Cb_g_tab[i] = (int) (-(0.114 / 0.331) * CB);
1075         Cb_b_tab[i] = (int) ((0.587 / 0.331) * CB);
1076     }
1077
1078     /* Find the pitch and offset values for the overlay */
1079     switch (format) {
1080     case SDL_PIXELFORMAT_YV12:
1081     case SDL_PIXELFORMAT_IYUV:
1082         swdata->pitches[0] = w;
1083         swdata->pitches[1] = swdata->pitches[0] / 2;
1084         swdata->pitches[2] = swdata->pitches[0] / 2;
1085         swdata->planes[0] = swdata->pixels;
1086         swdata->planes[1] = swdata->planes[0] + swdata->pitches[0] * h;
1087         swdata->planes[2] = swdata->planes[1] + swdata->pitches[1] * h / 2;
1088         break;
1089     case SDL_PIXELFORMAT_YUY2:
1090     case SDL_PIXELFORMAT_UYVY:
1091     case SDL_PIXELFORMAT_YVYU:
1092         swdata->pitches[0] = w * 2;
1093         swdata->planes[0] = swdata->pixels;
1094         break;
1095     default:
1096         SDL_assert(0 && "We should never get here (caught above)");
1097         break;
1098     }
1099
1100     /* We're all done.. */
1101     return (swdata);
1102 }
1103
1104 int
1105 SDL_SW_QueryYUVTexturePixels(SDL_SW_YUVTexture * swdata, void **pixels,
1106                              int *pitch)
1107 {
1108     *pixels = swdata->planes[0];
1109     *pitch = swdata->pitches[0];
1110     return 0;
1111 }
1112
1113 int
1114 SDL_SW_UpdateYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
1115                         const void *pixels, int pitch)
1116 {
1117     switch (swdata->format) {
1118     case SDL_PIXELFORMAT_YV12:
1119     case SDL_PIXELFORMAT_IYUV:
1120         if (rect->x == 0 && rect->y == 0 &&
1121             rect->w == swdata->w && rect->h == swdata->h) {
1122                 SDL_memcpy(swdata->pixels, pixels,
1123                            (swdata->h * swdata->w) + (swdata->h * swdata->w) / 2);
1124         } else {
1125             Uint8 *src, *dst;
1126             int row;
1127             size_t length;
1128
1129             /* Copy the Y plane */
1130             src = (Uint8 *) pixels;
1131             dst = swdata->pixels + rect->y * swdata->w + rect->x;
1132             length = rect->w;
1133             for (row = 0; row < rect->h; ++row) {
1134                 SDL_memcpy(dst, src, length);
1135                 src += pitch;
1136                 dst += swdata->w;
1137             }
1138
1139             /* Copy the next plane */
1140             src = (Uint8 *) pixels + rect->h * pitch;
1141             dst = swdata->pixels + swdata->h * swdata->w;
1142             dst += rect->y/2 * swdata->w/2 + rect->x/2;
1143             length = rect->w / 2;
1144             for (row = 0; row < rect->h/2; ++row) {
1145                 SDL_memcpy(dst, src, length);
1146                 src += pitch/2;
1147                 dst += swdata->w/2;
1148             }
1149
1150             /* Copy the next plane */
1151             src = (Uint8 *) pixels + rect->h * pitch + (rect->h * pitch) / 4;
1152             dst = swdata->pixels + swdata->h * swdata->w +
1153                   (swdata->h * swdata->w) / 4;
1154             dst += rect->y/2 * swdata->w/2 + rect->x/2;
1155             length = rect->w / 2;
1156             for (row = 0; row < rect->h/2; ++row) {
1157                 SDL_memcpy(dst, src, length);
1158                 src += pitch/2;
1159                 dst += swdata->w/2;
1160             }
1161         }
1162         break;
1163     case SDL_PIXELFORMAT_YUY2:
1164     case SDL_PIXELFORMAT_UYVY:
1165     case SDL_PIXELFORMAT_YVYU:
1166         {
1167             Uint8 *src, *dst;
1168             int row;
1169             size_t length;
1170
1171             src = (Uint8 *) pixels;
1172             dst =
1173                 swdata->planes[0] + rect->y * swdata->pitches[0] +
1174                 rect->x * 2;
1175             length = rect->w * 2;
1176             for (row = 0; row < rect->h; ++row) {
1177                 SDL_memcpy(dst, src, length);
1178                 src += pitch;
1179                 dst += swdata->pitches[0];
1180             }
1181         }
1182         break;
1183     }
1184     return 0;
1185 }
1186
1187 int
1188 SDL_SW_UpdateYUVTexturePlanar(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
1189                               const Uint8 *Yplane, int Ypitch,
1190                               const Uint8 *Uplane, int Upitch,
1191                               const Uint8 *Vplane, int Vpitch)
1192 {
1193     const Uint8 *src;
1194     Uint8 *dst;
1195     int row;
1196     size_t length;
1197
1198     /* Copy the Y plane */
1199     src = Yplane;
1200     dst = swdata->pixels + rect->y * swdata->w + rect->x;
1201     length = rect->w;
1202     for (row = 0; row < rect->h; ++row) {
1203         SDL_memcpy(dst, src, length);
1204         src += Ypitch;
1205         dst += swdata->w;
1206     }
1207
1208     /* Copy the U plane */
1209     src = Uplane;
1210     if (swdata->format == SDL_PIXELFORMAT_IYUV) {
1211         dst = swdata->pixels + swdata->h * swdata->w;
1212     } else {
1213         dst = swdata->pixels + swdata->h * swdata->w +
1214               (swdata->h * swdata->w) / 4;
1215     }
1216     dst += rect->y/2 * swdata->w/2 + rect->x/2;
1217     length = rect->w / 2;
1218     for (row = 0; row < rect->h/2; ++row) {
1219         SDL_memcpy(dst, src, length);
1220         src += Upitch;
1221         dst += swdata->w/2;
1222     }
1223
1224     /* Copy the V plane */
1225     src = Vplane;
1226     if (swdata->format == SDL_PIXELFORMAT_YV12) {
1227         dst = swdata->pixels + swdata->h * swdata->w;
1228     } else {
1229         dst = swdata->pixels + swdata->h * swdata->w +
1230               (swdata->h * swdata->w) / 4;
1231     }
1232     dst += rect->y/2 * swdata->w/2 + rect->x/2;
1233     length = rect->w / 2;
1234     for (row = 0; row < rect->h/2; ++row) {
1235         SDL_memcpy(dst, src, length);
1236         src += Vpitch;
1237         dst += swdata->w/2;
1238     }
1239     return 0;
1240 }
1241
1242 int
1243 SDL_SW_LockYUVTexture(SDL_SW_YUVTexture * swdata, const SDL_Rect * rect,
1244                       void **pixels, int *pitch)
1245 {
1246     switch (swdata->format) {
1247     case SDL_PIXELFORMAT_YV12:
1248     case SDL_PIXELFORMAT_IYUV:
1249         if (rect
1250             && (rect->x != 0 || rect->y != 0 || rect->w != swdata->w
1251                 || rect->h != swdata->h)) {
1252             return SDL_SetError
1253                 ("YV12 and IYUV textures only support full surface locks");
1254         }
1255         break;
1256     }
1257
1258     if (rect) {
1259         *pixels = swdata->planes[0] + rect->y * swdata->pitches[0] + rect->x * 2;
1260     } else {
1261         *pixels = swdata->planes[0];
1262     }
1263     *pitch = swdata->pitches[0];
1264     return 0;
1265 }
1266
1267 void
1268 SDL_SW_UnlockYUVTexture(SDL_SW_YUVTexture * swdata)
1269 {
1270 }
1271
1272 int
1273 SDL_SW_CopyYUVToRGB(SDL_SW_YUVTexture * swdata, const SDL_Rect * srcrect,
1274                     Uint32 target_format, int w, int h, void *pixels,
1275                     int pitch)
1276 {
1277     const int targetbpp = SDL_BYTESPERPIXEL(target_format);
1278     int stretch;
1279     int scale_2x;
1280     Uint8 *lum, *Cr, *Cb;
1281     int mod;
1282
1283     if (targetbpp == 0) {
1284         return SDL_SetError("Invalid target pixel format");
1285     }
1286
1287     /* Make sure we're set up to display in the desired format */
1288     if (target_format != swdata->target_format) {
1289         if (SDL_SW_SetupYUVDisplay(swdata, target_format) < 0) {
1290             return -1;
1291         }
1292     }
1293
1294     stretch = 0;
1295     scale_2x = 0;
1296     if (srcrect->x || srcrect->y || srcrect->w < swdata->w
1297         || srcrect->h < swdata->h) {
1298         /* The source rectangle has been clipped.
1299            Using a scratch surface is easier than adding clipped
1300            source support to all the blitters, plus that would
1301            slow them down in the general unclipped case.
1302          */
1303         stretch = 1;
1304     } else if ((srcrect->w != w) || (srcrect->h != h)) {
1305         if ((w == 2 * srcrect->w) && (h == 2 * srcrect->h)) {
1306             scale_2x = 1;
1307         } else {
1308             stretch = 1;
1309         }
1310     }
1311     if (stretch) {
1312         int bpp;
1313         Uint32 Rmask, Gmask, Bmask, Amask;
1314
1315         if (swdata->display) {
1316             swdata->display->w = w;
1317             swdata->display->h = h;
1318             swdata->display->pixels = pixels;
1319             swdata->display->pitch = pitch;
1320         } else {
1321             /* This must have succeeded in SDL_SW_SetupYUVDisplay() earlier */
1322             SDL_PixelFormatEnumToMasks(target_format, &bpp, &Rmask, &Gmask,
1323                                        &Bmask, &Amask);
1324             swdata->display =
1325                 SDL_CreateRGBSurfaceFrom(pixels, w, h, bpp, pitch, Rmask,
1326                                          Gmask, Bmask, Amask);
1327             if (!swdata->display) {
1328                 return (-1);
1329             }
1330         }
1331         if (!swdata->stretch) {
1332             /* This must have succeeded in SDL_SW_SetupYUVDisplay() earlier */
1333             SDL_PixelFormatEnumToMasks(target_format, &bpp, &Rmask, &Gmask,
1334                                        &Bmask, &Amask);
1335             swdata->stretch =
1336                 SDL_CreateRGBSurface(0, swdata->w, swdata->h, bpp, Rmask,
1337                                      Gmask, Bmask, Amask);
1338             if (!swdata->stretch) {
1339                 return (-1);
1340             }
1341         }
1342         pixels = swdata->stretch->pixels;
1343         pitch = swdata->stretch->pitch;
1344     }
1345     switch (swdata->format) {
1346     case SDL_PIXELFORMAT_YV12:
1347         lum = swdata->planes[0];
1348         Cr = swdata->planes[1];
1349         Cb = swdata->planes[2];
1350         break;
1351     case SDL_PIXELFORMAT_IYUV:
1352         lum = swdata->planes[0];
1353         Cr = swdata->planes[2];
1354         Cb = swdata->planes[1];
1355         break;
1356     case SDL_PIXELFORMAT_YUY2:
1357         lum = swdata->planes[0];
1358         Cr = lum + 3;
1359         Cb = lum + 1;
1360         break;
1361     case SDL_PIXELFORMAT_UYVY:
1362         lum = swdata->planes[0] + 1;
1363         Cr = lum + 1;
1364         Cb = lum - 1;
1365         break;
1366     case SDL_PIXELFORMAT_YVYU:
1367         lum = swdata->planes[0];
1368         Cr = lum + 1;
1369         Cb = lum + 3;
1370         break;
1371     default:
1372         return SDL_SetError("Unsupported YUV format in copy");
1373     }
1374     mod = (pitch / targetbpp);
1375
1376     if (scale_2x) {
1377         mod -= (swdata->w * 2);
1378         swdata->Display2X(swdata->colortab, swdata->rgb_2_pix,
1379                           lum, Cr, Cb, pixels, swdata->h, swdata->w, mod);
1380     } else {
1381         mod -= swdata->w;
1382         swdata->Display1X(swdata->colortab, swdata->rgb_2_pix,
1383                           lum, Cr, Cb, pixels, swdata->h, swdata->w, mod);
1384     }
1385     if (stretch) {
1386         SDL_Rect rect = *srcrect;
1387         SDL_SoftStretch(swdata->stretch, &rect, swdata->display, NULL);
1388     }
1389     return 0;
1390 }
1391
1392 void
1393 SDL_SW_DestroyYUVTexture(SDL_SW_YUVTexture * swdata)
1394 {
1395     if (swdata) {
1396         SDL_free(swdata->pixels);
1397         SDL_free(swdata->colortab);
1398         SDL_free(swdata->rgb_2_pix);
1399         SDL_FreeSurface(swdata->stretch);
1400         SDL_FreeSurface(swdata->display);
1401         SDL_free(swdata);
1402     }
1403 }
1404
1405 /* vi: set ts=4 sw=4 expandtab: */