src/third_party/skia/src/utils/SkTextureCompressor_LATC.cpp

   1 /*
   2  * Copyright 2014 Google Inc.
   3  *
   4  * Use of this source code is governed by a BSD-style license that can be
   5  * found in the LICENSE file.
   6  */
   7
   8 #include "SkTextureCompressor_LATC.h"
   9 #include "SkTextureCompressor_Blitter.h"
  10 #include "SkTextureCompressor_Utils.h"
  11
  12 #include "SkBlitter.h"
  13 #include "SkEndian.h"
  14
  15 // Compression options. In general, the slow version is much more accurate, but
  16 // much slower. The fast option is much faster, but much less accurate. YMMV.
  17 #define COMPRESS_LATC_SLOW 0
  18 #define COMPRESS_LATC_FAST 1
  19
  20 ////////////////////////////////////////////////////////////////////////////////
  21
  22 // Generates an LATC palette. LATC constructs
  23 // a palette of eight colors from LUM0 and LUM1 using the algorithm:
  24 //
  25 // LUM0,              if lum0 > lum1 and code(x,y) == 0
  26 // LUM1,              if lum0 > lum1 and code(x,y) == 1
  27 // (6*LUM0+  LUM1)/7, if lum0 > lum1 and code(x,y) == 2
  28 // (5*LUM0+2*LUM1)/7, if lum0 > lum1 and code(x,y) == 3
  29 // (4*LUM0+3*LUM1)/7, if lum0 > lum1 and code(x,y) == 4
  30 // (3*LUM0+4*LUM1)/7, if lum0 > lum1 and code(x,y) == 5
  31 // (2*LUM0+5*LUM1)/7, if lum0 > lum1 and code(x,y) == 6
  32 // (  LUM0+6*LUM1)/7, if lum0 > lum1 and code(x,y) == 7
  33 //
  34 // LUM0,              if lum0 <= lum1 and code(x,y) == 0
  35 // LUM1,              if lum0 <= lum1 and code(x,y) == 1
  36 // (4*LUM0+  LUM1)/5, if lum0 <= lum1 and code(x,y) == 2
  37 // (3*LUM0+2*LUM1)/5, if lum0 <= lum1 and code(x,y) == 3
  38 // (2*LUM0+3*LUM1)/5, if lum0 <= lum1 and code(x,y) == 4
  39 // (  LUM0+4*LUM1)/5, if lum0 <= lum1 and code(x,y) == 5
  40 // 0,                 if lum0 <= lum1 and code(x,y) == 6
  41 // 255,               if lum0 <= lum1 and code(x,y) == 7
  42
  43 static const int kLATCPaletteSize = 8;
  44 static void generate_latc_palette(uint8_t palette[], uint8_t lum0, uint8_t lum1) {
  45     palette[0] = lum0;
  46     palette[1] = lum1;
  47     if (lum0 > lum1) {
  48         for (int i = 1; i < 7; i++) {
  49             palette[i+1] = ((7-i)*lum0 + i*lum1) / 7;
  50         }
  51     } else {
  52         for (int i = 1; i < 5; i++) {
  53             palette[i+1] = ((5-i)*lum0 + i*lum1) / 5;
  54         }
  55         palette[6] = 0;
  56         palette[7] = 255;
  57     }
  58 }
  59
  60 ////////////////////////////////////////////////////////////////////////////////
  61
  62 #if COMPRESS_LATC_SLOW
  63
  64 ////////////////////////////////////////////////////////////////////////////////
  65 //
  66 // Utility Functions
  67 //
  68 ////////////////////////////////////////////////////////////////////////////////
  69
  70 // Absolute difference between two values. More correct than SkTAbs(a - b)
  71 // because it works on unsigned values.
  72 template <typename T> inline T abs_diff(const T &a, const T &b) {
  73     return (a > b) ? (a - b) : (b - a);
  74 }
  75
  76 static bool is_extremal(uint8_t pixel) {
  77     return 0 == pixel || 255 == pixel;
  78 }
  79
  80 typedef uint64_t (*A84x4To64BitProc)(const uint8_t block[]);
  81
  82 // This function is used by both R11 EAC and LATC to compress 4x4 blocks
  83 // of 8-bit alpha into 64-bit values that comprise the compressed data.
  84 // For both formats, we need to make sure that the dimensions of the
  85 // src pixels are divisible by 4, and copy 4x4 blocks one at a time
  86 // for compression.
  87 static bool compress_4x4_a8_to_64bit(uint8_t* dst, const uint8_t* src,
  88                                      int width, int height, int rowBytes,
  89                                      A84x4To64BitProc proc) {
  90     // Make sure that our data is well-formed enough to be considered for compression
  91     if (0 == width || 0 == height || (width % 4) != 0 || (height % 4) != 0) {
  92         return false;
  93     }
  94
  95     int blocksX = width >> 2;
  96     int blocksY = height >> 2;
  97
  98     uint8_t block[16];
  99     uint64_t* encPtr = reinterpret_cast<uint64_t*>(dst);
 100     for (int y = 0; y < blocksY; ++y) {
 101         for (int x = 0; x < blocksX; ++x) {
 102             // Load block
 103             for (int k = 0; k < 4; ++k) {
 104                 memcpy(block + k*4, src + k*rowBytes + 4*x, 4);
 105             }
 106
 107             // Compress it
 108             *encPtr = proc(block);
 109             ++encPtr;
 110         }
 111         src += 4 * rowBytes;
 112     }
 113
 114     return true;
 115 }
 116
 117 ////////////////////////////////////////////////////////////////////////////////
 118 //
 119 // LATC compressor
 120 //
 121 ////////////////////////////////////////////////////////////////////////////////
 122
 123 // LATC compressed texels down into square 4x4 blocks
 124 static const int kLATCBlockSize = 4;
 125 static const int kLATCPixelsPerBlock = kLATCBlockSize * kLATCBlockSize;
 126
 127 // Compress a block by using the bounding box of the pixels. It is assumed that
 128 // there are no extremal pixels in this block otherwise we would have used
 129 // compressBlockBBIgnoreExtremal.
 130 static uint64_t compress_latc_block_bb(const uint8_t pixels[]) {
 131     uint8_t minVal = 255;
 132     uint8_t maxVal = 0;
 133     for (int i = 0; i < kLATCPixelsPerBlock; ++i) {
 134         minVal = SkTMin(pixels[i], minVal);
 135         maxVal = SkTMax(pixels[i], maxVal);
 136     }
 137
 138     SkASSERT(!is_extremal(minVal));
 139     SkASSERT(!is_extremal(maxVal));
 140
 141     uint8_t palette[kLATCPaletteSize];
 142     generate_latc_palette(palette, maxVal, minVal);
 143
 144     uint64_t indices = 0;
 145     for (int i = kLATCPixelsPerBlock - 1; i >= 0; --i) {
 146
 147         // Find the best palette index
 148         uint8_t bestError = abs_diff(pixels[i], palette[0]);
 149         uint8_t idx = 0;
 150         for (int j = 1; j < kLATCPaletteSize; ++j) {
 151             uint8_t error = abs_diff(pixels[i], palette[j]);
 152             if (error < bestError) {
 153                 bestError = error;
 154                 idx = j;
 155             }
 156         }
 157
 158         indices <<= 3;
 159         indices |= idx;
 160     }
 161
 162     return
 163         SkEndian_SwapLE64(
 164             static_cast<uint64_t>(maxVal) |
 165             (static_cast<uint64_t>(minVal) << 8) |
 166             (indices << 16));
 167 }
 168
 169 // Compress a block by using the bounding box of the pixels without taking into
 170 // account the extremal values. The generated palette will contain extremal values
 171 // and fewer points along the line segment to interpolate.
 172 static uint64_t compress_latc_block_bb_ignore_extremal(const uint8_t pixels[]) {
 173     uint8_t minVal = 255;
 174     uint8_t maxVal = 0;
 175     for (int i = 0; i < kLATCPixelsPerBlock; ++i) {
 176         if (is_extremal(pixels[i])) {
 177             continue;
 178         }
 179
 180         minVal = SkTMin(pixels[i], minVal);
 181         maxVal = SkTMax(pixels[i], maxVal);
 182     }
 183
 184     SkASSERT(!is_extremal(minVal));
 185     SkASSERT(!is_extremal(maxVal));
 186
 187     uint8_t palette[kLATCPaletteSize];
 188     generate_latc_palette(palette, minVal, maxVal);
 189
 190     uint64_t indices = 0;
 191     for (int i = kLATCPixelsPerBlock - 1; i >= 0; --i) {
 192
 193         // Find the best palette index
 194         uint8_t idx = 0;
 195         if (is_extremal(pixels[i])) {
 196             if (0xFF == pixels[i]) {
 197                 idx = 7;
 198             } else if (0 == pixels[i]) {
 199                 idx = 6;
 200             } else {
 201                 SkFAIL("Pixel is extremal but not really?!");
 202             }
 203         } else {
 204             uint8_t bestError = abs_diff(pixels[i], palette[0]);
 205             for (int j = 1; j < kLATCPaletteSize - 2; ++j) {
 206                 uint8_t error = abs_diff(pixels[i], palette[j]);
 207                 if (error < bestError) {
 208                     bestError = error;
 209                     idx = j;
 210                 }
 211             }
 212         }
 213
 214         indices <<= 3;
 215         indices |= idx;
 216     }
 217
 218     return
 219         SkEndian_SwapLE64(
 220             static_cast<uint64_t>(minVal) |
 221             (static_cast<uint64_t>(maxVal) << 8) |
 222             (indices << 16));
 223 }
 224
 225
 226 // Compress LATC block. Each 4x4 block of pixels is decompressed by LATC from two
 227 // values LUM0 and LUM1, and an index into the generated palette. Details of how
 228 // the palette is generated can be found in the comments of generatePalette above.
 229 //
 230 // We choose which palette type to use based on whether or not 'pixels' contains
 231 // any extremal values (0 or 255). If there are extremal values, then we use the
 232 // palette that has the extremal values built in. Otherwise, we use the full bounding
 233 // box.
 234
 235 static uint64_t compress_latc_block(const uint8_t pixels[]) {
 236     // Collect unique pixels
 237     int nUniquePixels = 0;
 238     uint8_t uniquePixels[kLATCPixelsPerBlock];
 239     for (int i = 0; i < kLATCPixelsPerBlock; ++i) {
 240         bool foundPixel = false;
 241         for (int j = 0; j < nUniquePixels; ++j) {
 242             foundPixel = foundPixel || uniquePixels[j] == pixels[i];
 243         }
 244
 245         if (!foundPixel) {
 246             uniquePixels[nUniquePixels] = pixels[i];
 247             ++nUniquePixels;
 248         }
 249     }
 250
 251     // If there's only one unique pixel, then our compression is easy.
 252     if (1 == nUniquePixels) {
 253         return SkEndian_SwapLE64(pixels[0] | (pixels[0] << 8));
 254
 255     // Similarly, if there are only two unique pixels, then our compression is
 256     // easy again: place the pixels in the block header, and assign the indices
 257     // with one or zero depending on which pixel they belong to.
 258     } else if (2 == nUniquePixels) {
 259         uint64_t outBlock = 0;
 260         for (int i = kLATCPixelsPerBlock - 1; i >= 0; --i) {
 261             int idx = 0;
 262             if (pixels[i] == uniquePixels[1]) {
 263                 idx = 1;
 264             }
 265
 266             outBlock <<= 3;
 267             outBlock |= idx;
 268         }
 269         outBlock <<= 16;
 270         outBlock |= (uniquePixels[0] | (uniquePixels[1] << 8));
 271         return SkEndian_SwapLE64(outBlock);
 272     }
 273
 274     // Count non-maximal pixel values
 275     int nonExtremalPixels = 0;
 276     for (int i = 0; i < nUniquePixels; ++i) {
 277         if (!is_extremal(uniquePixels[i])) {
 278             ++nonExtremalPixels;
 279         }
 280     }
 281
 282     // If all the pixels are nonmaximal then compute the palette using
 283     // the bounding box of all the pixels.
 284     if (nonExtremalPixels == nUniquePixels) {
 285         // This is really just for correctness, in all of my tests we
 286         // never take this step. We don't lose too much perf here because
 287         // most of the processing in this function is worth it for the
 288         // 1 == nUniquePixels optimization.
 289         return compress_latc_block_bb(pixels);
 290     } else {
 291         return compress_latc_block_bb_ignore_extremal(pixels);
 292     }
 293 }
 294
 295 #endif  // COMPRESS_LATC_SLOW
 296
 297 ////////////////////////////////////////////////////////////////////////////////
 298
 299 #if COMPRESS_LATC_FAST
 300
 301 // Take the top three bits of each index and pack them into the low 12
 302 // bits of the integer.
 303 static inline uint32_t pack_index(uint32_t x) {
 304     // Pack it in...
 305 #if defined (SK_CPU_BENDIAN)
 306     return
 307         (x >> 24) |
 308         ((x >> 13) & 0x38) |
 309         ((x >> 2) & 0x1C0) |
 310         ((x << 9) & 0xE00);
 311 #else
 312     return
 313         (x & 0x7) |
 314         ((x >> 5) & 0x38) |
 315         ((x >> 10) & 0x1C0) |
 316         ((x >> 15) & 0xE00);
 317 #endif
 318 }
 319
 320 // Converts each 8-bit byte in the integer into an LATC index, and then packs
 321 // the indices into the low 12 bits of the integer.
 322 static inline uint32_t convert_index(uint32_t x) {
 323     // Since the palette is
 324     // 255, 0, 219, 182, 146, 109, 73, 36
 325     // we need to map the high three bits of each byte in the integer
 326     // from
 327     // 0 1 2 3 4 5 6 7
 328     // to
 329     // 1 7 6 5 4 3 2 0
 330     //
 331     // This first operation takes the mapping from
 332     // 0 1 2 3 4 5 6 7  -->  7 6 5 4 3 2 1 0
 333     x = 0x07070707 - SkTextureCompressor::ConvertToThreeBitIndex(x);
 334
 335     // mask is 1 if index is non-zero
 336     const uint32_t mask = (x | (x >> 1) | (x >> 2)) & 0x01010101;
 337
 338     // add mask:
 339     // 7 6 5 4 3 2 1 0 --> 8 7 6 5 4 3 2 0
 340     x = (x + mask);
 341
 342     // Handle overflow:
 343     // 8 7 6 5 4 3 2 0 --> 9 7 6 5 4 3 2 0
 344     x |= (x >> 3) & 0x01010101;
 345
 346     // Mask out high bits:
 347     // 9 7 6 5 4 3 2 0 --> 1 7 6 5 4 3 2 0
 348     x &= 0x07070707;
 349
 350     return pack_index(x);
 351 }
 352
 353 typedef uint64_t (*PackIndicesProc)(const uint8_t* alpha, int rowBytes);
 354 template<PackIndicesProc packIndicesProc>
 355 static void compress_a8_latc_block(uint8_t** dstPtr, const uint8_t* src, int rowBytes) {
 356     *(reinterpret_cast<uint64_t*>(*dstPtr)) =
 357         SkEndian_SwapLE64(0xFF | (packIndicesProc(src, rowBytes) << 16));
 358     *dstPtr += 8;
 359 }
 360
 361 inline uint64_t PackRowMajor(const uint8_t *indices, int rowBytes) {
 362     uint64_t result = 0;
 363     for (int i = 0; i < 4; ++i) {
 364         const uint32_t idx = *(reinterpret_cast<const uint32_t*>(indices + i*rowBytes));
 365         result |= static_cast<uint64_t>(convert_index(idx)) << 12*i;
 366     }
 367     return result;
 368 }
 369
 370 inline uint64_t PackColumnMajor(const uint8_t *indices, int rowBytes) {
 371     // !SPEED! Blarg, this is kind of annoying. SSE4 can make this
 372     // a LOT faster.
 373     uint8_t transposed[16];
 374     for (int i = 0; i < 4; ++i) {
 375         for (int j = 0; j < 4; ++j) {
 376             transposed[j*4+i] = indices[i*rowBytes + j];
 377         }
 378     }
 379
 380     return PackRowMajor(transposed, 4);
 381 }
 382
 383 static bool compress_4x4_a8_latc(uint8_t* dst, const uint8_t* src,
 384                                  int width, int height, int rowBytes) {
 385
 386     if (width < 0 || ((width % 4) != 0) || height < 0 || ((height % 4) != 0)) {
 387         return false;
 388     }
 389
 390     uint8_t** dstPtr = &dst;
 391     for (int y = 0; y < height; y += 4) {
 392         for (int x = 0; x < width; x += 4) {
 393             compress_a8_latc_block<PackRowMajor>(dstPtr, src + y*rowBytes + x, rowBytes);
 394         }
 395     }
 396
 397     return true;
 398 }
 399
 400 void CompressA8LATCBlockVertical(uint8_t* dst, const uint8_t block[]) {
 401     compress_a8_latc_block<PackColumnMajor>(&dst, block, 4);
 402 }
 403
 404 #endif  // COMPRESS_LATC_FAST
 405
 406 void decompress_latc_block(uint8_t* dst, int dstRowBytes, const uint8_t* src) {
 407     uint64_t block = SkEndian_SwapLE64(*(reinterpret_cast<const uint64_t *>(src)));
 408     uint8_t lum0 = block & 0xFF;
 409     uint8_t lum1 = (block >> 8) & 0xFF;
 410
 411     uint8_t palette[kLATCPaletteSize];
 412     generate_latc_palette(palette, lum0, lum1);
 413
 414     block >>= 16;
 415     for (int j = 0; j < 4; ++j) {
 416         for (int i = 0; i < 4; ++i) {
 417             dst[i] = palette[block & 0x7];
 418             block >>= 3;
 419         }
 420         dst += dstRowBytes;
 421     }
 422 }
 423
 424 // This is the type passed as the CompressorType argument of the compressed
 425 // blitter for the LATC format. The static functions required to be in this
 426 // struct are documented in SkTextureCompressor_Blitter.h
 427 struct CompressorLATC {
 428     static inline void CompressA8Vertical(uint8_t* dst, const uint8_t block[]) {
 429         compress_a8_latc_block<PackColumnMajor>(&dst, block, 4);
 430     }
 431
 432     static inline void CompressA8Horizontal(uint8_t* dst, const uint8_t* src,
 433                                             int srcRowBytes) {
 434         compress_a8_latc_block<PackRowMajor>(&dst, src, srcRowBytes);
 435     }
 436
 437 #if PEDANTIC_BLIT_RECT
 438     static inline void UpdateBlock(uint8_t* dst, const uint8_t* src, int srcRowBytes,
 439                                    const uint8_t* mask) {
 440         // Pack the mask
 441         uint64_t cmpMask = 0;
 442         for (int i = 0; i < 4; ++i) {
 443             const uint32_t idx = *(reinterpret_cast<const uint32_t*>(src + i*srcRowBytes));
 444             cmpMask |= static_cast<uint64_t>(pack_index(idx)) << 12*i;
 445         }
 446         cmpMask = SkEndian_SwapLE64(cmpMask << 16); // avoid header
 447
 448         uint64_t cmpSrc;
 449         uint8_t *cmpSrcPtr = reinterpret_cast<uint8_t*>(&cmpSrc);
 450         compress_a8_latc_block<PackRowMajor>(&cmpSrcPtr, src, srcRowBytes);
 451
 452         // Mask out header
 453         cmpSrc = cmpSrc & cmpMask;
 454
 455         // Read destination encoding
 456         uint64_t *cmpDst = reinterpret_cast<uint64_t*>(dst);
 457
 458         // If the destination is the encoding for a blank block, then we need
 459         // to properly set the header
 460         if (0 == cmpDst) {
 461             *cmpDst = SkTEndian_SwapLE64(0x24924924924900FFULL);
 462         }
 463
 464         // Set the new indices
 465         *cmpDst &= ~cmpMask;
 466         *cmpDst |= cmpSrc;
 467     }
 468 #endif  // PEDANTIC_BLIT_RECT
 469 };
 470
 471 ////////////////////////////////////////////////////////////////////////////////
 472
 473 namespace SkTextureCompressor {
 474
 475 bool CompressA8ToLATC(uint8_t* dst, const uint8_t* src, int width, int height, int rowBytes) {
 476 #if COMPRESS_LATC_FAST
 477     return compress_4x4_a8_latc(dst, src, width, height, rowBytes);
 478 #elif COMPRESS_LATC_SLOW
 479     return compress_4x4_a8_to_64bit(dst, src, width, height, rowBytes, compress_latc_block);
 480 #else
 481 #error "Must choose either fast or slow LATC compression"
 482 #endif
 483 }
 484
 485 SkBlitter* CreateLATCBlitter(int width, int height, void* outputBuffer,
 486                              SkTBlitterAllocator* allocator) {
 487     if ((width % 4) != 0 || (height % 4) != 0) {
 488         return NULL;
 489     }
 490
 491 #if COMPRESS_LATC_FAST
 492     // Memset the output buffer to an encoding that decodes to zero. We must do this
 493     // in order to avoid having uninitialized values in the buffer if the blitter
 494     // decides not to write certain scanlines (and skip entire rows of blocks).
 495     // In the case of LATC, if everything is zero, then LUM0 and LUM1 are also zero,
 496     // and they will only be non-zero (0xFF) if the index is 7. So bzero will do just fine.
 497     // (8 bytes per block) * (w * h / 16 blocks) = w * h / 2
 498     sk_bzero(outputBuffer, width * height / 2);
 499
 500     return allocator->createT<
 501         SkTCompressedAlphaBlitter<4, 8, CompressorLATC>, int, int, void* >
 502         (width, height, outputBuffer);
 503 #elif COMPRESS_LATC_SLOW
 504     // TODO (krajcevski)
 505     return NULL;
 506 #endif
 507 }
 508
 509 void DecompressLATC(uint8_t* dst, int dstRowBytes, const uint8_t* src, int width, int height) {
 510     for (int j = 0; j < height; j += 4) {
 511         for (int i = 0; i < width; i += 4) {
 512             decompress_latc_block(dst + i, dstRowBytes, src);
 513             src += 8;
 514         }
 515         dst += 4 * dstRowBytes;
 516     }
 517 }
 518
 519 }  // SkTextureCompressor