src/third_party/skia/src/utils/SkTextureCompressor_ASTC.cpp

   1 /*
   2  * Copyright 2014 Google Inc.
   3  *
   4  * Use of this source code is governed by a BSD-style license that can be
   5  * found in the LICENSE file.
   6  */
   7
   8 #include "SkTextureCompressor_ASTC.h"
   9 #include "SkTextureCompressor_Blitter.h"
  10
  11 #include "SkBlitter.h"
  12 #include "SkEndian.h"
  13 #include "SkMath.h"
  14
  15 // This table contains the weight values for each texel. This is used in determining
  16 // how to convert a 12x12 grid of alpha values into a 6x5 grid of index values. Since
  17 // we have a 6x5 grid, that gives 30 values that we have to compute. For each index,
  18 // we store up to 20 different triplets of values. In order the triplets are:
  19 // weight, texel-x, texel-y
  20 // The weight value corresponds to the amount that this index contributes to the final
  21 // index value of the given texel. Hence, we need to reconstruct the 6x5 index grid
  22 // from their relative contribution to the 12x12 texel grid.
  23 //
  24 // The algorithm is something like this:
  25 // foreach index i:
  26 //    total-weight = 0;
  27 //    total-alpha = 0;
  28 //    for w = 1 to 20:
  29 //       weight = table[i][w*3];
  30 //       texel-x = table[i][w*3 + 1];
  31 //       texel-y = table[i][w*3 + 2];
  32 //       if weight >= 0:
  33 //           total-weight += weight;
  34 //           total-alpha += weight * alphas[texel-x][texel-y];
  35 //
  36 //    total-alpha /= total-weight;
  37 //    index = top three bits of total-alpha
  38 //
  39 // If the associated index does not contribute to 20 different texels (e.g. it's in
  40 // a corner), then the extra texels are stored with -1's in the table.
  41
  42 static const int8_t k6x5To12x12Table[30][60] = {
  43 { 16, 0, 0, 9, 1, 0, 1, 2, 0, 10, 0, 1, 6, 1, 1, 1, 2, 1, 4, 0, 2, 2,
  44   1, 2, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0,
  45   0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
  46 { 7, 1, 0, 15, 2, 0, 10, 3, 0, 3, 4, 0, 4, 1, 1, 9, 2, 1, 6, 3, 1, 2,
  47   4, 1, 2, 1, 2, 4, 2, 2, 3, 3, 2, 1, 4, 2, -1, 0, 0, -1, 0, 0, -1, 0,
  48   0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
  49 { 6, 3, 0, 13, 4, 0, 12, 5, 0, 4, 6, 0, 4, 3, 1, 8, 4, 1, 8, 5, 1, 3,
  50   6, 1, 1, 3, 2, 3, 4, 2, 3, 5, 2, 1, 6, 2, -1, 0, 0, -1, 0, 0, -1, 0,
  51   0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
  52 { 4, 5, 0, 12, 6, 0, 13, 7, 0, 6, 8, 0, 2, 5, 1, 7, 6, 1, 8, 7, 1, 4,
  53   8, 1, 1, 5, 2, 3, 6, 2, 3, 7, 2, 2, 8, 2, -1, 0, 0, -1, 0, 0, -1, 0,
  54   0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
  55 { 3, 7, 0, 10, 8, 0, 15, 9, 0, 7, 10, 0, 2, 7, 1, 6, 8, 1, 9, 9, 1, 4,
  56   10, 1, 1, 7, 2, 2, 8, 2, 4, 9, 2, 2, 10, 2, -1, 0, 0, -1, 0, 0, -1, 0,
  57   0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
  58 { 1, 9, 0, 9, 10, 0, 16, 11, 0, 1, 9, 1, 6, 10, 1, 10, 11, 1, 2, 10, 2, 4,
  59   11, 2, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0,
  60   0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
  61 { 6, 0, 1, 3, 1, 1, 12, 0, 2, 7, 1, 2, 1, 2, 2, 15, 0, 3, 8, 1, 3, 1,
  62   2, 3, 9, 0, 4, 5, 1, 4, 1, 2, 4, 3, 0, 5, 2, 1, 5, -1, 0, 0, -1, 0,
  63   0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
  64 { 3, 1, 1, 6, 2, 1, 4, 3, 1, 1, 4, 1, 5, 1, 2, 11, 2, 2, 7, 3, 2, 2,
  65   4, 2, 7, 1, 3, 14, 2, 3, 9, 3, 3, 3, 4, 3, 4, 1, 4, 8, 2, 4, 6, 3,
  66   4, 2, 4, 4, 1, 1, 5, 3, 2, 5, 2, 3, 5, 1, 4, 5}, // n = 20
  67 { 2, 3, 1, 5, 4, 1, 4, 5, 1, 1, 6, 1, 5, 3, 2, 10, 4, 2, 9, 5, 2, 3,
  68   6, 2, 6, 3, 3, 12, 4, 3, 11, 5, 3, 4, 6, 3, 3, 3, 4, 7, 4, 4, 7, 5,
  69   4, 2, 6, 4, 1, 3, 5, 2, 4, 5, 2, 5, 5, 1, 6, 5}, // n = 20
  70 { 2, 5, 1, 5, 6, 1, 5, 7, 1, 2, 8, 1, 3, 5, 2, 9, 6, 2, 10, 7, 2, 4,
  71   8, 2, 4, 5, 3, 11, 6, 3, 12, 7, 3, 6, 8, 3, 2, 5, 4, 7, 6, 4, 7, 7,
  72   4, 3, 8, 4, 1, 5, 5, 2, 6, 5, 2, 7, 5, 1, 8, 5}, // n = 20
  73 { 1, 7, 1, 4, 8, 1, 6, 9, 1, 3, 10, 1, 2, 7, 2, 8, 8, 2, 11, 9, 2, 5,
  74   10, 2, 3, 7, 3, 9, 8, 3, 14, 9, 3, 7, 10, 3, 2, 7, 4, 6, 8, 4, 8, 9,
  75   4, 4, 10, 4, 1, 7, 5, 2, 8, 5, 3, 9, 5, 1, 10, 5}, // n = 20
  76 { 3, 10, 1, 6, 11, 1, 1, 9, 2, 7, 10, 2, 12, 11, 2, 1, 9, 3, 8, 10, 3, 15,
  77   11, 3, 1, 9, 4, 5, 10, 4, 9, 11, 4, 2, 10, 5, 3, 11, 5, -1, 0, 0, -1, 0,
  78   0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
  79 { 1, 0, 3, 1, 1, 3, 7, 0, 4, 4, 1, 4, 13, 0, 5, 7, 1, 5, 1, 2, 5, 13,
  80   0, 6, 7, 1, 6, 1, 2, 6, 7, 0, 7, 4, 1, 7, 1, 0, 8, 1, 1, 8, -1, 0,
  81   0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
  82 { 1, 2, 3, 1, 3, 3, 3, 1, 4, 7, 2, 4, 4, 3, 4, 1, 4, 4, 6, 1, 5, 12,
  83   2, 5, 8, 3, 5, 2, 4, 5, 6, 1, 6, 12, 2, 6, 8, 3, 6, 2, 4, 6, 3, 1,
  84   7, 7, 2, 7, 4, 3, 7, 1, 4, 7, 1, 2, 8, 1, 3, 8}, // n = 20
  85 { 1, 4, 3, 1, 5, 3, 3, 3, 4, 6, 4, 4, 5, 5, 4, 2, 6, 4, 5, 3, 5, 11,
  86   4, 5, 10, 5, 5, 3, 6, 5, 5, 3, 6, 11, 4, 6, 10, 5, 6, 3, 6, 6, 3, 3,
  87   7, 6, 4, 7, 5, 5, 7, 2, 6, 7, 1, 4, 8, 1, 5, 8}, // n = 20
  88 { 1, 6, 3, 1, 7, 3, 2, 5, 4, 5, 6, 4, 6, 7, 4, 3, 8, 4, 3, 5, 5, 10,
  89   6, 5, 11, 7, 5, 5, 8, 5, 3, 5, 6, 10, 6, 6, 11, 7, 6, 5, 8, 6, 2, 5,
  90   7, 5, 6, 7, 6, 7, 7, 3, 8, 7, 1, 6, 8, 1, 7, 8}, // n = 20
  91 { 1, 8, 3, 1, 9, 3, 1, 7, 4, 4, 8, 4, 7, 9, 4, 3, 10, 4, 2, 7, 5, 8,
  92   8, 5, 12, 9, 5, 6, 10, 5, 2, 7, 6, 8, 8, 6, 12, 9, 6, 6, 10, 6, 1, 7,
  93   7, 4, 8, 7, 7, 9, 7, 3, 10, 7, 1, 8, 8, 1, 9, 8}, // n = 20
  94 { 1, 10, 3, 1, 11, 3, 4, 10, 4, 7, 11, 4, 1, 9, 5, 7, 10, 5, 13, 11, 5, 1,
  95   9, 6, 7, 10, 6, 13, 11, 6, 4, 10, 7, 7, 11, 7, 1, 10, 8, 1, 11, 8, -1, 0,
  96   0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
  97 { 3, 0, 6, 2, 1, 6, 9, 0, 7, 5, 1, 7, 1, 2, 7, 15, 0, 8, 8, 1, 8, 1,
  98   2, 8, 12, 0, 9, 7, 1, 9, 1, 2, 9, 6, 0, 10, 3, 1, 10, -1, 0, 0, -1, 0,
  99   0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
 100 { 1, 1, 6, 3, 2, 6, 2, 3, 6, 1, 4, 6, 4, 1, 7, 8, 2, 7, 6, 3, 7, 2,
 101   4, 7, 7, 1, 8, 14, 2, 8, 9, 3, 8, 3, 4, 8, 5, 1, 9, 11, 2, 9, 8, 3,
 102   9, 2, 4, 9, 3, 1, 10, 6, 2, 10, 4, 3, 10, 1, 4, 10}, // n = 20
 103 { 1, 3, 6, 2, 4, 6, 2, 5, 6, 1, 6, 6, 3, 3, 7, 7, 4, 7, 7, 5, 7, 2,
 104   6, 7, 6, 3, 8, 12, 4, 8, 11, 5, 8, 4, 6, 8, 4, 3, 9, 10, 4, 9, 9, 5,
 105   9, 3, 6, 9, 2, 3, 10, 5, 4, 10, 5, 5, 10, 2, 6, 10}, // n = 20
 106 { 1, 5, 6, 2, 6, 6, 2, 7, 6, 1, 8, 6, 2, 5, 7, 7, 6, 7, 7, 7, 7, 3,
 107   8, 7, 4, 5, 8, 11, 6, 8, 12, 7, 8, 6, 8, 8, 3, 5, 9, 9, 6, 9, 10, 7,
 108   9, 5, 8, 9, 1, 5, 10, 4, 6, 10, 5, 7, 10, 2, 8, 10}, // n = 20
 109 { 1, 7, 6, 2, 8, 6, 3, 9, 6, 1, 10, 6, 2, 7, 7, 6, 8, 7, 8, 9, 7, 4,
 110   10, 7, 3, 7, 8, 9, 8, 8, 14, 9, 8, 7, 10, 8, 2, 7, 9, 7, 8, 9, 11, 9,
 111   9, 5, 10, 9, 1, 7, 10, 4, 8, 10, 6, 9, 10, 3, 10, 10}, // n = 20
 112 { 2, 10, 6, 3, 11, 6, 1, 9, 7, 5, 10, 7, 9, 11, 7, 1, 9, 8, 8, 10, 8, 15,
 113   11, 8, 1, 9, 9, 7, 10, 9, 12, 11, 9, 3, 10, 10, 6, 11, 10, -1, 0, 0, -1, 0,
 114   0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
 115 { 4, 0, 9, 2, 1, 9, 10, 0, 10, 6, 1, 10, 1, 2, 10, 16, 0, 11, 9, 1, 11, 1,
 116   2, 11, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0,
 117   0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
 118 { 2, 1, 9, 4, 2, 9, 2, 3, 9, 1, 4, 9, 4, 1, 10, 9, 2, 10, 6, 3, 10, 2,
 119   4, 10, 7, 1, 11, 15, 2, 11, 10, 3, 11, 3, 4, 11, -1, 0, 0, -1, 0, 0, -1, 0,
 120   0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
 121 { 2, 3, 9, 3, 4, 9, 3, 5, 9, 1, 6, 9, 4, 3, 10, 8, 4, 10, 7, 5, 10, 2,
 122   6, 10, 6, 3, 11, 13, 4, 11, 12, 5, 11, 4, 6, 11, -1, 0, 0, -1, 0, 0, -1, 0,
 123   0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
 124 { 1, 5, 9, 3, 6, 9, 3, 7, 9, 1, 8, 9, 3, 5, 10, 8, 6, 10, 8, 7, 10, 4,
 125   8, 10, 4, 5, 11, 12, 6, 11, 13, 7, 11, 6, 8, 11, -1, 0, 0, -1, 0, 0, -1, 0,
 126   0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
 127 { 1, 7, 9, 3, 8, 9, 4, 9, 9, 2, 10, 9, 2, 7, 10, 6, 8, 10, 9, 9, 10, 4,
 128   10, 10, 3, 7, 11, 10, 8, 11, 15, 9, 11, 7, 10, 11, -1, 0, 0, -1, 0, 0, -1, 0,
 129   0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0}, // n = 20
 130 { 2, 10, 9, 4, 11, 9, 1, 9, 10, 6, 10, 10, 10, 11, 10, 1, 9, 11, 9, 10, 11, 16,
 131   11, 11, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0,
 132   0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0, -1, 0, 0} // n = 20
 133 };
 134
 135 // Returns the alpha value of a texel at position (x, y) from src.
 136 // (x, y) are assumed to be in the range [0, 12).
 137 inline uint8_t GetAlpha(const uint8_t *src, int rowBytes, int x, int y) {
 138     SkASSERT(x >= 0 && x < 12);
 139     SkASSERT(y >= 0 && y < 12);
 140     SkASSERT(rowBytes >= 12);
 141     return *(src + y*rowBytes + x);
 142 }
 143
 144 inline uint8_t GetAlphaTranspose(const uint8_t *src, int rowBytes, int x, int y) {
 145     return GetAlpha(src, rowBytes, y, x);
 146 }
 147
 148 // Output the 16 bytes stored in top and bottom and advance the pointer. The bytes
 149 // are stored as the integers are represented in memory, so they should be swapped
 150 // if necessary.
 151 static inline void send_packing(uint8_t** dst, const uint64_t top, const uint64_t bottom) {
 152     uint64_t* dst64 = reinterpret_cast<uint64_t*>(*dst);
 153     dst64[0] = top;
 154     dst64[1] = bottom;
 155     *dst += 16;
 156 }
 157
 158 // Compresses an ASTC block, by looking up the proper contributions from
 159 // k6x5To12x12Table and computing an index from the associated values.
 160 typedef uint8_t (*GetAlphaProc)(const uint8_t* src, int rowBytes, int x, int y);
 161
 162 template<GetAlphaProc getAlphaProc>
 163 static void compress_a8_astc_block(uint8_t** dst, const uint8_t* src, int rowBytes) {
 164     // Check for single color
 165     bool constant = true;
 166     const uint32_t firstInt = *(reinterpret_cast<const uint32_t*>(src));
 167     for (int i = 0; i < 12; ++i) {
 168         const uint32_t *rowInt = reinterpret_cast<const uint32_t *>(src + i*rowBytes);
 169         constant = constant && (rowInt[0] == firstInt);
 170         constant = constant && (rowInt[1] == firstInt);
 171         constant = constant && (rowInt[2] == firstInt);
 172     }
 173
 174     if (constant) {
 175         if (0 == firstInt) {
 176             // All of the indices are set to zero, and the colors are
 177             // v0 = 0, v1 = 255, so everything will be transparent.
 178             send_packing(dst, SkTEndian_SwapLE64(0x0000000001FE000173ULL), 0);
 179             return;
 180         } else if (0xFFFFFFFF == firstInt) {
 181             // All of the indices are set to zero, and the colors are
 182             // v0 = 255, v1 = 0, so everything will be opaque.
 183             send_packing(dst, SkTEndian_SwapLE64(0x000000000001FE0173ULL), 0);
 184             return;
 185         }
 186     }
 187
 188     uint8_t indices[30]; // 6x5 index grid
 189     for (int idx = 0; idx < 30; ++idx) {
 190         int weightTot = 0;
 191         int alphaTot = 0;
 192         for (int w = 0; w < 20; ++w) {
 193             const int8_t weight = k6x5To12x12Table[idx][w*3];
 194             if (weight > 0) {
 195                 const int x = k6x5To12x12Table[idx][w*3 + 1];
 196                 const int y = k6x5To12x12Table[idx][w*3 + 2];
 197                 weightTot += weight;
 198                 alphaTot += weight * getAlphaProc(src, rowBytes, x, y);
 199             } else {
 200                 // In our table, not every entry has 20 weights, and all
 201                 // of them are nonzero. Once we hit a negative weight, we
 202                 // know that all of the other weights are not valid either.
 203                 break;
 204             }
 205         }
 206
 207         indices[idx] = (alphaTot / weightTot) >> 5;
 208     }
 209
 210     // Pack indices... The ASTC block layout is fairly complicated. An extensive
 211     // description can be found here:
 212     // https://www.opengl.org/registry/specs/KHR/texture_compression_astc_hdr.txt
 213     //
 214     // Here is a summary of the options that we've chosen:
 215     // 1. Block mode: 0b00101110011
 216     //     - 6x5 texel grid
 217     //     - Single plane
 218     //     - Low-precision index values
 219     //     - Index range 0-7 (three bits per index)
 220     // 2. Partitions: 0b00
 221     //     - One partition
 222     // 3. Color Endpoint Mode: 0b0000
 223     //     - Direct luminance -- e0=(v0,v0,v0,0xFF); e1=(v1,v1,v1,0xFF);
 224     // 4. 8-bit endpoints:
 225     //     v0 = 0, v1 = 255
 226     //
 227     // The rest of the block contains the 30 index values from before, which
 228     // are currently stored in the indices variable.
 229
 230     uint64_t top = 0x0000000001FE000173ULL;
 231     uint64_t bottom = 0;
 232
 233     for (int idx = 0; idx <= 20; ++idx) {
 234         const uint8_t index = indices[idx];
 235         bottom |= static_cast<uint64_t>(index) << (61-(idx*3));
 236     }
 237
 238     // index 21 straddles top and bottom
 239     {
 240         const uint8_t index = indices[21];
 241         bottom |= index & 1;
 242         top |= static_cast<uint64_t>((index >> 2) | (index & 2)) << 62;
 243     }
 244
 245     for (int idx = 22; idx < 30; ++idx) {
 246         const uint8_t index = indices[idx];
 247         top |= static_cast<uint64_t>(index) << (59-(idx-22)*3);
 248     }
 249
 250     // Reverse each 3-bit index since indices are read in reverse order...
 251     uint64_t t = (bottom ^ (bottom >> 2)) & 0x2492492492492492ULL;
 252     bottom = bottom ^ t ^ (t << 2);
 253
 254     t = (top ^ (top >> 2)) & 0x0924924000000000ULL;
 255     top = top ^ t ^ (t << 2);
 256
 257     send_packing(dst, SkEndian_SwapLE64(top), SkEndian_SwapLE64(bottom));
 258 }
 259
 260 inline void CompressA8ASTCBlockVertical(uint8_t* dst, const uint8_t* src) {
 261     compress_a8_astc_block<GetAlphaTranspose>(&dst, src, 12);
 262 }
 263
 264 ////////////////////////////////////////////////////////////////////////////////
 265 //
 266 // ASTC Decoder
 267 //
 268 // Full details available in the spec:
 269 // http://www.khronos.org/registry/gles/extensions/OES/OES_texture_compression_astc.txt
 270 //
 271 ////////////////////////////////////////////////////////////////////////////////
 272
 273 // Enable this to assert whenever a decoded block has invalid ASTC values. Otherwise,
 274 // each invalid block will result in a disgusting magenta color.
 275 #define ASSERT_ASTC_DECODE_ERROR 0
 276
 277 // Reverse 64-bit integer taken from TAOCP 4a, although it's better
 278 // documented at this site:
 279 // http://matthewarcus.wordpress.com/2012/11/18/reversing-a-64-bit-word/
 280
 281 template <typename T, T m, int k>
 282 static inline T swap_bits(T p) {
 283     T q = ((p>>k)^p) & m;
 284     return p^q^(q<<k);
 285 }
 286
 287 static inline uint64_t reverse64(uint64_t n) {
 288     static const uint64_t m0 = 0x5555555555555555ULL;
 289     static const uint64_t m1 = 0x0300c0303030c303ULL;
 290     static const uint64_t m2 = 0x00c0300c03f0003fULL;
 291     static const uint64_t m3 = 0x00000ffc00003fffULL;
 292     n = ((n>>1)&m0) | (n&m0)<<1;
 293     n = swap_bits<uint64_t, m1, 4>(n);
 294     n = swap_bits<uint64_t, m2, 8>(n);
 295     n = swap_bits<uint64_t, m3, 20>(n);
 296     n = (n >> 34) | (n << 30);
 297     return n;
 298 }
 299
 300 // An ASTC block is 128 bits. We represent it as two 64-bit integers in order
 301 // to efficiently operate on the block using bitwise operations.
 302 struct ASTCBlock {
 303     uint64_t fLow;
 304     uint64_t fHigh;
 305
 306     // Reverses the bits of an ASTC block, making the LSB of the
 307     // 128 bit block the MSB.
 308     inline void reverse() {
 309         const uint64_t newLow = reverse64(this->fHigh);
 310         this->fHigh = reverse64(this->fLow);
 311         this->fLow = newLow;
 312     }
 313 };
 314
 315 // Writes the given color to every pixel in the block. This is used by void-extent
 316 // blocks (a special constant-color encoding of a block) and by the error function.
 317 static inline void write_constant_color(uint8_t* dst, int blockDimX, int blockDimY,
 318                                         int dstRowBytes, SkColor color) {
 319     for (int y = 0; y < blockDimY; ++y) {
 320         SkColor *dstColors = reinterpret_cast<SkColor*>(dst);
 321         for (int x = 0; x < blockDimX; ++x) {
 322             dstColors[x] = color;
 323         }
 324         dst += dstRowBytes;
 325     }
 326 }
 327
 328 // Sets the entire block to the ASTC "error" color, a disgusting magenta
 329 // that's not supposed to appear in natural images.
 330 static inline void write_error_color(uint8_t* dst, int blockDimX, int blockDimY,
 331                                      int dstRowBytes) {
 332     static const SkColor kASTCErrorColor = SkColorSetRGB(0xFF, 0, 0xFF);
 333
 334 #if ASSERT_ASTC_DECODE_ERROR
 335     SkDEBUGFAIL("ASTC decoding error!\n");
 336 #endif
 337
 338     write_constant_color(dst, blockDimX, blockDimY, dstRowBytes, kASTCErrorColor);
 339 }
 340
 341 // Reads up to 64 bits of the ASTC block starting from bit
 342 // 'from' and going up to but not including bit 'to'. 'from' starts
 343 // counting from the LSB, counting up to the MSB. Returns -1 on
 344 // error.
 345 static uint64_t read_astc_bits(const ASTCBlock &block, int from, int to) {
 346     SkASSERT(0 <= from && from <= 128);
 347     SkASSERT(0 <= to && to <= 128);
 348
 349     const int nBits = to - from;
 350     if (0 == nBits) {
 351         return 0;
 352     }
 353
 354     if (nBits < 0 || 64 <= nBits) {
 355         SkDEBUGFAIL("ASTC -- shouldn't read more than 64 bits");
 356         return -1;
 357     }
 358
 359     // Remember, the 'to' bit isn't read.
 360     uint64_t result = 0;
 361     if (to <= 64) {
 362         // All desired bits are in the low 64-bits.
 363         result = (block.fLow >> from) & ((1ULL << nBits) - 1);
 364     } else if (from >= 64) {
 365         // All desired bits are in the high 64-bits.
 366         result = (block.fHigh >> (from - 64)) & ((1ULL << nBits) - 1);
 367     } else {
 368         // from < 64 && to > 64
 369         SkASSERT(nBits > (64 - from));
 370         const int nLow = 64 - from;
 371         const int nHigh = nBits - nLow;
 372         result =
 373             ((block.fLow >> from) & ((1ULL << nLow) - 1)) |
 374             ((block.fHigh & ((1ULL << nHigh) - 1)) << nLow);
 375     }
 376
 377     return result;
 378 }
 379
 380 // Returns the number of bits needed to represent a number
 381 // in the given power-of-two range (excluding the power of two itself).
 382 static inline int bits_for_range(int x) {
 383     SkASSERT(SkIsPow2(x));
 384     SkASSERT(0 != x);
 385     // Since we know it's a power of two, there should only be one bit set,
 386     // meaning the number of trailing zeros is 31 minus the number of leading
 387     // zeros.
 388     return 31 - SkCLZ(x);
 389 }
 390
 391 // Clamps an integer to the range [0, 255]
 392 static inline int clamp_byte(int x) {
 393     return SkClampMax(x, 255);
 394 }
 395
 396 // Helper function defined in the ASTC spec, section C.2.14
 397 // It transfers a few bits of precision from one value to another.
 398 static inline void bit_transfer_signed(int *a, int *b) {
 399     *b >>= 1;
 400     *b |= *a & 0x80;
 401     *a >>= 1;
 402     *a &= 0x3F;
 403     if ( (*a & 0x20) != 0 ) {
 404         *a -= 0x40;
 405     }
 406 }
 407
 408 // Helper function defined in the ASTC spec, section C.2.14
 409 // It uses the value in the blue channel to tint the red and green
 410 static inline SkColor blue_contract(int a, int r, int g, int b) {
 411     return SkColorSetARGB(a, (r + b) >> 1, (g + b) >> 1, b);
 412 }
 413
 414 // Helper function that decodes two colors from eight values. If isRGB is true,
 415 // then the pointer 'v' contains six values and the last two are considered to be
 416 // 0xFF. If isRGB is false, then all eight values come from the pointer 'v'. This
 417 // corresponds to the decode procedure for the following endpoint modes:
 418 //   kLDR_RGB_Direct_ColorEndpointMode
 419 //   kLDR_RGBA_Direct_ColorEndpointMode
 420 static inline void decode_rgba_direct(const int *v, SkColor *endpoints, bool isRGB) {
 421
 422     int v6 = 0xFF;
 423     int v7 = 0xFF;
 424     if (!isRGB) {
 425         v6 = v[6];
 426         v7 = v[7];
 427     }
 428
 429     const int s0 = v[0] + v[2] + v[4];
 430     const int s1 = v[1] + v[3] + v[5];
 431
 432     if (s1 >= s0) {
 433         endpoints[0] = SkColorSetARGB(v6, v[0], v[2], v[4]);
 434         endpoints[1] = SkColorSetARGB(v7, v[1], v[3], v[5]);
 435     } else {
 436         endpoints[0] = blue_contract(v7, v[1], v[3], v[5]);
 437         endpoints[1] = blue_contract(v6, v[0], v[2], v[4]);
 438     }
 439 }
 440
 441 // Helper function that decodes two colors from six values. If isRGB is true,
 442 // then the pointer 'v' contains four values and the last two are considered to be
 443 // 0xFF. If isRGB is false, then all six values come from the pointer 'v'. This
 444 // corresponds to the decode procedure for the following endpoint modes:
 445 //   kLDR_RGB_BaseScale_ColorEndpointMode
 446 //   kLDR_RGB_BaseScaleWithAlpha_ColorEndpointMode
 447 static inline void decode_rgba_basescale(const int *v, SkColor *endpoints, bool isRGB) {
 448
 449     int v4 = 0xFF;
 450     int v5 = 0xFF;
 451     if (!isRGB) {
 452         v4 = v[4];
 453         v5 = v[5];
 454     }
 455
 456     endpoints[0] = SkColorSetARGB(v4,
 457                                   (v[0]*v[3]) >> 8,
 458                                   (v[1]*v[3]) >> 8,
 459                                   (v[2]*v[3]) >> 8);
 460     endpoints[1] = SkColorSetARGB(v5, v[0], v[1], v[2]);
 461 }
 462
 463 // Helper function that decodes two colors from eight values. If isRGB is true,
 464 // then the pointer 'v' contains six values and the last two are considered to be
 465 // 0xFF. If isRGB is false, then all eight values come from the pointer 'v'. This
 466 // corresponds to the decode procedure for the following endpoint modes:
 467 //   kLDR_RGB_BaseOffset_ColorEndpointMode
 468 //   kLDR_RGBA_BaseOffset_ColorEndpointMode
 469 //
 470 // If isRGB is true, then treat this as if v6 and v7 are meant to encode full alpha values.
 471 static inline void decode_rgba_baseoffset(const int *v, SkColor *endpoints, bool isRGB) {
 472     int v0 = v[0];
 473     int v1 = v[1];
 474     int v2 = v[2];
 475     int v3 = v[3];
 476     int v4 = v[4];
 477     int v5 = v[5];
 478     int v6 = isRGB ? 0xFF : v[6];
 479     // The 0 is here because this is an offset, not a direct value
 480     int v7 = isRGB ? 0 : v[7];
 481
 482     bit_transfer_signed(&v1, &v0);
 483     bit_transfer_signed(&v3, &v2);
 484     bit_transfer_signed(&v5, &v4);
 485     if (!isRGB) {
 486         bit_transfer_signed(&v7, &v6);
 487     }
 488
 489     int c[2][4];
 490     if ((v1 + v3 + v5) >= 0) {
 491         c[0][0] = v6;
 492         c[0][1] = v0;
 493         c[0][2] = v2;
 494         c[0][3] = v4;
 495
 496         c[1][0] = v6 + v7;
 497         c[1][1] = v0 + v1;
 498         c[1][2] = v2 + v3;
 499         c[1][3] = v4 + v5;
 500     } else {
 501         c[0][0] = v6 + v7;
 502         c[0][1] = (v0 + v1 + v4 + v5) >> 1;
 503         c[0][2] = (v2 + v3 + v4 + v5) >> 1;
 504         c[0][3] = v4 + v5;
 505
 506         c[1][0] = v6;
 507         c[1][1] = (v0 + v4) >> 1;
 508         c[1][2] = (v2 + v4) >> 1;
 509         c[1][3] = v4;
 510     }
 511
 512     endpoints[0] = SkColorSetARGB(clamp_byte(c[0][0]),
 513                                   clamp_byte(c[0][1]),
 514                                   clamp_byte(c[0][2]),
 515                                   clamp_byte(c[0][3]));
 516
 517     endpoints[1] = SkColorSetARGB(clamp_byte(c[1][0]),
 518                                   clamp_byte(c[1][1]),
 519                                   clamp_byte(c[1][2]),
 520                                   clamp_byte(c[1][3]));
 521 }
 522
 523
 524 // A helper class used to decode bit values from standard integer values.
 525 // We can't use this class with ASTCBlock because then it would need to
 526 // handle multi-value ranges, and it's non-trivial to lookup a range of bits
 527 // that splits across two different ints.
 528 template <typename T>
 529 class SkTBits {
 530 public:
 531     SkTBits(const T val) : fVal(val) { }
 532
 533     // Returns the bit at the given position
 534     T operator [](const int idx) const {
 535         return (fVal >> idx) & 1;
 536     }
 537
 538     // Returns the bits in the given range, inclusive
 539     T operator ()(const int end, const int start) const {
 540         SkASSERT(end >= start);
 541         return (fVal >> start) & ((1ULL << ((end - start) + 1)) - 1);
 542     }
 543
 544 private:
 545     const T fVal;
 546 };
 547
 548 // This algorithm matches the trit block decoding in the spec (Table C.2.14)
 549 static void decode_trit_block(int* dst, int nBits, const uint64_t &block) {
 550
 551     SkTBits<uint64_t> blockBits(block);
 552
 553     // According to the spec, a trit block, which contains five values,
 554     // has the following layout:
 555     //
 556     // 27  26  25  24  23  22  21  20  19  18  17  16
 557     //  -----------------------------------------------
 558     // |T7 |     m4        |T6  T5 |     m3        |T4 |
 559     //  -----------------------------------------------
 560     //
 561     // 15  14  13  12  11  10  9   8   7   6   5   4   3   2   1   0
 562     //  --------------------------------------------------------------
 563     // |    m2        |T3  T2 |      m1       |T1  T0 |      m0       |
 564     //  --------------------------------------------------------------
 565     //
 566     // Where the m's are variable width depending on the number of bits used
 567     // to encode the values (anywhere from 0 to 6). Since 3^5 = 243, the extra
 568     // byte labeled T (whose bits are interleaved where 0 is the LSB and 7 is
 569     // the MSB), contains five trit values. To decode the trit values, the spec
 570     // says that we need to follow the following algorithm:
 571     //
 572     // if T[4:2] = 111
 573     //     C = { T[7:5], T[1:0] }; t4 = t3 = 2
 574     // else
 575     //     C = T[4:0]
 576     //
 577     // if T[6:5] = 11
 578     //     t4 = 2; t3 = T[7]
 579     // else
 580     //     t4 = T[7]; t3 = T[6:5]
 581     //
 582     // if C[1:0] = 11
 583     //     t2 = 2; t1 = C[4]; t0 = { C[3], C[2]&~C[3] }
 584     // else if C[3:2] = 11
 585     //     t2 = 2; t1 = 2; t0 = C[1:0]
 586     // else
 587     //     t2 = C[4]; t1 = C[3:2]; t0 = { C[1], C[0]&~C[1] }
 588     //
 589     // The following C++ code is meant to mirror this layout and algorithm as
 590     // closely as possible.
 591
 592     int m[5];
 593     if (0 == nBits) {
 594         memset(m, 0, sizeof(m));
 595     } else {
 596         SkASSERT(nBits < 8);
 597         m[0] = static_cast<int>(blockBits(nBits - 1, 0));
 598         m[1] = static_cast<int>(blockBits(2*nBits - 1 + 2, nBits + 2));
 599         m[2] = static_cast<int>(blockBits(3*nBits - 1 + 4, 2*nBits + 4));
 600         m[3] = static_cast<int>(blockBits(4*nBits - 1 + 5, 3*nBits + 5));
 601         m[4] = static_cast<int>(blockBits(5*nBits - 1 + 7, 4*nBits + 7));
 602     }
 603
 604     int T =
 605         static_cast<int>(blockBits(nBits + 1, nBits)) |
 606         (static_cast<int>(blockBits(2*nBits + 2 + 1, 2*nBits + 2)) << 2) |
 607         (static_cast<int>(blockBits[3*nBits + 4] << 4)) |
 608         (static_cast<int>(blockBits(4*nBits + 5 + 1, 4*nBits + 5)) << 5) |
 609         (static_cast<int>(blockBits[5*nBits + 7] << 7));
 610
 611     int t[5];
 612
 613     int C;
 614     SkTBits<int> Tbits(T);
 615     if (0x7 == Tbits(4, 2)) {
 616         C = (Tbits(7, 5) << 2) | Tbits(1, 0);
 617         t[3] = t[4] = 2;
 618     } else {
 619         C = Tbits(4, 0);
 620         if (Tbits(6, 5) == 0x3) {
 621             t[4] = 2; t[3] = Tbits[7];
 622         } else {
 623             t[4] = Tbits[7]; t[3] = Tbits(6, 5);
 624         }
 625     }
 626
 627     SkTBits<int> Cbits(C);
 628     if (Cbits(1, 0) == 0x3) {
 629         t[2] = 2;
 630         t[1] = Cbits[4];
 631         t[0] = (Cbits[3] << 1) | (Cbits[2] & (0x1 & ~(Cbits[3])));
 632     } else if (Cbits(3, 2) == 0x3) {
 633         t[2] = 2;
 634         t[1] = 2;
 635         t[0] = Cbits(1, 0);
 636     } else {
 637         t[2] = Cbits[4];
 638         t[1] = Cbits(3, 2);
 639         t[0] = (Cbits[1] << 1) | (Cbits[0] & (0x1 & ~(Cbits[1])));
 640     }
 641
 642 #ifdef SK_DEBUG
 643     // Make sure all of the decoded values have a trit less than three
 644     // and a bit value within the range of the allocated bits.
 645     for (int i = 0; i < 5; ++i) {
 646         SkASSERT(t[i] < 3);
 647         SkASSERT(m[i] < (1 << nBits));
 648     }
 649 #endif
 650
 651     for (int i = 0; i < 5; ++i) {
 652         *dst = (t[i] << nBits) + m[i];
 653         ++dst;
 654     }
 655 }
 656
 657 // This algorithm matches the quint block decoding in the spec (Table C.2.15)
 658 static void decode_quint_block(int* dst, int nBits, const uint64_t &block) {
 659     SkTBits<uint64_t> blockBits(block);
 660
 661     // According to the spec, a quint block, which contains three values,
 662     // has the following layout:
 663     //
 664     //
 665     // 18  17  16  15  14  13  12  11  10  9   8   7   6   5   4   3   2   1   0
 666     //  --------------------------------------------------------------------------
 667     // |Q6  Q5 |     m2       |Q4  Q3 |     m1        |Q2  Q1  Q0 |      m0       |
 668     //  --------------------------------------------------------------------------
 669     //
 670     // Where the m's are variable width depending on the number of bits used
 671     // to encode the values (anywhere from 0 to 4). Since 5^3 = 125, the extra
 672     // 7-bit value labeled Q (whose bits are interleaved where 0 is the LSB and 6 is
 673     // the MSB), contains three quint values. To decode the quint values, the spec
 674     // says that we need to follow the following algorithm:
 675     //
 676     // if Q[2:1] = 11 and Q[6:5] = 00
 677     //     q2 = { Q[0], Q[4]&~Q[0], Q[3]&~Q[0] }; q1 = q0 = 4
 678     // else
 679     //     if Q[2:1] = 11
 680     //         q2 = 4; C = { Q[4:3], ~Q[6:5], Q[0] }
 681     //     else
 682     //         q2 = T[6:5]; C = Q[4:0]
 683     //
 684     //     if C[2:0] = 101
 685     //         q1 = 4; q0 = C[4:3]
 686     //     else
 687     //         q1 = C[4:3]; q0 = C[2:0]
 688     //
 689     // The following C++ code is meant to mirror this layout and algorithm as
 690     // closely as possible.
 691
 692     int m[3];
 693     if (0 == nBits) {
 694         memset(m, 0, sizeof(m));
 695     } else {
 696         SkASSERT(nBits < 8);
 697         m[0] = static_cast<int>(blockBits(nBits - 1, 0));
 698         m[1] = static_cast<int>(blockBits(2*nBits - 1 + 3, nBits + 3));
 699         m[2] = static_cast<int>(blockBits(3*nBits - 1 + 5, 2*nBits + 5));
 700     }
 701
 702     int Q =
 703         static_cast<int>(blockBits(nBits + 2, nBits)) |
 704         (static_cast<int>(blockBits(2*nBits + 3 + 1, 2*nBits + 3)) << 3) |
 705         (static_cast<int>(blockBits(3*nBits + 5 + 1, 3*nBits + 5)) << 5);
 706
 707     int q[3];
 708     SkTBits<int> Qbits(Q); // quantum?
 709
 710     if (Qbits(2, 1) == 0x3 && Qbits(6, 5) == 0) {
 711         const int notBitZero = (0x1 & ~(Qbits[0]));
 712         q[2] = (Qbits[0] << 2) | ((Qbits[4] & notBitZero) << 1) | (Qbits[3] & notBitZero);
 713         q[1] = 4;
 714         q[0] = 4;
 715     } else {
 716         int C;
 717         if (Qbits(2, 1) == 0x3) {
 718             q[2] = 4;
 719             C = (Qbits(4, 3) << 3) | ((0x3 & ~(Qbits(6, 5))) << 1) | Qbits[0];
 720         } else {
 721             q[2] = Qbits(6, 5);
 722             C = Qbits(4, 0);
 723         }
 724
 725         SkTBits<int> Cbits(C);
 726         if (Cbits(2, 0) == 0x5) {
 727             q[1] = 4;
 728             q[0] = Cbits(4, 3);
 729         } else {
 730             q[1] = Cbits(4, 3);
 731             q[0] = Cbits(2, 0);
 732         }
 733     }
 734
 735 #ifdef SK_DEBUG
 736     for (int i = 0; i < 3; ++i) {
 737         SkASSERT(q[i] < 5);
 738         SkASSERT(m[i] < (1 << nBits));
 739     }
 740 #endif
 741
 742     for (int i = 0; i < 3; ++i) {
 743         *dst = (q[i] << nBits) + m[i];
 744         ++dst;
 745     }
 746 }
 747
 748 // Function that decodes a sequence of integers stored as an ISE (Integer
 749 // Sequence Encoding) bit stream. The full details of this function are outlined
 750 // in section C.2.12 of the ASTC spec. A brief overview is as follows:
 751 //
 752 // - Each integer in the sequence is bounded by a specific range r.
 753 // - The range of each value determines the way the bit stream is interpreted,
 754 // - If the range is a power of two, then the sequence is a sequence of bits
 755 // - If the range is of the form 3*2^n, then the sequence is stored as a
 756 //   sequence of blocks, each block contains 5 trits and 5 bit sequences, which
 757 //   decodes into 5 values.
 758 // - Similarly, if the range is of the form 5*2^n, then the sequence is stored as a
 759 //   sequence of blocks, each block contains 3 quints and 3 bit sequences, which
 760 //   decodes into 3 values.
 761 static bool decode_integer_sequence(
 762     int* dst,                 // The array holding the destination bits
 763     int dstSize,              // The maximum size of the array
 764     int nVals,                // The number of values that we'd like to decode
 765     const ASTCBlock &block,   // The block that we're decoding from
 766     int startBit,             // The bit from which we're going to do the reading
 767     int endBit,               // The bit at which we stop reading (not inclusive)
 768     bool bReadForward,        // If true, then read LSB -> MSB, else read MSB -> LSB
 769     int nBits,                // The number of bits representing this encoding
 770     int nTrits,               // The number of trits representing this encoding
 771     int nQuints               // The number of quints representing this encoding
 772 ) {
 773     // If we want more values than we have, then fail.
 774     if (nVals > dstSize) {
 775         return false;
 776     }
 777
 778     ASTCBlock src = block;
 779
 780     if (!bReadForward) {
 781         src.reverse();
 782         startBit = 128 - startBit;
 783         endBit = 128 - endBit;
 784     }
 785
 786     while (nVals > 0) {
 787
 788         if (nTrits > 0) {
 789             SkASSERT(0 == nQuints);
 790
 791             int endBlockBit = startBit + 8 + 5*nBits;
 792             if (endBlockBit > endBit) {
 793                 endBlockBit = endBit;
 794             }
 795
 796             // Trit blocks are three values large.
 797             int trits[5];
 798             decode_trit_block(trits, nBits, read_astc_bits(src, startBit, endBlockBit));
 799             memcpy(dst, trits, SkMin32(nVals, 5)*sizeof(int));
 800
 801             dst += 5;
 802             nVals -= 5;
 803             startBit = endBlockBit;
 804
 805         } else if (nQuints > 0) {
 806             SkASSERT(0 == nTrits);
 807
 808             int endBlockBit = startBit + 7 + 3*nBits;
 809             if (endBlockBit > endBit) {
 810                 endBlockBit = endBit;
 811             }
 812
 813             // Quint blocks are three values large
 814             int quints[3];
 815             decode_quint_block(quints, nBits, read_astc_bits(src, startBit, endBlockBit));
 816             memcpy(dst, quints, SkMin32(nVals, 3)*sizeof(int));
 817
 818             dst += 3;
 819             nVals -= 3;
 820             startBit = endBlockBit;
 821
 822         } else {
 823             // Just read the bits, but don't read more than we have...
 824             int endValBit = startBit + nBits;
 825             if (endValBit > endBit) {
 826                 endValBit = endBit;
 827             }
 828
 829             SkASSERT(endValBit - startBit < 31);
 830             *dst = static_cast<int>(read_astc_bits(src, startBit, endValBit));
 831             ++dst;
 832             --nVals;
 833             startBit = endValBit;
 834         }
 835     }
 836
 837     return true;
 838 }
 839
 840 // Helper function that unquantizes some (seemingly random) generated
 841 // numbers... meant to match the ASTC hardware. This function is used
 842 // to unquantize both colors (Table C.2.16) and weights (Table C.2.26)
 843 static inline int unquantize_value(unsigned mask, int A, int B, int C, int D) {
 844     int T = D * C + B;
 845     T = T ^ A;
 846     T = (A & mask) | (T >> 2);
 847     SkASSERT(T < 256);
 848     return T;
 849 }
 850
 851 // Helper function to replicate the bits in x that represents an oldPrec
 852 // precision integer into a prec precision integer. For example:
 853 //   255 == replicate_bits(7, 3, 8);
 854 static inline int replicate_bits(int x, int oldPrec, int prec) {
 855     while (oldPrec < prec) {
 856         const int toShift = SkMin32(prec-oldPrec, oldPrec);
 857         x = (x << toShift) | (x >> (oldPrec - toShift));
 858         oldPrec += toShift;
 859     }
 860
 861     // Make sure that no bits are set outside the desired precision.
 862     SkASSERT((-(1 << prec) & x) == 0);
 863     return x;
 864 }
 865
 866 // Returns the unquantized value of a color that's represented only as
 867 // a set of bits.
 868 static inline int unquantize_bits_color(int val, int nBits) {
 869     return replicate_bits(val, nBits, 8);
 870 }
 871
 872 // Returns the unquantized value of a color that's represented as a
 873 // trit followed by nBits bits. This algorithm follows the sequence
 874 // defined in section C.2.13 of the ASTC spec.
 875 static inline int unquantize_trit_color(int val, int nBits) {
 876     SkASSERT(nBits > 0);
 877     SkASSERT(nBits < 7);
 878
 879     const int D = (val >> nBits) & 0x3;
 880     SkASSERT(D < 3);
 881
 882     const int A = -(val & 0x1) & 0x1FF;
 883
 884     static const int Cvals[6] = { 204, 93, 44, 22, 11, 5 };
 885     const int C = Cvals[nBits - 1];
 886
 887     int B = 0;
 888     const SkTBits<int> valBits(val);
 889     switch (nBits) {
 890         case 1:
 891             B = 0;
 892             break;
 893
 894         case 2: {
 895             const int b = valBits[1];
 896             B = (b << 1) | (b << 2) | (b << 4) | (b << 8);
 897         }
 898         break;
 899
 900         case 3: {
 901             const int cb = valBits(2, 1);
 902             B = cb | (cb << 2) | (cb << 7);
 903         }
 904         break;
 905
 906         case 4: {
 907             const int dcb = valBits(3, 1);
 908             B = dcb | (dcb << 6);
 909         }
 910         break;
 911
 912         case 5: {
 913             const int edcb = valBits(4, 1);
 914             B = (edcb << 5) | (edcb >> 2);
 915         }
 916         break;
 917
 918         case 6: {
 919             const int fedcb = valBits(5, 1);
 920             B = (fedcb << 4) | (fedcb >> 4);
 921         }
 922         break;
 923     }
 924
 925     return unquantize_value(0x80, A, B, C, D);
 926 }
 927
 928 // Returns the unquantized value of a color that's represented as a
 929 // quint followed by nBits bits. This algorithm follows the sequence
 930 // defined in section C.2.13 of the ASTC spec.
 931 static inline int unquantize_quint_color(int val, int nBits) {
 932     const int D = (val >> nBits) & 0x7;
 933     SkASSERT(D < 5);
 934
 935     const int A = -(val & 0x1) & 0x1FF;
 936
 937     static const int Cvals[5] = { 113, 54, 26, 13, 6 };
 938     SkASSERT(nBits > 0);
 939     SkASSERT(nBits < 6);
 940
 941     const int C = Cvals[nBits - 1];
 942
 943     int B = 0;
 944     const SkTBits<int> valBits(val);
 945     switch (nBits) {
 946         case 1:
 947             B = 0;
 948             break;
 949
 950         case 2: {
 951             const int b = valBits[1];
 952             B = (b << 2) | (b << 3) | (b << 8);
 953         }
 954         break;
 955
 956         case 3: {
 957             const int cb = valBits(2, 1);
 958             B = (cb >> 1) | (cb << 1) | (cb << 7);
 959         }
 960         break;
 961
 962         case 4: {
 963             const int dcb = valBits(3, 1);
 964             B = (dcb >> 1) | (dcb << 6);
 965         }
 966         break;
 967
 968         case 5: {
 969             const int edcb = valBits(4, 1);
 970             B = (edcb << 5) | (edcb >> 3);
 971         }
 972         break;
 973     }
 974
 975     return unquantize_value(0x80, A, B, C, D);
 976 }
 977
 978 // This algorithm takes a list of integers, stored in vals, and unquantizes them
 979 // in place. This follows the algorithm laid out in section C.2.13 of the ASTC spec.
 980 static void unquantize_colors(int *vals, int nVals, int nBits, int nTrits, int nQuints) {
 981     for (int i = 0; i < nVals; ++i) {
 982         if (nTrits > 0) {
 983             SkASSERT(nQuints == 0);
 984             vals[i] = unquantize_trit_color(vals[i], nBits);
 985         } else if (nQuints > 0) {
 986             SkASSERT(nTrits == 0);
 987             vals[i] = unquantize_quint_color(vals[i], nBits);
 988         } else {
 989             SkASSERT(nQuints == 0 && nTrits == 0);
 990             vals[i] = unquantize_bits_color(vals[i], nBits);
 991         }
 992     }
 993 }
 994
 995 // Returns an interpolated value between c0 and c1 based on the weight. This
 996 // follows the algorithm laid out in section C.2.19 of the ASTC spec.
 997 static int interpolate_channel(int c0, int c1, int weight) {
 998     SkASSERT(0 <= c0 && c0 < 256);
 999     SkASSERT(0 <= c1 && c1 < 256);
1000
1001     c0 = (c0 << 8) | c0;
1002     c1 = (c1 << 8) | c1;
1003
1004     const int result = ((c0*(64 - weight) + c1*weight + 32) / 64) >> 8;
1005
1006     if (result > 255) {
1007         return 255;
1008     }
1009
1010     SkASSERT(result >= 0);
1011     return result;
1012 }
1013
1014 // Returns an interpolated color between the two endpoints based on the weight.
1015 static SkColor interpolate_endpoints(const SkColor endpoints[2], int weight) {
1016     return SkColorSetARGB(
1017         interpolate_channel(SkColorGetA(endpoints[0]), SkColorGetA(endpoints[1]), weight),
1018         interpolate_channel(SkColorGetR(endpoints[0]), SkColorGetR(endpoints[1]), weight),
1019         interpolate_channel(SkColorGetG(endpoints[0]), SkColorGetG(endpoints[1]), weight),
1020         interpolate_channel(SkColorGetB(endpoints[0]), SkColorGetB(endpoints[1]), weight));
1021 }
1022
1023 // Returns an interpolated color between the two endpoints based on the weight.
1024 // It uses separate weights for the channel depending on the value of the 'plane'
1025 // variable. By default, all channels will use weight 0, and the value of plane
1026 // means that weight1 will be used for:
1027 // 0: red
1028 // 1: green
1029 // 2: blue
1030 // 3: alpha
1031 static SkColor interpolate_dual_endpoints(
1032     const SkColor endpoints[2], int weight0, int weight1, int plane) {
1033     int a = interpolate_channel(SkColorGetA(endpoints[0]), SkColorGetA(endpoints[1]), weight0);
1034     int r = interpolate_channel(SkColorGetR(endpoints[0]), SkColorGetR(endpoints[1]), weight0);
1035     int g = interpolate_channel(SkColorGetG(endpoints[0]), SkColorGetG(endpoints[1]), weight0);
1036     int b = interpolate_channel(SkColorGetB(endpoints[0]), SkColorGetB(endpoints[1]), weight0);
1037
1038     switch (plane) {
1039
1040         case 0:
1041             r = interpolate_channel(
1042                 SkColorGetR(endpoints[0]), SkColorGetR(endpoints[1]), weight1);
1043             break;
1044
1045         case 1:
1046             g = interpolate_channel(
1047                 SkColorGetG(endpoints[0]), SkColorGetG(endpoints[1]), weight1);
1048             break;
1049
1050         case 2:
1051             b = interpolate_channel(
1052                 SkColorGetB(endpoints[0]), SkColorGetB(endpoints[1]), weight1);
1053             break;
1054
1055         case 3:
1056             a = interpolate_channel(
1057                 SkColorGetA(endpoints[0]), SkColorGetA(endpoints[1]), weight1);
1058             break;
1059
1060         default:
1061             SkDEBUGFAIL("Plane should be 0-3");
1062             break;
1063     }
1064
1065     return SkColorSetARGB(a, r, g, b);
1066 }
1067
1068 // A struct of decoded values that we use to carry around information
1069 // about the block. dimX and dimY are the dimension in texels of the block,
1070 // for which there is only a limited subset of valid values:
1071 //
1072 // 4x4, 5x4, 5x5, 6x5, 6x6, 8x5, 8x6, 8x8, 10x5, 10x6, 10x8, 10x10, 12x10, 12x12
1073
1074 struct ASTCDecompressionData {
1075     ASTCDecompressionData(int dimX, int dimY) : fDimX(dimX), fDimY(dimY) { }
1076     const int   fDimX;      // the X dimension of the decompressed block
1077     const int   fDimY;      // the Y dimension of the decompressed block
1078     ASTCBlock   fBlock;     // the block data
1079     int         fBlockMode; // the block header that contains the block mode.
1080
1081     bool fDualPlaneEnabled; // is this block compressing dual weight planes?
1082     int  fDualPlane;        // the independent plane in dual plane mode.
1083
1084     bool fVoidExtent;       // is this block a single color?
1085     bool fError;            // does this block have an error encoding?
1086
1087     int  fWeightDimX;       // the x dimension of the weight grid
1088     int  fWeightDimY;       // the y dimension of the weight grid
1089
1090     int  fWeightBits;       // the number of bits used for each weight value
1091     int  fWeightTrits;      // the number of trits used for each weight value
1092     int  fWeightQuints;     // the number of quints used for each weight value
1093
1094     int  fPartCount;        // the number of partitions in this block
1095     int  fPartIndex;        // the partition index: only relevant if fPartCount > 0
1096
1097     // CEM values can be anything in the range 0-15, and each corresponds to a different
1098     // mode that represents the color data. We only support LDR modes.
1099     enum ColorEndpointMode {
1100         kLDR_Luminance_Direct_ColorEndpointMode          = 0,
1101         kLDR_Luminance_BaseOffset_ColorEndpointMode      = 1,
1102         kHDR_Luminance_LargeRange_ColorEndpointMode      = 2,
1103         kHDR_Luminance_SmallRange_ColorEndpointMode      = 3,
1104         kLDR_LuminanceAlpha_Direct_ColorEndpointMode     = 4,
1105         kLDR_LuminanceAlpha_BaseOffset_ColorEndpointMode = 5,
1106         kLDR_RGB_BaseScale_ColorEndpointMode             = 6,
1107         kHDR_RGB_BaseScale_ColorEndpointMode             = 7,
1108         kLDR_RGB_Direct_ColorEndpointMode                = 8,
1109         kLDR_RGB_BaseOffset_ColorEndpointMode            = 9,
1110         kLDR_RGB_BaseScaleWithAlpha_ColorEndpointMode    = 10,
1111         kHDR_RGB_ColorEndpointMode                       = 11,
1112         kLDR_RGBA_Direct_ColorEndpointMode               = 12,
1113         kLDR_RGBA_BaseOffset_ColorEndpointMode           = 13,
1114         kHDR_RGB_LDRAlpha_ColorEndpointMode              = 14,
1115         kHDR_RGB_HDRAlpha_ColorEndpointMode              = 15
1116     };
1117     static const int kMaxColorEndpointModes = 16;
1118
1119     // the color endpoint modes for this block.
1120     static const int kMaxPartitions = 4;
1121     ColorEndpointMode fCEM[kMaxPartitions];
1122
1123     int  fColorStartBit;    // The bit position of the first bit of the color data
1124     int  fColorEndBit;      // The bit position of the last *possible* bit of the color data
1125
1126     // Returns the number of partitions for this block.
1127     int numPartitions() const {
1128         return fPartCount;
1129     }
1130
1131     // Returns the total number of weight values that are stored in this block
1132     int numWeights() const {
1133         return fWeightDimX * fWeightDimY * (fDualPlaneEnabled ? 2 : 1);
1134     }
1135
1136 #ifdef SK_DEBUG
1137     // Returns the maximum value that any weight can take. We really only use
1138     // this function for debugging.
1139     int maxWeightValue() const {
1140         int maxVal = (1 << fWeightBits);
1141         if (fWeightTrits > 0) {
1142             SkASSERT(0 == fWeightQuints);
1143             maxVal *= 3;
1144         } else if (fWeightQuints > 0) {
1145             SkASSERT(0 == fWeightTrits);
1146             maxVal *= 5;
1147         }
1148         return maxVal - 1;
1149     }
1150 #endif
1151
1152     // The number of bits needed to represent the texel weight data. This
1153     // comes from the 'data size determination' section of the ASTC spec (C.2.22)
1154     int numWeightBits() const {
1155         const int nWeights = this->numWeights();
1156         return
1157             ((nWeights*8*fWeightTrits + 4) / 5) +
1158             ((nWeights*7*fWeightQuints + 2) / 3) +
1159             (nWeights*fWeightBits);
1160     }
1161
1162     // Returns the number of color values stored in this block. The number of
1163     // values stored is directly a function of the color endpoint modes.
1164     int numColorValues() const {
1165         int numValues = 0;
1166         for (int i = 0; i < this->numPartitions(); ++i) {
1167             int cemInt = static_cast<int>(fCEM[i]);
1168             numValues += ((cemInt >> 2) + 1) * 2;
1169         }
1170
1171         return numValues;
1172     }
1173
1174     // Figures out the number of bits available for color values, and fills
1175     // in the maximum encoding that will fit the number of color values that
1176     // we need. Returns false on error. (See section C.2.22 of the spec)
1177     bool getColorValueEncoding(int *nBits, int *nTrits, int *nQuints) const {
1178         if (NULL == nBits || NULL == nTrits || NULL == nQuints) {
1179             return false;
1180         }
1181
1182         const int nColorVals = this->numColorValues();
1183         if (nColorVals <= 0) {
1184             return false;
1185         }
1186
1187         const int colorBits = fColorEndBit - fColorStartBit;
1188         SkASSERT(colorBits > 0);
1189
1190         // This is the minimum amount of accuracy required by the spec.
1191         if (colorBits < ((13 * nColorVals + 4) / 5)) {
1192             return false;
1193         }
1194
1195         // Values can be represented as at most 8-bit values.
1196         // !SPEED! place this in a lookup table based on colorBits and nColorVals
1197         for (int i = 255; i > 0; --i) {
1198             int range = i + 1;
1199             int bits = 0, trits = 0, quints = 0;
1200             bool valid = false;
1201             if (SkIsPow2(range)) {
1202                 bits = bits_for_range(range);
1203                 valid = true;
1204             } else if ((range % 3) == 0 && SkIsPow2(range/3)) {
1205                 trits = 1;
1206                 bits = bits_for_range(range/3);
1207                 valid = true;
1208             } else if ((range % 5) == 0 && SkIsPow2(range/5)) {
1209                 quints = 1;
1210                 bits = bits_for_range(range/5);
1211                 valid = true;
1212             }
1213
1214             if (valid) {
1215                 const int actualColorBits =
1216                     ((nColorVals*8*trits + 4) / 5) +
1217                     ((nColorVals*7*quints + 2) / 3) +
1218                     (nColorVals*bits);
1219                 if (actualColorBits <= colorBits) {
1220                     *nTrits = trits;
1221                     *nQuints = quints;
1222                     *nBits = bits;
1223                     return true;
1224                 }
1225             }
1226         }
1227
1228         return false;
1229     }
1230
1231     // Converts the sequence of color values into endpoints. The algorithm here
1232     // corresponds to the values determined by section C.2.14 of the ASTC spec
1233     void colorEndpoints(SkColor endpoints[4][2], const int* colorValues) const {
1234         for (int i = 0; i < this->numPartitions(); ++i) {
1235             switch (fCEM[i]) {
1236                 case kLDR_Luminance_Direct_ColorEndpointMode: {
1237                     const int* v = colorValues;
1238                     endpoints[i][0] = SkColorSetARGB(0xFF, v[0], v[0], v[0]);
1239                     endpoints[i][1] = SkColorSetARGB(0xFF, v[1], v[1], v[1]);
1240
1241                     colorValues += 2;
1242                 }
1243                 break;
1244
1245                 case kLDR_Luminance_BaseOffset_ColorEndpointMode: {
1246                     const int* v = colorValues;
1247                     const int L0 = (v[0] >> 2) | (v[1] & 0xC0);
1248                     const int L1 = clamp_byte(L0 + (v[1] & 0x3F));
1249
1250                     endpoints[i][0] = SkColorSetARGB(0xFF, L0, L0, L0);
1251                     endpoints[i][1] = SkColorSetARGB(0xFF, L1, L1, L1);
1252
1253                     colorValues += 2;
1254                 }
1255                 break;
1256
1257                 case kLDR_LuminanceAlpha_Direct_ColorEndpointMode: {
1258                     const int* v = colorValues;
1259
1260                     endpoints[i][0] = SkColorSetARGB(v[2], v[0], v[0], v[0]);
1261                     endpoints[i][1] = SkColorSetARGB(v[3], v[1], v[1], v[1]);
1262
1263                     colorValues += 4;
1264                 }
1265                 break;
1266
1267                 case kLDR_LuminanceAlpha_BaseOffset_ColorEndpointMode: {
1268                     int v0 = colorValues[0];
1269                     int v1 = colorValues[1];
1270                     int v2 = colorValues[2];
1271                     int v3 = colorValues[3];
1272
1273                     bit_transfer_signed(&v1, &v0);
1274                     bit_transfer_signed(&v3, &v2);
1275
1276                     endpoints[i][0] = SkColorSetARGB(v2, v0, v0, v0);
1277                     endpoints[i][1] = SkColorSetARGB(
1278                         clamp_byte(v3+v2),
1279                         clamp_byte(v1+v0),
1280                         clamp_byte(v1+v0),
1281                         clamp_byte(v1+v0));
1282
1283                     colorValues += 4;
1284                 }
1285                 break;
1286
1287                 case kLDR_RGB_BaseScale_ColorEndpointMode: {
1288                     decode_rgba_basescale(colorValues, endpoints[i], true);
1289                     colorValues += 4;
1290                 }
1291                 break;
1292
1293                 case kLDR_RGB_Direct_ColorEndpointMode: {
1294                     decode_rgba_direct(colorValues, endpoints[i], true);
1295                     colorValues += 6;
1296                 }
1297                 break;
1298
1299                 case kLDR_RGB_BaseOffset_ColorEndpointMode: {
1300                     decode_rgba_baseoffset(colorValues, endpoints[i], true);
1301                     colorValues += 6;
1302                 }
1303                 break;
1304
1305                 case kLDR_RGB_BaseScaleWithAlpha_ColorEndpointMode: {
1306                     decode_rgba_basescale(colorValues, endpoints[i], false);
1307                     colorValues += 6;
1308                 }
1309                 break;
1310
1311                 case kLDR_RGBA_Direct_ColorEndpointMode: {
1312                     decode_rgba_direct(colorValues, endpoints[i], false);
1313                     colorValues += 8;
1314                 }
1315                 break;
1316
1317                 case kLDR_RGBA_BaseOffset_ColorEndpointMode: {
1318                     decode_rgba_baseoffset(colorValues, endpoints[i], false);
1319                     colorValues += 8;
1320                 }
1321                 break;
1322
1323                 default:
1324                     SkDEBUGFAIL("HDR mode unsupported! This should be caught sooner.");
1325                     break;
1326             }
1327         }
1328     }
1329
1330     // Follows the procedure from section C.2.17 of the ASTC specification
1331     int unquantizeWeight(int x) const {
1332         SkASSERT(x <= this->maxWeightValue());
1333
1334         const int D = (x >> fWeightBits) & 0x7;
1335         const int A = -(x & 0x1) & 0x7F;
1336
1337         SkTBits<int> xbits(x);
1338
1339         int T = 0;
1340         if (fWeightTrits > 0) {
1341             SkASSERT(0 == fWeightQuints);
1342             switch (fWeightBits) {
1343                 case 0: {
1344                     // x is a single trit
1345                     SkASSERT(x < 3);
1346
1347                     static const int kUnquantizationTable[3] = { 0, 32, 63 };
1348                     T = kUnquantizationTable[x];
1349                 }
1350                 break;
1351
1352                 case 1: {
1353                     const int B = 0;
1354                     const int C = 50;
1355                     T = unquantize_value(0x20, A, B, C, D);
1356                 }
1357                 break;
1358
1359                 case 2: {
1360                     const int b = xbits[1];
1361                     const int B = b | (b << 2) | (b << 6);
1362                     const int C = 23;
1363                     T = unquantize_value(0x20, A, B, C, D);
1364                 }
1365                 break;
1366
1367                 case 3: {
1368                     const int cb = xbits(2, 1);
1369                     const int B = cb | (cb << 5);
1370                     const int C = 11;
1371                     T = unquantize_value(0x20, A, B, C, D);
1372                 }
1373                 break;
1374
1375                 default:
1376                     SkDEBUGFAIL("Too many bits for trit encoding");
1377                     break;
1378             }
1379
1380         } else if (fWeightQuints > 0) {
1381             SkASSERT(0 == fWeightTrits);
1382             switch (fWeightBits) {
1383                 case 0: {
1384                     // x is a single quint
1385                     SkASSERT(x < 5);
1386
1387                     static const int kUnquantizationTable[5] = { 0, 16, 32, 47, 63 };
1388                     T = kUnquantizationTable[x];
1389                 }
1390                 break;
1391
1392                 case 1: {
1393                     const int B = 0;
1394                     const int C = 28;
1395                     T = unquantize_value(0x20, A, B, C, D);
1396                 }
1397                 break;
1398
1399                 case 2: {
1400                     const int b = xbits[1];
1401                     const int B = (b << 1) | (b << 6);
1402                     const int C = 13;
1403                     T = unquantize_value(0x20, A, B, C, D);
1404                 }
1405                 break;
1406
1407                 default:
1408                     SkDEBUGFAIL("Too many bits for quint encoding");
1409                     break;
1410             }
1411         } else {
1412             SkASSERT(0 == fWeightTrits);
1413             SkASSERT(0 == fWeightQuints);
1414
1415             T = replicate_bits(x, fWeightBits, 6);
1416         }
1417
1418         // This should bring the value within [0, 63]..
1419         SkASSERT(T <= 63);
1420
1421         if (T > 32) {
1422             T += 1;
1423         }
1424
1425         SkASSERT(T <= 64);
1426
1427         return T;
1428     }
1429
1430     // Returns the weight at the associated index. If the index is out of bounds, it
1431     // returns zero. It also chooses the weight appropriately based on the given dual
1432     // plane.
1433     int getWeight(const int* unquantizedWeights, int idx, bool dualPlane) const {
1434         const int maxIdx = (fDualPlaneEnabled ? 2 : 1) * fWeightDimX * fWeightDimY - 1;
1435         if (fDualPlaneEnabled) {
1436             const int effectiveIdx = 2*idx + (dualPlane ? 1 : 0);
1437             if (effectiveIdx > maxIdx) {
1438                 return 0;
1439             }
1440             return unquantizedWeights[effectiveIdx];
1441         }
1442
1443         SkASSERT(!dualPlane);
1444
1445         if (idx > maxIdx) {
1446             return 0;
1447         } else {
1448             return unquantizedWeights[idx];
1449         }
1450     }
1451
1452     // This computes the effective weight at location (s, t) of the block. This
1453     // weight is computed by sampling the texel weight grid (it's usually not 1-1), and
1454     // then applying a bilerp. The algorithm outlined here follows the algorithm
1455     // defined in section C.2.18 of the ASTC spec.
1456     int infillWeight(const int* unquantizedValues, int s, int t, bool dualPlane) const {
1457         const int Ds = (1024 + fDimX/2) / (fDimX - 1);
1458         const int Dt = (1024 + fDimY/2) / (fDimY - 1);
1459
1460         const int cs = Ds * s;
1461         const int ct = Dt * t;
1462
1463         const int gs = (cs*(fWeightDimX - 1) + 32) >> 6;
1464         const int gt = (ct*(fWeightDimY - 1) + 32) >> 6;
1465
1466         const int js = gs >> 4;
1467         const int jt = gt >> 4;
1468
1469         const int fs = gs & 0xF;
1470         const int ft = gt & 0xF;
1471
1472         const int idx = js + jt*fWeightDimX;
1473         const int p00 = this->getWeight(unquantizedValues, idx, dualPlane);
1474         const int p01 = this->getWeight(unquantizedValues, idx + 1, dualPlane);
1475         const int p10 = this->getWeight(unquantizedValues, idx + fWeightDimX, dualPlane);
1476         const int p11 = this->getWeight(unquantizedValues, idx + fWeightDimX + 1, dualPlane);
1477
1478         const int w11 = (fs*ft + 8) >> 4;
1479         const int w10 = ft - w11;
1480         const int w01 = fs - w11;
1481         const int w00 = 16 - fs - ft + w11;
1482
1483         const int weight = (p00*w00 + p01*w01 + p10*w10 + p11*w11 + 8) >> 4;
1484         SkASSERT(weight <= 64);
1485         return weight;
1486     }
1487
1488     // Unquantizes the decoded texel weights as described in section C.2.17 of
1489     // the ASTC specification. Additionally, it populates texelWeights with
1490     // the expanded weight grid, which is computed according to section C.2.18
1491     void texelWeights(int texelWeights[2][12][12], const int* texelValues) const {
1492         // Unquantized texel weights...
1493         int unquantizedValues[144*2]; // 12x12 blocks with dual plane decoding...
1494         SkASSERT(this->numWeights() <= 144*2);
1495
1496         // Unquantize the weights and cache them
1497         for (int j = 0; j < this->numWeights(); ++j) {
1498             unquantizedValues[j] = this->unquantizeWeight(texelValues[j]);
1499         }
1500
1501         // Do weight infill...
1502         for (int y = 0; y < fDimY; ++y) {
1503             for (int x = 0; x < fDimX; ++x) {
1504                 texelWeights[0][x][y] = this->infillWeight(unquantizedValues, x, y, false);
1505                 if (fDualPlaneEnabled) {
1506                     texelWeights[1][x][y] = this->infillWeight(unquantizedValues, x, y, true);
1507                 }
1508             }
1509         }
1510     }
1511
1512     // Returns the partition for the texel located at position (x, y).
1513     // Adapted from C.2.21 of the ASTC specification
1514     int getPartition(int x, int y) const {
1515         const int partitionCount = this->numPartitions();
1516         int seed = fPartIndex;
1517         if ((fDimX * fDimY) < 31) {
1518             x <<= 1;
1519             y <<= 1;
1520         }
1521
1522         seed += (partitionCount - 1) * 1024;
1523
1524         uint32_t p = seed;
1525         p ^= p >> 15;  p -= p << 17;  p += p << 7; p += p <<  4;
1526         p ^= p >>  5;  p += p << 16;  p ^= p >> 7; p ^= p >> 3;
1527         p ^= p <<  6;  p ^= p >> 17;
1528
1529         uint32_t rnum = p;
1530         uint8_t seed1  =  rnum        & 0xF;
1531         uint8_t seed2  = (rnum >>  4) & 0xF;
1532         uint8_t seed3  = (rnum >>  8) & 0xF;
1533         uint8_t seed4  = (rnum >> 12) & 0xF;
1534         uint8_t seed5  = (rnum >> 16) & 0xF;
1535         uint8_t seed6  = (rnum >> 20) & 0xF;
1536         uint8_t seed7  = (rnum >> 24) & 0xF;
1537         uint8_t seed8  = (rnum >> 28) & 0xF;
1538         uint8_t seed9  = (rnum >> 18) & 0xF;
1539         uint8_t seed10 = (rnum >> 22) & 0xF;
1540         uint8_t seed11 = (rnum >> 26) & 0xF;
1541         uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF;
1542
1543         seed1 *= seed1;     seed2 *= seed2;
1544         seed3 *= seed3;     seed4 *= seed4;
1545         seed5 *= seed5;     seed6 *= seed6;
1546         seed7 *= seed7;     seed8 *= seed8;
1547         seed9 *= seed9;     seed10 *= seed10;
1548         seed11 *= seed11;   seed12 *= seed12;
1549
1550         int sh1, sh2, sh3;
1551         if (0 != (seed & 1)) {
1552             sh1 = (0 != (seed & 2))? 4 : 5;
1553             sh2 = (partitionCount == 3)? 6 : 5;
1554         } else {
1555             sh1 = (partitionCount==3)? 6 : 5;
1556             sh2 = (0 != (seed & 2))? 4 : 5;
1557         }
1558         sh3 = (0 != (seed & 0x10))? sh1 : sh2;
1559
1560         seed1 >>= sh1; seed2  >>= sh2; seed3  >>= sh1; seed4  >>= sh2;
1561         seed5 >>= sh1; seed6  >>= sh2; seed7  >>= sh1; seed8  >>= sh2;
1562         seed9 >>= sh3; seed10 >>= sh3; seed11 >>= sh3; seed12 >>= sh3;
1563
1564         const int z = 0;
1565         int a = seed1*x + seed2*y + seed11*z + (rnum >> 14);
1566         int b = seed3*x + seed4*y + seed12*z + (rnum >> 10);
1567         int c = seed5*x + seed6*y + seed9 *z + (rnum >>  6);
1568         int d = seed7*x + seed8*y + seed10*z + (rnum >>  2);
1569
1570         a &= 0x3F;
1571         b &= 0x3F;
1572         c &= 0x3F;
1573         d &= 0x3F;
1574
1575         if (partitionCount < 4) {
1576             d = 0;
1577         }
1578
1579         if (partitionCount < 3) {
1580             c = 0;
1581         }
1582
1583         if (a >= b && a >= c && a >= d) {
1584             return 0;
1585         } else if (b >= c && b >= d) {
1586             return 1;
1587         } else if (c >= d) {
1588             return 2;
1589         } else {
1590             return 3;
1591         }
1592     }
1593
1594     // Performs the proper interpolation of the texel based on the
1595     // endpoints and weights.
1596     SkColor getTexel(const SkColor endpoints[4][2],
1597                      const int weights[2][12][12],
1598                      int x, int y) const {
1599         int part = 0;
1600         if (this->numPartitions() > 1) {
1601             part = this->getPartition(x, y);
1602         }
1603
1604         SkColor result;
1605         if (fDualPlaneEnabled) {
1606             result = interpolate_dual_endpoints(
1607                 endpoints[part], weights[0][x][y], weights[1][x][y], fDualPlane);
1608         } else {
1609             result = interpolate_endpoints(endpoints[part], weights[0][x][y]);
1610         }
1611
1612 #if 1
1613         // !FIXME! if we're writing directly to a bitmap, then we don't need
1614         // to swap the red and blue channels, but since we're usually being used
1615         // by the SkImageDecoder_astc module, the results are expected to be in RGBA.
1616         result = SkColorSetARGB(
1617             SkColorGetA(result), SkColorGetB(result), SkColorGetG(result), SkColorGetR(result));
1618 #endif
1619
1620         return result;
1621     }
1622
1623     void decode() {
1624         // First decode the block mode.
1625         this->decodeBlockMode();
1626
1627         // Now we can decode the partition information.
1628         fPartIndex = static_cast<int>(read_astc_bits(fBlock, 11, 23));
1629         fPartCount = (fPartIndex & 0x3) + 1;
1630         fPartIndex >>= 2;
1631
1632         // This is illegal
1633         if (fDualPlaneEnabled && this->numPartitions() == 4) {
1634             fError = true;
1635             return;
1636         }
1637
1638         // Based on the partition info, we can decode the color information.
1639         this->decodeColorData();
1640     }
1641
1642     // Decodes the dual plane based on the given bit location. The final
1643     // location, if the dual plane is enabled, is also the end of our color data.
1644     // This function is only meant to be used from this->decodeColorData()
1645     void decodeDualPlane(int bitLoc) {
1646         if (fDualPlaneEnabled) {
1647             fDualPlane = static_cast<int>(read_astc_bits(fBlock, bitLoc - 2, bitLoc));
1648             fColorEndBit = bitLoc - 2;
1649         } else {
1650             fColorEndBit = bitLoc;
1651         }
1652     }
1653
1654     // Decodes the color information based on the ASTC spec.
1655     void decodeColorData() {
1656
1657         // By default, the last color bit is at the end of the texel weights
1658         const int lastWeight = 128 - this->numWeightBits();
1659
1660         // If we have a dual plane then it will be at this location, too.
1661         int dualPlaneBitLoc = lastWeight;
1662
1663         // If there's only one partition, then our job is (relatively) easy.
1664         if (this->numPartitions() == 1) {
1665             fCEM[0] = static_cast<ColorEndpointMode>(read_astc_bits(fBlock, 13, 17));
1666             fColorStartBit = 17;
1667
1668             // Handle dual plane mode...
1669             this->decodeDualPlane(dualPlaneBitLoc);
1670
1671             return;
1672         }
1673
1674         // If we have more than one partition, then we need to make
1675         // room for the partition index.
1676         fColorStartBit = 29;
1677
1678         // Read the base CEM. If it's zero, then we have no additional
1679         // CEM data and the endpoints for each partition share the same CEM.
1680         const int baseCEM = static_cast<int>(read_astc_bits(fBlock, 23, 25));
1681         if (0 == baseCEM) {
1682
1683             const ColorEndpointMode sameCEM =
1684                 static_cast<ColorEndpointMode>(read_astc_bits(fBlock, 25, 29));
1685
1686             for (int i = 0; i < kMaxPartitions; ++i) {
1687                 fCEM[i] = sameCEM;
1688             }
1689
1690             // Handle dual plane mode...
1691             this->decodeDualPlane(dualPlaneBitLoc);
1692
1693             return;
1694         }
1695
1696         // Move the dual plane selector bits down based on how many
1697         // partitions the block contains.
1698         switch (this->numPartitions()) {
1699             case 2:
1700                 dualPlaneBitLoc -= 2;
1701                 break;
1702
1703             case 3:
1704                 dualPlaneBitLoc -= 5;
1705                 break;
1706
1707             case 4:
1708                 dualPlaneBitLoc -= 8;
1709                 break;
1710
1711             default:
1712                 SkDEBUGFAIL("Internal ASTC decoding error.");
1713                 break;
1714         }
1715
1716         // The rest of the CEM config will be between the dual plane bit selector
1717         // and the texel weight grid.
1718         const int lowCEM = static_cast<int>(read_astc_bits(fBlock, 23, 29));
1719         SkASSERT(lastWeight >= dualPlaneBitLoc);
1720         SkASSERT(lastWeight - dualPlaneBitLoc < 31);
1721         int fullCEM = static_cast<int>(read_astc_bits(fBlock, dualPlaneBitLoc, lastWeight));
1722
1723         // Attach the config at the end of the weight grid to the CEM values
1724         // in the beginning of the block.
1725         fullCEM = (fullCEM << 6) | lowCEM;
1726
1727         // Ignore the two least significant bits, since those are our baseCEM above.
1728         fullCEM = fullCEM >> 2;
1729
1730         int C[kMaxPartitions]; // Next, decode C and M from the spec (Table C.2.12)
1731         for (int i = 0; i < this->numPartitions(); ++i) {
1732             C[i] = fullCEM & 1;
1733             fullCEM = fullCEM >> 1;
1734         }
1735
1736         int M[kMaxPartitions];
1737         for (int i = 0; i < this->numPartitions(); ++i) {
1738             M[i] = fullCEM & 0x3;
1739             fullCEM = fullCEM >> 2;
1740         }
1741
1742         // Construct our CEMs..
1743         SkASSERT(baseCEM > 0);
1744         for (int i = 0; i < this->numPartitions(); ++i) {
1745             int cem = (baseCEM - 1) * 4;
1746             cem += (0 == C[i])? 0 : 4;
1747             cem += M[i];
1748
1749             SkASSERT(cem < 16);
1750             fCEM[i] = static_cast<ColorEndpointMode>(cem);
1751         }
1752
1753         // Finally, if we have dual plane mode, then read the plane selector.
1754         this->decodeDualPlane(dualPlaneBitLoc);
1755     }
1756
1757     // Decodes the block mode. This function determines whether or not we use
1758     // dual plane encoding, the size of the texel weight grid, and the number of
1759     // bits, trits and quints that are used to encode it. For more information,
1760     // see section C.2.10 of the ASTC spec.
1761     //
1762     // For 2D blocks, the Block Mode field is laid out as follows:
1763     //
1764     // -------------------------------------------------------------------------
1765     // 10  9   8   7   6   5   4   3   2   1   0   Width Height Notes
1766     // -------------------------------------------------------------------------
1767     // D   H     B       A     R0  0   0   R2  R1  B+4   A+2
1768     // D   H     B       A     R0  0   1   R2  R1  B+8   A+2
1769     // D   H     B       A     R0  1   0   R2  R1  A+2   B+8
1770     // D   H   0   B     A     R0  1   1   R2  R1  A+2   B+6
1771     // D   H   1   B     A     R0  1   1   R2  R1  B+2   A+2
1772     // D   H   0   0     A     R0  R2  R1  0   0   12    A+2
1773     // D   H   0   1     A     R0  R2  R1  0   0   A+2   12
1774     // D   H   1   1   0   0   R0  R2  R1  0   0   6     10
1775     // D   H   1   1   0   1   R0  R2  R1  0   0   10    6
1776     //   B     1   0     A     R0  R2  R1  0   0   A+6   B+6   D=0, H=0
1777     // x   x   1   1   1   1   1   1   1   0   0   -     -     Void-extent
1778     // x   x   1   1   1   x   x   x   x   0   0   -     -     Reserved*
1779     // x   x   x   x   x   x   x   0   0   0   0   -     -     Reserved
1780     // -------------------------------------------------------------------------
1781     //
1782     // D - dual plane enabled
1783     // H, R - used to determine the number of bits/trits/quints in texel weight encoding
1784     //        R is a three bit value whose LSB is R0 and MSB is R1
1785     // Width, Height - dimensions of the texel weight grid (determined by A and B)
1786
1787     void decodeBlockMode() {
1788         const int blockMode = static_cast<int>(read_astc_bits(fBlock, 0, 11));
1789
1790         // Check for special void extent encoding
1791         fVoidExtent = (blockMode & 0x1FF) == 0x1FC;
1792
1793         // Check for reserved block modes
1794         fError = ((blockMode & 0x1C3) == 0x1C0) || ((blockMode & 0xF) == 0);
1795
1796         // Neither reserved nor void-extent, decode as usual
1797         // This code corresponds to table C.2.8 of the ASTC spec
1798         bool highPrecision = false;
1799         int R = 0;
1800         if ((blockMode & 0x3) == 0) {
1801             R = ((0xC & blockMode) >> 1) | ((0x10 & blockMode) >> 4);
1802             const int bitsSevenAndEight = (blockMode & 0x180) >> 7;
1803             SkASSERT(0 <= bitsSevenAndEight && bitsSevenAndEight < 4);
1804
1805             const int A = (blockMode >> 5) & 0x3;
1806             const int B = (blockMode >> 9) & 0x3;
1807
1808             fDualPlaneEnabled = (blockMode >> 10) & 0x1;
1809             highPrecision = (blockMode >> 9) & 0x1;
1810
1811             switch (bitsSevenAndEight) {
1812                 default:
1813                 case 0:
1814                     fWeightDimX = 12;
1815                     fWeightDimY = A + 2;
1816                     break;
1817
1818                 case 1:
1819                     fWeightDimX = A + 2;
1820                     fWeightDimY = 12;
1821                     break;
1822
1823                 case 2:
1824                     fWeightDimX = A + 6;
1825                     fWeightDimY = B + 6;
1826                     fDualPlaneEnabled = false;
1827                     highPrecision = false;
1828                     break;
1829
1830                 case 3:
1831                     if (0 == A) {
1832                         fWeightDimX = 6;
1833                         fWeightDimY = 10;
1834                     } else {
1835                         fWeightDimX = 10;
1836                         fWeightDimY = 6;
1837                     }
1838                     break;
1839             }
1840         } else { // (blockMode & 0x3) != 0
1841             R = ((blockMode & 0x3) << 1) | ((blockMode & 0x10) >> 4);
1842
1843             const int bitsTwoAndThree = (blockMode >> 2) & 0x3;
1844             SkASSERT(0 <= bitsTwoAndThree && bitsTwoAndThree < 4);
1845
1846             const int A = (blockMode >> 5) & 0x3;
1847             const int B = (blockMode >> 7) & 0x3;
1848
1849             fDualPlaneEnabled = (blockMode >> 10) & 0x1;
1850             highPrecision = (blockMode >> 9) & 0x1;
1851
1852             switch (bitsTwoAndThree) {
1853                 case 0:
1854                     fWeightDimX = B + 4;
1855                     fWeightDimY = A + 2;
1856                     break;
1857                 case 1:
1858                     fWeightDimX = B + 8;
1859                     fWeightDimY = A + 2;
1860                     break;
1861                 case 2:
1862                     fWeightDimX = A + 2;
1863                     fWeightDimY = B + 8;
1864                     break;
1865                 case 3:
1866                     if ((B & 0x2) == 0) {
1867                         fWeightDimX = A + 2;
1868                         fWeightDimY = (B & 1) + 6;
1869                     } else {
1870                         fWeightDimX = (B & 1) + 2;
1871                         fWeightDimY = A + 2;
1872                     }
1873                     break;
1874             }
1875         }
1876
1877         // We should have set the values of R and highPrecision
1878         // from decoding the block mode, these are used to determine
1879         // the proper dimensions of our weight grid.
1880         if ((R & 0x6) == 0) {
1881             fError = true;
1882         } else {
1883             static const int kBitAllocationTable[2][6][3] = {
1884                 {
1885                     {  1, 0, 0 },
1886                     {  0, 1, 0 },
1887                     {  2, 0, 0 },
1888                     {  0, 0, 1 },
1889                     {  1, 1, 0 },
1890                     {  3, 0, 0 }
1891                 },
1892                 {
1893                     {  1, 0, 1 },
1894                     {  2, 1, 0 },
1895                     {  4, 0, 0 },
1896                     {  2, 0, 1 },
1897                     {  3, 1, 0 },
1898                     {  5, 0, 0 }
1899                 }
1900             };
1901
1902             fWeightBits = kBitAllocationTable[highPrecision][R - 2][0];
1903             fWeightTrits = kBitAllocationTable[highPrecision][R - 2][1];
1904             fWeightQuints = kBitAllocationTable[highPrecision][R - 2][2];
1905         }
1906     }
1907 };
1908
1909 // Reads an ASTC block from the given pointer.
1910 static inline void read_astc_block(ASTCDecompressionData *dst, const uint8_t* src) {
1911     const uint64_t* qword = reinterpret_cast<const uint64_t*>(src);
1912     dst->fBlock.fLow = SkEndian_SwapLE64(qword[0]);
1913     dst->fBlock.fHigh = SkEndian_SwapLE64(qword[1]);
1914     dst->decode();
1915 }
1916
1917 // Take a known void-extent block, and write out the values as a constant color.
1918 static void decompress_void_extent(uint8_t* dst, int dstRowBytes,
1919                                    const ASTCDecompressionData &data) {
1920     // The top 64 bits contain 4 16-bit RGBA values.
1921     int a = (static_cast<int>(read_astc_bits(data.fBlock, 112, 128)) + 255) >> 8;
1922     int b = (static_cast<int>(read_astc_bits(data.fBlock, 96, 112)) + 255) >> 8;
1923     int g = (static_cast<int>(read_astc_bits(data.fBlock, 80, 96)) + 255) >> 8;
1924     int r = (static_cast<int>(read_astc_bits(data.fBlock, 64, 80)) + 255) >> 8;
1925
1926     write_constant_color(dst, data.fDimX, data.fDimY, dstRowBytes, SkColorSetARGB(a, r, g, b));
1927 }
1928
1929 // Decompresses a single ASTC block. It's assumed that data.fDimX and data.fDimY are
1930 // set and that the block has already been decoded (i.e. data.decode() has been called)
1931 static void decompress_astc_block(uint8_t* dst, int dstRowBytes,
1932                                   const ASTCDecompressionData &data) {
1933     if (data.fError) {
1934         write_error_color(dst, data.fDimX, data.fDimY, dstRowBytes);
1935         return;
1936     }
1937
1938     if (data.fVoidExtent) {
1939         decompress_void_extent(dst, dstRowBytes, data);
1940         return;
1941     }
1942
1943     // According to the spec, any more than 64 values is illegal. (C.2.24)
1944     static const int kMaxTexelValues = 64;
1945
1946     // Decode the texel weights.
1947     int texelValues[kMaxTexelValues];
1948     bool success = decode_integer_sequence(
1949         texelValues, kMaxTexelValues, data.numWeights(),
1950         // texel data goes to the end of the 128 bit block.
1951         data.fBlock, 128, 128 - data.numWeightBits(), false,
1952         data.fWeightBits, data.fWeightTrits, data.fWeightQuints);
1953
1954     if (!success) {
1955         write_error_color(dst, data.fDimX, data.fDimY, dstRowBytes);
1956         return;
1957     }
1958
1959     // Decode the color endpoints
1960     int colorBits, colorTrits, colorQuints;
1961     if (!data.getColorValueEncoding(&colorBits, &colorTrits, &colorQuints)) {
1962         write_error_color(dst, data.fDimX, data.fDimY, dstRowBytes);
1963         return;
1964     }
1965
1966     // According to the spec, any more than 18 color values is illegal. (C.2.24)
1967     static const int kMaxColorValues = 18;
1968
1969     int colorValues[kMaxColorValues];
1970     success = decode_integer_sequence(
1971         colorValues, kMaxColorValues, data.numColorValues(),
1972         data.fBlock, data.fColorStartBit, data.fColorEndBit, true,
1973         colorBits, colorTrits, colorQuints);
1974
1975     if (!success) {
1976         write_error_color(dst, data.fDimX, data.fDimY, dstRowBytes);
1977         return;
1978     }
1979
1980     // Unquantize the color values after they've been decoded.
1981     unquantize_colors(colorValues, data.numColorValues(), colorBits, colorTrits, colorQuints);
1982
1983     // Decode the colors into the appropriate endpoints.
1984     SkColor endpoints[4][2];
1985     data.colorEndpoints(endpoints, colorValues);
1986
1987     // Do texel infill and decode the texel values.
1988     int texelWeights[2][12][12];
1989     data.texelWeights(texelWeights, texelValues);
1990
1991     // Write the texels by interpolating them based on the information
1992     // stored in the block.
1993     dst += data.fDimY * dstRowBytes;
1994     for (int y = 0; y < data.fDimY; ++y) {
1995         dst -= dstRowBytes;
1996         SkColor* colorPtr = reinterpret_cast<SkColor*>(dst);
1997         for (int x = 0; x < data.fDimX; ++x) {
1998             colorPtr[x] = data.getTexel(endpoints, texelWeights, x, y);
1999         }
2000     }
2001 }
2002
2003 ////////////////////////////////////////////////////////////////////////////////
2004 //
2005 // ASTC Comrpession Struct
2006 //
2007 ////////////////////////////////////////////////////////////////////////////////
2008
2009 // This is the type passed as the CompressorType argument of the compressed
2010 // blitter for the ASTC format. The static functions required to be in this
2011 // struct are documented in SkTextureCompressor_Blitter.h
2012 struct CompressorASTC {
2013     static inline void CompressA8Vertical(uint8_t* dst, const uint8_t* src) {
2014         compress_a8_astc_block<GetAlphaTranspose>(&dst, src, 12);
2015     }
2016
2017     static inline void CompressA8Horizontal(uint8_t* dst, const uint8_t* src,
2018                                             int srcRowBytes) {
2019         compress_a8_astc_block<GetAlpha>(&dst, src, srcRowBytes);
2020     }
2021
2022 #if PEDANTIC_BLIT_RECT
2023     static inline void UpdateBlock(uint8_t* dst, const uint8_t* src, int srcRowBytes,
2024                                    const uint8_t* mask) {
2025         // TODO: krajcevski
2026         // This is kind of difficult for ASTC because the weight values are calculated
2027         // as an average of the actual weights. The best we can do is decompress the
2028         // weights and recalculate them based on the new texel values. This should
2029         // be "not too bad" since we know that anytime we hit this function, we're
2030         // compressing 12x12 block dimension alpha-only, and we know the layout
2031         // of the block
2032         SkFAIL("Implement me!");
2033     }
2034 #endif
2035 };
2036
2037 ////////////////////////////////////////////////////////////////////////////////
2038
2039 namespace SkTextureCompressor {
2040
2041 bool CompressA8To12x12ASTC(uint8_t* dst, const uint8_t* src,
2042                            int width, int height, int rowBytes) {
2043     if (width < 0 || ((width % 12) != 0) || height < 0 || ((height % 12) != 0)) {
2044         return false;
2045     }
2046
2047     uint8_t** dstPtr = &dst;
2048     for (int y = 0; y < height; y += 12) {
2049         for (int x = 0; x < width; x += 12) {
2050             compress_a8_astc_block<GetAlpha>(dstPtr, src + y*rowBytes + x, rowBytes);
2051         }
2052     }
2053
2054     return true;
2055 }
2056
2057 SkBlitter* CreateASTCBlitter(int width, int height, void* outputBuffer,
2058                              SkTBlitterAllocator* allocator) {
2059     if ((width % 12) != 0 || (height % 12) != 0) {
2060         return NULL;
2061     }
2062
2063     // Memset the output buffer to an encoding that decodes to zero. We must do this
2064     // in order to avoid having uninitialized values in the buffer if the blitter
2065     // decides not to write certain scanlines (and skip entire rows of blocks).
2066     // In the case of ASTC, if everything index is zero, then the interpolated value
2067     // will decode to zero provided we have the right header. We use the encoding
2068     // from recognizing all zero blocks from above.
2069     const int nBlocks = (width * height / 144);
2070     uint8_t *dst = reinterpret_cast<uint8_t *>(outputBuffer);
2071     for (int i = 0; i < nBlocks; ++i) {
2072         send_packing(&dst, SkTEndian_SwapLE64(0x0000000001FE000173ULL), 0);
2073     }
2074
2075     return allocator->createT<
2076         SkTCompressedAlphaBlitter<12, 16, CompressorASTC>, int, int, void* >
2077         (width, height, outputBuffer);
2078 }
2079
2080 void DecompressASTC(uint8_t* dst, int dstRowBytes, const uint8_t* src,
2081                     int width, int height, int blockDimX, int blockDimY) {
2082     // ASTC is encoded in what they call "raster order", so that the first
2083     // block is the bottom-left block in the image, and the first pixel
2084     // is the bottom-left pixel of the image
2085     dst += height * dstRowBytes;
2086
2087     ASTCDecompressionData data(blockDimX, blockDimY);
2088     for (int y = 0; y < height; y += blockDimY) {
2089         dst -= blockDimY * dstRowBytes;
2090         SkColor *colorPtr = reinterpret_cast<SkColor*>(dst);
2091         for (int x = 0; x < width; x += blockDimX) {
2092             read_astc_block(&data, src);
2093             decompress_astc_block(reinterpret_cast<uint8_t*>(colorPtr + x), dstRowBytes, data);
2094
2095             // ASTC encoded blocks are 16 bytes (128 bits) large.
2096             src += 16;
2097         }
2098     }
2099 }
2100
2101 }  // SkTextureCompressor