framework/common/tcuAstcUtil.cpp

   1 /*-------------------------------------------------------------------------
   2  * drawElements Quality Program Tester Core
   3  * ----------------------------------------
   4  *
   5  * Copyright 2016 The Android Open Source Project
   6  *
   7  * Licensed under the Apache License, Version 2.0 (the "License");
   8  * you may not use this file except in compliance with the License.
   9  * You may obtain a copy of the License at
  10  *
  11  *      http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  *
  19  *//*!
  20  * \file
  21  * \brief ASTC Utilities.
  22  *//*--------------------------------------------------------------------*/
  23
  24 #include "tcuAstcUtil.hpp"
  25 #include "deFloat16.h"
  26 #include "deRandom.hpp"
  27 #include "deMeta.hpp"
  28
  29 #include <algorithm>
  30
  31 namespace tcu
  32 {
  33 namespace astc
  34 {
  35
  36 using std::vector;
  37
  38 namespace
  39 {
  40
  41 // Common utilities
  42
  43 enum
  44 {
  45         MAX_BLOCK_WIDTH         = 12,
  46         MAX_BLOCK_HEIGHT        = 12
  47 };
  48
  49 inline deUint32 getBit (deUint32 src, int ndx)
  50 {
  51         DE_ASSERT(de::inBounds(ndx, 0, 32));
  52         return (src >> ndx) & 1;
  53 }
  54
  55 inline deUint32 getBits (deUint32 src, int low, int high)
  56 {
  57         const int numBits = (high-low) + 1;
  58
  59         DE_ASSERT(de::inRange(numBits, 1, 32));
  60
  61         if (numBits < 32)
  62                 return (deUint32)((src >> low) & ((1u<<numBits)-1));
  63         else
  64                 return (deUint32)((src >> low) & 0xFFFFFFFFu);
  65 }
  66
  67 inline bool isBitSet (deUint32 src, int ndx)
  68 {
  69         return getBit(src, ndx) != 0;
  70 }
  71
  72 inline deUint32 reverseBits (deUint32 src, int numBits)
  73 {
  74         DE_ASSERT(de::inRange(numBits, 0, 32));
  75         deUint32 result = 0;
  76         for (int i = 0; i < numBits; i++)
  77                 result |= ((src >> i) & 1) << (numBits-1-i);
  78         return result;
  79 }
  80
  81 inline deUint32 bitReplicationScale (deUint32 src, int numSrcBits, int numDstBits)
  82 {
  83         DE_ASSERT(numSrcBits <= numDstBits);
  84         DE_ASSERT((src & ((1<<numSrcBits)-1)) == src);
  85         deUint32 dst = 0;
  86         for (int shift = numDstBits-numSrcBits; shift > -numSrcBits; shift -= numSrcBits)
  87                 dst |= shift >= 0 ? src << shift : src >> -shift;
  88         return dst;
  89 }
  90
  91 inline deInt32 signExtend (deInt32 src, int numSrcBits)
  92 {
  93         DE_ASSERT(de::inRange(numSrcBits, 2, 31));
  94         const bool negative = (src & (1 << (numSrcBits-1))) != 0;
  95         return src | (negative ? ~((1 << numSrcBits) - 1) : 0);
  96 }
  97
  98 inline bool isFloat16InfOrNan (deFloat16 v)
  99 {
 100         return getBits(v, 10, 14) == 31;
 101 }
 102
 103 enum ISEMode
 104 {
 105         ISEMODE_TRIT = 0,
 106         ISEMODE_QUINT,
 107         ISEMODE_PLAIN_BIT,
 108
 109         ISEMODE_LAST
 110 };
 111
 112 struct ISEParams
 113 {
 114         ISEMode         mode;
 115         int                     numBits;
 116
 117         ISEParams (ISEMode mode_, int numBits_) : mode(mode_), numBits(numBits_) {}
 118 };
 119
 120 inline int computeNumRequiredBits (const ISEParams& iseParams, int numValues)
 121 {
 122         switch (iseParams.mode)
 123         {
 124                 case ISEMODE_TRIT:                      return deDivRoundUp32(numValues*8, 5) + numValues*iseParams.numBits;
 125                 case ISEMODE_QUINT:                     return deDivRoundUp32(numValues*7, 3) + numValues*iseParams.numBits;
 126                 case ISEMODE_PLAIN_BIT:         return numValues*iseParams.numBits;
 127                 default:
 128                         DE_ASSERT(false);
 129                         return -1;
 130         }
 131 }
 132
 133 ISEParams computeMaximumRangeISEParams (int numAvailableBits, int numValuesInSequence)
 134 {
 135         int curBitsForTritMode          = 6;
 136         int curBitsForQuintMode         = 5;
 137         int curBitsForPlainBitMode      = 8;
 138
 139         while (true)
 140         {
 141                 DE_ASSERT(curBitsForTritMode > 0 || curBitsForQuintMode > 0 || curBitsForPlainBitMode > 0);
 142
 143                 const int tritRange                     = curBitsForTritMode > 0                ? (3 << curBitsForTritMode) - 1                 : -1;
 144                 const int quintRange            = curBitsForQuintMode > 0               ? (5 << curBitsForQuintMode) - 1                : -1;
 145                 const int plainBitRange         = curBitsForPlainBitMode > 0    ? (1 << curBitsForPlainBitMode) - 1             : -1;
 146                 const int maxRange                      = de::max(de::max(tritRange, quintRange), plainBitRange);
 147
 148                 if (maxRange == tritRange)
 149                 {
 150                         const ISEParams params(ISEMODE_TRIT, curBitsForTritMode);
 151                         if (computeNumRequiredBits(params, numValuesInSequence) <= numAvailableBits)
 152                                 return ISEParams(ISEMODE_TRIT, curBitsForTritMode);
 153                         curBitsForTritMode--;
 154                 }
 155                 else if (maxRange == quintRange)
 156                 {
 157                         const ISEParams params(ISEMODE_QUINT, curBitsForQuintMode);
 158                         if (computeNumRequiredBits(params, numValuesInSequence) <= numAvailableBits)
 159                                 return ISEParams(ISEMODE_QUINT, curBitsForQuintMode);
 160                         curBitsForQuintMode--;
 161                 }
 162                 else
 163                 {
 164                         const ISEParams params(ISEMODE_PLAIN_BIT, curBitsForPlainBitMode);
 165                         DE_ASSERT(maxRange == plainBitRange);
 166                         if (computeNumRequiredBits(params, numValuesInSequence) <= numAvailableBits)
 167                                 return ISEParams(ISEMODE_PLAIN_BIT, curBitsForPlainBitMode);
 168                         curBitsForPlainBitMode--;
 169                 }
 170         }
 171 }
 172
 173 inline int computeNumColorEndpointValues (deUint32 endpointMode)
 174 {
 175         DE_ASSERT(endpointMode < 16);
 176         return (endpointMode/4 + 1) * 2;
 177 }
 178
 179 // Decompression utilities
 180
 181 enum DecompressResult
 182 {
 183         DECOMPRESS_RESULT_VALID_BLOCK   = 0,    //!< Decompressed valid block
 184         DECOMPRESS_RESULT_ERROR,                                //!< Encountered error while decompressing, error color written
 185
 186         DECOMPRESS_RESULT_LAST
 187 };
 188
 189 // A helper for getting bits from a 128-bit block.
 190 class Block128
 191 {
 192 private:
 193         typedef deUint64 Word;
 194
 195         enum
 196         {
 197                 WORD_BYTES      = sizeof(Word),
 198                 WORD_BITS       = 8*WORD_BYTES,
 199                 NUM_WORDS       = 128 / WORD_BITS
 200         };
 201
 202         DE_STATIC_ASSERT(128 % WORD_BITS == 0);
 203
 204 public:
 205         Block128 (const deUint8* src)
 206         {
 207                 for (int wordNdx = 0; wordNdx < NUM_WORDS; wordNdx++)
 208                 {
 209                         m_words[wordNdx] = 0;
 210                         for (int byteNdx = 0; byteNdx < WORD_BYTES; byteNdx++)
 211                                 m_words[wordNdx] |= (Word)src[wordNdx*WORD_BYTES + byteNdx] << (8*byteNdx);
 212                 }
 213         }
 214
 215         deUint32 getBit (int ndx) const
 216         {
 217                 DE_ASSERT(de::inBounds(ndx, 0, 128));
 218                 return (m_words[ndx / WORD_BITS] >> (ndx % WORD_BITS)) & 1;
 219         }
 220
 221         deUint32 getBits (int low, int high) const
 222         {
 223                 DE_ASSERT(de::inBounds(low, 0, 128));
 224                 DE_ASSERT(de::inBounds(high, 0, 128));
 225                 DE_ASSERT(de::inRange(high-low+1, 0, 32));
 226
 227                 if (high-low+1 == 0)
 228                         return 0;
 229
 230                 const int word0Ndx = low / WORD_BITS;
 231                 const int word1Ndx = high / WORD_BITS;
 232
 233                 // \note "foo << bar << 1" done instead of "foo << (bar+1)" to avoid overflow, i.e. shift amount being too big.
 234
 235                 if (word0Ndx == word1Ndx)
 236                         return (deUint32)((m_words[word0Ndx] & ((((Word)1 << high%WORD_BITS << 1) - 1))) >> ((Word)low % WORD_BITS));
 237                 else
 238                 {
 239                         DE_ASSERT(word1Ndx == word0Ndx + 1);
 240
 241                         return (deUint32)(m_words[word0Ndx] >> (low%WORD_BITS)) |
 242                                    (deUint32)((m_words[word1Ndx] & (((Word)1 << high%WORD_BITS << 1) - 1)) << (high-low - high%WORD_BITS));
 243                 }
 244         }
 245
 246         bool isBitSet (int ndx) const
 247         {
 248                 DE_ASSERT(de::inBounds(ndx, 0, 128));
 249                 return getBit(ndx) != 0;
 250         }
 251
 252 private:
 253         Word m_words[NUM_WORDS];
 254 };
 255
 256 // A helper for sequential access into a Block128.
 257 class BitAccessStream
 258 {
 259 public:
 260         BitAccessStream (const Block128& src, int startNdxInSrc, int length, bool forward)
 261                 : m_src                         (src)
 262                 , m_startNdxInSrc       (startNdxInSrc)
 263                 , m_length                      (length)
 264                 , m_forward                     (forward)
 265                 , m_ndx                         (0)
 266         {
 267         }
 268
 269         // Get the next num bits. Bits at positions greater than or equal to m_length are zeros.
 270         deUint32 getNext (int num)
 271         {
 272                 if (num == 0 || m_ndx >= m_length)
 273                         return 0;
 274
 275                 const int end                           = m_ndx + num;
 276                 const int numBitsFromSrc        = de::max(0, de::min(m_length, end) - m_ndx);
 277                 const int low                           = m_ndx;
 278                 const int high                          = m_ndx + numBitsFromSrc - 1;
 279
 280                 m_ndx += num;
 281
 282                 return m_forward ?                         m_src.getBits(m_startNdxInSrc + low,  m_startNdxInSrc + high)
 283                                                  : reverseBits(m_src.getBits(m_startNdxInSrc - high, m_startNdxInSrc - low), numBitsFromSrc);
 284         }
 285
 286 private:
 287         const Block128&         m_src;
 288         const int                       m_startNdxInSrc;
 289         const int                       m_length;
 290         const bool                      m_forward;
 291
 292         int                                     m_ndx;
 293 };
 294
 295 struct ISEDecodedResult
 296 {
 297         deUint32 m;
 298         deUint32 tq; //!< Trit or quint value, depending on ISE mode.
 299         deUint32 v;
 300 };
 301
 302 // Data from an ASTC block's "block mode" part (i.e. bits [0,10]).
 303 struct ASTCBlockMode
 304 {
 305         bool            isError;
 306         // \note Following fields only relevant if !isError.
 307         bool            isVoidExtent;
 308         // \note Following fields only relevant if !isVoidExtent.
 309         bool            isDualPlane;
 310         int                     weightGridWidth;
 311         int                     weightGridHeight;
 312         ISEParams       weightISEParams;
 313
 314         ASTCBlockMode (void)
 315                 : isError                       (true)
 316                 , isVoidExtent          (true)
 317                 , isDualPlane           (true)
 318                 , weightGridWidth       (-1)
 319                 , weightGridHeight      (-1)
 320                 , weightISEParams       (ISEMODE_LAST, -1)
 321         {
 322         }
 323 };
 324
 325 inline int computeNumWeights (const ASTCBlockMode& mode)
 326 {
 327         return mode.weightGridWidth * mode.weightGridHeight * (mode.isDualPlane ? 2 : 1);
 328 }
 329
 330 struct ColorEndpointPair
 331 {
 332         UVec4 e0;
 333         UVec4 e1;
 334 };
 335
 336 struct TexelWeightPair
 337 {
 338         deUint32 w[2];
 339 };
 340
 341 ASTCBlockMode getASTCBlockMode (deUint32 blockModeData)
 342 {
 343         ASTCBlockMode blockMode;
 344         blockMode.isError = true; // \note Set to false later, if not error.
 345
 346         blockMode.isVoidExtent = getBits(blockModeData, 0, 8) == 0x1fc;
 347
 348         if (!blockMode.isVoidExtent)
 349         {
 350                 if ((getBits(blockModeData, 0, 1) == 0 && getBits(blockModeData, 6, 8) == 7) || getBits(blockModeData, 0, 3) == 0)
 351                         return blockMode; // Invalid ("reserved").
 352
 353                 deUint32 r = (deUint32)-1; // \note Set in the following branches.
 354
 355                 if (getBits(blockModeData, 0, 1) == 0)
 356                 {
 357                         const deUint32 r0       = getBit(blockModeData, 4);
 358                         const deUint32 r1       = getBit(blockModeData, 2);
 359                         const deUint32 r2       = getBit(blockModeData, 3);
 360                         const deUint32 i78      = getBits(blockModeData, 7, 8);
 361
 362                         r = (r2 << 2) | (r1 << 1) | (r0 << 0);
 363
 364                         if (i78 == 3)
 365                         {
 366                                 const bool i5 = isBitSet(blockModeData, 5);
 367                                 blockMode.weightGridWidth       = i5 ? 10 : 6;
 368                                 blockMode.weightGridHeight      = i5 ? 6  : 10;
 369                         }
 370                         else
 371                         {
 372                                 const deUint32 a = getBits(blockModeData, 5, 6);
 373                                 switch (i78)
 374                                 {
 375                                         case 0:         blockMode.weightGridWidth = 12;         blockMode.weightGridHeight = a + 2;                                                                     break;
 376                                         case 1:         blockMode.weightGridWidth = a + 2;      blockMode.weightGridHeight = 12;                                                                        break;
 377                                         case 2:         blockMode.weightGridWidth = a + 6;      blockMode.weightGridHeight = getBits(blockModeData, 9, 10) + 6;         break;
 378                                         default: DE_ASSERT(false);
 379                                 }
 380                         }
 381                 }
 382                 else
 383                 {
 384                         const deUint32 r0       = getBit(blockModeData, 4);
 385                         const deUint32 r1       = getBit(blockModeData, 0);
 386                         const deUint32 r2       = getBit(blockModeData, 1);
 387                         const deUint32 i23      = getBits(blockModeData, 2, 3);
 388                         const deUint32 a        = getBits(blockModeData, 5, 6);
 389
 390                         r = (r2 << 2) | (r1 << 1) | (r0 << 0);
 391
 392                         if (i23 == 3)
 393                         {
 394                                 const deUint32  b       = getBit(blockModeData, 7);
 395                                 const bool              i8      = isBitSet(blockModeData, 8);
 396                                 blockMode.weightGridWidth       = i8 ? b+2 : a+2;
 397                                 blockMode.weightGridHeight      = i8 ? a+2 : b+6;
 398                         }
 399                         else
 400                         {
 401                                 const deUint32 b = getBits(blockModeData, 7, 8);
 402
 403                                 switch (i23)
 404                                 {
 405                                         case 0:         blockMode.weightGridWidth = b + 4;      blockMode.weightGridHeight = a + 2;     break;
 406                                         case 1:         blockMode.weightGridWidth = b + 8;      blockMode.weightGridHeight = a + 2;     break;
 407                                         case 2:         blockMode.weightGridWidth = a + 2;      blockMode.weightGridHeight = b + 8;     break;
 408                                         default: DE_ASSERT(false);
 409                                 }
 410                         }
 411                 }
 412
 413                 const bool      zeroDH          = getBits(blockModeData, 0, 1) == 0 && getBits(blockModeData, 7, 8) == 2;
 414                 const bool      h                       = zeroDH ? 0 : isBitSet(blockModeData, 9);
 415                 blockMode.isDualPlane   = zeroDH ? 0 : isBitSet(blockModeData, 10);
 416
 417                 {
 418                         ISEMode&        m       = blockMode.weightISEParams.mode;
 419                         int&            b       = blockMode.weightISEParams.numBits;
 420                         m = ISEMODE_PLAIN_BIT;
 421                         b = 0;
 422
 423                         if (h)
 424                         {
 425                                 switch (r)
 426                                 {
 427                                         case 2:                                                 m = ISEMODE_QUINT;      b = 1;  break;
 428                                         case 3:         m = ISEMODE_TRIT;                                               b = 2;  break;
 429                                         case 4:                                                                                         b = 4;  break;
 430                                         case 5:                                                 m = ISEMODE_QUINT;      b = 2;  break;
 431                                         case 6:         m = ISEMODE_TRIT;                                               b = 3;  break;
 432                                         case 7:                                                                                         b = 5;  break;
 433                                         default:        DE_ASSERT(false);
 434                                 }
 435                         }
 436                         else
 437                         {
 438                                 switch (r)
 439                                 {
 440                                         case 2:                                                                                         b = 1;  break;
 441                                         case 3:         m = ISEMODE_TRIT;                                                               break;
 442                                         case 4:                                                                                         b = 2;  break;
 443                                         case 5:                                                 m = ISEMODE_QUINT;                      break;
 444                                         case 6:         m = ISEMODE_TRIT;                                               b = 1;  break;
 445                                         case 7:                                                                                         b = 3;  break;
 446                                         default:        DE_ASSERT(false);
 447                                 }
 448                         }
 449                 }
 450         }
 451
 452         blockMode.isError = false;
 453         return blockMode;
 454 }
 455
 456 inline void setASTCErrorColorBlock (void* dst, int blockWidth, int blockHeight, bool isSRGB)
 457 {
 458         if (isSRGB)
 459         {
 460                 deUint8* const dstU = (deUint8*)dst;
 461
 462                 for (int i = 0; i < blockWidth*blockHeight; i++)
 463                 {
 464                         dstU[4*i + 0] = 0xff;
 465                         dstU[4*i + 1] = 0;
 466                         dstU[4*i + 2] = 0xff;
 467                         dstU[4*i + 3] = 0xff;
 468                 }
 469         }
 470         else
 471         {
 472                 float* const dstF = (float*)dst;
 473
 474                 for (int i = 0; i < blockWidth*blockHeight; i++)
 475                 {
 476                         dstF[4*i + 0] = 1.0f;
 477                         dstF[4*i + 1] = 0.0f;
 478                         dstF[4*i + 2] = 1.0f;
 479                         dstF[4*i + 3] = 1.0f;
 480                 }
 481         }
 482 }
 483
 484 DecompressResult decodeVoidExtentBlock (void* dst, const Block128& blockData, int blockWidth, int blockHeight, bool isSRGB, bool isLDRMode)
 485 {
 486         const deUint32  minSExtent                      = blockData.getBits(12, 24);
 487         const deUint32  maxSExtent                      = blockData.getBits(25, 37);
 488         const deUint32  minTExtent                      = blockData.getBits(38, 50);
 489         const deUint32  maxTExtent                      = blockData.getBits(51, 63);
 490         const bool              allExtentsAllOnes       = minSExtent == 0x1fff && maxSExtent == 0x1fff && minTExtent == 0x1fff && maxTExtent == 0x1fff;
 491         const bool              isHDRBlock                      = blockData.isBitSet(9);
 492
 493         if ((isLDRMode && isHDRBlock) || (!allExtentsAllOnes && (minSExtent >= maxSExtent || minTExtent >= maxTExtent)))
 494         {
 495                 setASTCErrorColorBlock(dst, blockWidth, blockHeight, isSRGB);
 496                 return DECOMPRESS_RESULT_ERROR;
 497         }
 498
 499         const deUint32 rgba[4] =
 500         {
 501                 blockData.getBits(64,  79),
 502                 blockData.getBits(80,  95),
 503                 blockData.getBits(96,  111),
 504                 blockData.getBits(112, 127)
 505         };
 506
 507         if (isSRGB)
 508         {
 509                 deUint8* const dstU = (deUint8*)dst;
 510                 for (int i = 0; i < blockWidth*blockHeight; i++)
 511                 for (int c = 0; c < 4; c++)
 512                         dstU[i*4 + c] = (deUint8)((rgba[c] & 0xff00) >> 8);
 513         }
 514         else
 515         {
 516                 float* const dstF = (float*)dst;
 517
 518                 if (isHDRBlock)
 519                 {
 520                         for (int c = 0; c < 4; c++)
 521                         {
 522                                 if (isFloat16InfOrNan((deFloat16)rgba[c]))
 523                                         throw InternalError("Infinity or NaN color component in HDR void extent block in ASTC texture (behavior undefined by ASTC specification)");
 524                         }
 525
 526                         for (int i = 0; i < blockWidth*blockHeight; i++)
 527                         for (int c = 0; c < 4; c++)
 528                                 dstF[i*4 + c] = deFloat16To32((deFloat16)rgba[c]);
 529                 }
 530                 else
 531                 {
 532                         for (int i = 0; i < blockWidth*blockHeight; i++)
 533                         for (int c = 0; c < 4; c++)
 534                                 dstF[i*4 + c] = rgba[c] == 65535 ? 1.0f : (float)rgba[c] / 65536.0f;
 535                 }
 536         }
 537
 538         return DECOMPRESS_RESULT_VALID_BLOCK;
 539 }
 540
 541 void decodeColorEndpointModes (deUint32* endpointModesDst, const Block128& blockData, int numPartitions, int extraCemBitsStart)
 542 {
 543         if (numPartitions == 1)
 544                 endpointModesDst[0] = blockData.getBits(13, 16);
 545         else
 546         {
 547                 const deUint32 highLevelSelector = blockData.getBits(23, 24);
 548
 549                 if (highLevelSelector == 0)
 550                 {
 551                         const deUint32 mode = blockData.getBits(25, 28);
 552                         for (int i = 0; i < numPartitions; i++)
 553                                 endpointModesDst[i] = mode;
 554                 }
 555                 else
 556                 {
 557                         for (int partNdx = 0; partNdx < numPartitions; partNdx++)
 558                         {
 559                                 const deUint32 cemClass         = highLevelSelector - (blockData.isBitSet(25 + partNdx) ? 0 : 1);
 560                                 const deUint32 lowBit0Ndx       = numPartitions + 2*partNdx;
 561                                 const deUint32 lowBit1Ndx       = numPartitions + 2*partNdx + 1;
 562                                 const deUint32 lowBit0          = blockData.getBit(lowBit0Ndx < 4 ? 25+lowBit0Ndx : extraCemBitsStart+lowBit0Ndx-4);
 563                                 const deUint32 lowBit1          = blockData.getBit(lowBit1Ndx < 4 ? 25+lowBit1Ndx : extraCemBitsStart+lowBit1Ndx-4);
 564
 565                                 endpointModesDst[partNdx] = (cemClass << 2) | (lowBit1 << 1) | lowBit0;
 566                         }
 567                 }
 568         }
 569 }
 570
 571 int computeNumColorEndpointValues (const deUint32* endpointModes, int numPartitions)
 572 {
 573         int result = 0;
 574         for (int i = 0; i < numPartitions; i++)
 575                 result += computeNumColorEndpointValues(endpointModes[i]);
 576         return result;
 577 }
 578
 579 void decodeISETritBlock (ISEDecodedResult* dst, int numValues, BitAccessStream& data, int numBits)
 580 {
 581         DE_ASSERT(de::inRange(numValues, 1, 5));
 582
 583         deUint32 m[5];
 584
 585         m[0]                    = data.getNext(numBits);
 586         deUint32 T01    = data.getNext(2);
 587         m[1]                    = data.getNext(numBits);
 588         deUint32 T23    = data.getNext(2);
 589         m[2]                    = data.getNext(numBits);
 590         deUint32 T4             = data.getNext(1);
 591         m[3]                    = data.getNext(numBits);
 592         deUint32 T56    = data.getNext(2);
 593         m[4]                    = data.getNext(numBits);
 594         deUint32 T7             = data.getNext(1);
 595
 596         switch (numValues)
 597         {
 598                 case 1:
 599                         T23     = 0;
 600                 // Fallthrough
 601                 case 2:
 602                         T4      = 0;
 603                 // Fallthrough
 604                 case 3:
 605                         T56     = 0;
 606                 // Fallthrough
 607                 case 4:
 608                         T7      = 0;
 609                 // Fallthrough
 610                 case 5:
 611                         break;
 612                 default:
 613                         DE_ASSERT(false);
 614         }
 615
 616         const deUint32 T = (T7 << 7) | (T56 << 5) | (T4 << 4) | (T23 << 2) | (T01 << 0);
 617
 618         static const deUint32 tritsFromT[256][5] =
 619         {
 620                 { 0,0,0,0,0 }, { 1,0,0,0,0 }, { 2,0,0,0,0 }, { 0,0,2,0,0 }, { 0,1,0,0,0 }, { 1,1,0,0,0 }, { 2,1,0,0,0 }, { 1,0,2,0,0 }, { 0,2,0,0,0 }, { 1,2,0,0,0 }, { 2,2,0,0,0 }, { 2,0,2,0,0 }, { 0,2,2,0,0 }, { 1,2,2,0,0 }, { 2,2,2,0,0 }, { 2,0,2,0,0 },
 621                 { 0,0,1,0,0 }, { 1,0,1,0,0 }, { 2,0,1,0,0 }, { 0,1,2,0,0 }, { 0,1,1,0,0 }, { 1,1,1,0,0 }, { 2,1,1,0,0 }, { 1,1,2,0,0 }, { 0,2,1,0,0 }, { 1,2,1,0,0 }, { 2,2,1,0,0 }, { 2,1,2,0,0 }, { 0,0,0,2,2 }, { 1,0,0,2,2 }, { 2,0,0,2,2 }, { 0,0,2,2,2 },
 622                 { 0,0,0,1,0 }, { 1,0,0,1,0 }, { 2,0,0,1,0 }, { 0,0,2,1,0 }, { 0,1,0,1,0 }, { 1,1,0,1,0 }, { 2,1,0,1,0 }, { 1,0,2,1,0 }, { 0,2,0,1,0 }, { 1,2,0,1,0 }, { 2,2,0,1,0 }, { 2,0,2,1,0 }, { 0,2,2,1,0 }, { 1,2,2,1,0 }, { 2,2,2,1,0 }, { 2,0,2,1,0 },
 623                 { 0,0,1,1,0 }, { 1,0,1,1,0 }, { 2,0,1,1,0 }, { 0,1,2,1,0 }, { 0,1,1,1,0 }, { 1,1,1,1,0 }, { 2,1,1,1,0 }, { 1,1,2,1,0 }, { 0,2,1,1,0 }, { 1,2,1,1,0 }, { 2,2,1,1,0 }, { 2,1,2,1,0 }, { 0,1,0,2,2 }, { 1,1,0,2,2 }, { 2,1,0,2,2 }, { 1,0,2,2,2 },
 624                 { 0,0,0,2,0 }, { 1,0,0,2,0 }, { 2,0,0,2,0 }, { 0,0,2,2,0 }, { 0,1,0,2,0 }, { 1,1,0,2,0 }, { 2,1,0,2,0 }, { 1,0,2,2,0 }, { 0,2,0,2,0 }, { 1,2,0,2,0 }, { 2,2,0,2,0 }, { 2,0,2,2,0 }, { 0,2,2,2,0 }, { 1,2,2,2,0 }, { 2,2,2,2,0 }, { 2,0,2,2,0 },
 625                 { 0,0,1,2,0 }, { 1,0,1,2,0 }, { 2,0,1,2,0 }, { 0,1,2,2,0 }, { 0,1,1,2,0 }, { 1,1,1,2,0 }, { 2,1,1,2,0 }, { 1,1,2,2,0 }, { 0,2,1,2,0 }, { 1,2,1,2,0 }, { 2,2,1,2,0 }, { 2,1,2,2,0 }, { 0,2,0,2,2 }, { 1,2,0,2,2 }, { 2,2,0,2,2 }, { 2,0,2,2,2 },
 626                 { 0,0,0,0,2 }, { 1,0,0,0,2 }, { 2,0,0,0,2 }, { 0,0,2,0,2 }, { 0,1,0,0,2 }, { 1,1,0,0,2 }, { 2,1,0,0,2 }, { 1,0,2,0,2 }, { 0,2,0,0,2 }, { 1,2,0,0,2 }, { 2,2,0,0,2 }, { 2,0,2,0,2 }, { 0,2,2,0,2 }, { 1,2,2,0,2 }, { 2,2,2,0,2 }, { 2,0,2,0,2 },
 627                 { 0,0,1,0,2 }, { 1,0,1,0,2 }, { 2,0,1,0,2 }, { 0,1,2,0,2 }, { 0,1,1,0,2 }, { 1,1,1,0,2 }, { 2,1,1,0,2 }, { 1,1,2,0,2 }, { 0,2,1,0,2 }, { 1,2,1,0,2 }, { 2,2,1,0,2 }, { 2,1,2,0,2 }, { 0,2,2,2,2 }, { 1,2,2,2,2 }, { 2,2,2,2,2 }, { 2,0,2,2,2 },
 628                 { 0,0,0,0,1 }, { 1,0,0,0,1 }, { 2,0,0,0,1 }, { 0,0,2,0,1 }, { 0,1,0,0,1 }, { 1,1,0,0,1 }, { 2,1,0,0,1 }, { 1,0,2,0,1 }, { 0,2,0,0,1 }, { 1,2,0,0,1 }, { 2,2,0,0,1 }, { 2,0,2,0,1 }, { 0,2,2,0,1 }, { 1,2,2,0,1 }, { 2,2,2,0,1 }, { 2,0,2,0,1 },
 629                 { 0,0,1,0,1 }, { 1,0,1,0,1 }, { 2,0,1,0,1 }, { 0,1,2,0,1 }, { 0,1,1,0,1 }, { 1,1,1,0,1 }, { 2,1,1,0,1 }, { 1,1,2,0,1 }, { 0,2,1,0,1 }, { 1,2,1,0,1 }, { 2,2,1,0,1 }, { 2,1,2,0,1 }, { 0,0,1,2,2 }, { 1,0,1,2,2 }, { 2,0,1,2,2 }, { 0,1,2,2,2 },
 630                 { 0,0,0,1,1 }, { 1,0,0,1,1 }, { 2,0,0,1,1 }, { 0,0,2,1,1 }, { 0,1,0,1,1 }, { 1,1,0,1,1 }, { 2,1,0,1,1 }, { 1,0,2,1,1 }, { 0,2,0,1,1 }, { 1,2,0,1,1 }, { 2,2,0,1,1 }, { 2,0,2,1,1 }, { 0,2,2,1,1 }, { 1,2,2,1,1 }, { 2,2,2,1,1 }, { 2,0,2,1,1 },
 631                 { 0,0,1,1,1 }, { 1,0,1,1,1 }, { 2,0,1,1,1 }, { 0,1,2,1,1 }, { 0,1,1,1,1 }, { 1,1,1,1,1 }, { 2,1,1,1,1 }, { 1,1,2,1,1 }, { 0,2,1,1,1 }, { 1,2,1,1,1 }, { 2,2,1,1,1 }, { 2,1,2,1,1 }, { 0,1,1,2,2 }, { 1,1,1,2,2 }, { 2,1,1,2,2 }, { 1,1,2,2,2 },
 632                 { 0,0,0,2,1 }, { 1,0,0,2,1 }, { 2,0,0,2,1 }, { 0,0,2,2,1 }, { 0,1,0,2,1 }, { 1,1,0,2,1 }, { 2,1,0,2,1 }, { 1,0,2,2,1 }, { 0,2,0,2,1 }, { 1,2,0,2,1 }, { 2,2,0,2,1 }, { 2,0,2,2,1 }, { 0,2,2,2,1 }, { 1,2,2,2,1 }, { 2,2,2,2,1 }, { 2,0,2,2,1 },
 633                 { 0,0,1,2,1 }, { 1,0,1,2,1 }, { 2,0,1,2,1 }, { 0,1,2,2,1 }, { 0,1,1,2,1 }, { 1,1,1,2,1 }, { 2,1,1,2,1 }, { 1,1,2,2,1 }, { 0,2,1,2,1 }, { 1,2,1,2,1 }, { 2,2,1,2,1 }, { 2,1,2,2,1 }, { 0,2,1,2,2 }, { 1,2,1,2,2 }, { 2,2,1,2,2 }, { 2,1,2,2,2 },
 634                 { 0,0,0,1,2 }, { 1,0,0,1,2 }, { 2,0,0,1,2 }, { 0,0,2,1,2 }, { 0,1,0,1,2 }, { 1,1,0,1,2 }, { 2,1,0,1,2 }, { 1,0,2,1,2 }, { 0,2,0,1,2 }, { 1,2,0,1,2 }, { 2,2,0,1,2 }, { 2,0,2,1,2 }, { 0,2,2,1,2 }, { 1,2,2,1,2 }, { 2,2,2,1,2 }, { 2,0,2,1,2 },
 635                 { 0,0,1,1,2 }, { 1,0,1,1,2 }, { 2,0,1,1,2 }, { 0,1,2,1,2 }, { 0,1,1,1,2 }, { 1,1,1,1,2 }, { 2,1,1,1,2 }, { 1,1,2,1,2 }, { 0,2,1,1,2 }, { 1,2,1,1,2 }, { 2,2,1,1,2 }, { 2,1,2,1,2 }, { 0,2,2,2,2 }, { 1,2,2,2,2 }, { 2,2,2,2,2 }, { 2,1,2,2,2 }
 636         };
 637
 638         const deUint32 (& trits)[5] = tritsFromT[T];
 639
 640         for (int i = 0; i < numValues; i++)
 641         {
 642                 dst[i].m        = m[i];
 643                 dst[i].tq       = trits[i];
 644                 dst[i].v        = (trits[i] << numBits) + m[i];
 645         }
 646 }
 647
 648 void decodeISEQuintBlock (ISEDecodedResult* dst, int numValues, BitAccessStream& data, int numBits)
 649 {
 650         DE_ASSERT(de::inRange(numValues, 1, 3));
 651
 652         deUint32 m[3];
 653
 654         m[0]                    = data.getNext(numBits);
 655         deUint32 Q012   = data.getNext(3);
 656         m[1]                    = data.getNext(numBits);
 657         deUint32 Q34    = data.getNext(2);
 658         m[2]                    = data.getNext(numBits);
 659         deUint32 Q56    = data.getNext(2);
 660
 661         switch (numValues)
 662         {
 663                 case 1:
 664                         Q34     = 0;
 665                 // Fallthrough
 666                 case 2:
 667                         Q56     = 0;
 668                 // Fallthrough
 669                 case 3:
 670                         break;
 671                 default:
 672                         DE_ASSERT(false);
 673         }
 674
 675         const deUint32 Q = (Q56 << 5) | (Q34 << 3) | (Q012 << 0);
 676
 677         static const deUint32 quintsFromQ[256][3] =
 678         {
 679                 { 0,0,0 }, { 1,0,0 }, { 2,0,0 }, { 3,0,0 }, { 4,0,0 }, { 0,4,0 }, { 4,4,0 }, { 4,4,4 }, { 0,1,0 }, { 1,1,0 }, { 2,1,0 }, { 3,1,0 }, { 4,1,0 }, { 1,4,0 }, { 4,4,1 }, { 4,4,4 },
 680                 { 0,2,0 }, { 1,2,0 }, { 2,2,0 }, { 3,2,0 }, { 4,2,0 }, { 2,4,0 }, { 4,4,2 }, { 4,4,4 }, { 0,3,0 }, { 1,3,0 }, { 2,3,0 }, { 3,3,0 }, { 4,3,0 }, { 3,4,0 }, { 4,4,3 }, { 4,4,4 },
 681                 { 0,0,1 }, { 1,0,1 }, { 2,0,1 }, { 3,0,1 }, { 4,0,1 }, { 0,4,1 }, { 4,0,4 }, { 0,4,4 }, { 0,1,1 }, { 1,1,1 }, { 2,1,1 }, { 3,1,1 }, { 4,1,1 }, { 1,4,1 }, { 4,1,4 }, { 1,4,4 },
 682                 { 0,2,1 }, { 1,2,1 }, { 2,2,1 }, { 3,2,1 }, { 4,2,1 }, { 2,4,1 }, { 4,2,4 }, { 2,4,4 }, { 0,3,1 }, { 1,3,1 }, { 2,3,1 }, { 3,3,1 }, { 4,3,1 }, { 3,4,1 }, { 4,3,4 }, { 3,4,4 },
 683                 { 0,0,2 }, { 1,0,2 }, { 2,0,2 }, { 3,0,2 }, { 4,0,2 }, { 0,4,2 }, { 2,0,4 }, { 3,0,4 }, { 0,1,2 }, { 1,1,2 }, { 2,1,2 }, { 3,1,2 }, { 4,1,2 }, { 1,4,2 }, { 2,1,4 }, { 3,1,4 },
 684                 { 0,2,2 }, { 1,2,2 }, { 2,2,2 }, { 3,2,2 }, { 4,2,2 }, { 2,4,2 }, { 2,2,4 }, { 3,2,4 }, { 0,3,2 }, { 1,3,2 }, { 2,3,2 }, { 3,3,2 }, { 4,3,2 }, { 3,4,2 }, { 2,3,4 }, { 3,3,4 },
 685                 { 0,0,3 }, { 1,0,3 }, { 2,0,3 }, { 3,0,3 }, { 4,0,3 }, { 0,4,3 }, { 0,0,4 }, { 1,0,4 }, { 0,1,3 }, { 1,1,3 }, { 2,1,3 }, { 3,1,3 }, { 4,1,3 }, { 1,4,3 }, { 0,1,4 }, { 1,1,4 },
 686                 { 0,2,3 }, { 1,2,3 }, { 2,2,3 }, { 3,2,3 }, { 4,2,3 }, { 2,4,3 }, { 0,2,4 }, { 1,2,4 }, { 0,3,3 }, { 1,3,3 }, { 2,3,3 }, { 3,3,3 }, { 4,3,3 }, { 3,4,3 }, { 0,3,4 }, { 1,3,4 }
 687         };
 688
 689         const deUint32 (& quints)[3] = quintsFromQ[Q];
 690
 691         for (int i = 0; i < numValues; i++)
 692         {
 693                 dst[i].m        = m[i];
 694                 dst[i].tq       = quints[i];
 695                 dst[i].v        = (quints[i] << numBits) + m[i];
 696         }
 697 }
 698
 699 inline void decodeISEBitBlock (ISEDecodedResult* dst, BitAccessStream& data, int numBits)
 700 {
 701         dst[0].m = data.getNext(numBits);
 702         dst[0].v = dst[0].m;
 703 }
 704
 705 void decodeISE (ISEDecodedResult* dst, int numValues, BitAccessStream& data, const ISEParams& params)
 706 {
 707         if (params.mode == ISEMODE_TRIT)
 708         {
 709                 const int numBlocks = deDivRoundUp32(numValues, 5);
 710                 for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
 711                 {
 712                         const int numValuesInBlock = blockNdx == numBlocks-1 ? numValues - 5*(numBlocks-1) : 5;
 713                         decodeISETritBlock(&dst[5*blockNdx], numValuesInBlock, data, params.numBits);
 714                 }
 715         }
 716         else if (params.mode == ISEMODE_QUINT)
 717         {
 718                 const int numBlocks = deDivRoundUp32(numValues, 3);
 719                 for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
 720                 {
 721                         const int numValuesInBlock = blockNdx == numBlocks-1 ? numValues - 3*(numBlocks-1) : 3;
 722                         decodeISEQuintBlock(&dst[3*blockNdx], numValuesInBlock, data, params.numBits);
 723                 }
 724         }
 725         else
 726         {
 727                 DE_ASSERT(params.mode == ISEMODE_PLAIN_BIT);
 728                 for (int i = 0; i < numValues; i++)
 729                         decodeISEBitBlock(&dst[i], data, params.numBits);
 730         }
 731 }
 732
 733 void unquantizeColorEndpoints (deUint32* dst, const ISEDecodedResult* iseResults, int numEndpoints, const ISEParams& iseParams)
 734 {
 735         if (iseParams.mode == ISEMODE_TRIT || iseParams.mode == ISEMODE_QUINT)
 736         {
 737                 const int rangeCase                             = iseParams.numBits*2 - (iseParams.mode == ISEMODE_TRIT ? 2 : 1);
 738                 DE_ASSERT(de::inRange(rangeCase, 0, 10));
 739                 static const deUint32   Ca[11]  = { 204, 113, 93, 54, 44, 26, 22, 13, 11, 6, 5 };
 740                 const deUint32                  C               = Ca[rangeCase];
 741
 742                 for (int endpointNdx = 0; endpointNdx < numEndpoints; endpointNdx++)
 743                 {
 744                         const deUint32 a = getBit(iseResults[endpointNdx].m, 0);
 745                         const deUint32 b = getBit(iseResults[endpointNdx].m, 1);
 746                         const deUint32 c = getBit(iseResults[endpointNdx].m, 2);
 747                         const deUint32 d = getBit(iseResults[endpointNdx].m, 3);
 748                         const deUint32 e = getBit(iseResults[endpointNdx].m, 4);
 749                         const deUint32 f = getBit(iseResults[endpointNdx].m, 5);
 750
 751                         const deUint32 A = a == 0 ? 0 : (1<<9)-1;
 752                         const deUint32 B = rangeCase == 0       ? 0
 753                                                          : rangeCase == 1       ? 0
 754                                                          : rangeCase == 2       ? (b << 8) |                                                                    (b << 4) |                              (b << 2) |      (b << 1)
 755                                                          : rangeCase == 3       ? (b << 8) |                                                                                            (b << 3) |      (b << 2)
 756                                                          : rangeCase == 4       ? (c << 8) | (b << 7) |                                                                         (c << 3) |      (b << 2) |      (c << 1) |      (b << 0)
 757                                                          : rangeCase == 5       ? (c << 8) | (b << 7) |                                                                                                 (c << 2) |      (b << 1) |      (c << 0)
 758                                                          : rangeCase == 6       ? (d << 8) | (c << 7) | (b << 6) |                                                                              (d << 2) |      (c << 1) |      (b << 0)
 759                                                          : rangeCase == 7       ? (d << 8) | (c << 7) | (b << 6) |                                                                                                      (d << 1) |      (c << 0)
 760                                                          : rangeCase == 8       ? (e << 8) | (d << 7) | (c << 6) | (b << 5) |                                                                           (e << 1) |      (d << 0)
 761                                                          : rangeCase == 9       ? (e << 8) | (d << 7) | (c << 6) | (b << 5) |                                                                                                   (e << 0)
 762                                                          : rangeCase == 10      ? (f << 8) | (e << 7) | (d << 6) | (c << 5) |   (b << 4) |                                                                              (f << 0)
 763                                                          : (deUint32)-1;
 764                         DE_ASSERT(B != (deUint32)-1);
 765
 766                         dst[endpointNdx] = (((iseResults[endpointNdx].tq*C + B) ^ A) >> 2) | (A & 0x80);
 767                 }
 768         }
 769         else
 770         {
 771                 DE_ASSERT(iseParams.mode == ISEMODE_PLAIN_BIT);
 772
 773                 for (int endpointNdx = 0; endpointNdx < numEndpoints; endpointNdx++)
 774                         dst[endpointNdx] = bitReplicationScale(iseResults[endpointNdx].v, iseParams.numBits, 8);
 775         }
 776 }
 777
 778 inline void bitTransferSigned (deInt32& a, deInt32& b)
 779 {
 780         b >>= 1;
 781         b |= a & 0x80;
 782         a >>= 1;
 783         a &= 0x3f;
 784         if (isBitSet(a, 5))
 785                 a -= 0x40;
 786 }
 787
 788 inline UVec4 clampedRGBA (const IVec4& rgba)
 789 {
 790         return UVec4(de::clamp(rgba.x(), 0, 0xff),
 791                                  de::clamp(rgba.y(), 0, 0xff),
 792                                  de::clamp(rgba.z(), 0, 0xff),
 793                                  de::clamp(rgba.w(), 0, 0xff));
 794 }
 795
 796 inline IVec4 blueContract (int r, int g, int b, int a)
 797 {
 798         return IVec4((r+b)>>1, (g+b)>>1, b, a);
 799 }
 800
 801 inline bool isColorEndpointModeHDR (deUint32 mode)
 802 {
 803         return mode == 2        ||
 804                    mode == 3    ||
 805                    mode == 7    ||
 806                    mode == 11   ||
 807                    mode == 14   ||
 808                    mode == 15;
 809 }
 810
 811 void decodeHDREndpointMode7 (UVec4& e0, UVec4& e1, deUint32 v0, deUint32 v1, deUint32 v2, deUint32 v3)
 812 {
 813         const deUint32 m10              = getBit(v1, 7) | (getBit(v2, 7) << 1);
 814         const deUint32 m23              = getBits(v0, 6, 7);
 815         const deUint32 majComp  = m10 != 3      ? m10
 816                                                         : m23 != 3      ? m23
 817                                                         :                         0;
 818         const deUint32 mode             = m10 != 3      ? m23
 819                                                         : m23 != 3      ? 4
 820                                                         :                         5;
 821
 822         deInt32                 red             = (deInt32)getBits(v0, 0, 5);
 823         deInt32                 green   = (deInt32)getBits(v1, 0, 4);
 824         deInt32                 blue    = (deInt32)getBits(v2, 0, 4);
 825         deInt32                 scale   = (deInt32)getBits(v3, 0, 4);
 826
 827         {
 828 #define SHOR(DST_VAR, SHIFT, BIT_VAR) (DST_VAR) |= (BIT_VAR) << (SHIFT)
 829 #define ASSIGN_X_BITS(V0,S0, V1,S1, V2,S2, V3,S3, V4,S4, V5,S5, V6,S6) do { SHOR(V0,S0,x0); SHOR(V1,S1,x1); SHOR(V2,S2,x2); SHOR(V3,S3,x3); SHOR(V4,S4,x4); SHOR(V5,S5,x5); SHOR(V6,S6,x6); } while (false)
 830
 831                 const deUint32  x0      = getBit(v1, 6);
 832                 const deUint32  x1      = getBit(v1, 5);
 833                 const deUint32  x2      = getBit(v2, 6);
 834                 const deUint32  x3      = getBit(v2, 5);
 835                 const deUint32  x4      = getBit(v3, 7);
 836                 const deUint32  x5      = getBit(v3, 6);
 837                 const deUint32  x6      = getBit(v3, 5);
 838
 839                 deInt32&                R       = red;
 840                 deInt32&                G       = green;
 841                 deInt32&                B       = blue;
 842                 deInt32&                S       = scale;
 843
 844                 switch (mode)
 845                 {
 846                         case 0: ASSIGN_X_BITS(R,9,  R,8,  R,7,  R,10,  R,6,  S,6,   S,5); break;
 847                         case 1: ASSIGN_X_BITS(R,8,  G,5,  R,7,  B,5,   R,6,  R,10,  R,9); break;
 848                         case 2: ASSIGN_X_BITS(R,9,  R,8,  R,7,  R,6,   S,7,  S,6,   S,5); break;
 849                         case 3: ASSIGN_X_BITS(R,8,  G,5,  R,7,  B,5,   R,6,  S,6,   S,5); break;
 850                         case 4: ASSIGN_X_BITS(G,6,  G,5,  B,6,  B,5,   R,6,  R,7,   S,5); break;
 851                         case 5: ASSIGN_X_BITS(G,6,  G,5,  B,6,  B,5,   R,6,  S,6,   S,5); break;
 852                         default:
 853                                 DE_ASSERT(false);
 854                 }
 855
 856 #undef ASSIGN_X_BITS
 857 #undef SHOR
 858         }
 859
 860         static const int shiftAmounts[] = { 1, 1, 2, 3, 4, 5 };
 861         DE_ASSERT(mode < DE_LENGTH_OF_ARRAY(shiftAmounts));
 862
 863         red             <<= shiftAmounts[mode];
 864         green   <<= shiftAmounts[mode];
 865         blue    <<= shiftAmounts[mode];
 866         scale   <<= shiftAmounts[mode];
 867
 868         if (mode != 5)
 869         {
 870                 green   = red - green;
 871                 blue    = red - blue;
 872         }
 873
 874         if (majComp == 1)
 875                 std::swap(red, green);
 876         else if (majComp == 2)
 877                 std::swap(red, blue);
 878
 879         e0 = UVec4(de::clamp(red        - scale,        0, 0xfff),
 880                            de::clamp(green      - scale,        0, 0xfff),
 881                            de::clamp(blue       - scale,        0, 0xfff),
 882                            0x780);
 883
 884         e1 = UVec4(de::clamp(red,                               0, 0xfff),
 885                            de::clamp(green,                             0, 0xfff),
 886                            de::clamp(blue,                              0, 0xfff),
 887                            0x780);
 888 }
 889
 890 void decodeHDREndpointMode11 (UVec4& e0, UVec4& e1, deUint32 v0, deUint32 v1, deUint32 v2, deUint32 v3, deUint32 v4, deUint32 v5)
 891 {
 892         const deUint32 major = (getBit(v5, 7) << 1) | getBit(v4, 7);
 893
 894         if (major == 3)
 895         {
 896                 e0 = UVec4(v0<<4, v2<<4, getBits(v4,0,6)<<5, 0x780);
 897                 e1 = UVec4(v1<<4, v3<<4, getBits(v5,0,6)<<5, 0x780);
 898         }
 899         else
 900         {
 901                 const deUint32 mode = (getBit(v3, 7) << 2) | (getBit(v2, 7) << 1) | getBit(v1, 7);
 902
 903                 deInt32 a       = (deInt32)((getBit(v1, 6) << 8) | v0);
 904                 deInt32 c       = (deInt32)(getBits(v1, 0, 5));
 905                 deInt32 b0      = (deInt32)(getBits(v2, 0, 5));
 906                 deInt32 b1      = (deInt32)(getBits(v3, 0, 5));
 907                 deInt32 d0      = (deInt32)(getBits(v4, 0, 4));
 908                 deInt32 d1      = (deInt32)(getBits(v5, 0, 4));
 909
 910                 {
 911 #define SHOR(DST_VAR, SHIFT, BIT_VAR) (DST_VAR) |= (BIT_VAR) << (SHIFT)
 912 #define ASSIGN_X_BITS(V0,S0, V1,S1, V2,S2, V3,S3, V4,S4, V5,S5) do { SHOR(V0,S0,x0); SHOR(V1,S1,x1); SHOR(V2,S2,x2); SHOR(V3,S3,x3); SHOR(V4,S4,x4); SHOR(V5,S5,x5); } while (false)
 913
 914                         const deUint32 x0 = getBit(v2, 6);
 915                         const deUint32 x1 = getBit(v3, 6);
 916                         const deUint32 x2 = getBit(v4, 6);
 917                         const deUint32 x3 = getBit(v5, 6);
 918                         const deUint32 x4 = getBit(v4, 5);
 919                         const deUint32 x5 = getBit(v5, 5);
 920
 921                         switch (mode)
 922                         {
 923                                 case 0: ASSIGN_X_BITS(b0,6,  b1,6,   d0,6,  d1,6,  d0,5,  d1,5); break;
 924                                 case 1: ASSIGN_X_BITS(b0,6,  b1,6,   b0,7,  b1,7,  d0,5,  d1,5); break;
 925                                 case 2: ASSIGN_X_BITS(a,9,   c,6,    d0,6,  d1,6,  d0,5,  d1,5); break;
 926                                 case 3: ASSIGN_X_BITS(b0,6,  b1,6,   a,9,   c,6,   d0,5,  d1,5); break;
 927                                 case 4: ASSIGN_X_BITS(b0,6,  b1,6,   b0,7,  b1,7,  a,9,   a,10); break;
 928                                 case 5: ASSIGN_X_BITS(a,9,   a,10,   c,7,   c,6,   d0,5,  d1,5); break;
 929                                 case 6: ASSIGN_X_BITS(b0,6,  b1,6,   a,11,  c,6,   a,9,   a,10); break;
 930                                 case 7: ASSIGN_X_BITS(a,9,   a,10,   a,11,  c,6,   d0,5,  d1,5); break;
 931                                 default:
 932                                         DE_ASSERT(false);
 933                         }
 934
 935 #undef ASSIGN_X_BITS
 936 #undef SHOR
 937                 }
 938
 939                 static const int numDBits[] = { 7, 6, 7, 6, 5, 6, 5, 6 };
 940                 DE_ASSERT(mode < DE_LENGTH_OF_ARRAY(numDBits));
 941
 942                 d0 = signExtend(d0, numDBits[mode]);
 943                 d1 = signExtend(d1, numDBits[mode]);
 944
 945                 const int shiftAmount = (mode >> 1) ^ 3;
 946                 a       <<= shiftAmount;
 947                 c       <<= shiftAmount;
 948                 b0      <<= shiftAmount;
 949                 b1      <<= shiftAmount;
 950                 d0      <<= shiftAmount;
 951                 d1      <<= shiftAmount;
 952
 953                 e0 = UVec4(de::clamp(a-c,                       0, 0xfff),
 954                                    de::clamp(a-b0-c-d0,         0, 0xfff),
 955                                    de::clamp(a-b1-c-d1,         0, 0xfff),
 956                                    0x780);
 957
 958                 e1 = UVec4(de::clamp(a,                         0, 0xfff),
 959                                    de::clamp(a-b0,                      0, 0xfff),
 960                                    de::clamp(a-b1,                      0, 0xfff),
 961                                    0x780);
 962
 963                 if (major == 1)
 964                 {
 965                         std::swap(e0.x(), e0.y());
 966                         std::swap(e1.x(), e1.y());
 967                 }
 968                 else if (major == 2)
 969                 {
 970                         std::swap(e0.x(), e0.z());
 971                         std::swap(e1.x(), e1.z());
 972                 }
 973         }
 974 }
 975
 976 void decodeHDREndpointMode15(UVec4& e0, UVec4& e1, deUint32 v0, deUint32 v1, deUint32 v2, deUint32 v3, deUint32 v4, deUint32 v5, deUint32 v6In, deUint32 v7In)
 977 {
 978         decodeHDREndpointMode11(e0, e1, v0, v1, v2, v3, v4, v5);
 979
 980         const deUint32  mode    = (getBit(v7In, 7) << 1) | getBit(v6In, 7);
 981         deInt32                 v6              = (deInt32)getBits(v6In, 0, 6);
 982         deInt32                 v7              = (deInt32)getBits(v7In, 0, 6);
 983
 984         if (mode == 3)
 985         {
 986                 e0.w() = v6 << 5;
 987                 e1.w() = v7 << 5;
 988         }
 989         else
 990         {
 991                 v6 |= (v7 << (mode+1)) & 0x780;
 992                 v7 &= (0x3f >> mode);
 993                 v7 ^= 0x20 >> mode;
 994                 v7 -= 0x20 >> mode;
 995                 v6 <<= 4-mode;
 996                 v7 <<= 4-mode;
 997
 998                 v7 += v6;
 999                 v7 = de::clamp(v7, 0, 0xfff);
1000                 e0.w() = v6;
1001                 e1.w() = v7;
1002         }
1003 }
1004
1005 void decodeColorEndpoints (ColorEndpointPair* dst, const deUint32* unquantizedEndpoints, const deUint32* endpointModes, int numPartitions)
1006 {
1007         int unquantizedNdx = 0;
1008
1009         for (int partitionNdx = 0; partitionNdx < numPartitions; partitionNdx++)
1010         {
1011                 const deUint32          endpointMode    = endpointModes[partitionNdx];
1012                 const deUint32*         v                               = &unquantizedEndpoints[unquantizedNdx];
1013                 UVec4&                          e0                              = dst[partitionNdx].e0;
1014                 UVec4&                          e1                              = dst[partitionNdx].e1;
1015
1016                 unquantizedNdx += computeNumColorEndpointValues(endpointMode);
1017
1018                 switch (endpointMode)
1019                 {
1020                         case 0:
1021                                 e0 = UVec4(v[0], v[0], v[0], 0xff);
1022                                 e1 = UVec4(v[1], v[1], v[1], 0xff);
1023                                 break;
1024
1025                         case 1:
1026                         {
1027                                 const deUint32 L0 = (v[0] >> 2) | (getBits(v[1], 6, 7) << 6);
1028                                 const deUint32 L1 = de::min(0xffu, L0 + getBits(v[1], 0, 5));
1029                                 e0 = UVec4(L0, L0, L0, 0xff);
1030                                 e1 = UVec4(L1, L1, L1, 0xff);
1031                                 break;
1032                         }
1033
1034                         case 2:
1035                         {
1036                                 const deUint32 v1Gr             = v[1] >= v[0];
1037                                 const deUint32 y0               = v1Gr ? v[0]<<4 : (v[1]<<4) + 8;
1038                                 const deUint32 y1               = v1Gr ? v[1]<<4 : (v[0]<<4) - 8;
1039
1040                                 e0 = UVec4(y0, y0, y0, 0x780);
1041                                 e1 = UVec4(y1, y1, y1, 0x780);
1042                                 break;
1043                         }
1044
1045                         case 3:
1046                         {
1047                                 const bool              m       = isBitSet(v[0], 7);
1048                                 const deUint32  y0      = m ? (getBits(v[1], 5, 7) << 9) | (getBits(v[0], 0, 6) << 2)
1049                                                                                 : (getBits(v[1], 4, 7) << 8) | (getBits(v[0], 0, 6) << 1);
1050                                 const deUint32  d       = m ? getBits(v[1], 0, 4) << 2
1051                                                                                 : getBits(v[1], 0, 3) << 1;
1052                                 const deUint32  y1      = de::min(0xfffu, y0+d);
1053
1054                                 e0 = UVec4(y0, y0, y0, 0x780);
1055                                 e1 = UVec4(y1, y1, y1, 0x780);
1056                                 break;
1057                         }
1058
1059                         case 4:
1060                                 e0 = UVec4(v[0], v[0], v[0], v[2]);
1061                                 e1 = UVec4(v[1], v[1], v[1], v[3]);
1062                                 break;
1063
1064                         case 5:
1065                         {
1066                                 deInt32 v0 = (deInt32)v[0];
1067                                 deInt32 v1 = (deInt32)v[1];
1068                                 deInt32 v2 = (deInt32)v[2];
1069                                 deInt32 v3 = (deInt32)v[3];
1070                                 bitTransferSigned(v1, v0);
1071                                 bitTransferSigned(v3, v2);
1072
1073                                 e0 = clampedRGBA(IVec4(v0,              v0,             v0,             v2));
1074                                 e1 = clampedRGBA(IVec4(v0+v1,   v0+v1,  v0+v1,  v2+v3));
1075                                 break;
1076                         }
1077
1078                         case 6:
1079                                 e0 = UVec4((v[0]*v[3]) >> 8,    (v[1]*v[3]) >> 8,       (v[2]*v[3]) >> 8,       0xff);
1080                                 e1 = UVec4(v[0],                                v[1],                           v[2],                           0xff);
1081                                 break;
1082
1083                         case 7:
1084                                 decodeHDREndpointMode7(e0, e1, v[0], v[1], v[2], v[3]);
1085                                 break;
1086
1087                         case 8:
1088                                 if (v[1]+v[3]+v[5] >= v[0]+v[2]+v[4])
1089                                 {
1090                                         e0 = UVec4(v[0], v[2], v[4], 0xff);
1091                                         e1 = UVec4(v[1], v[3], v[5], 0xff);
1092                                 }
1093                                 else
1094                                 {
1095                                         e0 = blueContract(v[1], v[3], v[5], 0xff).asUint();
1096                                         e1 = blueContract(v[0], v[2], v[4], 0xff).asUint();
1097                                 }
1098                                 break;
1099
1100                         case 9:
1101                         {
1102                                 deInt32 v0 = (deInt32)v[0];
1103                                 deInt32 v1 = (deInt32)v[1];
1104                                 deInt32 v2 = (deInt32)v[2];
1105                                 deInt32 v3 = (deInt32)v[3];
1106                                 deInt32 v4 = (deInt32)v[4];
1107                                 deInt32 v5 = (deInt32)v[5];
1108                                 bitTransferSigned(v1, v0);
1109                                 bitTransferSigned(v3, v2);
1110                                 bitTransferSigned(v5, v4);
1111
1112                                 if (v1+v3+v5 >= 0)
1113                                 {
1114                                         e0 = clampedRGBA(IVec4(v0,              v2,             v4,             0xff));
1115                                         e1 = clampedRGBA(IVec4(v0+v1,   v2+v3,  v4+v5,  0xff));
1116                                 }
1117                                 else
1118                                 {
1119                                         e0 = clampedRGBA(blueContract(v0+v1,    v2+v3,  v4+v5,  0xff));
1120                                         e1 = clampedRGBA(blueContract(v0,               v2,             v4,             0xff));
1121                                 }
1122                                 break;
1123                         }
1124
1125                         case 10:
1126                                 e0 = UVec4((v[0]*v[3]) >> 8,    (v[1]*v[3]) >> 8,       (v[2]*v[3]) >> 8,       v[4]);
1127                                 e1 = UVec4(v[0],                                v[1],                           v[2],                           v[5]);
1128                                 break;
1129
1130                         case 11:
1131                                 decodeHDREndpointMode11(e0, e1, v[0], v[1], v[2], v[3], v[4], v[5]);
1132                                 break;
1133
1134                         case 12:
1135                                 if (v[1]+v[3]+v[5] >= v[0]+v[2]+v[4])
1136                                 {
1137                                         e0 = UVec4(v[0], v[2], v[4], v[6]);
1138                                         e1 = UVec4(v[1], v[3], v[5], v[7]);
1139                                 }
1140                                 else
1141                                 {
1142                                         e0 = clampedRGBA(blueContract(v[1], v[3], v[5], v[7]));
1143                                         e1 = clampedRGBA(blueContract(v[0], v[2], v[4], v[6]));
1144                                 }
1145                                 break;
1146
1147                         case 13:
1148                         {
1149                                 deInt32 v0 = (deInt32)v[0];
1150                                 deInt32 v1 = (deInt32)v[1];
1151                                 deInt32 v2 = (deInt32)v[2];
1152                                 deInt32 v3 = (deInt32)v[3];
1153                                 deInt32 v4 = (deInt32)v[4];
1154                                 deInt32 v5 = (deInt32)v[5];
1155                                 deInt32 v6 = (deInt32)v[6];
1156                                 deInt32 v7 = (deInt32)v[7];
1157                                 bitTransferSigned(v1, v0);
1158                                 bitTransferSigned(v3, v2);
1159                                 bitTransferSigned(v5, v4);
1160                                 bitTransferSigned(v7, v6);
1161
1162                                 if (v1+v3+v5 >= 0)
1163                                 {
1164                                         e0 = clampedRGBA(IVec4(v0,              v2,             v4,             v6));
1165                                         e1 = clampedRGBA(IVec4(v0+v1,   v2+v3,  v4+v5,  v6+v7));
1166                                 }
1167                                 else
1168                                 {
1169                                         e0 = clampedRGBA(blueContract(v0+v1,    v2+v3,  v4+v5,  v6+v7));
1170                                         e1 = clampedRGBA(blueContract(v0,               v2,             v4,             v6));
1171                                 }
1172
1173                                 break;
1174                         }
1175
1176                         case 14:
1177                                 decodeHDREndpointMode11(e0, e1, v[0], v[1], v[2], v[3], v[4], v[5]);
1178                                 e0.w() = v[6];
1179                                 e1.w() = v[7];
1180                                 break;
1181
1182                         case 15:
1183                                 decodeHDREndpointMode15(e0, e1, v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);
1184                                 break;
1185
1186                         default:
1187                                 DE_ASSERT(false);
1188                 }
1189         }
1190 }
1191
1192 void computeColorEndpoints (ColorEndpointPair* dst, const Block128& blockData, const deUint32* endpointModes, int numPartitions, int numColorEndpointValues, const ISEParams& iseParams, int numBitsAvailable)
1193 {
1194         const int                       colorEndpointDataStart = numPartitions == 1 ? 17 : 29;
1195         ISEDecodedResult        colorEndpointData[18];
1196
1197         {
1198                 BitAccessStream dataStream(blockData, colorEndpointDataStart, numBitsAvailable, true);
1199                 decodeISE(&colorEndpointData[0], numColorEndpointValues, dataStream, iseParams);
1200         }
1201
1202         {
1203                 deUint32 unquantizedEndpoints[18];
1204                 unquantizeColorEndpoints(&unquantizedEndpoints[0], &colorEndpointData[0], numColorEndpointValues, iseParams);
1205                 decodeColorEndpoints(dst, &unquantizedEndpoints[0], &endpointModes[0], numPartitions);
1206         }
1207 }
1208
1209 void unquantizeWeights (deUint32 dst[64], const ISEDecodedResult* weightGrid, const ASTCBlockMode& blockMode)
1210 {
1211         const int                       numWeights      = computeNumWeights(blockMode);
1212         const ISEParams&        iseParams       = blockMode.weightISEParams;
1213
1214         if (iseParams.mode == ISEMODE_TRIT || iseParams.mode == ISEMODE_QUINT)
1215         {
1216                 const int rangeCase = iseParams.numBits*2 + (iseParams.mode == ISEMODE_QUINT ? 1 : 0);
1217
1218                 if (rangeCase == 0 || rangeCase == 1)
1219                 {
1220                         static const deUint32 map0[3]   = { 0, 32, 63 };
1221                         static const deUint32 map1[5]   = { 0, 16, 32, 47, 63 };
1222                         const deUint32* const map               = rangeCase == 0 ? &map0[0] : &map1[0];
1223                         for (int i = 0; i < numWeights; i++)
1224                         {
1225                                 DE_ASSERT(weightGrid[i].v < (rangeCase == 0 ? 3u : 5u));
1226                                 dst[i] = map[weightGrid[i].v];
1227                         }
1228                 }
1229                 else
1230                 {
1231                         DE_ASSERT(rangeCase <= 6);
1232                         static const deUint32   Ca[5]   = { 50, 28, 23, 13, 11 };
1233                         const deUint32                  C               = Ca[rangeCase-2];
1234
1235                         for (int weightNdx = 0; weightNdx < numWeights; weightNdx++)
1236                         {
1237                                 const deUint32 a = getBit(weightGrid[weightNdx].m, 0);
1238                                 const deUint32 b = getBit(weightGrid[weightNdx].m, 1);
1239                                 const deUint32 c = getBit(weightGrid[weightNdx].m, 2);
1240
1241                                 const deUint32 A = a == 0 ? 0 : (1<<7)-1;
1242                                 const deUint32 B = rangeCase == 2 ? 0
1243                                                                  : rangeCase == 3 ? 0
1244                                                                  : rangeCase == 4 ? (b << 6) |                                  (b << 2) |                              (b << 0)
1245                                                                  : rangeCase == 5 ? (b << 6) |                                                          (b << 1)
1246                                                                  : rangeCase == 6 ? (c << 6) | (b << 5) |                                       (c << 1) |      (b << 0)
1247                                                                  : (deUint32)-1;
1248
1249                                 dst[weightNdx] = (((weightGrid[weightNdx].tq*C + B) ^ A) >> 2) | (A & 0x20);
1250                         }
1251                 }
1252         }
1253         else
1254         {
1255                 DE_ASSERT(iseParams.mode == ISEMODE_PLAIN_BIT);
1256
1257                 for (int weightNdx = 0; weightNdx < numWeights; weightNdx++)
1258                         dst[weightNdx] = bitReplicationScale(weightGrid[weightNdx].v, iseParams.numBits, 6);
1259         }
1260
1261         for (int weightNdx = 0; weightNdx < numWeights; weightNdx++)
1262                 dst[weightNdx] += dst[weightNdx] > 32 ? 1 : 0;
1263
1264         // Initialize nonexistent weights to poison values
1265         for (int weightNdx = numWeights; weightNdx < 64; weightNdx++)
1266                 dst[weightNdx] = ~0u;
1267
1268 }
1269
1270 void interpolateWeights (TexelWeightPair* dst, const deUint32 (&unquantizedWeights) [64], int blockWidth, int blockHeight, const ASTCBlockMode& blockMode)
1271 {
1272         const int               numWeightsPerTexel      = blockMode.isDualPlane ? 2 : 1;
1273         const deUint32  scaleX                          = (1024 + blockWidth/2) / (blockWidth-1);
1274         const deUint32  scaleY                          = (1024 + blockHeight/2) / (blockHeight-1);
1275
1276         DE_ASSERT(blockMode.weightGridWidth*blockMode.weightGridHeight*numWeightsPerTexel <= DE_LENGTH_OF_ARRAY(unquantizedWeights));
1277
1278         for (int texelY = 0; texelY < blockHeight; texelY++)
1279         {
1280                 for (int texelX = 0; texelX < blockWidth; texelX++)
1281                 {
1282                         const deUint32 gX       = (scaleX*texelX*(blockMode.weightGridWidth-1) + 32) >> 6;
1283                         const deUint32 gY       = (scaleY*texelY*(blockMode.weightGridHeight-1) + 32) >> 6;
1284                         const deUint32 jX       = gX >> 4;
1285                         const deUint32 jY       = gY >> 4;
1286                         const deUint32 fX       = gX & 0xf;
1287                         const deUint32 fY       = gY & 0xf;
1288
1289                         const deUint32 w11      = (fX*fY + 8) >> 4;
1290                         const deUint32 w10      = fY - w11;
1291                         const deUint32 w01      = fX - w11;
1292                         const deUint32 w00      = 16 - fX - fY + w11;
1293
1294                         const deUint32 i00      = jY*blockMode.weightGridWidth + jX;
1295                         const deUint32 i01      = i00 + 1;
1296                         const deUint32 i10      = i00 + blockMode.weightGridWidth;
1297                         const deUint32 i11      = i00 + blockMode.weightGridWidth + 1;
1298
1299                         // These addresses can be out of bounds, but respective weights will be 0 then.
1300                         DE_ASSERT(deInBounds32(i00, 0, blockMode.weightGridWidth*blockMode.weightGridHeight) || w00 == 0);
1301                         DE_ASSERT(deInBounds32(i01, 0, blockMode.weightGridWidth*blockMode.weightGridHeight) || w01 == 0);
1302                         DE_ASSERT(deInBounds32(i10, 0, blockMode.weightGridWidth*blockMode.weightGridHeight) || w10 == 0);
1303                         DE_ASSERT(deInBounds32(i11, 0, blockMode.weightGridWidth*blockMode.weightGridHeight) || w11 == 0);
1304
1305                         for (int texelWeightNdx = 0; texelWeightNdx < numWeightsPerTexel; texelWeightNdx++)
1306                         {
1307                                 // & 0x3f clamps address to bounds of unquantizedWeights
1308                                 const deUint32 p00      = unquantizedWeights[(i00 * numWeightsPerTexel + texelWeightNdx) & 0x3f];
1309                                 const deUint32 p01      = unquantizedWeights[(i01 * numWeightsPerTexel + texelWeightNdx) & 0x3f];
1310                                 const deUint32 p10      = unquantizedWeights[(i10 * numWeightsPerTexel + texelWeightNdx) & 0x3f];
1311                                 const deUint32 p11      = unquantizedWeights[(i11 * numWeightsPerTexel + texelWeightNdx) & 0x3f];
1312
1313                                 dst[texelY*blockWidth + texelX].w[texelWeightNdx] = (p00*w00 + p01*w01 + p10*w10 + p11*w11 + 8) >> 4;
1314                         }
1315                 }
1316         }
1317 }
1318
1319 void computeTexelWeights (TexelWeightPair* dst, const Block128& blockData, int blockWidth, int blockHeight, const ASTCBlockMode& blockMode)
1320 {
1321         ISEDecodedResult weightGrid[64];
1322
1323         {
1324                 BitAccessStream dataStream(blockData, 127, computeNumRequiredBits(blockMode.weightISEParams, computeNumWeights(blockMode)), false);
1325                 decodeISE(&weightGrid[0], computeNumWeights(blockMode), dataStream, blockMode.weightISEParams);
1326         }
1327
1328         {
1329                 deUint32 unquantizedWeights[64];
1330                 unquantizeWeights(&unquantizedWeights[0], &weightGrid[0], blockMode);
1331                 interpolateWeights(dst, unquantizedWeights, blockWidth, blockHeight, blockMode);
1332         }
1333 }
1334
1335 inline deUint32 hash52 (deUint32 v)
1336 {
1337         deUint32 p = v;
1338         p ^= p >> 15;   p -= p << 17;   p += p << 7;    p += p << 4;
1339         p ^= p >>  5;   p += p << 16;   p ^= p >> 7;    p ^= p >> 3;
1340         p ^= p <<  6;   p ^= p >> 17;
1341         return p;
1342 }
1343
1344 int computeTexelPartition (deUint32 seedIn, deUint32 xIn, deUint32 yIn, deUint32 zIn, int numPartitions, bool smallBlock)
1345 {
1346         DE_ASSERT(zIn == 0);
1347         const deUint32  x               = smallBlock ? xIn << 1 : xIn;
1348         const deUint32  y               = smallBlock ? yIn << 1 : yIn;
1349         const deUint32  z               = smallBlock ? zIn << 1 : zIn;
1350         const deUint32  seed    = seedIn + 1024*(numPartitions-1);
1351         const deUint32  rnum    = hash52(seed);
1352         deUint8                 seed1   = (deUint8)( rnum                                                       & 0xf);
1353         deUint8                 seed2   = (deUint8)((rnum >>  4)                                        & 0xf);
1354         deUint8                 seed3   = (deUint8)((rnum >>  8)                                        & 0xf);
1355         deUint8                 seed4   = (deUint8)((rnum >> 12)                                        & 0xf);
1356         deUint8                 seed5   = (deUint8)((rnum >> 16)                                        & 0xf);
1357         deUint8                 seed6   = (deUint8)((rnum >> 20)                                        & 0xf);
1358         deUint8                 seed7   = (deUint8)((rnum >> 24)                                        & 0xf);
1359         deUint8                 seed8   = (deUint8)((rnum >> 28)                                        & 0xf);
1360         deUint8                 seed9   = (deUint8)((rnum >> 18)                                        & 0xf);
1361         deUint8                 seed10  = (deUint8)((rnum >> 22)                                        & 0xf);
1362         deUint8                 seed11  = (deUint8)((rnum >> 26)                                        & 0xf);
1363         deUint8                 seed12  = (deUint8)(((rnum >> 30) | (rnum << 2))        & 0xf);
1364
1365         seed1  = (deUint8)(seed1  * seed1 );
1366         seed2  = (deUint8)(seed2  * seed2 );
1367         seed3  = (deUint8)(seed3  * seed3 );
1368         seed4  = (deUint8)(seed4  * seed4 );
1369         seed5  = (deUint8)(seed5  * seed5 );
1370         seed6  = (deUint8)(seed6  * seed6 );
1371         seed7  = (deUint8)(seed7  * seed7 );
1372         seed8  = (deUint8)(seed8  * seed8 );
1373         seed9  = (deUint8)(seed9  * seed9 );
1374         seed10 = (deUint8)(seed10 * seed10);
1375         seed11 = (deUint8)(seed11 * seed11);
1376         seed12 = (deUint8)(seed12 * seed12);
1377
1378         const int shA = (seed & 2) != 0         ? 4             : 5;
1379         const int shB = numPartitions == 3      ? 6             : 5;
1380         const int sh1 = (seed & 1) != 0         ? shA   : shB;
1381         const int sh2 = (seed & 1) != 0         ? shB   : shA;
1382         const int sh3 = (seed & 0x10) != 0      ? sh1   : sh2;
1383
1384         seed1  = (deUint8)(seed1  >> sh1);
1385         seed2  = (deUint8)(seed2  >> sh2);
1386         seed3  = (deUint8)(seed3  >> sh1);
1387         seed4  = (deUint8)(seed4  >> sh2);
1388         seed5  = (deUint8)(seed5  >> sh1);
1389         seed6  = (deUint8)(seed6  >> sh2);
1390         seed7  = (deUint8)(seed7  >> sh1);
1391         seed8  = (deUint8)(seed8  >> sh2);
1392         seed9  = (deUint8)(seed9  >> sh3);
1393         seed10 = (deUint8)(seed10 >> sh3);
1394         seed11 = (deUint8)(seed11 >> sh3);
1395         seed12 = (deUint8)(seed12 >> sh3);
1396
1397         const int a =                                           0x3f & (seed1*x + seed2*y + seed11*z + (rnum >> 14));
1398         const int b =                                           0x3f & (seed3*x + seed4*y + seed12*z + (rnum >> 10));
1399         const int c = numPartitions >= 3 ?      0x3f & (seed5*x + seed6*y + seed9*z  + (rnum >>  6))    : 0;
1400         const int d = numPartitions >= 4 ?      0x3f & (seed7*x + seed8*y + seed10*z + (rnum >>  2))    : 0;
1401
1402         return a >= b && a >= c && a >= d       ? 0
1403                  : b >= c && b >= d                             ? 1
1404                  : c >= d                                               ? 2
1405                  :                                                                3;
1406 }
1407
1408 DecompressResult setTexelColors (void* dst, ColorEndpointPair* colorEndpoints, TexelWeightPair* texelWeights, int ccs, deUint32 partitionIndexSeed,
1409                                                                  int numPartitions, int blockWidth, int blockHeight, bool isSRGB, bool isLDRMode, const deUint32* colorEndpointModes)
1410 {
1411         const bool                      smallBlock      = blockWidth*blockHeight < 31;
1412         DecompressResult        result          = DECOMPRESS_RESULT_VALID_BLOCK;
1413         bool                            isHDREndpoint[4];
1414
1415         for (int i = 0; i < numPartitions; i++)
1416                 isHDREndpoint[i] = isColorEndpointModeHDR(colorEndpointModes[i]);
1417
1418         for (int texelY = 0; texelY < blockHeight; texelY++)
1419         for (int texelX = 0; texelX < blockWidth; texelX++)
1420         {
1421                 const int                               texelNdx                        = texelY*blockWidth + texelX;
1422                 const int                               colorEndpointNdx        = numPartitions == 1 ? 0 : computeTexelPartition(partitionIndexSeed, texelX, texelY, 0, numPartitions, smallBlock);
1423                 DE_ASSERT(colorEndpointNdx < numPartitions);
1424                 const UVec4&                    e0                                      = colorEndpoints[colorEndpointNdx].e0;
1425                 const UVec4&                    e1                                      = colorEndpoints[colorEndpointNdx].e1;
1426                 const TexelWeightPair&  weight                          = texelWeights[texelNdx];
1427
1428                 if (isLDRMode && isHDREndpoint[colorEndpointNdx])
1429                 {
1430                         if (isSRGB)
1431                         {
1432                                 ((deUint8*)dst)[texelNdx*4 + 0] = 0xff;
1433                                 ((deUint8*)dst)[texelNdx*4 + 1] = 0;
1434                                 ((deUint8*)dst)[texelNdx*4 + 2] = 0xff;
1435                                 ((deUint8*)dst)[texelNdx*4 + 3] = 0xff;
1436                         }
1437                         else
1438                         {
1439                                 ((float*)dst)[texelNdx*4 + 0] = 1.0f;
1440                                 ((float*)dst)[texelNdx*4 + 1] = 0;
1441                                 ((float*)dst)[texelNdx*4 + 2] = 1.0f;
1442                                 ((float*)dst)[texelNdx*4 + 3] = 1.0f;
1443                         }
1444
1445                         result = DECOMPRESS_RESULT_ERROR;
1446                 }
1447                 else
1448                 {
1449                         for (int channelNdx = 0; channelNdx < 4; channelNdx++)
1450                         {
1451                                 if (!isHDREndpoint[colorEndpointNdx] || (channelNdx == 3 && colorEndpointModes[colorEndpointNdx] == 14)) // \note Alpha for mode 14 is treated the same as LDR.
1452                                 {
1453                                         const deUint32 c0       = (e0[channelNdx] << 8) | (isSRGB ? 0x80 : e0[channelNdx]);
1454                                         const deUint32 c1       = (e1[channelNdx] << 8) | (isSRGB ? 0x80 : e1[channelNdx]);
1455                                         const deUint32 w        = weight.w[ccs == channelNdx ? 1 : 0];
1456                                         const deUint32 c        = (c0*(64-w) + c1*w + 32) / 64;
1457
1458                                         if (isSRGB)
1459                                                 ((deUint8*)dst)[texelNdx*4 + channelNdx] = (deUint8)((c & 0xff00) >> 8);
1460                                         else
1461                                                 ((float*)dst)[texelNdx*4 + channelNdx] = c == 65535 ? 1.0f : (float)c / 65536.0f;
1462                                 }
1463                                 else
1464                                 {
1465                                         DE_STATIC_ASSERT((de::meta::TypesSame<deFloat16, deUint16>::Value));
1466                                         const deUint32          c0      = e0[channelNdx] << 4;
1467                                         const deUint32          c1      = e1[channelNdx] << 4;
1468                                         const deUint32          w       = weight.w[ccs == channelNdx ? 1 : 0];
1469                                         const deUint32          c       = (c0*(64-w) + c1*w + 32) / 64;
1470                                         const deUint32          e       = getBits(c, 11, 15);
1471                                         const deUint32          m       = getBits(c, 0, 10);
1472                                         const deUint32          mt      = m < 512               ? 3*m
1473                                                                                         : m >= 1536             ? 5*m - 2048
1474                                                                                         :                                 4*m - 512;
1475                                         const deFloat16         cf      = (deFloat16)((e << 10) + (mt >> 3));
1476
1477                                         ((float*)dst)[texelNdx*4 + channelNdx] = deFloat16To32(isFloat16InfOrNan(cf) ? 0x7bff : cf);
1478                                 }
1479                         }
1480                 }
1481         }
1482
1483         return result;
1484 }
1485
1486 DecompressResult decompressBlock (void* dst, const Block128& blockData, int blockWidth, int blockHeight, bool isSRGB, bool isLDR)
1487 {
1488         DE_ASSERT(isLDR || !isSRGB);
1489
1490         // Decode block mode.
1491
1492         const ASTCBlockMode blockMode = getASTCBlockMode(blockData.getBits(0, 10));
1493
1494         // Check for block mode errors.
1495
1496         if (blockMode.isError)
1497         {
1498                 setASTCErrorColorBlock(dst, blockWidth, blockHeight, isSRGB);
1499                 return DECOMPRESS_RESULT_ERROR;
1500         }
1501
1502         // Separate path for void-extent.
1503
1504         if (blockMode.isVoidExtent)
1505                 return decodeVoidExtentBlock(dst, blockData, blockWidth, blockHeight, isSRGB, isLDR);
1506
1507         // Compute weight grid values.
1508
1509         const int numWeights                    = computeNumWeights(blockMode);
1510         const int numWeightDataBits             = computeNumRequiredBits(blockMode.weightISEParams, numWeights);
1511         const int numPartitions                 = (int)blockData.getBits(11, 12) + 1;
1512
1513         // Check for errors in weight grid, partition and dual-plane parameters.
1514
1515         if (numWeights > 64                                                             ||
1516                 numWeightDataBits > 96                                          ||
1517                 numWeightDataBits < 24                                          ||
1518                 blockMode.weightGridWidth > blockWidth          ||
1519                 blockMode.weightGridHeight > blockHeight        ||
1520                 (numPartitions == 4 && blockMode.isDualPlane))
1521         {
1522                 setASTCErrorColorBlock(dst, blockWidth, blockHeight, isSRGB);
1523                 return DECOMPRESS_RESULT_ERROR;
1524         }
1525
1526         // Compute number of bits available for color endpoint data.
1527
1528         const bool      isSingleUniqueCem                       = numPartitions == 1 || blockData.getBits(23, 24) == 0;
1529         const int       numConfigDataBits                       = (numPartitions == 1 ? 17 : isSingleUniqueCem ? 29 : 25 + 3*numPartitions) +
1530                                                                                           (blockMode.isDualPlane ? 2 : 0);
1531         const int       numBitsForColorEndpoints        = 128 - numWeightDataBits - numConfigDataBits;
1532         const int       extraCemBitsStart                       = 127 - numWeightDataBits - (isSingleUniqueCem          ? -1
1533                                                                                                                                                 : numPartitions == 4    ? 7
1534                                                                                                                                                 : numPartitions == 3    ? 4
1535                                                                                                                                                 : numPartitions == 2    ? 1
1536                                                                                                                                                 : 0);
1537         // Decode color endpoint modes.
1538
1539         deUint32 colorEndpointModes[4];
1540         decodeColorEndpointModes(&colorEndpointModes[0], blockData, numPartitions, extraCemBitsStart);
1541
1542         const int numColorEndpointValues = computeNumColorEndpointValues(colorEndpointModes, numPartitions);
1543
1544         // Check for errors in color endpoint value count.
1545
1546         if (numColorEndpointValues > 18 || numBitsForColorEndpoints < deDivRoundUp32(13*numColorEndpointValues, 5))
1547         {
1548                 setASTCErrorColorBlock(dst, blockWidth, blockHeight, isSRGB);
1549                 return DECOMPRESS_RESULT_ERROR;
1550         }
1551
1552         // Compute color endpoints.
1553
1554         ColorEndpointPair colorEndpoints[4];
1555         computeColorEndpoints(&colorEndpoints[0], blockData, &colorEndpointModes[0], numPartitions, numColorEndpointValues,
1556                                                   computeMaximumRangeISEParams(numBitsForColorEndpoints, numColorEndpointValues), numBitsForColorEndpoints);
1557
1558         // Compute texel weights.
1559
1560         TexelWeightPair texelWeights[MAX_BLOCK_WIDTH*MAX_BLOCK_HEIGHT];
1561         computeTexelWeights(&texelWeights[0], blockData, blockWidth, blockHeight, blockMode);
1562
1563         // Set texel colors.
1564
1565         const int               ccs                                             = blockMode.isDualPlane ? (int)blockData.getBits(extraCemBitsStart-2, extraCemBitsStart-1) : -1;
1566         const deUint32  partitionIndexSeed              = numPartitions > 1 ? blockData.getBits(13, 22) : (deUint32)-1;
1567
1568         return setTexelColors(dst, &colorEndpoints[0], &texelWeights[0], ccs, partitionIndexSeed, numPartitions, blockWidth, blockHeight, isSRGB, isLDR, &colorEndpointModes[0]);
1569 }
1570
1571 void decompress (const PixelBufferAccess& dst, const deUint8* data, bool isSRGB, bool isLDR)
1572 {
1573         DE_ASSERT(isLDR || !isSRGB);
1574
1575         const int blockWidth = dst.getWidth();
1576         const int blockHeight = dst.getHeight();
1577
1578         union
1579         {
1580                 deUint8         sRGB[MAX_BLOCK_WIDTH*MAX_BLOCK_HEIGHT*4];
1581                 float           linear[MAX_BLOCK_WIDTH*MAX_BLOCK_HEIGHT*4];
1582         } decompressedBuffer;
1583
1584         const Block128 blockData(data);
1585         decompressBlock(isSRGB ? (void*)&decompressedBuffer.sRGB[0] : (void*)&decompressedBuffer.linear[0],
1586                                         blockData, dst.getWidth(), dst.getHeight(), isSRGB, isLDR);
1587
1588         if (isSRGB)
1589         {
1590                 for (int i = 0; i < blockHeight; i++)
1591                 for (int j = 0; j < blockWidth; j++)
1592                 {
1593                         dst.setPixel(IVec4(decompressedBuffer.sRGB[(i*blockWidth + j) * 4 + 0],
1594                                                            decompressedBuffer.sRGB[(i*blockWidth + j) * 4 + 1],
1595                                                            decompressedBuffer.sRGB[(i*blockWidth + j) * 4 + 2],
1596                                                            decompressedBuffer.sRGB[(i*blockWidth + j) * 4 + 3]), j, i);
1597                 }
1598         }
1599         else
1600         {
1601                 for (int i = 0; i < blockHeight; i++)
1602                 for (int j = 0; j < blockWidth; j++)
1603                 {
1604                         dst.setPixel(Vec4(decompressedBuffer.linear[(i*blockWidth + j) * 4 + 0],
1605                                                           decompressedBuffer.linear[(i*blockWidth + j) * 4 + 1],
1606                                                           decompressedBuffer.linear[(i*blockWidth + j) * 4 + 2],
1607                                                           decompressedBuffer.linear[(i*blockWidth + j) * 4 + 3]), j, i);
1608                 }
1609         }
1610 }
1611
1612 // Helper class for setting bits in a 128-bit block.
1613 class AssignBlock128
1614 {
1615 private:
1616         typedef deUint64 Word;
1617
1618         enum
1619         {
1620                 WORD_BYTES      = sizeof(Word),
1621                 WORD_BITS       = 8*WORD_BYTES,
1622                 NUM_WORDS       = 128 / WORD_BITS
1623         };
1624
1625         DE_STATIC_ASSERT(128 % WORD_BITS == 0);
1626
1627 public:
1628         AssignBlock128 (void)
1629         {
1630                 for (int wordNdx = 0; wordNdx < NUM_WORDS; wordNdx++)
1631                         m_words[wordNdx] = 0;
1632         }
1633
1634         void setBit (int ndx, deUint32 val)
1635         {
1636                 DE_ASSERT(de::inBounds(ndx, 0, 128));
1637                 DE_ASSERT((val & 1) == val);
1638                 const int wordNdx       = ndx / WORD_BITS;
1639                 const int bitNdx        = ndx % WORD_BITS;
1640                 m_words[wordNdx] = (m_words[wordNdx] & ~((Word)1 << bitNdx)) | ((Word)val << bitNdx);
1641         }
1642
1643         void setBits (int low, int high, deUint32 bits)
1644         {
1645                 DE_ASSERT(de::inBounds(low, 0, 128));
1646                 DE_ASSERT(de::inBounds(high, 0, 128));
1647                 DE_ASSERT(de::inRange(high-low+1, 0, 32));
1648                 DE_ASSERT((bits & (((Word)1 << (high-low+1)) - 1)) == bits);
1649
1650                 if (high-low+1 == 0)
1651                         return;
1652
1653                 const int word0Ndx              = low / WORD_BITS;
1654                 const int word1Ndx              = high / WORD_BITS;
1655                 const int lowNdxInW0    = low % WORD_BITS;
1656
1657                 if (word0Ndx == word1Ndx)
1658                         m_words[word0Ndx] = (m_words[word0Ndx] & ~((((Word)1 << (high-low+1)) - 1) << lowNdxInW0)) | ((Word)bits << lowNdxInW0);
1659                 else
1660                 {
1661                         DE_ASSERT(word1Ndx == word0Ndx + 1);
1662
1663                         const int       highNdxInW1                     = high % WORD_BITS;
1664                         const int       numBitsToSetInW0        = WORD_BITS - lowNdxInW0;
1665                         const Word      bitsLowMask                     = ((Word)1 << numBitsToSetInW0) - 1;
1666
1667                         m_words[word0Ndx] = (m_words[word0Ndx] & (((Word)1 << lowNdxInW0) - 1))                 | (((Word)bits & bitsLowMask) << lowNdxInW0);
1668                         m_words[word1Ndx] = (m_words[word1Ndx] & ~(((Word)1 << (highNdxInW1+1)) - 1))   | (((Word)bits & ~bitsLowMask) >> numBitsToSetInW0);
1669                 }
1670         }
1671
1672         void assignToMemory (deUint8* dst) const
1673         {
1674                 for (int wordNdx = 0; wordNdx < NUM_WORDS; wordNdx++)
1675                 {
1676                         for (int byteNdx = 0; byteNdx < WORD_BYTES; byteNdx++)
1677                                 dst[wordNdx*WORD_BYTES + byteNdx] = (deUint8)((m_words[wordNdx] >> (8*byteNdx)) & 0xff);
1678                 }
1679         }
1680
1681         void pushBytesToVector (vector<deUint8>& dst) const
1682         {
1683                 const int assignStartIndex = (int)dst.size();
1684                 dst.resize(dst.size() + BLOCK_SIZE_BYTES);
1685                 assignToMemory(&dst[assignStartIndex]);
1686         }
1687
1688 private:
1689         Word m_words[NUM_WORDS];
1690 };
1691
1692 // A helper for sequential access into a AssignBlock128.
1693 class BitAssignAccessStream
1694 {
1695 public:
1696         BitAssignAccessStream (AssignBlock128& dst, int startNdxInSrc, int length, bool forward)
1697                 : m_dst                         (dst)
1698                 , m_startNdxInSrc       (startNdxInSrc)
1699                 , m_length                      (length)
1700                 , m_forward                     (forward)
1701                 , m_ndx                         (0)
1702         {
1703         }
1704
1705         // Set the next num bits. Bits at positions greater than or equal to m_length are not touched.
1706         void setNext (int num, deUint32 bits)
1707         {
1708                 DE_ASSERT((bits & (((deUint64)1 << num) - 1)) == bits);
1709
1710                 if (num == 0 || m_ndx >= m_length)
1711                         return;
1712
1713                 const int               end                             = m_ndx + num;
1714                 const int               numBitsToDst    = de::max(0, de::min(m_length, end) - m_ndx);
1715                 const int               low                             = m_ndx;
1716                 const int               high                    = m_ndx + numBitsToDst - 1;
1717                 const deUint32  actualBits              = getBits(bits, 0, numBitsToDst-1);
1718
1719                 m_ndx += num;
1720
1721                 return m_forward ? m_dst.setBits(m_startNdxInSrc + low,  m_startNdxInSrc + high, actualBits)
1722                                                  : m_dst.setBits(m_startNdxInSrc - high, m_startNdxInSrc - low, reverseBits(actualBits, numBitsToDst));
1723         }
1724
1725 private:
1726         AssignBlock128&         m_dst;
1727         const int                       m_startNdxInSrc;
1728         const int                       m_length;
1729         const bool                      m_forward;
1730
1731         int                                     m_ndx;
1732 };
1733
1734 struct VoidExtentParams
1735 {
1736         DE_STATIC_ASSERT((de::meta::TypesSame<deFloat16, deUint16>::Value));
1737         bool            isHDR;
1738         deUint16        r;
1739         deUint16        g;
1740         deUint16        b;
1741         deUint16        a;
1742         // \note Currently extent coordinates are all set to all-ones.
1743
1744         VoidExtentParams (bool isHDR_, deUint16 r_, deUint16 g_, deUint16 b_, deUint16 a_) : isHDR(isHDR_), r(r_), g(g_), b(b_), a(a_) {}
1745 };
1746
1747 static AssignBlock128 generateVoidExtentBlock (const VoidExtentParams& params)
1748 {
1749         AssignBlock128 block;
1750
1751         block.setBits(0, 8, 0x1fc); // \note Marks void-extent block.
1752         block.setBit(9, params.isHDR);
1753         block.setBits(10, 11, 3); // \note Spec shows that these bits are both set, although they serve no purpose.
1754
1755         // Extent coordinates - currently all-ones.
1756         block.setBits(12, 24, 0x1fff);
1757         block.setBits(25, 37, 0x1fff);
1758         block.setBits(38, 50, 0x1fff);
1759         block.setBits(51, 63, 0x1fff);
1760
1761         DE_ASSERT(!params.isHDR || (!isFloat16InfOrNan(params.r) &&
1762                                                                 !isFloat16InfOrNan(params.g) &&
1763                                                                 !isFloat16InfOrNan(params.b) &&
1764                                                                 !isFloat16InfOrNan(params.a)));
1765
1766         block.setBits(64,  79,  params.r);
1767         block.setBits(80,  95,  params.g);
1768         block.setBits(96,  111, params.b);
1769         block.setBits(112, 127, params.a);
1770
1771         return block;
1772 }
1773
1774 // An input array of ISE inputs for an entire ASTC block. Can be given as either single values in the
1775 // range [0, maximumValueOfISERange] or as explicit block value specifications. The latter is needed
1776 // so we can test all possible values of T and Q in a block, since multiple T or Q values may map
1777 // to the same set of decoded values.
1778 struct ISEInput
1779 {
1780         struct Block
1781         {
1782                 deUint32 tOrQValue; //!< The 8-bit T or 7-bit Q in a trit or quint ISE block.
1783                 deUint32 bitValues[5];
1784         };
1785
1786         bool isGivenInBlockForm;
1787         union
1788         {
1789                 //!< \note 64 comes from the maximum number of weight values in an ASTC block.
1790                 deUint32        plain[64];
1791                 Block           block[64];
1792         } value;
1793
1794         ISEInput (void)
1795                 : isGivenInBlockForm (false)
1796         {
1797         }
1798 };
1799
1800 static inline deUint32 computeISERangeMax (const ISEParams& iseParams)
1801 {
1802         switch (iseParams.mode)
1803         {
1804                 case ISEMODE_TRIT:                      return (1u << iseParams.numBits) * 3 - 1;
1805                 case ISEMODE_QUINT:                     return (1u << iseParams.numBits) * 5 - 1;
1806                 case ISEMODE_PLAIN_BIT:         return (1u << iseParams.numBits)     - 1;
1807                 default:
1808                         DE_ASSERT(false);
1809                         return -1;
1810         }
1811 }
1812
1813 struct NormalBlockParams
1814 {
1815         int                                     weightGridWidth;
1816         int                                     weightGridHeight;
1817         ISEParams                       weightISEParams;
1818         bool                            isDualPlane;
1819         deUint32                        ccs; //! \note Irrelevant if !isDualPlane.
1820         int                                     numPartitions;
1821         deUint32                        colorEndpointModes[4];
1822         // \note Below members are irrelevant if numPartitions == 1.
1823         bool                            isMultiPartSingleCemMode; //! \note If true, the single CEM is at colorEndpointModes[0].
1824         deUint32                        partitionSeed;
1825
1826         NormalBlockParams (void)
1827                 : weightGridWidth                       (-1)
1828                 , weightGridHeight                      (-1)
1829                 , weightISEParams                       (ISEMODE_LAST, -1)
1830                 , isDualPlane                           (true)
1831                 , ccs                                           ((deUint32)-1)
1832                 , numPartitions                         (-1)
1833                 , isMultiPartSingleCemMode      (false)
1834                 , partitionSeed                         ((deUint32)-1)
1835         {
1836                 colorEndpointModes[0] = 0;
1837                 colorEndpointModes[1] = 0;
1838                 colorEndpointModes[2] = 0;
1839                 colorEndpointModes[3] = 0;
1840         }
1841 };
1842
1843 struct NormalBlockISEInputs
1844 {
1845         ISEInput weight;
1846         ISEInput endpoint;
1847
1848         NormalBlockISEInputs (void)
1849                 : weight        ()
1850                 , endpoint      ()
1851         {
1852         }
1853 };
1854
1855 static inline int computeNumWeights (const NormalBlockParams& params)
1856 {
1857         return params.weightGridWidth * params.weightGridHeight * (params.isDualPlane ? 2 : 1);
1858 }
1859
1860 static inline int computeNumBitsForColorEndpoints (const NormalBlockParams& params)
1861 {
1862         const int numWeightBits                 = computeNumRequiredBits(params.weightISEParams, computeNumWeights(params));
1863         const int numConfigDataBits             = (params.numPartitions == 1 ? 17 : params.isMultiPartSingleCemMode ? 29 : 25 + 3*params.numPartitions) +
1864                                                                           (params.isDualPlane ? 2 : 0);
1865
1866         return 128 - numWeightBits - numConfigDataBits;
1867 }
1868
1869 static inline int computeNumColorEndpointValues (const deUint32* endpointModes, int numPartitions, bool isMultiPartSingleCemMode)
1870 {
1871         if (isMultiPartSingleCemMode)
1872                 return numPartitions * computeNumColorEndpointValues(endpointModes[0]);
1873         else
1874         {
1875                 int result = 0;
1876                 for (int i = 0; i < numPartitions; i++)
1877                         result += computeNumColorEndpointValues(endpointModes[i]);
1878                 return result;
1879         }
1880 }
1881
1882 static inline bool isValidBlockParams (const NormalBlockParams& params, int blockWidth, int blockHeight)
1883 {
1884         const int numWeights                            = computeNumWeights(params);
1885         const int numWeightBits                         = computeNumRequiredBits(params.weightISEParams, numWeights);
1886         const int numColorEndpointValues        = computeNumColorEndpointValues(&params.colorEndpointModes[0], params.numPartitions, params.isMultiPartSingleCemMode);
1887         const int numBitsForColorEndpoints      = computeNumBitsForColorEndpoints(params);
1888
1889         return numWeights <= 64                                                                         &&
1890                    de::inRange(numWeightBits, 24, 96)                                   &&
1891                    params.weightGridWidth <= blockWidth                                 &&
1892                    params.weightGridHeight <= blockHeight                               &&
1893                    !(params.numPartitions == 4 && params.isDualPlane)   &&
1894                    numColorEndpointValues <= 18                                                 &&
1895                    numBitsForColorEndpoints >= deDivRoundUp32(13*numColorEndpointValues, 5);
1896 }
1897
1898 // Write bits 0 to 10 of an ASTC block.
1899 static void writeBlockMode (AssignBlock128& dst, const NormalBlockParams& blockParams)
1900 {
1901         const deUint32  d = blockParams.isDualPlane != 0;
1902         // r and h initialized in switch below.
1903         deUint32                r;
1904         deUint32                h;
1905         // a, b and blockModeLayoutNdx initialized in block mode layout index detecting loop below.
1906         deUint32                a = (deUint32)-1;
1907         deUint32                b = (deUint32)-1;
1908         int                             blockModeLayoutNdx;
1909
1910         // Find the values of r and h (ISE range).
1911         switch (computeISERangeMax(blockParams.weightISEParams))
1912         {
1913                 case 1:         r = 2; h = 0;   break;
1914                 case 2:         r = 3; h = 0;   break;
1915                 case 3:         r = 4; h = 0;   break;
1916                 case 4:         r = 5; h = 0;   break;
1917                 case 5:         r = 6; h = 0;   break;
1918                 case 7:         r = 7; h = 0;   break;
1919
1920                 case 9:         r = 2; h = 1;   break;
1921                 case 11:        r = 3; h = 1;   break;
1922                 case 15:        r = 4; h = 1;   break;
1923                 case 19:        r = 5; h = 1;   break;
1924                 case 23:        r = 6; h = 1;   break;
1925                 case 31:        r = 7; h = 1;   break;
1926
1927                 default:
1928                         DE_ASSERT(false);
1929                         r = (deUint32)-1;
1930                         h = (deUint32)-1;
1931         }
1932
1933         // Find block mode layout index, i.e. appropriate row in the "2d block mode layout" table in ASTC spec.
1934
1935         {
1936                 enum BlockModeLayoutABVariable { Z=0, A=1, B=2 };
1937
1938                 static const struct BlockModeLayout
1939                 {
1940                         int                                                     aNumBits;
1941                         int                                                     bNumBits;
1942                         BlockModeLayoutABVariable       gridWidthVariableTerm;
1943                         int                                                     gridWidthConstantTerm;
1944                         BlockModeLayoutABVariable       gridHeightVariableTerm;
1945                         int                                                     gridHeightConstantTerm;
1946                 } blockModeLayouts[] =
1947                 {
1948                         { 2, 2,   B,  4,   A,  2},
1949                         { 2, 2,   B,  8,   A,  2},
1950                         { 2, 2,   A,  2,   B,  8},
1951                         { 2, 1,   A,  2,   B,  6},
1952                         { 2, 1,   B,  2,   A,  2},
1953                         { 2, 0,   Z, 12,   A,  2},
1954                         { 2, 0,   A,  2,   Z, 12},
1955                         { 0, 0,   Z,  6,   Z, 10},
1956                         { 0, 0,   Z, 10,   Z,  6},
1957                         { 2, 2,   A,  6,   B,  6}
1958                 };
1959
1960                 for (blockModeLayoutNdx = 0; blockModeLayoutNdx < DE_LENGTH_OF_ARRAY(blockModeLayouts); blockModeLayoutNdx++)
1961                 {
1962                         const BlockModeLayout&  layout                                  = blockModeLayouts[blockModeLayoutNdx];
1963                         const int                               aMax                                    = (1 << layout.aNumBits) - 1;
1964                         const int                               bMax                                    = (1 << layout.bNumBits) - 1;
1965                         const int                               variableOffsetsMax[3]   = { 0, aMax, bMax };
1966                         const int                               widthMin                                = layout.gridWidthConstantTerm;
1967                         const int                               heightMin                               = layout.gridHeightConstantTerm;
1968                         const int                               widthMax                                = widthMin  + variableOffsetsMax[layout.gridWidthVariableTerm];
1969                         const int                               heightMax                               = heightMin + variableOffsetsMax[layout.gridHeightVariableTerm];
1970
1971                         DE_ASSERT(layout.gridWidthVariableTerm != layout.gridHeightVariableTerm || layout.gridWidthVariableTerm == Z);
1972
1973                         if (de::inRange(blockParams.weightGridWidth, widthMin, widthMax) &&
1974                                 de::inRange(blockParams.weightGridHeight, heightMin, heightMax))
1975                         {
1976                                 deUint32        defaultvalue    = 0;
1977                                 deUint32&       widthVariable   = layout.gridWidthVariableTerm == A  ? a : layout.gridWidthVariableTerm == B  ? b : defaultvalue;
1978                                 deUint32&       heightVariable  = layout.gridHeightVariableTerm == A ? a : layout.gridHeightVariableTerm == B ? b : defaultvalue;
1979
1980                                 widthVariable   = blockParams.weightGridWidth  - layout.gridWidthConstantTerm;
1981                                 heightVariable  = blockParams.weightGridHeight - layout.gridHeightConstantTerm;
1982
1983                                 break;
1984                         }
1985                 }
1986         }
1987
1988         // Set block mode bits.
1989
1990         const deUint32 a0 = getBit(a, 0);
1991         const deUint32 a1 = getBit(a, 1);
1992         const deUint32 b0 = getBit(b, 0);
1993         const deUint32 b1 = getBit(b, 1);
1994         const deUint32 r0 = getBit(r, 0);
1995         const deUint32 r1 = getBit(r, 1);
1996         const deUint32 r2 = getBit(r, 2);
1997
1998 #define SB(NDX, VAL) dst.setBit((NDX), (VAL))
1999 #define ASSIGN_BITS(B10, B9, B8, B7, B6, B5, B4, B3, B2, B1, B0) do { SB(10,(B10)); SB(9,(B9)); SB(8,(B8)); SB(7,(B7)); SB(6,(B6)); SB(5,(B5)); SB(4,(B4)); SB(3,(B3)); SB(2,(B2)); SB(1,(B1)); SB(0,(B0)); } while (false)
2000
2001         switch (blockModeLayoutNdx)
2002         {
2003                 case 0: ASSIGN_BITS(d,  h,  b1, b0, a1, a0, r0, 0,  0,  r2, r1);                                                                        break;
2004                 case 1: ASSIGN_BITS(d,  h,  b1, b0, a1, a0, r0, 0,  1,  r2, r1);                                                                        break;
2005                 case 2: ASSIGN_BITS(d,  h,  b1, b0, a1, a0, r0, 1,  0,  r2, r1);                                                                        break;
2006                 case 3: ASSIGN_BITS(d,  h,   0,  b, a1, a0, r0, 1,  1,  r2, r1);                                                                        break;
2007                 case 4: ASSIGN_BITS(d,  h,   1,  b, a1, a0, r0, 1,  1,  r2, r1);                                                                        break;
2008                 case 5: ASSIGN_BITS(d,  h,   0,  0, a1, a0, r0, r2, r1,  0,  0);                                                                        break;
2009                 case 6: ASSIGN_BITS(d,  h,   0,  1, a1, a0, r0, r2, r1,  0,  0);                                                                        break;
2010                 case 7: ASSIGN_BITS(d,  h,   1,  1,  0,  0, r0, r2, r1,  0,  0);                                                                        break;
2011                 case 8: ASSIGN_BITS(d,  h,   1,  1,  0,  1, r0, r2, r1,  0,  0);                                                                        break;
2012                 case 9: ASSIGN_BITS(b1, b0,  1,  0, a1, a0, r0, r2, r1,  0,  0); DE_ASSERT(d == 0 && h == 0);           break;
2013                 default:
2014                         DE_ASSERT(false);
2015         }
2016
2017 #undef ASSIGN_BITS
2018 #undef SB
2019 }
2020
2021 // Write color endpoint mode data of an ASTC block.
2022 static void writeColorEndpointModes (AssignBlock128& dst, const deUint32* colorEndpointModes, bool isMultiPartSingleCemMode, int numPartitions, int extraCemBitsStart)
2023 {
2024         if (numPartitions == 1)
2025                 dst.setBits(13, 16, colorEndpointModes[0]);
2026         else
2027         {
2028                 if (isMultiPartSingleCemMode)
2029                 {
2030                         dst.setBits(23, 24, 0);
2031                         dst.setBits(25, 28, colorEndpointModes[0]);
2032                 }
2033                 else
2034                 {
2035                         DE_ASSERT(numPartitions > 0);
2036                         const deUint32 minCem                           = *std::min_element(&colorEndpointModes[0], &colorEndpointModes[numPartitions]);
2037                         const deUint32 maxCem                           = *std::max_element(&colorEndpointModes[0], &colorEndpointModes[numPartitions]);
2038                         const deUint32 minCemClass                      = minCem/4;
2039                         const deUint32 maxCemClass                      = maxCem/4;
2040                         DE_ASSERT(maxCemClass - minCemClass <= 1);
2041                         DE_UNREF(minCemClass); // \note For non-debug builds.
2042                         const deUint32 highLevelSelector        = de::max(1u, maxCemClass);
2043
2044                         dst.setBits(23, 24, highLevelSelector);
2045
2046                         for (int partNdx = 0; partNdx < numPartitions; partNdx++)
2047                         {
2048                                 const deUint32 c                        = colorEndpointModes[partNdx] / 4 == highLevelSelector ? 1 : 0;
2049                                 const deUint32 m                        = colorEndpointModes[partNdx] % 4;
2050                                 const deUint32 lowMBit0Ndx      = numPartitions + 2*partNdx;
2051                                 const deUint32 lowMBit1Ndx      = numPartitions + 2*partNdx + 1;
2052                                 dst.setBit(25 + partNdx, c);
2053                                 dst.setBit(lowMBit0Ndx < 4 ? 25+lowMBit0Ndx : extraCemBitsStart+lowMBit0Ndx-4, getBit(m, 0));
2054                                 dst.setBit(lowMBit1Ndx < 4 ? 25+lowMBit1Ndx : extraCemBitsStart+lowMBit1Ndx-4, getBit(m, 1));
2055                         }
2056                 }
2057         }
2058 }
2059
2060 static void encodeISETritBlock (BitAssignAccessStream& dst, int numBits, bool fromExplicitInputBlock, const ISEInput::Block& blockInput, const deUint32* nonBlockInput, int numValues)
2061 {
2062         // tritBlockTValue[t0][t1][t2][t3][t4] is a value of T (not necessarily the only one) that will yield the given trits when decoded.
2063         static const deUint32 tritBlockTValue[3][3][3][3][3] =
2064         {
2065                 {
2066                         {{{0, 128, 96}, {32, 160, 224}, {64, 192, 28}}, {{16, 144, 112}, {48, 176, 240}, {80, 208, 156}}, {{3, 131, 99}, {35, 163, 227}, {67, 195, 31}}},
2067                         {{{4, 132, 100}, {36, 164, 228}, {68, 196, 60}}, {{20, 148, 116}, {52, 180, 244}, {84, 212, 188}}, {{19, 147, 115}, {51, 179, 243}, {83, 211, 159}}},
2068                         {{{8, 136, 104}, {40, 168, 232}, {72, 200, 92}}, {{24, 152, 120}, {56, 184, 248}, {88, 216, 220}}, {{12, 140, 108}, {44, 172, 236}, {76, 204, 124}}}
2069                 },
2070                 {
2071                         {{{1, 129, 97}, {33, 161, 225}, {65, 193, 29}}, {{17, 145, 113}, {49, 177, 241}, {81, 209, 157}}, {{7, 135, 103}, {39, 167, 231}, {71, 199, 63}}},
2072                         {{{5, 133, 101}, {37, 165, 229}, {69, 197, 61}}, {{21, 149, 117}, {53, 181, 245}, {85, 213, 189}}, {{23, 151, 119}, {55, 183, 247}, {87, 215, 191}}},
2073                         {{{9, 137, 105}, {41, 169, 233}, {73, 201, 93}}, {{25, 153, 121}, {57, 185, 249}, {89, 217, 221}}, {{13, 141, 109}, {45, 173, 237}, {77, 205, 125}}}
2074                 },
2075                 {
2076                         {{{2, 130, 98}, {34, 162, 226}, {66, 194, 30}}, {{18, 146, 114}, {50, 178, 242}, {82, 210, 158}}, {{11, 139, 107}, {43, 171, 235}, {75, 203, 95}}},
2077                         {{{6, 134, 102}, {38, 166, 230}, {70, 198, 62}}, {{22, 150, 118}, {54, 182, 246}, {86, 214, 190}}, {{27, 155, 123}, {59, 187, 251}, {91, 219, 223}}},
2078                         {{{10, 138, 106}, {42, 170, 234}, {74, 202, 94}}, {{26, 154, 122}, {58, 186, 250}, {90, 218, 222}}, {{14, 142, 110}, {46, 174, 238}, {78, 206, 126}}}
2079                 }
2080         };
2081
2082         DE_ASSERT(de::inRange(numValues, 1, 5));
2083
2084         deUint32 tritParts[5];
2085         deUint32 bitParts[5];
2086
2087         for (int i = 0; i < 5; i++)
2088         {
2089                 if (i < numValues)
2090                 {
2091                         if (fromExplicitInputBlock)
2092                         {
2093                                 bitParts[i]             = blockInput.bitValues[i];
2094                                 tritParts[i]    = -1; // \note Won't be used, but silences warning.
2095                         }
2096                         else
2097                         {
2098                                 // \todo [2016-01-20 pyry] numBits = 0 doesn't make sense
2099                                 bitParts[i]             = numBits > 0 ? getBits(nonBlockInput[i], 0, numBits-1) : 0;
2100                                 tritParts[i]    = nonBlockInput[i] >> numBits;
2101                         }
2102                 }
2103                 else
2104                 {
2105                         bitParts[i]             = 0;
2106                         tritParts[i]    = 0;
2107                 }
2108         }
2109
2110         const deUint32 T = fromExplicitInputBlock ? blockInput.tOrQValue : tritBlockTValue[tritParts[0]]
2111                                                                                                                                                                           [tritParts[1]]
2112                                                                                                                                                                           [tritParts[2]]
2113                                                                                                                                                                           [tritParts[3]]
2114                                                                                                                                                                           [tritParts[4]];
2115
2116         dst.setNext(numBits,    bitParts[0]);
2117         dst.setNext(2,                  getBits(T, 0, 1));
2118         dst.setNext(numBits,    bitParts[1]);
2119         dst.setNext(2,                  getBits(T, 2, 3));
2120         dst.setNext(numBits,    bitParts[2]);
2121         dst.setNext(1,                  getBit(T, 4));
2122         dst.setNext(numBits,    bitParts[3]);
2123         dst.setNext(2,                  getBits(T, 5, 6));
2124         dst.setNext(numBits,    bitParts[4]);
2125         dst.setNext(1,                  getBit(T, 7));
2126 }
2127
2128 static void encodeISEQuintBlock (BitAssignAccessStream& dst, int numBits, bool fromExplicitInputBlock, const ISEInput::Block& blockInput, const deUint32* nonBlockInput, int numValues)
2129 {
2130         // quintBlockQValue[q0][q1][q2] is a value of Q (not necessarily the only one) that will yield the given quints when decoded.
2131         static const deUint32 quintBlockQValue[5][5][5] =
2132         {
2133                 {{0, 32, 64, 96, 102}, {8, 40, 72, 104, 110}, {16, 48, 80, 112, 118}, {24, 56, 88, 120, 126}, {5, 37, 69, 101, 39}},
2134                 {{1, 33, 65, 97, 103}, {9, 41, 73, 105, 111}, {17, 49, 81, 113, 119}, {25, 57, 89, 121, 127}, {13, 45, 77, 109, 47}},
2135                 {{2, 34, 66, 98, 70}, {10, 42, 74, 106, 78}, {18, 50, 82, 114, 86}, {26, 58, 90, 122, 94}, {21, 53, 85, 117, 55}},
2136                 {{3, 35, 67, 99, 71}, {11, 43, 75, 107, 79}, {19, 51, 83, 115, 87}, {27, 59, 91, 123, 95}, {29, 61, 93, 125, 63}},
2137                 {{4, 36, 68, 100, 38}, {12, 44, 76, 108, 46}, {20, 52, 84, 116, 54}, {28, 60, 92, 124, 62}, {6, 14, 22, 30, 7}}
2138         };
2139
2140         DE_ASSERT(de::inRange(numValues, 1, 3));
2141
2142         deUint32 quintParts[3];
2143         deUint32 bitParts[3];
2144
2145         for (int i = 0; i < 3; i++)
2146         {
2147                 if (i < numValues)
2148                 {
2149                         if (fromExplicitInputBlock)
2150                         {
2151                                 bitParts[i]             = blockInput.bitValues[i];
2152                                 quintParts[i]   = -1; // \note Won't be used, but silences warning.
2153                         }
2154                         else
2155                         {
2156                                 // \todo [2016-01-20 pyry] numBits = 0 doesn't make sense
2157                                 bitParts[i]             = numBits > 0 ? getBits(nonBlockInput[i], 0, numBits-1) : 0;
2158                                 quintParts[i]   = nonBlockInput[i] >> numBits;
2159                         }
2160                 }
2161                 else
2162                 {
2163                         bitParts[i]             = 0;
2164                         quintParts[i]   = 0;
2165                 }
2166         }
2167
2168         const deUint32 Q = fromExplicitInputBlock ? blockInput.tOrQValue : quintBlockQValue[quintParts[0]]
2169                                                                                                                                                                            [quintParts[1]]
2170                                                                                                                                                                            [quintParts[2]];
2171
2172         dst.setNext(numBits,    bitParts[0]);
2173         dst.setNext(3,                  getBits(Q, 0, 2));
2174         dst.setNext(numBits,    bitParts[1]);
2175         dst.setNext(2,                  getBits(Q, 3, 4));
2176         dst.setNext(numBits,    bitParts[2]);
2177         dst.setNext(2,                  getBits(Q, 5, 6));
2178 }
2179
2180 static void encodeISEBitBlock (BitAssignAccessStream& dst, int numBits, deUint32 value)
2181 {
2182         DE_ASSERT(de::inRange(value, 0u, (1u<<numBits)-1));
2183         dst.setNext(numBits, value);
2184 }
2185
2186 static void encodeISE (BitAssignAccessStream& dst, const ISEParams& params, const ISEInput& input, int numValues)
2187 {
2188         if (params.mode == ISEMODE_TRIT)
2189         {
2190                 const int numBlocks = deDivRoundUp32(numValues, 5);
2191                 for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
2192                 {
2193                         const int numValuesInBlock = blockNdx == numBlocks-1 ? numValues - 5*(numBlocks-1) : 5;
2194                         encodeISETritBlock(dst, params.numBits, input.isGivenInBlockForm,
2195                                                            input.isGivenInBlockForm ? input.value.block[blockNdx]       : ISEInput::Block(),
2196                                                            input.isGivenInBlockForm ? DE_NULL                                           : &input.value.plain[5*blockNdx],
2197                                                            numValuesInBlock);
2198                 }
2199         }
2200         else if (params.mode == ISEMODE_QUINT)
2201         {
2202                 const int numBlocks = deDivRoundUp32(numValues, 3);
2203                 for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
2204                 {
2205                         const int numValuesInBlock = blockNdx == numBlocks-1 ? numValues - 3*(numBlocks-1) : 3;
2206                         encodeISEQuintBlock(dst, params.numBits, input.isGivenInBlockForm,
2207                                                                 input.isGivenInBlockForm ? input.value.block[blockNdx]  : ISEInput::Block(),
2208                                                                 input.isGivenInBlockForm ? DE_NULL                                              : &input.value.plain[3*blockNdx],
2209                                                                 numValuesInBlock);
2210                 }
2211         }
2212         else
2213         {
2214                 DE_ASSERT(params.mode == ISEMODE_PLAIN_BIT);
2215                 for (int i = 0; i < numValues; i++)
2216                         encodeISEBitBlock(dst, params.numBits, input.isGivenInBlockForm ? input.value.block[i].bitValues[0] : input.value.plain[i]);
2217         }
2218 }
2219
2220 static void writeWeightData (AssignBlock128& dst, const ISEParams& iseParams, const ISEInput& input, int numWeights)
2221 {
2222         const int                               numWeightBits   = computeNumRequiredBits(iseParams, numWeights);
2223         BitAssignAccessStream   access                  (dst, 127, numWeightBits, false);
2224         encodeISE(access, iseParams, input, numWeights);
2225 }
2226
2227 static void writeColorEndpointData (AssignBlock128& dst, const ISEParams& iseParams, const ISEInput& input, int numEndpoints, int numBitsForColorEndpoints, int colorEndpointDataStartNdx)
2228 {
2229         BitAssignAccessStream access(dst, colorEndpointDataStartNdx, numBitsForColorEndpoints, true);
2230         encodeISE(access, iseParams, input, numEndpoints);
2231 }
2232
2233 static AssignBlock128 generateNormalBlock (const NormalBlockParams& blockParams, int blockWidth, int blockHeight, const NormalBlockISEInputs& iseInputs)
2234 {
2235         DE_ASSERT(isValidBlockParams(blockParams, blockWidth, blockHeight));
2236         DE_UNREF(blockWidth);   // \note For non-debug builds.
2237         DE_UNREF(blockHeight);  // \note For non-debug builds.
2238
2239         AssignBlock128  block;
2240         const int               numWeights              = computeNumWeights(blockParams);
2241         const int               numWeightBits   = computeNumRequiredBits(blockParams.weightISEParams, numWeights);
2242
2243         writeBlockMode(block, blockParams);
2244
2245         block.setBits(11, 12, blockParams.numPartitions - 1);
2246         if (blockParams.numPartitions > 1)
2247                 block.setBits(13, 22, blockParams.partitionSeed);
2248
2249         {
2250                 const int extraCemBitsStart = 127 - numWeightBits - (blockParams.numPartitions == 1 || blockParams.isMultiPartSingleCemMode             ? -1
2251                                                                                                                         : blockParams.numPartitions == 4                                                                                        ? 7
2252                                                                                                                         : blockParams.numPartitions == 3                                                                                        ? 4
2253                                                                                                                         : blockParams.numPartitions == 2                                                                                        ? 1
2254                                                                                                                         : 0);
2255
2256                 writeColorEndpointModes(block, &blockParams.colorEndpointModes[0], blockParams.isMultiPartSingleCemMode, blockParams.numPartitions, extraCemBitsStart);
2257
2258                 if (blockParams.isDualPlane)
2259                         block.setBits(extraCemBitsStart-2, extraCemBitsStart-1, blockParams.ccs);
2260         }
2261
2262         writeWeightData(block, blockParams.weightISEParams, iseInputs.weight, numWeights);
2263
2264         {
2265                 const int                       numColorEndpointValues          = computeNumColorEndpointValues(&blockParams.colorEndpointModes[0], blockParams.numPartitions, blockParams.isMultiPartSingleCemMode);
2266                 const int                       numBitsForColorEndpoints        = computeNumBitsForColorEndpoints(blockParams);
2267                 const int                       colorEndpointDataStartNdx       = blockParams.numPartitions == 1 ? 17 : 29;
2268                 const ISEParams&        colorEndpointISEParams          = computeMaximumRangeISEParams(numBitsForColorEndpoints, numColorEndpointValues);
2269
2270                 writeColorEndpointData(block, colorEndpointISEParams, iseInputs.endpoint, numColorEndpointValues, numBitsForColorEndpoints, colorEndpointDataStartNdx);
2271         }
2272
2273         return block;
2274 }
2275
2276 // Generate default ISE inputs for weight and endpoint data - gradient-ish values.
2277 static NormalBlockISEInputs generateDefaultISEInputs (const NormalBlockParams& blockParams)
2278 {
2279         NormalBlockISEInputs result;
2280
2281         {
2282                 result.weight.isGivenInBlockForm = false;
2283
2284                 const int numWeights            = computeNumWeights(blockParams);
2285                 const int weightRangeMax        = computeISERangeMax(blockParams.weightISEParams);
2286
2287                 if (blockParams.isDualPlane)
2288                 {
2289                         for (int i = 0; i < numWeights; i += 2)
2290                                 result.weight.value.plain[i] = (i*weightRangeMax + (numWeights-1)/2) / (numWeights-1);
2291
2292                         for (int i = 1; i < numWeights; i += 2)
2293                                 result.weight.value.plain[i] = weightRangeMax - (i*weightRangeMax + (numWeights-1)/2) / (numWeights-1);
2294                 }
2295                 else
2296                 {
2297                         for (int i = 0; i < numWeights; i++)
2298                                 result.weight.value.plain[i] = (i*weightRangeMax + (numWeights-1)/2) / (numWeights-1);
2299                 }
2300         }
2301
2302         {
2303                 result.endpoint.isGivenInBlockForm = false;
2304
2305                 const int                       numColorEndpointValues          = computeNumColorEndpointValues(&blockParams.colorEndpointModes[0], blockParams.numPartitions, blockParams.isMultiPartSingleCemMode);
2306                 const int                       numBitsForColorEndpoints        = computeNumBitsForColorEndpoints(blockParams);
2307                 const ISEParams&        colorEndpointISEParams          = computeMaximumRangeISEParams(numBitsForColorEndpoints, numColorEndpointValues);
2308                 const int                       colorEndpointRangeMax           = computeISERangeMax(colorEndpointISEParams);
2309
2310                 for (int i = 0; i < numColorEndpointValues; i++)
2311                         result.endpoint.value.plain[i] = (i*colorEndpointRangeMax + (numColorEndpointValues-1)/2) / (numColorEndpointValues-1);
2312         }
2313
2314         return result;
2315 }
2316
2317 static const ISEParams s_weightISEParamsCandidates[] =
2318 {
2319         ISEParams(ISEMODE_PLAIN_BIT,    1),
2320         ISEParams(ISEMODE_TRIT,                 0),
2321         ISEParams(ISEMODE_PLAIN_BIT,    2),
2322         ISEParams(ISEMODE_QUINT,                0),
2323         ISEParams(ISEMODE_TRIT,                 1),
2324         ISEParams(ISEMODE_PLAIN_BIT,    3),
2325         ISEParams(ISEMODE_QUINT,                1),
2326         ISEParams(ISEMODE_TRIT,                 2),
2327         ISEParams(ISEMODE_PLAIN_BIT,    4),
2328         ISEParams(ISEMODE_QUINT,                2),
2329         ISEParams(ISEMODE_TRIT,                 3),
2330         ISEParams(ISEMODE_PLAIN_BIT,    5)
2331 };
2332
2333 void generateRandomBlock (deUint8* dst, const IVec3& blockSize, de::Random& rnd)
2334 {
2335         DE_ASSERT(blockSize.z() == 1);
2336
2337         if (rnd.getFloat() < 0.1f)
2338         {
2339                 // Void extent block.
2340                 const bool              isVoidExtentHDR         = rnd.getBool();
2341                 const deUint16  r                                       = isVoidExtentHDR ? deFloat32To16(rnd.getFloat(0.0f, 1.0f)) : (deUint16)rnd.getInt(0, 0xffff);
2342                 const deUint16  g                                       = isVoidExtentHDR ? deFloat32To16(rnd.getFloat(0.0f, 1.0f)) : (deUint16)rnd.getInt(0, 0xffff);
2343                 const deUint16  b                                       = isVoidExtentHDR ? deFloat32To16(rnd.getFloat(0.0f, 1.0f)) : (deUint16)rnd.getInt(0, 0xffff);
2344                 const deUint16  a                                       = isVoidExtentHDR ? deFloat32To16(rnd.getFloat(0.0f, 1.0f)) : (deUint16)rnd.getInt(0, 0xffff);
2345                 generateVoidExtentBlock(VoidExtentParams(isVoidExtentHDR, r, g, b, a)).assignToMemory(dst);
2346         }
2347         else
2348         {
2349                 // Not void extent block.
2350
2351                 // Generate block params.
2352
2353                 NormalBlockParams blockParams;
2354
2355                 do
2356                 {
2357                         blockParams.weightGridWidth                             = rnd.getInt(2, blockSize.x());
2358                         blockParams.weightGridHeight                    = rnd.getInt(2, blockSize.y());
2359                         blockParams.weightISEParams                             = s_weightISEParamsCandidates[rnd.getInt(0, DE_LENGTH_OF_ARRAY(s_weightISEParamsCandidates)-1)];
2360                         blockParams.numPartitions                               = rnd.getInt(1, 4);
2361                         blockParams.isMultiPartSingleCemMode    = rnd.getFloat() < 0.25f;
2362                         blockParams.isDualPlane                                 = blockParams.numPartitions != 4 && rnd.getBool();
2363                         blockParams.ccs                                                 = rnd.getInt(0, 3);
2364                         blockParams.partitionSeed                               = rnd.getInt(0, 1023);
2365
2366                         blockParams.colorEndpointModes[0] = rnd.getInt(0, 15);
2367
2368                         {
2369                                 const int cemDiff = blockParams.isMultiPartSingleCemMode                ? 0
2370                                                                         : blockParams.colorEndpointModes[0] == 0        ? 1
2371                                                                         : blockParams.colorEndpointModes[0] == 15       ? -1
2372                                                                         : rnd.getBool()                                                         ? 1 : -1;
2373
2374                                 for (int i = 1; i < blockParams.numPartitions; i++)
2375                                         blockParams.colorEndpointModes[i] = blockParams.colorEndpointModes[0] + (cemDiff == -1 ? rnd.getInt(-1, 0) : cemDiff == 1 ? rnd.getInt(0, 1) : 0);
2376                         }
2377                 } while (!isValidBlockParams(blockParams, blockSize.x(), blockSize.y()));
2378
2379                 // Generate ISE inputs for both weight and endpoint data.
2380
2381                 NormalBlockISEInputs iseInputs;
2382
2383                 for (int weightOrEndpoints = 0; weightOrEndpoints <= 1; weightOrEndpoints++)
2384                 {
2385                         const bool                      setWeights      = weightOrEndpoints == 0;
2386                         const int                       numValues       = setWeights ? computeNumWeights(blockParams) :
2387                                                                                                 computeNumColorEndpointValues(&blockParams.colorEndpointModes[0], blockParams.numPartitions, blockParams.isMultiPartSingleCemMode);
2388                         const ISEParams         iseParams       = setWeights ? blockParams.weightISEParams : computeMaximumRangeISEParams(computeNumBitsForColorEndpoints(blockParams), numValues);
2389                         ISEInput&                       iseInput        = setWeights ? iseInputs.weight : iseInputs.endpoint;
2390
2391                         iseInput.isGivenInBlockForm = rnd.getBool();
2392
2393                         if (iseInput.isGivenInBlockForm)
2394                         {
2395                                 const int numValuesPerISEBlock  = iseParams.mode == ISEMODE_TRIT        ? 5
2396                                                                                                 : iseParams.mode == ISEMODE_QUINT       ? 3
2397                                                                                                 :                                                                         1;
2398                                 const int iseBitMax                             = (1 << iseParams.numBits) - 1;
2399                                 const int numISEBlocks                  = deDivRoundUp32(numValues, numValuesPerISEBlock);
2400
2401                                 for (int iseBlockNdx = 0; iseBlockNdx < numISEBlocks; iseBlockNdx++)
2402                                 {
2403                                         iseInput.value.block[iseBlockNdx].tOrQValue = rnd.getInt(0, 255);
2404                                         for (int i = 0; i < numValuesPerISEBlock; i++)
2405                                                 iseInput.value.block[iseBlockNdx].bitValues[i] = rnd.getInt(0, iseBitMax);
2406                                 }
2407                         }
2408                         else
2409                         {
2410                                 const int rangeMax = computeISERangeMax(iseParams);
2411
2412                                 for (int valueNdx = 0; valueNdx < numValues; valueNdx++)
2413                                         iseInput.value.plain[valueNdx] = rnd.getInt(0, rangeMax);
2414                         }
2415                 }
2416
2417                 generateNormalBlock(blockParams, blockSize.x(), blockSize.y(), iseInputs).assignToMemory(dst);
2418         }
2419 }
2420
2421 } // anonymous
2422
2423 // Generate block data for a given BlockTestType and format.
2424 void generateBlockCaseTestData (vector<deUint8>& dst, CompressedTexFormat format, BlockTestType testType)
2425 {
2426         DE_ASSERT(isAstcFormat(format));
2427         DE_ASSERT(!(isAstcSRGBFormat(format) && isBlockTestTypeHDROnly(testType)));
2428
2429         const IVec3 blockSize = getBlockPixelSize(format);
2430         DE_ASSERT(blockSize.z() == 1);
2431
2432         switch (testType)
2433         {
2434                 case BLOCK_TEST_TYPE_VOID_EXTENT_LDR:
2435                 // Generate a gradient-like set of LDR void-extent blocks.
2436                 {
2437                         const int                       numBlocks       = 1<<13;
2438                         const deUint32          numValues       = 1<<16;
2439                         dst.reserve(numBlocks*BLOCK_SIZE_BYTES);
2440
2441                         for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
2442                         {
2443                                 const deUint32 baseValue        = blockNdx*(numValues-1) / (numBlocks-1);
2444                                 const deUint16 r                        = (deUint16)((baseValue + numValues*0/4) % numValues);
2445                                 const deUint16 g                        = (deUint16)((baseValue + numValues*1/4) % numValues);
2446                                 const deUint16 b                        = (deUint16)((baseValue + numValues*2/4) % numValues);
2447                                 const deUint16 a                        = (deUint16)((baseValue + numValues*3/4) % numValues);
2448                                 AssignBlock128 block;
2449
2450                                 generateVoidExtentBlock(VoidExtentParams(false, r, g, b, a)).pushBytesToVector(dst);
2451                         }
2452
2453                         break;
2454                 }
2455
2456                 case BLOCK_TEST_TYPE_VOID_EXTENT_HDR:
2457                 // Generate a gradient-like set of HDR void-extent blocks, with values ranging from the largest finite negative to largest finite positive of fp16.
2458                 {
2459                         const float             minValue        = -65504.0f;
2460                         const float             maxValue        = +65504.0f;
2461                         const int               numBlocks       = 1<<13;
2462                         dst.reserve(numBlocks*BLOCK_SIZE_BYTES);
2463
2464                         for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
2465                         {
2466                                 const int                       rNdx    = (blockNdx + numBlocks*0/4) % numBlocks;
2467                                 const int                       gNdx    = (blockNdx + numBlocks*1/4) % numBlocks;
2468                                 const int                       bNdx    = (blockNdx + numBlocks*2/4) % numBlocks;
2469                                 const int                       aNdx    = (blockNdx + numBlocks*3/4) % numBlocks;
2470                                 const deFloat16         r               = deFloat32To16(minValue + (float)rNdx * (maxValue - minValue) / (float)(numBlocks-1));
2471                                 const deFloat16         g               = deFloat32To16(minValue + (float)gNdx * (maxValue - minValue) / (float)(numBlocks-1));
2472                                 const deFloat16         b               = deFloat32To16(minValue + (float)bNdx * (maxValue - minValue) / (float)(numBlocks-1));
2473                                 const deFloat16         a               = deFloat32To16(minValue + (float)aNdx * (maxValue - minValue) / (float)(numBlocks-1));
2474
2475                                 generateVoidExtentBlock(VoidExtentParams(true, r, g, b, a)).pushBytesToVector(dst);
2476                         }
2477
2478                         break;
2479                 }
2480
2481                 case BLOCK_TEST_TYPE_WEIGHT_GRID:
2482                 // Generate different combinations of plane count, weight ISE params, and grid size.
2483                 {
2484                         for (int isDualPlane = 0;               isDualPlane <= 1;                                                                                               isDualPlane++)
2485                         for (int iseParamsNdx = 0;              iseParamsNdx < DE_LENGTH_OF_ARRAY(s_weightISEParamsCandidates); iseParamsNdx++)
2486                         for (int weightGridWidth = 2;   weightGridWidth <= 12;                                                                                  weightGridWidth++)
2487                         for (int weightGridHeight = 2;  weightGridHeight <= 12;                                                                                 weightGridHeight++)
2488                         {
2489                                 NormalBlockParams               blockParams;
2490                                 NormalBlockISEInputs    iseInputs;
2491
2492                                 blockParams.weightGridWidth                     = weightGridWidth;
2493                                 blockParams.weightGridHeight            = weightGridHeight;
2494                                 blockParams.isDualPlane                         = isDualPlane != 0;
2495                                 blockParams.weightISEParams                     = s_weightISEParamsCandidates[iseParamsNdx];
2496                                 blockParams.ccs                                         = 0;
2497                                 blockParams.numPartitions                       = 1;
2498                                 blockParams.colorEndpointModes[0]       = 0;
2499
2500                                 if (isValidBlockParams(blockParams, blockSize.x(), blockSize.y()))
2501                                         generateNormalBlock(blockParams, blockSize.x(), blockSize.y(), generateDefaultISEInputs(blockParams)).pushBytesToVector(dst);
2502                         }
2503
2504                         break;
2505                 }
2506
2507                 case BLOCK_TEST_TYPE_WEIGHT_ISE:
2508                 // For each weight ISE param set, generate blocks that cover:
2509                 // - each single value of the ISE's range, at each position inside an ISE block
2510                 // - for trit and quint ISEs, each single T or Q value of an ISE block
2511                 {
2512                         for (int iseParamsNdx = 0;      iseParamsNdx < DE_LENGTH_OF_ARRAY(s_weightISEParamsCandidates); iseParamsNdx++)
2513                         {
2514                                 const ISEParams&        iseParams = s_weightISEParamsCandidates[iseParamsNdx];
2515                                 NormalBlockParams       blockParams;
2516
2517                                 blockParams.weightGridWidth                     = 4;
2518                                 blockParams.weightGridHeight            = 4;
2519                                 blockParams.weightISEParams                     = iseParams;
2520                                 blockParams.numPartitions                       = 1;
2521                                 blockParams.isDualPlane                         = blockParams.weightGridWidth * blockParams.weightGridHeight < 24 ? true : false;
2522                                 blockParams.ccs                                         = 0;
2523                                 blockParams.colorEndpointModes[0]       = 0;
2524
2525                                 while (!isValidBlockParams(blockParams, blockSize.x(), blockSize.y()))
2526                                 {
2527                                         blockParams.weightGridWidth--;
2528                                         blockParams.weightGridHeight--;
2529                                 }
2530
2531                                 const int numValuesInISEBlock   = iseParams.mode == ISEMODE_TRIT ? 5 : iseParams.mode == ISEMODE_QUINT ? 3 : 1;
2532                                 const int numWeights                    = computeNumWeights(blockParams);
2533
2534                                 {
2535                                         const int                               numWeightValues         = (int)computeISERangeMax(iseParams) + 1;
2536                                         const int                               numBlocks                       = deDivRoundUp32(numWeightValues, numWeights);
2537                                         NormalBlockISEInputs    iseInputs                       = generateDefaultISEInputs(blockParams);
2538                                         iseInputs.weight.isGivenInBlockForm = false;
2539
2540                                         for (int offset = 0;    offset < numValuesInISEBlock;   offset++)
2541                                         for (int blockNdx = 0;  blockNdx < numBlocks;                   blockNdx++)
2542                                         {
2543                                                 for (int weightNdx = 0; weightNdx < numWeights; weightNdx++)
2544                                                         iseInputs.weight.value.plain[weightNdx] = (blockNdx*numWeights + weightNdx + offset) % numWeightValues;
2545
2546                                                 generateNormalBlock(blockParams, blockSize.x(), blockSize.y(), iseInputs).pushBytesToVector(dst);
2547                                         }
2548                                 }
2549
2550                                 if (iseParams.mode == ISEMODE_TRIT || iseParams.mode == ISEMODE_QUINT)
2551                                 {
2552                                         NormalBlockISEInputs iseInputs = generateDefaultISEInputs(blockParams);
2553                                         iseInputs.weight.isGivenInBlockForm = true;
2554
2555                                         const int numTQValues                   = 1 << (iseParams.mode == ISEMODE_TRIT ? 8 : 7);
2556                                         const int numISEBlocksPerBlock  = deDivRoundUp32(numWeights, numValuesInISEBlock);
2557                                         const int numBlocks                             = deDivRoundUp32(numTQValues, numISEBlocksPerBlock);
2558
2559                                         for (int offset = 0;    offset < numValuesInISEBlock;   offset++)
2560                                         for (int blockNdx = 0;  blockNdx < numBlocks;                   blockNdx++)
2561                                         {
2562                                                 for (int iseBlockNdx = 0; iseBlockNdx < numISEBlocksPerBlock; iseBlockNdx++)
2563                                                 {
2564                                                         for (int i = 0; i < numValuesInISEBlock; i++)
2565                                                                 iseInputs.weight.value.block[iseBlockNdx].bitValues[i] = 0;
2566                                                         iseInputs.weight.value.block[iseBlockNdx].tOrQValue = (blockNdx*numISEBlocksPerBlock + iseBlockNdx + offset) % numTQValues;
2567                                                 }
2568
2569                                                 generateNormalBlock(blockParams, blockSize.x(), blockSize.y(), iseInputs).pushBytesToVector(dst);
2570                                         }
2571                                 }
2572                         }
2573
2574                         break;
2575                 }
2576
2577                 case BLOCK_TEST_TYPE_CEMS:
2578                 // For each plane count & partition count combination, generate all color endpoint mode combinations.
2579                 {
2580                         for (int isDualPlane = 0;               isDualPlane <= 1;                                                               isDualPlane++)
2581                         for (int numPartitions = 1;             numPartitions <= (isDualPlane != 0 ? 3 : 4);    numPartitions++)
2582                         {
2583                                 // Multi-partition, single-CEM mode.
2584                                 if (numPartitions > 1)
2585                                 {
2586                                         for (deUint32 singleCem = 0; singleCem < 16; singleCem++)
2587                                         {
2588                                                 NormalBlockParams blockParams;
2589                                                 blockParams.weightGridWidth                             = 4;
2590                                                 blockParams.weightGridHeight                    = 4;
2591                                                 blockParams.isDualPlane                                 = isDualPlane != 0;
2592                                                 blockParams.ccs                                                 = 0;
2593                                                 blockParams.numPartitions                               = numPartitions;
2594                                                 blockParams.isMultiPartSingleCemMode    = true;
2595                                                 blockParams.colorEndpointModes[0]               = singleCem;
2596                                                 blockParams.partitionSeed                               = 634;
2597
2598                                                 for (int iseParamsNdx = 0; iseParamsNdx < DE_LENGTH_OF_ARRAY(s_weightISEParamsCandidates); iseParamsNdx++)
2599                                                 {
2600                                                         blockParams.weightISEParams = s_weightISEParamsCandidates[iseParamsNdx];
2601                                                         if (isValidBlockParams(blockParams, blockSize.x(), blockSize.y()))
2602                                                         {
2603                                                                 generateNormalBlock(blockParams, blockSize.x(), blockSize.y(), generateDefaultISEInputs(blockParams)).pushBytesToVector(dst);
2604                                                                 break;
2605                                                         }
2606                                                 }
2607                                         }
2608                                 }
2609
2610                                 // Separate-CEM mode.
2611                                 for (deUint32 cem0 = 0; cem0 < 16; cem0++)
2612                                 for (deUint32 cem1 = 0; cem1 < (numPartitions >= 2 ? 16u : 1u); cem1++)
2613                                 for (deUint32 cem2 = 0; cem2 < (numPartitions >= 3 ? 16u : 1u); cem2++)
2614                                 for (deUint32 cem3 = 0; cem3 < (numPartitions >= 4 ? 16u : 1u); cem3++)
2615                                 {
2616                                         NormalBlockParams blockParams;
2617                                         blockParams.weightGridWidth                             = 4;
2618                                         blockParams.weightGridHeight                    = 4;
2619                                         blockParams.isDualPlane                                 = isDualPlane != 0;
2620                                         blockParams.ccs                                                 = 0;
2621                                         blockParams.numPartitions                               = numPartitions;
2622                                         blockParams.isMultiPartSingleCemMode    = false;
2623                                         blockParams.colorEndpointModes[0]               = cem0;
2624                                         blockParams.colorEndpointModes[1]               = cem1;
2625                                         blockParams.colorEndpointModes[2]               = cem2;
2626                                         blockParams.colorEndpointModes[3]               = cem3;
2627                                         blockParams.partitionSeed                               = 634;
2628
2629                                         {
2630                                                 const deUint32 minCem           = *std::min_element(&blockParams.colorEndpointModes[0], &blockParams.colorEndpointModes[numPartitions]);
2631                                                 const deUint32 maxCem           = *std::max_element(&blockParams.colorEndpointModes[0], &blockParams.colorEndpointModes[numPartitions]);
2632                                                 const deUint32 minCemClass      = minCem/4;
2633                                                 const deUint32 maxCemClass      = maxCem/4;
2634
2635                                                 if (maxCemClass - minCemClass > 1)
2636                                                         continue;
2637                                         }
2638
2639                                         for (int iseParamsNdx = 0; iseParamsNdx < DE_LENGTH_OF_ARRAY(s_weightISEParamsCandidates); iseParamsNdx++)
2640                                         {
2641                                                 blockParams.weightISEParams = s_weightISEParamsCandidates[iseParamsNdx];
2642                                                 if (isValidBlockParams(blockParams, blockSize.x(), blockSize.y()))
2643                                                 {
2644                                                         generateNormalBlock(blockParams, blockSize.x(), blockSize.y(), generateDefaultISEInputs(blockParams)).pushBytesToVector(dst);
2645                                                         break;
2646                                                 }
2647                                         }
2648                                 }
2649                         }
2650
2651                         break;
2652                 }
2653
2654                 case BLOCK_TEST_TYPE_PARTITION_SEED:
2655                 // Test all partition seeds ("partition pattern indices").
2656                 {
2657                         for (int                numPartitions = 2;      numPartitions <= 4;             numPartitions++)
2658                         for (deUint32   partitionSeed = 0;      partitionSeed < 1<<10;  partitionSeed++)
2659                         {
2660                                 NormalBlockParams blockParams;
2661                                 blockParams.weightGridWidth                             = 4;
2662                                 blockParams.weightGridHeight                    = 4;
2663                                 blockParams.weightISEParams                             = ISEParams(ISEMODE_PLAIN_BIT, 2);
2664                                 blockParams.isDualPlane                                 = false;
2665                                 blockParams.numPartitions                               = numPartitions;
2666                                 blockParams.isMultiPartSingleCemMode    = true;
2667                                 blockParams.colorEndpointModes[0]               = 0;
2668                                 blockParams.partitionSeed                               = partitionSeed;
2669
2670                                 generateNormalBlock(blockParams, blockSize.x(), blockSize.y(), generateDefaultISEInputs(blockParams)).pushBytesToVector(dst);
2671                         }
2672
2673                         break;
2674                 }
2675
2676                 // \note Fall-through.
2677                 case BLOCK_TEST_TYPE_ENDPOINT_VALUE_LDR:
2678                 case BLOCK_TEST_TYPE_ENDPOINT_VALUE_HDR_NO_15:
2679                 case BLOCK_TEST_TYPE_ENDPOINT_VALUE_HDR_15:
2680                 // For each endpoint mode, for each pair of components in the endpoint value, test 10x10 combinations of values for that pair.
2681                 // \note Separate modes for HDR and mode 15 due to different color scales and biases.
2682                 {
2683                         for (deUint32 cem = 0; cem < 16; cem++)
2684                         {
2685                                 const bool isHDRCem = cem == 2          ||
2686                                                                           cem == 3              ||
2687                                                                           cem == 7              ||
2688                                                                           cem == 11             ||
2689                                                                           cem == 14             ||
2690                                                                           cem == 15;
2691
2692                                 if ((testType == BLOCK_TEST_TYPE_ENDPOINT_VALUE_LDR                     && isHDRCem)                                    ||
2693                                         (testType == BLOCK_TEST_TYPE_ENDPOINT_VALUE_HDR_NO_15           && (!isHDRCem || cem == 15))    ||
2694                                         (testType == BLOCK_TEST_TYPE_ENDPOINT_VALUE_HDR_15              && cem != 15))
2695                                         continue;
2696
2697                                 NormalBlockParams blockParams;
2698                                 blockParams.weightGridWidth                     = 3;
2699                                 blockParams.weightGridHeight            = 4;
2700                                 blockParams.weightISEParams                     = ISEParams(ISEMODE_PLAIN_BIT, 2);
2701                                 blockParams.isDualPlane                         = false;
2702                                 blockParams.numPartitions                       = 1;
2703                                 blockParams.colorEndpointModes[0]       = cem;
2704
2705                                 {
2706                                         const int                       numBitsForEndpoints             = computeNumBitsForColorEndpoints(blockParams);
2707                                         const int                       numEndpointParts                = computeNumColorEndpointValues(cem);
2708                                         const ISEParams         endpointISE                             = computeMaximumRangeISEParams(numBitsForEndpoints, numEndpointParts);
2709                                         const int                       endpointISERangeMax             = computeISERangeMax(endpointISE);
2710
2711                                         for (int endpointPartNdx0 = 0;                                          endpointPartNdx0 < numEndpointParts; endpointPartNdx0++)
2712                                         for (int endpointPartNdx1 = endpointPartNdx0+1;         endpointPartNdx1 < numEndpointParts; endpointPartNdx1++)
2713                                         {
2714                                                 NormalBlockISEInputs    iseInputs                       = generateDefaultISEInputs(blockParams);
2715                                                 const int                               numEndpointValues       = de::min(10, endpointISERangeMax+1);
2716
2717                                                 for (int endpointValueNdx0 = 0; endpointValueNdx0 < numEndpointValues; endpointValueNdx0++)
2718                                                 for (int endpointValueNdx1 = 0; endpointValueNdx1 < numEndpointValues; endpointValueNdx1++)
2719                                                 {
2720                                                         const int endpointValue0 = endpointValueNdx0 * endpointISERangeMax / (numEndpointValues-1);
2721                                                         const int endpointValue1 = endpointValueNdx1 * endpointISERangeMax / (numEndpointValues-1);
2722
2723                                                         iseInputs.endpoint.value.plain[endpointPartNdx0] = endpointValue0;
2724                                                         iseInputs.endpoint.value.plain[endpointPartNdx1] = endpointValue1;
2725
2726                                                         generateNormalBlock(blockParams, blockSize.x(), blockSize.y(), iseInputs).pushBytesToVector(dst);
2727                                                 }
2728                                         }
2729                                 }
2730                         }
2731
2732                         break;
2733                 }
2734
2735                 case BLOCK_TEST_TYPE_ENDPOINT_ISE:
2736                 // Similar to BLOCK_TEST_TYPE_WEIGHT_ISE, see above.
2737                 {
2738                         static const deUint32 endpointRangeMaximums[] = { 5, 9, 11, 19, 23, 39, 47, 79, 95, 159, 191 };
2739
2740                         for (int endpointRangeNdx = 0; endpointRangeNdx < DE_LENGTH_OF_ARRAY(endpointRangeMaximums); endpointRangeNdx++)
2741                         {
2742                                 bool validCaseGenerated = false;
2743
2744                                 for (int numPartitions = 1;                     !validCaseGenerated && numPartitions <= 4;                                                                                                              numPartitions++)
2745                                 for (int isDual = 0;                            !validCaseGenerated && isDual <= 1;                                                                                                                             isDual++)
2746                                 for (int weightISEParamsNdx = 0;        !validCaseGenerated && weightISEParamsNdx < DE_LENGTH_OF_ARRAY(s_weightISEParamsCandidates);    weightISEParamsNdx++)
2747                                 for (int weightGridWidth = 2;           !validCaseGenerated && weightGridWidth <= 12;                                                                                                   weightGridWidth++)
2748                                 for (int weightGridHeight = 2;          !validCaseGenerated && weightGridHeight <= 12;                                                                                                  weightGridHeight++)
2749                                 {
2750                                         NormalBlockParams blockParams;
2751                                         blockParams.weightGridWidth                             = weightGridWidth;
2752                                         blockParams.weightGridHeight                    = weightGridHeight;
2753                                         blockParams.weightISEParams                             = s_weightISEParamsCandidates[weightISEParamsNdx];
2754                                         blockParams.isDualPlane                                 = isDual != 0;
2755                                         blockParams.ccs                                                 = 0;
2756                                         blockParams.numPartitions                               = numPartitions;
2757                                         blockParams.isMultiPartSingleCemMode    = true;
2758                                         blockParams.colorEndpointModes[0]               = 12;
2759                                         blockParams.partitionSeed                               = 634;
2760
2761                                         if (isValidBlockParams(blockParams, blockSize.x(), blockSize.y()))
2762                                         {
2763                                                 const ISEParams endpointISEParams = computeMaximumRangeISEParams(computeNumBitsForColorEndpoints(blockParams),
2764                                                                                                                                                                                  computeNumColorEndpointValues(&blockParams.colorEndpointModes[0], numPartitions, true));
2765
2766                                                 if (computeISERangeMax(endpointISEParams) == endpointRangeMaximums[endpointRangeNdx])
2767                                                 {
2768                                                         validCaseGenerated = true;
2769
2770                                                         const int numColorEndpoints             = computeNumColorEndpointValues(&blockParams.colorEndpointModes[0], numPartitions, blockParams.isMultiPartSingleCemMode);
2771                                                         const int numValuesInISEBlock   = endpointISEParams.mode == ISEMODE_TRIT ? 5 : endpointISEParams.mode == ISEMODE_QUINT ? 3 : 1;
2772
2773                                                         {
2774                                                                 const int                               numColorEndpointValues  = (int)computeISERangeMax(endpointISEParams) + 1;
2775                                                                 const int                               numBlocks                               = deDivRoundUp32(numColorEndpointValues, numColorEndpoints);
2776                                                                 NormalBlockISEInputs    iseInputs                               = generateDefaultISEInputs(blockParams);
2777                                                                 iseInputs.endpoint.isGivenInBlockForm = false;
2778
2779                                                                 for (int offset = 0;    offset < numValuesInISEBlock;   offset++)
2780                                                                 for (int blockNdx = 0;  blockNdx < numBlocks;                   blockNdx++)
2781                                                                 {
2782                                                                         for (int endpointNdx = 0; endpointNdx < numColorEndpoints; endpointNdx++)
2783                                                                                 iseInputs.endpoint.value.plain[endpointNdx] = (blockNdx*numColorEndpoints + endpointNdx + offset) % numColorEndpointValues;
2784
2785                                                                         generateNormalBlock(blockParams, blockSize.x(), blockSize.y(), iseInputs).pushBytesToVector(dst);
2786                                                                 }
2787                                                         }
2788
2789                                                         if (endpointISEParams.mode == ISEMODE_TRIT || endpointISEParams.mode == ISEMODE_QUINT)
2790                                                         {
2791                                                                 NormalBlockISEInputs iseInputs = generateDefaultISEInputs(blockParams);
2792                                                                 iseInputs.endpoint.isGivenInBlockForm = true;
2793
2794                                                                 const int numTQValues                   = 1 << (endpointISEParams.mode == ISEMODE_TRIT ? 8 : 7);
2795                                                                 const int numISEBlocksPerBlock  = deDivRoundUp32(numColorEndpoints, numValuesInISEBlock);
2796                                                                 const int numBlocks                             = deDivRoundUp32(numTQValues, numISEBlocksPerBlock);
2797
2798                                                                 for (int offset = 0;    offset < numValuesInISEBlock;   offset++)
2799                                                                 for (int blockNdx = 0;  blockNdx < numBlocks;                   blockNdx++)
2800                                                                 {
2801                                                                         for (int iseBlockNdx = 0; iseBlockNdx < numISEBlocksPerBlock; iseBlockNdx++)
2802                                                                         {
2803                                                                                 for (int i = 0; i < numValuesInISEBlock; i++)
2804                                                                                         iseInputs.endpoint.value.block[iseBlockNdx].bitValues[i] = 0;
2805                                                                                 iseInputs.endpoint.value.block[iseBlockNdx].tOrQValue = (blockNdx*numISEBlocksPerBlock + iseBlockNdx + offset) % numTQValues;
2806                                                                         }
2807
2808                                                                         generateNormalBlock(blockParams, blockSize.x(), blockSize.y(), iseInputs).pushBytesToVector(dst);
2809                                                                 }
2810                                                         }
2811                                                 }
2812                                         }
2813                                 }
2814
2815                                 DE_ASSERT(validCaseGenerated);
2816                         }
2817
2818                         break;
2819                 }
2820
2821                 case BLOCK_TEST_TYPE_CCS:
2822                 // For all partition counts, test all values of the CCS (color component selector).
2823                 {
2824                         for (int                numPartitions = 1;              numPartitions <= 3;             numPartitions++)
2825                         for (deUint32   ccs = 0;                                ccs < 4;                                ccs++)
2826                         {
2827                                 NormalBlockParams blockParams;
2828                                 blockParams.weightGridWidth                             = 3;
2829                                 blockParams.weightGridHeight                    = 3;
2830                                 blockParams.weightISEParams                             = ISEParams(ISEMODE_PLAIN_BIT, 2);
2831                                 blockParams.isDualPlane                                 = true;
2832                                 blockParams.ccs                                                 = ccs;
2833                                 blockParams.numPartitions                               = numPartitions;
2834                                 blockParams.isMultiPartSingleCemMode    = true;
2835                                 blockParams.colorEndpointModes[0]               = 8;
2836                                 blockParams.partitionSeed                               = 634;
2837
2838                                 generateNormalBlock(blockParams, blockSize.x(), blockSize.y(), generateDefaultISEInputs(blockParams)).pushBytesToVector(dst);
2839                         }
2840
2841                         break;
2842                 }
2843
2844                 case BLOCK_TEST_TYPE_RANDOM:
2845                 // Generate a number of random (including invalid) blocks.
2846                 {
2847                         const int               numBlocks       = 16384;
2848                         const deUint32  seed            = 1;
2849
2850                         dst.resize(numBlocks*BLOCK_SIZE_BYTES);
2851
2852                         generateRandomBlocks(&dst[0], numBlocks, format, seed);
2853
2854                         break;
2855                 }
2856
2857                 default:
2858                         DE_ASSERT(false);
2859         }
2860 }
2861
2862 void generateRandomBlocks (deUint8* dst, size_t numBlocks, CompressedTexFormat format, deUint32 seed)
2863 {
2864         const IVec3             blockSize                       = getBlockPixelSize(format);
2865         de::Random              rnd                                     (seed);
2866         size_t                  numBlocksGenerated      = 0;
2867
2868         DE_ASSERT(isAstcFormat(format));
2869         DE_ASSERT(blockSize.z() == 1);
2870
2871         for (numBlocksGenerated = 0; numBlocksGenerated < numBlocks; numBlocksGenerated++)
2872         {
2873                 deUint8* const  curBlockPtr             = dst + numBlocksGenerated*BLOCK_SIZE_BYTES;
2874
2875                 generateRandomBlock(curBlockPtr, blockSize, rnd);
2876         }
2877 }
2878
2879 void generateRandomValidBlocks (deUint8* dst, size_t numBlocks, CompressedTexFormat format, TexDecompressionParams::AstcMode mode, deUint32 seed)
2880 {
2881         const IVec3             blockSize                       = getBlockPixelSize(format);
2882         de::Random              rnd                                     (seed);
2883         size_t                  numBlocksGenerated      = 0;
2884
2885         DE_ASSERT(isAstcFormat(format));
2886         DE_ASSERT(blockSize.z() == 1);
2887
2888         for (numBlocksGenerated = 0; numBlocksGenerated < numBlocks; numBlocksGenerated++)
2889         {
2890                 deUint8* const  curBlockPtr             = dst + numBlocksGenerated*BLOCK_SIZE_BYTES;
2891
2892                 do
2893                 {
2894                         generateRandomBlock(curBlockPtr, blockSize, rnd);
2895                 } while (!isValidBlock(curBlockPtr, format, mode));
2896         }
2897 }
2898
2899 // Generate a number of trivial blocks to fill unneeded space in a texture.
2900 void generateDefaultVoidExtentBlocks (deUint8* dst, size_t numBlocks)
2901 {
2902         AssignBlock128 block = generateVoidExtentBlock(VoidExtentParams(false, 0, 0, 0, 0));
2903         for (size_t ndx = 0; ndx < numBlocks; ndx++)
2904                 block.assignToMemory(&dst[ndx * BLOCK_SIZE_BYTES]);
2905 }
2906
2907 void generateDefaultNormalBlocks (deUint8* dst, size_t numBlocks, int blockWidth, int blockHeight)
2908 {
2909         NormalBlockParams blockParams;
2910
2911         blockParams.weightGridWidth                     = 3;
2912         blockParams.weightGridHeight            = 3;
2913         blockParams.weightISEParams                     = ISEParams(ISEMODE_PLAIN_BIT, 5);
2914         blockParams.isDualPlane                         = false;
2915         blockParams.numPartitions                       = 1;
2916         blockParams.colorEndpointModes[0]       = 8;
2917
2918         NormalBlockISEInputs iseInputs = generateDefaultISEInputs(blockParams);
2919         iseInputs.weight.isGivenInBlockForm = false;
2920
2921         const int numWeights            = computeNumWeights(blockParams);
2922         const int weightRangeMax        = computeISERangeMax(blockParams.weightISEParams);
2923
2924         for (size_t blockNdx = 0; blockNdx < numBlocks; blockNdx++)
2925         {
2926                 for (int weightNdx = 0; weightNdx < numWeights; weightNdx++)
2927                         iseInputs.weight.value.plain[weightNdx] = (deUint32)((blockNdx*numWeights + weightNdx) * weightRangeMax / (numBlocks*numWeights-1));
2928
2929                 generateNormalBlock(blockParams, blockWidth, blockHeight, iseInputs).assignToMemory(dst + blockNdx*BLOCK_SIZE_BYTES);
2930         }
2931 }
2932
2933 bool isValidBlock (const deUint8* data, CompressedTexFormat format, TexDecompressionParams::AstcMode mode)
2934 {
2935         const tcu::IVec3                blockPixelSize  = getBlockPixelSize(format);
2936         const bool                              isSRGB                  = isAstcSRGBFormat(format);
2937         const bool                              isLDR                   = isSRGB || mode == TexDecompressionParams::ASTCMODE_LDR;
2938
2939         // sRGB is not supported in HDR mode
2940         DE_ASSERT(!(mode == TexDecompressionParams::ASTCMODE_HDR && isSRGB));
2941
2942         union
2943         {
2944                 deUint8         sRGB[MAX_BLOCK_WIDTH*MAX_BLOCK_HEIGHT*4];
2945                 float           linear[MAX_BLOCK_WIDTH*MAX_BLOCK_HEIGHT*4];
2946         } tmpBuffer;
2947         const Block128                  blockData               (data);
2948         const DecompressResult  result                  = decompressBlock((isSRGB ? (void*)&tmpBuffer.sRGB[0] : (void*)&tmpBuffer.linear[0]),
2949                                                                                                                           blockData, blockPixelSize.x(), blockPixelSize.y(), isSRGB, isLDR);
2950
2951         return result == DECOMPRESS_RESULT_VALID_BLOCK;
2952 }
2953
2954 void decompress (const PixelBufferAccess& dst, const deUint8* data, CompressedTexFormat format, TexDecompressionParams::AstcMode mode)
2955 {
2956         const bool                      isSRGBFormat    = isAstcSRGBFormat(format);
2957
2958 #if defined(DE_DEBUG)
2959         const tcu::IVec3        blockPixelSize  = getBlockPixelSize(format);
2960
2961         DE_ASSERT(dst.getWidth()        == blockPixelSize.x() &&
2962                           dst.getHeight()       == blockPixelSize.y() &&
2963                           dst.getDepth()        == blockPixelSize.z());
2964         DE_ASSERT(mode == TexDecompressionParams::ASTCMODE_LDR || mode == TexDecompressionParams::ASTCMODE_HDR);
2965 #endif
2966
2967         // sRGB is not supported in HDR mode
2968         DE_ASSERT(!(mode == TexDecompressionParams::ASTCMODE_HDR && isSRGBFormat));
2969
2970         decompress(dst, data, isSRGBFormat, isSRGBFormat || mode == TexDecompressionParams::ASTCMODE_LDR);
2971 }
2972
2973 const char* getBlockTestTypeName (BlockTestType testType)
2974 {
2975         switch (testType)
2976         {
2977                 case BLOCK_TEST_TYPE_VOID_EXTENT_LDR:                   return "void_extent_ldr";
2978                 case BLOCK_TEST_TYPE_VOID_EXTENT_HDR:                   return "void_extent_hdr";
2979                 case BLOCK_TEST_TYPE_WEIGHT_GRID:                               return "weight_grid";
2980                 case BLOCK_TEST_TYPE_WEIGHT_ISE:                                return "weight_ise";
2981                 case BLOCK_TEST_TYPE_CEMS:                                              return "color_endpoint_modes";
2982                 case BLOCK_TEST_TYPE_PARTITION_SEED:                    return "partition_pattern_index";
2983                 case BLOCK_TEST_TYPE_ENDPOINT_VALUE_LDR:                return "endpoint_value_ldr";
2984                 case BLOCK_TEST_TYPE_ENDPOINT_VALUE_HDR_NO_15:  return "endpoint_value_hdr_cem_not_15";
2985                 case BLOCK_TEST_TYPE_ENDPOINT_VALUE_HDR_15:             return "endpoint_value_hdr_cem_15";
2986                 case BLOCK_TEST_TYPE_ENDPOINT_ISE:                              return "endpoint_ise";
2987                 case BLOCK_TEST_TYPE_CCS:                                               return "color_component_selector";
2988                 case BLOCK_TEST_TYPE_RANDOM:                                    return "random";
2989                 default:
2990                         DE_ASSERT(false);
2991                         return DE_NULL;
2992         }
2993 }
2994
2995 const char* getBlockTestTypeDescription (BlockTestType testType)
2996 {
2997         switch (testType)
2998         {
2999                 case BLOCK_TEST_TYPE_VOID_EXTENT_LDR:                   return "Test void extent block, LDR mode";
3000                 case BLOCK_TEST_TYPE_VOID_EXTENT_HDR:                   return "Test void extent block, HDR mode";
3001                 case BLOCK_TEST_TYPE_WEIGHT_GRID:                               return "Test combinations of plane count, weight integer sequence encoding parameters, and weight grid size";
3002                 case BLOCK_TEST_TYPE_WEIGHT_ISE:                                return "Test different integer sequence encoding block values for weight grid";
3003                 case BLOCK_TEST_TYPE_CEMS:                                              return "Test different color endpoint mode combinations, combined with different plane and partition counts";
3004                 case BLOCK_TEST_TYPE_PARTITION_SEED:                    return "Test different partition pattern indices";
3005                 case BLOCK_TEST_TYPE_ENDPOINT_VALUE_LDR:                return "Test various combinations of each pair of color endpoint values, for each LDR color endpoint mode";
3006                 case BLOCK_TEST_TYPE_ENDPOINT_VALUE_HDR_NO_15:  return "Test various combinations of each pair of color endpoint values, for each HDR color endpoint mode other than mode 15";
3007                 case BLOCK_TEST_TYPE_ENDPOINT_VALUE_HDR_15:             return "Test various combinations of each pair of color endpoint values, HDR color endpoint mode 15";
3008                 case BLOCK_TEST_TYPE_ENDPOINT_ISE:                              return "Test different integer sequence encoding block values for color endpoints";
3009                 case BLOCK_TEST_TYPE_CCS:                                               return "Test color component selector, for different partition counts";
3010                 case BLOCK_TEST_TYPE_RANDOM:                                    return "Random block test";
3011                 default:
3012                         DE_ASSERT(false);
3013                         return DE_NULL;
3014         }
3015 }
3016
3017 bool isBlockTestTypeHDROnly (BlockTestType testType)
3018 {
3019         return testType == BLOCK_TEST_TYPE_VOID_EXTENT_HDR                      ||
3020                    testType == BLOCK_TEST_TYPE_ENDPOINT_VALUE_HDR_NO_15 ||
3021                    testType == BLOCK_TEST_TYPE_ENDPOINT_VALUE_HDR_15;
3022 }
3023
3024 Vec4 getBlockTestTypeColorScale (BlockTestType testType)
3025 {
3026         switch (testType)
3027         {
3028                 case tcu::astc::BLOCK_TEST_TYPE_VOID_EXTENT_HDR:                        return Vec4(0.5f/65504.0f);
3029                 case tcu::astc::BLOCK_TEST_TYPE_ENDPOINT_VALUE_HDR_NO_15:       return Vec4(1.0f/65504.0f, 1.0f/65504.0f, 1.0f/65504.0f, 1.0f);
3030                 case tcu::astc::BLOCK_TEST_TYPE_ENDPOINT_VALUE_HDR_15:          return Vec4(1.0f/65504.0f);
3031                 default:                                                                                                        return Vec4(1.0f);
3032         }
3033 }
3034
3035 Vec4 getBlockTestTypeColorBias (BlockTestType testType)
3036 {
3037         switch (testType)
3038         {
3039                 case tcu::astc::BLOCK_TEST_TYPE_VOID_EXTENT_HDR:        return Vec4(0.5f);
3040                 default:                                                                                        return Vec4(0.0f);
3041         }
3042 }
3043
3044 } // astc
3045 } // tcu