Fix couple of trivial warnings reported by MSVC
[platform/upstream/VK-GL-CTS.git] / framework / common / tcuCompressedTexture.cpp
1 /*-------------------------------------------------------------------------
2  * drawElements Quality Program Tester Core
3  * ----------------------------------------
4  *
5  * Copyright 2014 The Android Open Source Project
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Compressed Texture Utilities.
22  *//*--------------------------------------------------------------------*/
23
24 #include "tcuCompressedTexture.hpp"
25 #include "tcuTextureUtil.hpp"
26
27 #include "deStringUtil.hpp"
28 #include "deFloat16.h"
29
30 #include <algorithm>
31
32 namespace tcu
33 {
34
35 namespace
36 {
37
38 enum { ASTC_BLOCK_SIZE_BYTES = 128/8 };
39
40 template <typename T, typename Y>
41 struct isSameType                       { enum { V = 0 }; };
42 template <typename T>
43 struct isSameType<T, T>         { enum { V = 1 }; };
44
45 } // anonymous
46
47 int getBlockSize (CompressedTexFormat format)
48 {
49         if (isAstcFormat(format))
50         {
51                 return ASTC_BLOCK_SIZE_BYTES;
52         }
53         else if (isEtcFormat(format))
54         {
55                 switch (format)
56                 {
57                         case COMPRESSEDTEXFORMAT_ETC1_RGB8:                                                     return 8;
58                         case COMPRESSEDTEXFORMAT_EAC_R11:                                                       return 8;
59                         case COMPRESSEDTEXFORMAT_EAC_SIGNED_R11:                                        return 8;
60                         case COMPRESSEDTEXFORMAT_EAC_RG11:                                                      return 16;
61                         case COMPRESSEDTEXFORMAT_EAC_SIGNED_RG11:                                       return 16;
62                         case COMPRESSEDTEXFORMAT_ETC2_RGB8:                                                     return 8;
63                         case COMPRESSEDTEXFORMAT_ETC2_SRGB8:                                            return 8;
64                         case COMPRESSEDTEXFORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1:         return 8;
65                         case COMPRESSEDTEXFORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1:        return 8;
66                         case COMPRESSEDTEXFORMAT_ETC2_EAC_RGBA8:                                        return 16;
67                         case COMPRESSEDTEXFORMAT_ETC2_EAC_SRGB8_ALPHA8:                         return 16;
68
69                         default:
70                                 DE_ASSERT(false);
71                                 return -1;
72                 }
73         }
74         else
75         {
76                 DE_ASSERT(false);
77                 return -1;
78         }
79 }
80
81 IVec3 getBlockPixelSize (CompressedTexFormat format)
82 {
83         if (isEtcFormat(format))
84         {
85                 return IVec3(4, 4, 1);
86         }
87         else if (isAstcFormat(format))
88         {
89                 switch (format)
90                 {
91                         case COMPRESSEDTEXFORMAT_ASTC_4x4_RGBA:                         return IVec3(4,  4,  1);
92                         case COMPRESSEDTEXFORMAT_ASTC_5x4_RGBA:                         return IVec3(5,  4,  1);
93                         case COMPRESSEDTEXFORMAT_ASTC_5x5_RGBA:                         return IVec3(5,  5,  1);
94                         case COMPRESSEDTEXFORMAT_ASTC_6x5_RGBA:                         return IVec3(6,  5,  1);
95                         case COMPRESSEDTEXFORMAT_ASTC_6x6_RGBA:                         return IVec3(6,  6,  1);
96                         case COMPRESSEDTEXFORMAT_ASTC_8x5_RGBA:                         return IVec3(8,  5,  1);
97                         case COMPRESSEDTEXFORMAT_ASTC_8x6_RGBA:                         return IVec3(8,  6,  1);
98                         case COMPRESSEDTEXFORMAT_ASTC_8x8_RGBA:                         return IVec3(8,  8,  1);
99                         case COMPRESSEDTEXFORMAT_ASTC_10x5_RGBA:                        return IVec3(10, 5,  1);
100                         case COMPRESSEDTEXFORMAT_ASTC_10x6_RGBA:                        return IVec3(10, 6,  1);
101                         case COMPRESSEDTEXFORMAT_ASTC_10x8_RGBA:                        return IVec3(10, 8,  1);
102                         case COMPRESSEDTEXFORMAT_ASTC_10x10_RGBA:                       return IVec3(10, 10, 1);
103                         case COMPRESSEDTEXFORMAT_ASTC_12x10_RGBA:                       return IVec3(12, 10, 1);
104                         case COMPRESSEDTEXFORMAT_ASTC_12x12_RGBA:                       return IVec3(12, 12, 1);
105                         case COMPRESSEDTEXFORMAT_ASTC_4x4_SRGB8_ALPHA8:         return IVec3(4,  4,  1);
106                         case COMPRESSEDTEXFORMAT_ASTC_5x4_SRGB8_ALPHA8:         return IVec3(5,  4,  1);
107                         case COMPRESSEDTEXFORMAT_ASTC_5x5_SRGB8_ALPHA8:         return IVec3(5,  5,  1);
108                         case COMPRESSEDTEXFORMAT_ASTC_6x5_SRGB8_ALPHA8:         return IVec3(6,  5,  1);
109                         case COMPRESSEDTEXFORMAT_ASTC_6x6_SRGB8_ALPHA8:         return IVec3(6,  6,  1);
110                         case COMPRESSEDTEXFORMAT_ASTC_8x5_SRGB8_ALPHA8:         return IVec3(8,  5,  1);
111                         case COMPRESSEDTEXFORMAT_ASTC_8x6_SRGB8_ALPHA8:         return IVec3(8,  6,  1);
112                         case COMPRESSEDTEXFORMAT_ASTC_8x8_SRGB8_ALPHA8:         return IVec3(8,  8,  1);
113                         case COMPRESSEDTEXFORMAT_ASTC_10x5_SRGB8_ALPHA8:        return IVec3(10, 5,  1);
114                         case COMPRESSEDTEXFORMAT_ASTC_10x6_SRGB8_ALPHA8:        return IVec3(10, 6,  1);
115                         case COMPRESSEDTEXFORMAT_ASTC_10x8_SRGB8_ALPHA8:        return IVec3(10, 8,  1);
116                         case COMPRESSEDTEXFORMAT_ASTC_10x10_SRGB8_ALPHA8:       return IVec3(10, 10, 1);
117                         case COMPRESSEDTEXFORMAT_ASTC_12x10_SRGB8_ALPHA8:       return IVec3(12, 10, 1);
118                         case COMPRESSEDTEXFORMAT_ASTC_12x12_SRGB8_ALPHA8:       return IVec3(12, 12, 1);
119
120                         default:
121                                 DE_ASSERT(false);
122                                 return IVec3();
123                 }
124         }
125         else
126         {
127                 DE_ASSERT(false);
128                 return IVec3(-1);
129         }
130 }
131
132 bool isEtcFormat (CompressedTexFormat format)
133 {
134         switch (format)
135         {
136                 case COMPRESSEDTEXFORMAT_ETC1_RGB8:
137                 case COMPRESSEDTEXFORMAT_EAC_R11:
138                 case COMPRESSEDTEXFORMAT_EAC_SIGNED_R11:
139                 case COMPRESSEDTEXFORMAT_EAC_RG11:
140                 case COMPRESSEDTEXFORMAT_EAC_SIGNED_RG11:
141                 case COMPRESSEDTEXFORMAT_ETC2_RGB8:
142                 case COMPRESSEDTEXFORMAT_ETC2_SRGB8:
143                 case COMPRESSEDTEXFORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1:
144                 case COMPRESSEDTEXFORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1:
145                 case COMPRESSEDTEXFORMAT_ETC2_EAC_RGBA8:
146                 case COMPRESSEDTEXFORMAT_ETC2_EAC_SRGB8_ALPHA8:
147                         return true;
148
149                 default:
150                         return false;
151         }
152 }
153
154 bool isAstcFormat (CompressedTexFormat format)
155 {
156         switch (format)
157         {
158                 case COMPRESSEDTEXFORMAT_ASTC_4x4_RGBA:
159                 case COMPRESSEDTEXFORMAT_ASTC_5x4_RGBA:
160                 case COMPRESSEDTEXFORMAT_ASTC_5x5_RGBA:
161                 case COMPRESSEDTEXFORMAT_ASTC_6x5_RGBA:
162                 case COMPRESSEDTEXFORMAT_ASTC_6x6_RGBA:
163                 case COMPRESSEDTEXFORMAT_ASTC_8x5_RGBA:
164                 case COMPRESSEDTEXFORMAT_ASTC_8x6_RGBA:
165                 case COMPRESSEDTEXFORMAT_ASTC_8x8_RGBA:
166                 case COMPRESSEDTEXFORMAT_ASTC_10x5_RGBA:
167                 case COMPRESSEDTEXFORMAT_ASTC_10x6_RGBA:
168                 case COMPRESSEDTEXFORMAT_ASTC_10x8_RGBA:
169                 case COMPRESSEDTEXFORMAT_ASTC_10x10_RGBA:
170                 case COMPRESSEDTEXFORMAT_ASTC_12x10_RGBA:
171                 case COMPRESSEDTEXFORMAT_ASTC_12x12_RGBA:
172                 case COMPRESSEDTEXFORMAT_ASTC_4x4_SRGB8_ALPHA8:
173                 case COMPRESSEDTEXFORMAT_ASTC_5x4_SRGB8_ALPHA8:
174                 case COMPRESSEDTEXFORMAT_ASTC_5x5_SRGB8_ALPHA8:
175                 case COMPRESSEDTEXFORMAT_ASTC_6x5_SRGB8_ALPHA8:
176                 case COMPRESSEDTEXFORMAT_ASTC_6x6_SRGB8_ALPHA8:
177                 case COMPRESSEDTEXFORMAT_ASTC_8x5_SRGB8_ALPHA8:
178                 case COMPRESSEDTEXFORMAT_ASTC_8x6_SRGB8_ALPHA8:
179                 case COMPRESSEDTEXFORMAT_ASTC_8x8_SRGB8_ALPHA8:
180                 case COMPRESSEDTEXFORMAT_ASTC_10x5_SRGB8_ALPHA8:
181                 case COMPRESSEDTEXFORMAT_ASTC_10x6_SRGB8_ALPHA8:
182                 case COMPRESSEDTEXFORMAT_ASTC_10x8_SRGB8_ALPHA8:
183                 case COMPRESSEDTEXFORMAT_ASTC_10x10_SRGB8_ALPHA8:
184                 case COMPRESSEDTEXFORMAT_ASTC_12x10_SRGB8_ALPHA8:
185                 case COMPRESSEDTEXFORMAT_ASTC_12x12_SRGB8_ALPHA8:
186                         return true;
187
188                 default:
189                         return false;
190         }
191 }
192
193 bool isAstcSRGBFormat (CompressedTexFormat format)
194 {
195         switch (format)
196         {
197                 case COMPRESSEDTEXFORMAT_ASTC_4x4_SRGB8_ALPHA8:
198                 case COMPRESSEDTEXFORMAT_ASTC_5x4_SRGB8_ALPHA8:
199                 case COMPRESSEDTEXFORMAT_ASTC_5x5_SRGB8_ALPHA8:
200                 case COMPRESSEDTEXFORMAT_ASTC_6x5_SRGB8_ALPHA8:
201                 case COMPRESSEDTEXFORMAT_ASTC_6x6_SRGB8_ALPHA8:
202                 case COMPRESSEDTEXFORMAT_ASTC_8x5_SRGB8_ALPHA8:
203                 case COMPRESSEDTEXFORMAT_ASTC_8x6_SRGB8_ALPHA8:
204                 case COMPRESSEDTEXFORMAT_ASTC_8x8_SRGB8_ALPHA8:
205                 case COMPRESSEDTEXFORMAT_ASTC_10x5_SRGB8_ALPHA8:
206                 case COMPRESSEDTEXFORMAT_ASTC_10x6_SRGB8_ALPHA8:
207                 case COMPRESSEDTEXFORMAT_ASTC_10x8_SRGB8_ALPHA8:
208                 case COMPRESSEDTEXFORMAT_ASTC_10x10_SRGB8_ALPHA8:
209                 case COMPRESSEDTEXFORMAT_ASTC_12x10_SRGB8_ALPHA8:
210                 case COMPRESSEDTEXFORMAT_ASTC_12x12_SRGB8_ALPHA8:
211                         return true;
212
213                 default:
214                         return false;
215         }
216 }
217
218 TextureFormat getUncompressedFormat (CompressedTexFormat format)
219 {
220         if (isEtcFormat(format))
221         {
222                 switch (format)
223                 {
224                         case COMPRESSEDTEXFORMAT_ETC1_RGB8:                                                     return TextureFormat(TextureFormat::RGB,        TextureFormat::UNORM_INT8);
225                         case COMPRESSEDTEXFORMAT_EAC_R11:                                                       return TextureFormat(TextureFormat::R,          TextureFormat::UNORM_INT16);
226                         case COMPRESSEDTEXFORMAT_EAC_SIGNED_R11:                                        return TextureFormat(TextureFormat::R,          TextureFormat::SNORM_INT16);
227                         case COMPRESSEDTEXFORMAT_EAC_RG11:                                                      return TextureFormat(TextureFormat::RG,         TextureFormat::UNORM_INT16);
228                         case COMPRESSEDTEXFORMAT_EAC_SIGNED_RG11:                                       return TextureFormat(TextureFormat::RG,         TextureFormat::SNORM_INT16);
229                         case COMPRESSEDTEXFORMAT_ETC2_RGB8:                                                     return TextureFormat(TextureFormat::RGB,        TextureFormat::UNORM_INT8);
230                         case COMPRESSEDTEXFORMAT_ETC2_SRGB8:                                            return TextureFormat(TextureFormat::sRGB,       TextureFormat::UNORM_INT8);
231                         case COMPRESSEDTEXFORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1:         return TextureFormat(TextureFormat::RGBA,       TextureFormat::UNORM_INT8);
232                         case COMPRESSEDTEXFORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1:        return TextureFormat(TextureFormat::sRGBA,      TextureFormat::UNORM_INT8);
233                         case COMPRESSEDTEXFORMAT_ETC2_EAC_RGBA8:                                        return TextureFormat(TextureFormat::RGBA,       TextureFormat::UNORM_INT8);
234                         case COMPRESSEDTEXFORMAT_ETC2_EAC_SRGB8_ALPHA8:                         return TextureFormat(TextureFormat::sRGBA,      TextureFormat::UNORM_INT8);
235
236                         default:
237                                 DE_ASSERT(false);
238                                 return TextureFormat();
239                 }
240         }
241         else if (isAstcFormat(format))
242         {
243                 if (isAstcSRGBFormat(format))
244                         return TextureFormat(TextureFormat::sRGBA, TextureFormat::UNORM_INT8);
245                 else
246                         return TextureFormat(TextureFormat::RGBA, TextureFormat::HALF_FLOAT);
247         }
248         else
249         {
250                 DE_ASSERT(false);
251                 return TextureFormat();
252         }
253 }
254
255 CompressedTexFormat getAstcFormatByBlockSize (const IVec3& size, bool isSRGB)
256 {
257         if (size.z() > 1)
258                 throw InternalError("3D ASTC textures not currently supported");
259
260         for (int fmtI = 0; fmtI < COMPRESSEDTEXFORMAT_LAST; fmtI++)
261         {
262                 const CompressedTexFormat fmt = (CompressedTexFormat)fmtI;
263
264                 if (isAstcFormat(fmt) && getBlockPixelSize(fmt) == size && isAstcSRGBFormat(fmt) == isSRGB)
265                         return fmt;
266         }
267
268         throw InternalError("Invalid ASTC block size " + de::toString(size.x()) + "x" + de::toString(size.y()) + "x" + de::toString(size.z()));
269 }
270
271 namespace
272 {
273
274 inline int divRoundUp (int a, int b)
275 {
276         return a/b + ((a%b) ? 1 : 0);
277 }
278
279 // \todo [2013-08-06 nuutti] ETC and ASTC decompression codes are rather unrelated, and are already in their own "private" namespaces - should this be split to multiple files?
280
281 namespace EtcDecompressInternal
282 {
283
284 enum
285 {
286         ETC2_BLOCK_WIDTH                                        = 4,
287         ETC2_BLOCK_HEIGHT                                       = 4,
288         ETC2_UNCOMPRESSED_PIXEL_SIZE_A8         = 1,
289         ETC2_UNCOMPRESSED_PIXEL_SIZE_R11        = 2,
290         ETC2_UNCOMPRESSED_PIXEL_SIZE_RG11       = 4,
291         ETC2_UNCOMPRESSED_PIXEL_SIZE_RGB8       = 3,
292         ETC2_UNCOMPRESSED_PIXEL_SIZE_RGBA8      = 4,
293         ETC2_UNCOMPRESSED_BLOCK_SIZE_A8         = ETC2_BLOCK_WIDTH*ETC2_BLOCK_HEIGHT*ETC2_UNCOMPRESSED_PIXEL_SIZE_A8,
294         ETC2_UNCOMPRESSED_BLOCK_SIZE_R11        = ETC2_BLOCK_WIDTH*ETC2_BLOCK_HEIGHT*ETC2_UNCOMPRESSED_PIXEL_SIZE_R11,
295         ETC2_UNCOMPRESSED_BLOCK_SIZE_RG11       = ETC2_BLOCK_WIDTH*ETC2_BLOCK_HEIGHT*ETC2_UNCOMPRESSED_PIXEL_SIZE_RG11,
296         ETC2_UNCOMPRESSED_BLOCK_SIZE_RGB8       = ETC2_BLOCK_WIDTH*ETC2_BLOCK_HEIGHT*ETC2_UNCOMPRESSED_PIXEL_SIZE_RGB8,
297         ETC2_UNCOMPRESSED_BLOCK_SIZE_RGBA8      = ETC2_BLOCK_WIDTH*ETC2_BLOCK_HEIGHT*ETC2_UNCOMPRESSED_PIXEL_SIZE_RGBA8
298 };
299
300 inline deUint64 get64BitBlock (const deUint8* src, int blockNdx)
301 {
302         // Stored in big-endian form.
303         deUint64 block = 0;
304
305         for (int i = 0; i < 8; i++)
306                 block = (block << 8ull) | (deUint64)(src[blockNdx*8+i]);
307
308         return block;
309 }
310
311 // Return the first 64 bits of a 128 bit block.
312 inline deUint64 get128BitBlockStart (const deUint8* src, int blockNdx)
313 {
314         return get64BitBlock(src, 2*blockNdx);
315 }
316
317 // Return the last 64 bits of a 128 bit block.
318 inline deUint64 get128BitBlockEnd (const deUint8* src, int blockNdx)
319 {
320         return get64BitBlock(src, 2*blockNdx + 1);
321 }
322
323 inline deUint32 getBit (deUint64 src, int bit)
324 {
325         return (src >> bit) & 1;
326 }
327
328 inline deUint32 getBits (deUint64 src, int low, int high)
329 {
330         const int numBits = (high-low) + 1;
331         DE_ASSERT(de::inRange(numBits, 1, 32));
332         return (src >> low) & ((1<<numBits)-1);
333 }
334
335 inline deUint8 extend4To8 (deUint8 src)
336 {
337         DE_ASSERT((src & ~((1<<4)-1)) == 0);
338         return (src << 4) | src;
339 }
340
341 inline deUint8 extend5To8 (deUint8 src)
342 {
343         DE_ASSERT((src & ~((1<<5)-1)) == 0);
344         return (src << 3) | (src >> 2);
345 }
346
347 inline deUint8 extend6To8 (deUint8 src)
348 {
349         DE_ASSERT((src & ~((1<<6)-1)) == 0);
350         return (src << 2) | (src >> 4);
351 }
352
353 inline deUint8 extend7To8 (deUint8 src)
354 {
355         DE_ASSERT((src & ~((1<<7)-1)) == 0);
356         return (src << 1) | (src >> 6);
357 }
358
359 inline deInt8 extendSigned3To8 (deUint8 src)
360 {
361         const bool isNeg = (src & (1<<2)) != 0;
362         return (deInt8)((isNeg ? ~((1<<3)-1) : 0) | src);
363 }
364
365 inline deUint8 extend5Delta3To8 (deUint8 base5, deUint8 delta3)
366 {
367         const deUint8 t = (deUint8)((deInt8)base5 + extendSigned3To8(delta3));
368         return extend5To8(t);
369 }
370
371 inline deUint16 extend11To16 (deUint16 src)
372 {
373         DE_ASSERT((src & ~((1<<11)-1)) == 0);
374         return (src << 5) | (src >> 6);
375 }
376
377 inline deInt16 extend11To16WithSign (deInt16 src)
378 {
379         if (src < 0)
380                 return -(deInt16)extend11To16(-src);
381         else
382                 return (deInt16)extend11To16(src);
383 }
384
385 void decompressETC1Block (deUint8 dst[ETC2_UNCOMPRESSED_BLOCK_SIZE_RGB8], deUint64 src)
386 {
387         const int               diffBit         = (int)getBit(src, 33);
388         const int               flipBit         = (int)getBit(src, 32);
389         const deUint32  table[2]        = { getBits(src, 37, 39), getBits(src, 34, 36) };
390         deUint8                 baseR[2];
391         deUint8                 baseG[2];
392         deUint8                 baseB[2];
393
394         if (diffBit == 0)
395         {
396                 // Individual mode.
397                 baseR[0] = extend4To8((deUint8)getBits(src, 60, 63));
398                 baseR[1] = extend4To8((deUint8)getBits(src, 56, 59));
399                 baseG[0] = extend4To8((deUint8)getBits(src, 52, 55));
400                 baseG[1] = extend4To8((deUint8)getBits(src, 48, 51));
401                 baseB[0] = extend4To8((deUint8)getBits(src, 44, 47));
402                 baseB[1] = extend4To8((deUint8)getBits(src, 40, 43));
403         }
404         else
405         {
406                 // Differential mode (diffBit == 1).
407                 deUint8 bR = (deUint8)getBits(src, 59, 63); // 5b
408                 deUint8 dR = (deUint8)getBits(src, 56, 58); // 3b
409                 deUint8 bG = (deUint8)getBits(src, 51, 55);
410                 deUint8 dG = (deUint8)getBits(src, 48, 50);
411                 deUint8 bB = (deUint8)getBits(src, 43, 47);
412                 deUint8 dB = (deUint8)getBits(src, 40, 42);
413
414                 baseR[0] = extend5To8(bR);
415                 baseG[0] = extend5To8(bG);
416                 baseB[0] = extend5To8(bB);
417
418                 baseR[1] = extend5Delta3To8(bR, dR);
419                 baseG[1] = extend5Delta3To8(bG, dG);
420                 baseB[1] = extend5Delta3To8(bB, dB);
421         }
422
423         static const int modifierTable[8][4] =
424         {
425         //        00   01   10    11
426                 {  2,   8,  -2,   -8 },
427                 {  5,  17,  -5,  -17 },
428                 {  9,  29,  -9,  -29 },
429                 { 13,  42, -13,  -42 },
430                 { 18,  60, -18,  -60 },
431                 { 24,  80, -24,  -80 },
432                 { 33, 106, -33, -106 },
433                 { 47, 183, -47, -183 }
434         };
435
436         // Write final pixels.
437         for (int pixelNdx = 0; pixelNdx < ETC2_BLOCK_HEIGHT*ETC2_BLOCK_WIDTH; pixelNdx++)
438         {
439                 const int               x                               = pixelNdx / ETC2_BLOCK_HEIGHT;
440                 const int               y                               = pixelNdx % ETC2_BLOCK_HEIGHT;
441                 const int               dstOffset               = (y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_RGB8;
442                 const int               subBlock                = ((flipBit ? y : x) >= 2) ? 1 : 0;
443                 const deUint32  tableNdx                = table[subBlock];
444                 const deUint32  modifierNdx             = (getBit(src, 16+pixelNdx) << 1) | getBit(src, pixelNdx);
445                 const int               modifier                = modifierTable[tableNdx][modifierNdx];
446
447                 dst[dstOffset+0] = (deUint8)deClamp32((int)baseR[subBlock] + modifier, 0, 255);
448                 dst[dstOffset+1] = (deUint8)deClamp32((int)baseG[subBlock] + modifier, 0, 255);
449                 dst[dstOffset+2] = (deUint8)deClamp32((int)baseB[subBlock] + modifier, 0, 255);
450         }
451 }
452
453 // if alphaMode is true, do PUNCHTHROUGH and store alpha to alphaDst; otherwise do ordinary ETC2 RGB8.
454 void decompressETC2Block (deUint8 dst[ETC2_UNCOMPRESSED_BLOCK_SIZE_RGB8], deUint64 src, deUint8 alphaDst[ETC2_UNCOMPRESSED_BLOCK_SIZE_A8], bool alphaMode)
455 {
456         enum Etc2Mode
457         {
458                 MODE_INDIVIDUAL = 0,
459                 MODE_DIFFERENTIAL,
460                 MODE_T,
461                 MODE_H,
462                 MODE_PLANAR,
463
464                 MODE_LAST
465         };
466
467         const int               diffOpaqueBit   = (int)getBit(src, 33);
468         const deInt8    selBR                   = (deInt8)getBits(src, 59, 63); // 5 bits.
469         const deInt8    selBG                   = (deInt8)getBits(src, 51, 55);
470         const deInt8    selBB                   = (deInt8)getBits(src, 43, 47);
471         const deInt8    selDR                   = extendSigned3To8((deUint8)getBits(src, 56, 58)); // 3 bits.
472         const deInt8    selDG                   = extendSigned3To8((deUint8)getBits(src, 48, 50));
473         const deInt8    selDB                   = extendSigned3To8((deUint8)getBits(src, 40, 42));
474         Etc2Mode                mode;
475
476         if (!alphaMode && diffOpaqueBit == 0)
477                 mode = MODE_INDIVIDUAL;
478         else if (!de::inRange(selBR + selDR, 0, 31))
479                 mode = MODE_T;
480         else if (!de::inRange(selBG + selDG, 0, 31))
481                 mode = MODE_H;
482         else if (!de::inRange(selBB + selDB, 0, 31))
483                 mode = MODE_PLANAR;
484         else
485                 mode = MODE_DIFFERENTIAL;
486
487         if (mode == MODE_INDIVIDUAL || mode == MODE_DIFFERENTIAL)
488         {
489                 // Individual and differential modes have some steps in common, handle them here.
490                 static const int modifierTable[8][4] =
491                 {
492                 //        00   01   10    11
493                         {  2,   8,  -2,   -8 },
494                         {  5,  17,  -5,  -17 },
495                         {  9,  29,  -9,  -29 },
496                         { 13,  42, -13,  -42 },
497                         { 18,  60, -18,  -60 },
498                         { 24,  80, -24,  -80 },
499                         { 33, 106, -33, -106 },
500                         { 47, 183, -47, -183 }
501                 };
502
503                 const int               flipBit         = (int)getBit(src, 32);
504                 const deUint32  table[2]        = { getBits(src, 37, 39), getBits(src, 34, 36) };
505                 deUint8                 baseR[2];
506                 deUint8                 baseG[2];
507                 deUint8                 baseB[2];
508
509                 if (mode == MODE_INDIVIDUAL)
510                 {
511                         // Individual mode, initial values.
512                         baseR[0] = extend4To8((deUint8)getBits(src, 60, 63));
513                         baseR[1] = extend4To8((deUint8)getBits(src, 56, 59));
514                         baseG[0] = extend4To8((deUint8)getBits(src, 52, 55));
515                         baseG[1] = extend4To8((deUint8)getBits(src, 48, 51));
516                         baseB[0] = extend4To8((deUint8)getBits(src, 44, 47));
517                         baseB[1] = extend4To8((deUint8)getBits(src, 40, 43));
518                 }
519                 else
520                 {
521                         // Differential mode, initial values.
522                         baseR[0] = extend5To8(selBR);
523                         baseG[0] = extend5To8(selBG);
524                         baseB[0] = extend5To8(selBB);
525
526                         baseR[1] = extend5To8((deUint8)(selBR + selDR));
527                         baseG[1] = extend5To8((deUint8)(selBG + selDG));
528                         baseB[1] = extend5To8((deUint8)(selBB + selDB));
529                 }
530
531                 // Write final pixels for individual or differential mode.
532                 for (int pixelNdx = 0; pixelNdx < ETC2_BLOCK_HEIGHT*ETC2_BLOCK_WIDTH; pixelNdx++)
533                 {
534                         const int               x                               = pixelNdx / ETC2_BLOCK_HEIGHT;
535                         const int               y                               = pixelNdx % ETC2_BLOCK_HEIGHT;
536                         const int               dstOffset               = (y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_RGB8;
537                         const int               subBlock                = ((flipBit ? y : x) >= 2) ? 1 : 0;
538                         const deUint32  tableNdx                = table[subBlock];
539                         const deUint32  modifierNdx             = (getBit(src, 16+pixelNdx) << 1) | getBit(src, pixelNdx);
540                         const int               alphaDstOffset  = (y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_A8; // Only needed for PUNCHTHROUGH version.
541
542                         // If doing PUNCHTHROUGH version (alphaMode), opaque bit may affect colors.
543                         if (alphaMode && diffOpaqueBit == 0 && modifierNdx == 2)
544                         {
545                                 dst[dstOffset+0]                        = 0;
546                                 dst[dstOffset+1]                        = 0;
547                                 dst[dstOffset+2]                        = 0;
548                                 alphaDst[alphaDstOffset]        = 0;
549                         }
550                         else
551                         {
552                                 int modifier;
553
554                                 // PUNCHTHROUGH version and opaque bit may also affect modifiers.
555                                 if (alphaMode && diffOpaqueBit == 0 && (modifierNdx == 0 || modifierNdx == 2))
556                                         modifier = 0;
557                                 else
558                                         modifier = modifierTable[tableNdx][modifierNdx];
559
560                                 dst[dstOffset+0] = (deUint8)deClamp32((int)baseR[subBlock] + modifier, 0, 255);
561                                 dst[dstOffset+1] = (deUint8)deClamp32((int)baseG[subBlock] + modifier, 0, 255);
562                                 dst[dstOffset+2] = (deUint8)deClamp32((int)baseB[subBlock] + modifier, 0, 255);
563
564                                 if (alphaMode)
565                                         alphaDst[alphaDstOffset] = 255;
566                         }
567                 }
568         }
569         else if (mode == MODE_T || mode == MODE_H)
570         {
571                 // T and H modes have some steps in common, handle them here.
572                 static const int distTable[8] = { 3, 6, 11, 16, 23, 32, 41, 64 };
573
574                 deUint8 paintR[4];
575                 deUint8 paintG[4];
576                 deUint8 paintB[4];
577
578                 if (mode == MODE_T)
579                 {
580                         // T mode, calculate paint values.
581                         const deUint8   R1a                     = (deUint8)getBits(src, 59, 60);
582                         const deUint8   R1b                     = (deUint8)getBits(src, 56, 57);
583                         const deUint8   G1                      = (deUint8)getBits(src, 52, 55);
584                         const deUint8   B1                      = (deUint8)getBits(src, 48, 51);
585                         const deUint8   R2                      = (deUint8)getBits(src, 44, 47);
586                         const deUint8   G2                      = (deUint8)getBits(src, 40, 43);
587                         const deUint8   B2                      = (deUint8)getBits(src, 36, 39);
588                         const deUint32  distNdx         = (getBits(src, 34, 35) << 1) | getBit(src, 32);
589                         const int               dist            = distTable[distNdx];
590
591                         paintR[0] = extend4To8((R1a << 2) | R1b);
592                         paintG[0] = extend4To8(G1);
593                         paintB[0] = extend4To8(B1);
594                         paintR[2] = extend4To8(R2);
595                         paintG[2] = extend4To8(G2);
596                         paintB[2] = extend4To8(B2);
597                         paintR[1] = (deUint8)deClamp32((int)paintR[2] + dist, 0, 255);
598                         paintG[1] = (deUint8)deClamp32((int)paintG[2] + dist, 0, 255);
599                         paintB[1] = (deUint8)deClamp32((int)paintB[2] + dist, 0, 255);
600                         paintR[3] = (deUint8)deClamp32((int)paintR[2] - dist, 0, 255);
601                         paintG[3] = (deUint8)deClamp32((int)paintG[2] - dist, 0, 255);
602                         paintB[3] = (deUint8)deClamp32((int)paintB[2] - dist, 0, 255);
603                 }
604                 else
605                 {
606                         // H mode, calculate paint values.
607                         const deUint8   R1              = (deUint8)getBits(src, 59, 62);
608                         const deUint8   G1a             = (deUint8)getBits(src, 56, 58);
609                         const deUint8   G1b             = (deUint8)getBit(src, 52);
610                         const deUint8   B1a             = (deUint8)getBit(src, 51);
611                         const deUint8   B1b             = (deUint8)getBits(src, 47, 49);
612                         const deUint8   R2              = (deUint8)getBits(src, 43, 46);
613                         const deUint8   G2              = (deUint8)getBits(src, 39, 42);
614                         const deUint8   B2              = (deUint8)getBits(src, 35, 38);
615                         deUint8                 baseR[2];
616                         deUint8                 baseG[2];
617                         deUint8                 baseB[2];
618                         deUint32                baseValue[2];
619                         deUint32                distNdx;
620                         int                             dist;
621
622                         baseR[0]                = extend4To8(R1);
623                         baseG[0]                = extend4To8((G1a << 1) | G1b);
624                         baseB[0]                = extend4To8((B1a << 3) | B1b);
625                         baseR[1]                = extend4To8(R2);
626                         baseG[1]                = extend4To8(G2);
627                         baseB[1]                = extend4To8(B2);
628                         baseValue[0]    = (((deUint32)baseR[0]) << 16) | (((deUint32)baseG[0]) << 8) | baseB[0];
629                         baseValue[1]    = (((deUint32)baseR[1]) << 16) | (((deUint32)baseG[1]) << 8) | baseB[1];
630                         distNdx                 = (getBit(src, 34) << 2) | (getBit(src, 32) << 1) | (deUint32)(baseValue[0] >= baseValue[1]);
631                         dist                    = distTable[distNdx];
632
633                         paintR[0]               = (deUint8)deClamp32((int)baseR[0] + dist, 0, 255);
634                         paintG[0]               = (deUint8)deClamp32((int)baseG[0] + dist, 0, 255);
635                         paintB[0]               = (deUint8)deClamp32((int)baseB[0] + dist, 0, 255);
636                         paintR[1]               = (deUint8)deClamp32((int)baseR[0] - dist, 0, 255);
637                         paintG[1]               = (deUint8)deClamp32((int)baseG[0] - dist, 0, 255);
638                         paintB[1]               = (deUint8)deClamp32((int)baseB[0] - dist, 0, 255);
639                         paintR[2]               = (deUint8)deClamp32((int)baseR[1] + dist, 0, 255);
640                         paintG[2]               = (deUint8)deClamp32((int)baseG[1] + dist, 0, 255);
641                         paintB[2]               = (deUint8)deClamp32((int)baseB[1] + dist, 0, 255);
642                         paintR[3]               = (deUint8)deClamp32((int)baseR[1] - dist, 0, 255);
643                         paintG[3]               = (deUint8)deClamp32((int)baseG[1] - dist, 0, 255);
644                         paintB[3]               = (deUint8)deClamp32((int)baseB[1] - dist, 0, 255);
645                 }
646
647                 // Write final pixels for T or H mode.
648                 for (int pixelNdx = 0; pixelNdx < ETC2_BLOCK_HEIGHT*ETC2_BLOCK_WIDTH; pixelNdx++)
649                 {
650                         const int               x                               = pixelNdx / ETC2_BLOCK_HEIGHT;
651                         const int               y                               = pixelNdx % ETC2_BLOCK_HEIGHT;
652                         const int               dstOffset               = (y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_RGB8;
653                         const deUint32  paintNdx                = (getBit(src, 16+pixelNdx) << 1) | getBit(src, pixelNdx);
654                         const int               alphaDstOffset  = (y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_A8; // Only needed for PUNCHTHROUGH version.
655
656                         if (alphaMode && diffOpaqueBit == 0 && paintNdx == 2)
657                         {
658                                 dst[dstOffset+0]                        = 0;
659                                 dst[dstOffset+1]                        = 0;
660                                 dst[dstOffset+2]                        = 0;
661                                 alphaDst[alphaDstOffset]        = 0;
662                         }
663                         else
664                         {
665                                 dst[dstOffset+0] = (deUint8)deClamp32((int)paintR[paintNdx], 0, 255);
666                                 dst[dstOffset+1] = (deUint8)deClamp32((int)paintG[paintNdx], 0, 255);
667                                 dst[dstOffset+2] = (deUint8)deClamp32((int)paintB[paintNdx], 0, 255);
668
669                                 if (alphaMode)
670                                         alphaDst[alphaDstOffset] = 255;
671                         }
672                 }
673         }
674         else
675         {
676                 // Planar mode.
677                 const deUint8 GO1       = (deUint8)getBit(src, 56);
678                 const deUint8 GO2       = (deUint8)getBits(src, 49, 54);
679                 const deUint8 BO1       = (deUint8)getBit(src, 48);
680                 const deUint8 BO2       = (deUint8)getBits(src, 43, 44);
681                 const deUint8 BO3       = (deUint8)getBits(src, 39, 41);
682                 const deUint8 RH1       = (deUint8)getBits(src, 34, 38);
683                 const deUint8 RH2       = (deUint8)getBit(src, 32);
684                 const deUint8 RO        = extend6To8((deUint8)getBits(src, 57, 62));
685                 const deUint8 GO        = extend7To8((GO1 << 6) | GO2);
686                 const deUint8 BO        = extend6To8((BO1 << 5) | (BO2 << 3) | BO3);
687                 const deUint8 RH        = extend6To8((RH1 << 1) | RH2);
688                 const deUint8 GH        = extend7To8((deUint8)getBits(src, 25, 31));
689                 const deUint8 BH        = extend6To8((deUint8)getBits(src, 19, 24));
690                 const deUint8 RV        = extend6To8((deUint8)getBits(src, 13, 18));
691                 const deUint8 GV        = extend7To8((deUint8)getBits(src, 6, 12));
692                 const deUint8 BV        = extend6To8((deUint8)getBits(src, 0, 5));
693
694                 // Write final pixels for planar mode.
695                 for (int y = 0; y < 4; y++)
696                 {
697                         for (int x = 0; x < 4; x++)
698                         {
699                                 const int dstOffset                     = (y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_RGB8;
700                                 const int unclampedR            = (x * ((int)RH-(int)RO) + y * ((int)RV-(int)RO) + 4*(int)RO + 2) >> 2;
701                                 const int unclampedG            = (x * ((int)GH-(int)GO) + y * ((int)GV-(int)GO) + 4*(int)GO + 2) >> 2;
702                                 const int unclampedB            = (x * ((int)BH-(int)BO) + y * ((int)BV-(int)BO) + 4*(int)BO + 2) >> 2;
703                                 const int alphaDstOffset        = (y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_A8; // Only needed for PUNCHTHROUGH version.
704
705                                 dst[dstOffset+0] = (deUint8)deClamp32(unclampedR, 0, 255);
706                                 dst[dstOffset+1] = (deUint8)deClamp32(unclampedG, 0, 255);
707                                 dst[dstOffset+2] = (deUint8)deClamp32(unclampedB, 0, 255);
708
709                                 if (alphaMode)
710                                         alphaDst[alphaDstOffset] = 255;
711                         }
712                 }
713         }
714 }
715
716 void decompressEAC8Block (deUint8 dst[ETC2_UNCOMPRESSED_BLOCK_SIZE_A8], deUint64 src)
717 {
718         static const int modifierTable[16][8] =
719         {
720                 {-3,  -6,  -9, -15,  2,  5,  8, 14},
721                 {-3,  -7, -10, -13,  2,  6,  9, 12},
722                 {-2,  -5,  -8, -13,  1,  4,  7, 12},
723                 {-2,  -4,  -6, -13,  1,  3,  5, 12},
724                 {-3,  -6,  -8, -12,  2,  5,  7, 11},
725                 {-3,  -7,  -9, -11,  2,  6,  8, 10},
726                 {-4,  -7,  -8, -11,  3,  6,  7, 10},
727                 {-3,  -5,  -8, -11,  2,  4,  7, 10},
728                 {-2,  -6,  -8, -10,  1,  5,  7,  9},
729                 {-2,  -5,  -8, -10,  1,  4,  7,  9},
730                 {-2,  -4,  -8, -10,  1,  3,  7,  9},
731                 {-2,  -5,  -7, -10,  1,  4,  6,  9},
732                 {-3,  -4,  -7, -10,  2,  3,  6,  9},
733                 {-1,  -2,  -3, -10,  0,  1,  2,  9},
734                 {-4,  -6,  -8,  -9,  3,  5,  7,  8},
735                 {-3,  -5,  -7,  -9,  2,  4,  6,  8}
736         };
737
738         const deUint8   baseCodeword    = (deUint8)getBits(src, 56, 63);
739         const deUint8   multiplier              = (deUint8)getBits(src, 52, 55);
740         const deUint32  tableNdx                = getBits(src, 48, 51);
741
742         for (int pixelNdx = 0; pixelNdx < ETC2_BLOCK_HEIGHT*ETC2_BLOCK_WIDTH; pixelNdx++)
743         {
744                 const int               x                               = pixelNdx / ETC2_BLOCK_HEIGHT;
745                 const int               y                               = pixelNdx % ETC2_BLOCK_HEIGHT;
746                 const int               dstOffset               = (y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_A8;
747                 const int               pixelBitNdx             = 45 - 3*pixelNdx;
748                 const deUint32  modifierNdx             = (getBit(src, pixelBitNdx + 2) << 2) | (getBit(src, pixelBitNdx + 1) << 1) | getBit(src, pixelBitNdx);
749                 const int               modifier                = modifierTable[tableNdx][modifierNdx];
750
751                 dst[dstOffset] = (deUint8)deClamp32((int)baseCodeword + (int)multiplier*modifier, 0, 255);
752         }
753 }
754
755 void decompressEAC11Block (deUint8 dst[ETC2_UNCOMPRESSED_BLOCK_SIZE_R11], deUint64 src, bool signedMode)
756 {
757         static const int modifierTable[16][8] =
758         {
759                 {-3,  -6,  -9, -15,  2,  5,  8, 14},
760                 {-3,  -7, -10, -13,  2,  6,  9, 12},
761                 {-2,  -5,  -8, -13,  1,  4,  7, 12},
762                 {-2,  -4,  -6, -13,  1,  3,  5, 12},
763                 {-3,  -6,  -8, -12,  2,  5,  7, 11},
764                 {-3,  -7,  -9, -11,  2,  6,  8, 10},
765                 {-4,  -7,  -8, -11,  3,  6,  7, 10},
766                 {-3,  -5,  -8, -11,  2,  4,  7, 10},
767                 {-2,  -6,  -8, -10,  1,  5,  7,  9},
768                 {-2,  -5,  -8, -10,  1,  4,  7,  9},
769                 {-2,  -4,  -8, -10,  1,  3,  7,  9},
770                 {-2,  -5,  -7, -10,  1,  4,  6,  9},
771                 {-3,  -4,  -7, -10,  2,  3,  6,  9},
772                 {-1,  -2,  -3, -10,  0,  1,  2,  9},
773                 {-4,  -6,  -8,  -9,  3,  5,  7,  8},
774                 {-3,  -5,  -7,  -9,  2,  4,  6,  8}
775         };
776
777         const deInt32 multiplier        = (deInt32)getBits(src, 52, 55);
778         const deInt32 tableNdx          = (deInt32)getBits(src, 48, 51);
779         deInt32 baseCodeword            = (deInt32)getBits(src, 56, 63);
780
781         if (signedMode)
782         {
783                 if (baseCodeword > 127)
784                         baseCodeword -= 256;
785                 if (baseCodeword == -128)
786                         baseCodeword = -127;
787         }
788
789         for (int pixelNdx = 0; pixelNdx < ETC2_BLOCK_HEIGHT*ETC2_BLOCK_WIDTH; pixelNdx++)
790         {
791                 const int               x                               = pixelNdx / ETC2_BLOCK_HEIGHT;
792                 const int               y                               = pixelNdx % ETC2_BLOCK_HEIGHT;
793                 const int               dstOffset               = (y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_R11;
794                 const int               pixelBitNdx             = 45 - 3*pixelNdx;
795                 const deUint32  modifierNdx             = (getBit(src, pixelBitNdx + 2) << 2) | (getBit(src, pixelBitNdx + 1) << 1) | getBit(src, pixelBitNdx);
796                 const int               modifier                = modifierTable[tableNdx][modifierNdx];
797
798                 if (signedMode)
799                 {
800                         deInt16 value;
801
802                         if (multiplier != 0)
803                                 value = (deInt16)deClamp32(baseCodeword*8 + multiplier*modifier*8, -1023, 1023);
804                         else
805                                 value = (deInt16)deClamp32(baseCodeword*8 + modifier, -1023, 1023);
806
807                         *((deInt16*)(dst + dstOffset)) = value;
808                 }
809                 else
810                 {
811                         deUint16 value;
812
813                         if (multiplier != 0)
814                                 value = (deUint16)deClamp32(baseCodeword*8 + 4 + multiplier*modifier*8, 0, 2047);
815                         else
816                                 value= (deUint16)deClamp32(baseCodeword*8 + 4 + modifier, 0, 2047);
817
818                         *((deUint16*)(dst + dstOffset)) = value;
819                 }
820         }
821 }
822
823 } // EtcDecompressInternal
824
825 void decompressETC1 (const PixelBufferAccess& dst, const deUint8* src)
826 {
827         using namespace EtcDecompressInternal;
828
829         deUint8* const  dstPtr                  = (deUint8*)dst.getDataPtr();
830         const deUint64  compressedBlock = get64BitBlock(src, 0);
831
832         decompressETC1Block(dstPtr, compressedBlock);
833 }
834
835 void decompressETC2 (const PixelBufferAccess& dst, const deUint8* src)
836 {
837         using namespace EtcDecompressInternal;
838
839         deUint8* const  dstPtr                  = (deUint8*)dst.getDataPtr();
840         const deUint64  compressedBlock = get64BitBlock(src, 0);
841
842         decompressETC2Block(dstPtr, compressedBlock, NULL, false);
843 }
844
845 void decompressETC2_EAC_RGBA8 (const PixelBufferAccess& dst, const deUint8* src)
846 {
847         using namespace EtcDecompressInternal;
848
849         deUint8* const  dstPtr                  = (deUint8*)dst.getDataPtr();
850         const int               dstRowPitch             = dst.getRowPitch();
851         const int               dstPixelSize    = ETC2_UNCOMPRESSED_PIXEL_SIZE_RGBA8;
852
853         const deUint64  compressedBlockAlpha    = get128BitBlockStart(src, 0);
854         const deUint64  compressedBlockRGB              = get128BitBlockEnd(src, 0);
855         deUint8                 uncompressedBlockAlpha[ETC2_UNCOMPRESSED_BLOCK_SIZE_A8];
856         deUint8                 uncompressedBlockRGB[ETC2_UNCOMPRESSED_BLOCK_SIZE_RGB8];
857
858         // Decompress.
859         decompressETC2Block(uncompressedBlockRGB, compressedBlockRGB, NULL, false);
860         decompressEAC8Block(uncompressedBlockAlpha, compressedBlockAlpha);
861
862         // Write to dst.
863         for (int y = 0; y < (int)ETC2_BLOCK_HEIGHT; y++)
864         {
865                 for (int x = 0; x < (int)ETC2_BLOCK_WIDTH; x++)
866                 {
867                         const deUint8* const    srcPixelRGB             = &uncompressedBlockRGB[(y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_RGB8];
868                         const deUint8* const    srcPixelAlpha   = &uncompressedBlockAlpha[(y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_A8];
869                         deUint8* const                  dstPixel                = dstPtr + y*dstRowPitch + x*dstPixelSize;
870
871                         DE_STATIC_ASSERT(ETC2_UNCOMPRESSED_PIXEL_SIZE_RGBA8 == 4);
872                         dstPixel[0] = srcPixelRGB[0];
873                         dstPixel[1] = srcPixelRGB[1];
874                         dstPixel[2] = srcPixelRGB[2];
875                         dstPixel[3] = srcPixelAlpha[0];
876                 }
877         }
878 }
879
880 void decompressETC2_RGB8_PUNCHTHROUGH_ALPHA1 (const PixelBufferAccess& dst, const deUint8* src)
881 {
882         using namespace EtcDecompressInternal;
883
884         deUint8* const  dstPtr                  = (deUint8*)dst.getDataPtr();
885         const int               dstRowPitch             = dst.getRowPitch();
886         const int               dstPixelSize    = ETC2_UNCOMPRESSED_PIXEL_SIZE_RGBA8;
887
888         const deUint64  compressedBlockRGBA     = get64BitBlock(src, 0);
889         deUint8                 uncompressedBlockRGB[ETC2_UNCOMPRESSED_BLOCK_SIZE_RGB8];
890         deUint8                 uncompressedBlockAlpha[ETC2_UNCOMPRESSED_BLOCK_SIZE_A8];
891
892         // Decompress.
893         decompressETC2Block(uncompressedBlockRGB, compressedBlockRGBA, uncompressedBlockAlpha, DE_TRUE);
894
895         // Write to dst.
896         for (int y = 0; y < (int)ETC2_BLOCK_HEIGHT; y++)
897         {
898                 for (int x = 0; x < (int)ETC2_BLOCK_WIDTH; x++)
899                 {
900                         const deUint8* const    srcPixel                = &uncompressedBlockRGB[(y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_RGB8];
901                         const deUint8* const    srcPixelAlpha   = &uncompressedBlockAlpha[(y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_A8];
902                         deUint8* const                  dstPixel                = dstPtr + y*dstRowPitch + x*dstPixelSize;
903
904                         DE_STATIC_ASSERT(ETC2_UNCOMPRESSED_PIXEL_SIZE_RGBA8 == 4);
905                         dstPixel[0] = srcPixel[0];
906                         dstPixel[1] = srcPixel[1];
907                         dstPixel[2] = srcPixel[2];
908                         dstPixel[3] = srcPixelAlpha[0];
909                 }
910         }
911 }
912
913 void decompressEAC_R11 (const PixelBufferAccess& dst, const deUint8* src, bool signedMode)
914 {
915         using namespace EtcDecompressInternal;
916
917         deUint8* const  dstPtr                  = (deUint8*)dst.getDataPtr();
918         const int               dstRowPitch             = dst.getRowPitch();
919         const int               dstPixelSize    = ETC2_UNCOMPRESSED_PIXEL_SIZE_R11;
920
921         const deUint64  compressedBlock = get64BitBlock(src, 0);
922         deUint8                 uncompressedBlock[ETC2_UNCOMPRESSED_BLOCK_SIZE_R11];
923
924         // Decompress.
925         decompressEAC11Block(uncompressedBlock, compressedBlock, signedMode);
926
927         // Write to dst.
928         for (int y = 0; y < (int)ETC2_BLOCK_HEIGHT; y++)
929         {
930                 for (int x = 0; x < (int)ETC2_BLOCK_WIDTH; x++)
931                 {
932                         DE_STATIC_ASSERT(ETC2_UNCOMPRESSED_PIXEL_SIZE_R11 == 2);
933
934                         if (signedMode)
935                         {
936                                 const deInt16* const    srcPixel = (deInt16*)&uncompressedBlock[(y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_R11];
937                                 deInt16* const                  dstPixel = (deInt16*)(dstPtr + y*dstRowPitch + x*dstPixelSize);
938
939                                 dstPixel[0] = extend11To16WithSign(srcPixel[0]);
940                         }
941                         else
942                         {
943                                 const deUint16* const   srcPixel = (deUint16*)&uncompressedBlock[(y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_R11];
944                                 deUint16* const                 dstPixel = (deUint16*)(dstPtr + y*dstRowPitch + x*dstPixelSize);
945
946                                 dstPixel[0] = extend11To16(srcPixel[0]);
947                         }
948                 }
949         }
950 }
951
952 void decompressEAC_RG11 (const PixelBufferAccess& dst, const deUint8* src, bool signedMode)
953 {
954         using namespace EtcDecompressInternal;
955
956         deUint8* const  dstPtr                  = (deUint8*)dst.getDataPtr();
957         const int               dstRowPitch             = dst.getRowPitch();
958         const int               dstPixelSize    = ETC2_UNCOMPRESSED_PIXEL_SIZE_RG11;
959
960         const deUint64  compressedBlockR = get128BitBlockStart(src, 0);
961         const deUint64  compressedBlockG = get128BitBlockEnd(src, 0);
962         deUint8                 uncompressedBlockR[ETC2_UNCOMPRESSED_BLOCK_SIZE_R11];
963         deUint8                 uncompressedBlockG[ETC2_UNCOMPRESSED_BLOCK_SIZE_R11];
964
965         // Decompress.
966         decompressEAC11Block(uncompressedBlockR, compressedBlockR, signedMode);
967         decompressEAC11Block(uncompressedBlockG, compressedBlockG, signedMode);
968
969         // Write to dst.
970         for (int y = 0; y < (int)ETC2_BLOCK_HEIGHT; y++)
971         {
972                 for (int x = 0; x < (int)ETC2_BLOCK_WIDTH; x++)
973                 {
974                         DE_STATIC_ASSERT(ETC2_UNCOMPRESSED_PIXEL_SIZE_RG11 == 4);
975
976                         if (signedMode)
977                         {
978                                 const deInt16* const    srcPixelR       = (deInt16*)&uncompressedBlockR[(y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_R11];
979                                 const deInt16* const    srcPixelG       = (deInt16*)&uncompressedBlockG[(y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_R11];
980                                 deInt16* const                  dstPixel        = (deInt16*)(dstPtr + y*dstRowPitch + x*dstPixelSize);
981
982                                 dstPixel[0] = extend11To16WithSign(srcPixelR[0]);
983                                 dstPixel[1] = extend11To16WithSign(srcPixelG[0]);
984                         }
985                         else
986                         {
987                                 const deUint16* const   srcPixelR       = (deUint16*)&uncompressedBlockR[(y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_R11];
988                                 const deUint16* const   srcPixelG       = (deUint16*)&uncompressedBlockG[(y*ETC2_BLOCK_WIDTH + x)*ETC2_UNCOMPRESSED_PIXEL_SIZE_R11];
989                                 deUint16* const                 dstPixel        = (deUint16*)(dstPtr + y*dstRowPitch + x*dstPixelSize);
990
991                                 dstPixel[0] = extend11To16(srcPixelR[0]);
992                                 dstPixel[1] = extend11To16(srcPixelG[0]);
993                         }
994                 }
995         }
996 }
997
998 namespace ASTCDecompressInternal
999 {
1000
1001 enum
1002 {
1003         ASTC_MAX_BLOCK_WIDTH    = 12,
1004         ASTC_MAX_BLOCK_HEIGHT   = 12
1005 };
1006
1007 inline deUint32 getBit (deUint32 src, int ndx)
1008 {
1009         DE_ASSERT(de::inBounds(ndx, 0, 32));
1010         return (src >> ndx) & 1;
1011 }
1012
1013 inline deUint32 getBits (deUint32 src, int low, int high)
1014 {
1015         const int numBits = (high-low) + 1;
1016         DE_ASSERT(de::inRange(numBits, 1, 32));
1017         return (src >> low) & ((1u<<numBits)-1);
1018 }
1019
1020 inline bool isBitSet (deUint32 src, int ndx)
1021 {
1022         return getBit(src, ndx) != 0;
1023 }
1024
1025 inline deUint32 reverseBits (deUint32 src, int numBits)
1026 {
1027         DE_ASSERT(de::inRange(numBits, 0, 32));
1028         deUint32 result = 0;
1029         for (int i = 0; i < numBits; i++)
1030                 result |= ((src >> i) & 1) << (numBits-1-i);
1031         return result;
1032 }
1033
1034 inline deUint32 bitReplicationScale (deUint32 src, int numSrcBits, int numDstBits)
1035 {
1036         DE_ASSERT(numSrcBits <= numDstBits);
1037         DE_ASSERT((src & ((1<<numSrcBits)-1)) == src);
1038         deUint32 dst = 0;
1039         for (int shift = numDstBits-numSrcBits; shift > -numSrcBits; shift -= numSrcBits)
1040                 dst |= shift >= 0 ? src << shift : src >> -shift;
1041         return dst;
1042 }
1043
1044 inline deInt32 signExtend (deInt32 src, int numSrcBits)
1045 {
1046         DE_ASSERT(de::inRange(numSrcBits, 2, 31));
1047         const bool negative = (src & (1 << (numSrcBits-1))) != 0;
1048         return src | (negative ? ~((1 << numSrcBits) - 1) : 0);
1049 }
1050
1051 inline bool isFloat16InfOrNan (deFloat16 v)
1052 {
1053         return getBits(v, 10, 14) == 31;
1054 }
1055
1056 // A helper for getting bits from a 128-bit block.
1057 class Block128
1058 {
1059 private:
1060         typedef deUint64 Word;
1061
1062         enum
1063         {
1064                 WORD_BYTES      = sizeof(Word),
1065                 WORD_BITS       = 8*WORD_BYTES,
1066                 NUM_WORDS       = 128 / WORD_BITS
1067         };
1068
1069         DE_STATIC_ASSERT(128 % WORD_BITS == 0);
1070
1071 public:
1072         Block128 (const deUint8* src)
1073         {
1074                 for (int wordNdx = 0; wordNdx < NUM_WORDS; wordNdx++)
1075                 {
1076                         m_words[wordNdx] = 0;
1077                         for (int byteNdx = 0; byteNdx < WORD_BYTES; byteNdx++)
1078                                 m_words[wordNdx] |= (Word)src[wordNdx*WORD_BYTES + byteNdx] << (8*byteNdx);
1079                 }
1080         }
1081
1082         deUint32 getBit (int ndx) const
1083         {
1084                 DE_ASSERT(de::inBounds(ndx, 0, 128));
1085                 return (m_words[ndx / WORD_BITS] >> (ndx % WORD_BITS)) & 1;
1086         }
1087
1088         deUint32 getBits (int low, int high) const
1089         {
1090                 DE_ASSERT(de::inBounds(low, 0, 128));
1091                 DE_ASSERT(de::inBounds(high, 0, 128));
1092                 DE_ASSERT(de::inRange(high-low+1, 0, 32));
1093
1094                 if (high-low+1 == 0)
1095                         return 0;
1096
1097                 const int word0Ndx = low / WORD_BITS;
1098                 const int word1Ndx = high / WORD_BITS;
1099
1100                 // \note "foo << bar << 1" done instead of "foo << (bar+1)" to avoid overflow, i.e. shift amount being too big.
1101
1102                 if (word0Ndx == word1Ndx)
1103                         return (m_words[word0Ndx] & ((((Word)1 << high%WORD_BITS << 1) - 1))) >> ((Word)low % WORD_BITS);
1104                 else
1105                 {
1106                         DE_ASSERT(word1Ndx == word0Ndx + 1);
1107
1108                         return (deUint32)(m_words[word0Ndx] >> (low%WORD_BITS)) |
1109                                    (deUint32)((m_words[word1Ndx] & (((Word)1 << high%WORD_BITS << 1) - 1)) << (high-low - high%WORD_BITS));
1110                 }
1111         }
1112
1113         bool isBitSet (int ndx) const
1114         {
1115                 DE_ASSERT(de::inBounds(ndx, 0, 128));
1116                 return getBit(ndx) != 0;
1117         }
1118
1119 private:
1120         Word m_words[NUM_WORDS];
1121 };
1122
1123 // A helper for sequential access into a Block128.
1124 class BitAccessStream
1125 {
1126 public:
1127         BitAccessStream (const Block128& src, int startNdxInSrc, int length, bool forward)
1128                 : m_src                         (src)
1129                 , m_startNdxInSrc       (startNdxInSrc)
1130                 , m_length                      (length)
1131                 , m_forward                     (forward)
1132                 , m_ndx                         (0)
1133         {
1134         }
1135
1136         // Get the next num bits. Bits at positions greater than or equal to m_length are zeros.
1137         deUint32 getNext (int num)
1138         {
1139                 if (num == 0 || m_ndx >= m_length)
1140                         return 0;
1141
1142                 const int end                           = m_ndx + num;
1143                 const int numBitsFromSrc        = de::max(0, de::min(m_length, end) - m_ndx);
1144                 const int low                           = m_ndx;
1145                 const int high                          = m_ndx + numBitsFromSrc - 1;
1146
1147                 m_ndx += num;
1148
1149                 return m_forward ?                         m_src.getBits(m_startNdxInSrc + low,  m_startNdxInSrc + high)
1150                                                  : reverseBits(m_src.getBits(m_startNdxInSrc - high, m_startNdxInSrc - low), numBitsFromSrc);
1151         }
1152
1153 private:
1154         const Block128&         m_src;
1155         const int                       m_startNdxInSrc;
1156         const int                       m_length;
1157         const bool                      m_forward;
1158
1159         int                                     m_ndx;
1160 };
1161
1162 enum ISEMode
1163 {
1164         ISEMODE_TRIT = 0,
1165         ISEMODE_QUINT,
1166         ISEMODE_PLAIN_BIT,
1167
1168         ISEMODE_LAST
1169 };
1170
1171 struct ISEParams
1172 {
1173         ISEMode         mode;
1174         int                     numBits;
1175
1176         ISEParams (ISEMode mode_, int numBits_) : mode(mode_), numBits(numBits_) {}
1177 };
1178
1179 inline int computeNumRequiredBits (const ISEParams& iseParams, int numValues)
1180 {
1181         switch (iseParams.mode)
1182         {
1183                 case ISEMODE_TRIT:                      return divRoundUp(numValues*8, 5) + numValues*iseParams.numBits;
1184                 case ISEMODE_QUINT:                     return divRoundUp(numValues*7, 3) + numValues*iseParams.numBits;
1185                 case ISEMODE_PLAIN_BIT:         return numValues*iseParams.numBits;
1186                 default:
1187                         DE_ASSERT(false);
1188                         return -1;
1189         }
1190 }
1191
1192 struct ISEDecodedResult
1193 {
1194         deUint32 m;
1195         deUint32 tq; //!< Trit or quint value, depending on ISE mode.
1196         deUint32 v;
1197 };
1198
1199 // Data from an ASTC block's "block mode" part (i.e. bits [0,10]).
1200 struct ASTCBlockMode
1201 {
1202         bool            isError;
1203         // \note Following fields only relevant if !isError.
1204         bool            isVoidExtent;
1205         // \note Following fields only relevant if !isVoidExtent.
1206         bool            isDualPlane;
1207         int                     weightGridWidth;
1208         int                     weightGridHeight;
1209         ISEParams       weightISEParams;
1210
1211         ASTCBlockMode (void)
1212                 : isError                       (true)
1213                 , isVoidExtent          (true)
1214                 , isDualPlane           (true)
1215                 , weightGridWidth       (-1)
1216                 , weightGridHeight      (-1)
1217                 , weightISEParams       (ISEMODE_LAST, -1)
1218         {
1219         }
1220 };
1221
1222 inline int computeNumWeights (const ASTCBlockMode& mode)
1223 {
1224         return mode.weightGridWidth * mode.weightGridHeight * (mode.isDualPlane ? 2 : 1);
1225 }
1226
1227 struct ColorEndpointPair
1228 {
1229         UVec4 e0;
1230         UVec4 e1;
1231 };
1232
1233 struct TexelWeightPair
1234 {
1235         deUint32 w[2];
1236 };
1237
1238 ASTCBlockMode getASTCBlockMode (deUint32 blockModeData)
1239 {
1240         ASTCBlockMode blockMode;
1241         blockMode.isError = true; // \note Set to false later, if not error.
1242
1243         blockMode.isVoidExtent = getBits(blockModeData, 0, 8) == 0x1fc;
1244
1245         if (!blockMode.isVoidExtent)
1246         {
1247                 if ((getBits(blockModeData, 0, 1) == 0 && getBits(blockModeData, 6, 8) == 7) || getBits(blockModeData, 0, 3) == 0)
1248                         return blockMode; // Invalid ("reserved").
1249
1250                 deUint32 r = (deUint32)-1; // \note Set in the following branches.
1251
1252                 if (getBits(blockModeData, 0, 1) == 0)
1253                 {
1254                         const deUint32 r0       = getBit(blockModeData, 4);
1255                         const deUint32 r1       = getBit(blockModeData, 2);
1256                         const deUint32 r2       = getBit(blockModeData, 3);
1257                         const deUint32 i78      = getBits(blockModeData, 7, 8);
1258
1259                         r = (r2 << 2) | (r1 << 1) | (r0 << 0);
1260
1261                         if (i78 == 3)
1262                         {
1263                                 const bool i5 = isBitSet(blockModeData, 5);
1264                                 blockMode.weightGridWidth       = i5 ? 10 : 6;
1265                                 blockMode.weightGridHeight      = i5 ? 6  : 10;
1266                         }
1267                         else
1268                         {
1269                                 const deUint32 a = getBits(blockModeData, 5, 6);
1270                                 switch (i78)
1271                                 {
1272                                         case 0:         blockMode.weightGridWidth = 12;         blockMode.weightGridHeight = a + 2;                                                                     break;
1273                                         case 1:         blockMode.weightGridWidth = a + 2;      blockMode.weightGridHeight = 12;                                                                        break;
1274                                         case 2:         blockMode.weightGridWidth = a + 6;      blockMode.weightGridHeight = getBits(blockModeData, 9, 10) + 6;         break;
1275                                         default: DE_ASSERT(false);
1276                                 }
1277                         }
1278                 }
1279                 else
1280                 {
1281                         const deUint32 r0       = getBit(blockModeData, 4);
1282                         const deUint32 r1       = getBit(blockModeData, 0);
1283                         const deUint32 r2       = getBit(blockModeData, 1);
1284                         const deUint32 i23      = getBits(blockModeData, 2, 3);
1285                         const deUint32 a        = getBits(blockModeData, 5, 6);
1286
1287                         r = (r2 << 2) | (r1 << 1) | (r0 << 0);
1288
1289                         if (i23 == 3)
1290                         {
1291                                 const deUint32  b       = getBit(blockModeData, 7);
1292                                 const bool              i8      = isBitSet(blockModeData, 8);
1293                                 blockMode.weightGridWidth       = i8 ? b+2 : a+2;
1294                                 blockMode.weightGridHeight      = i8 ? a+2 : b+6;
1295                         }
1296                         else
1297                         {
1298                                 const deUint32 b = getBits(blockModeData, 7, 8);
1299
1300                                 switch (i23)
1301                                 {
1302                                         case 0:         blockMode.weightGridWidth = b + 4;      blockMode.weightGridHeight = a + 2;     break;
1303                                         case 1:         blockMode.weightGridWidth = b + 8;      blockMode.weightGridHeight = a + 2;     break;
1304                                         case 2:         blockMode.weightGridWidth = a + 2;      blockMode.weightGridHeight = b + 8;     break;
1305                                         default: DE_ASSERT(false);
1306                                 }
1307                         }
1308                 }
1309
1310                 const bool      zeroDH          = getBits(blockModeData, 0, 1) == 0 && getBits(blockModeData, 7, 8) == 2;
1311                 const bool      h                       = zeroDH ? 0 : isBitSet(blockModeData, 9);
1312                 blockMode.isDualPlane   = zeroDH ? 0 : isBitSet(blockModeData, 10);
1313
1314                 {
1315                         ISEMode&        m       = blockMode.weightISEParams.mode;
1316                         int&            b       = blockMode.weightISEParams.numBits;
1317                         m = ISEMODE_PLAIN_BIT;
1318                         b = 0;
1319
1320                         if (h)
1321                         {
1322                                 switch (r)
1323                                 {
1324                                         case 2:                                                 m = ISEMODE_QUINT;      b = 1;  break;
1325                                         case 3:         m = ISEMODE_TRIT;                                               b = 2;  break;
1326                                         case 4:                                                                                         b = 4;  break;
1327                                         case 5:                                                 m = ISEMODE_QUINT;      b = 2;  break;
1328                                         case 6:         m = ISEMODE_TRIT;                                               b = 3;  break;
1329                                         case 7:                                                                                         b = 5;  break;
1330                                         default: DE_ASSERT(false);
1331                                 }
1332                         }
1333                         else
1334                         {
1335                                 switch (r)
1336                                 {
1337                                         case 2:                                                                                         b = 1;  break;
1338                                         case 3:         m = ISEMODE_TRIT;                                                               break;
1339                                         case 4:                                                                                         b = 2;  break;
1340                                         case 5:                                                 m = ISEMODE_QUINT;                      break;
1341                                         case 6:         m = ISEMODE_TRIT;                                               b = 1;  break;
1342                                         case 7:                                                                                         b = 3;  break;
1343                                         default: DE_ASSERT(false);
1344                                 }
1345                         }
1346                 }
1347         }
1348
1349         blockMode.isError = false;
1350         return blockMode;
1351 }
1352
1353 inline void setASTCErrorColorBlock (void* dst, int blockWidth, int blockHeight, bool isSRGB)
1354 {
1355         if (isSRGB)
1356         {
1357                 deUint8* const dstU = (deUint8*)dst;
1358
1359                 for (int i = 0; i < blockWidth*blockHeight; i++)
1360                 {
1361                         dstU[4*i + 0] = 0xff;
1362                         dstU[4*i + 1] = 0;
1363                         dstU[4*i + 2] = 0xff;
1364                         dstU[4*i + 3] = 0xff;
1365                 }
1366         }
1367         else
1368         {
1369                 float* const dstF = (float*)dst;
1370
1371                 for (int i = 0; i < blockWidth*blockHeight; i++)
1372                 {
1373                         dstF[4*i + 0] = 1.0f;
1374                         dstF[4*i + 1] = 0.0f;
1375                         dstF[4*i + 2] = 1.0f;
1376                         dstF[4*i + 3] = 1.0f;
1377                 }
1378         }
1379 }
1380
1381 void decodeVoidExtentBlock (void* dst, const Block128& blockData, int blockWidth, int blockHeight, bool isSRGB, bool isLDRMode)
1382 {
1383         const deUint32  minSExtent                      = blockData.getBits(12, 24);
1384         const deUint32  maxSExtent                      = blockData.getBits(25, 37);
1385         const deUint32  minTExtent                      = blockData.getBits(38, 50);
1386         const deUint32  maxTExtent                      = blockData.getBits(51, 63);
1387         const bool              allExtentsAllOnes       = minSExtent == 0x1fff && maxSExtent == 0x1fff && minTExtent == 0x1fff && maxTExtent == 0x1fff;
1388         const bool              isHDRBlock                      = blockData.isBitSet(9);
1389
1390         if ((isLDRMode && isHDRBlock) || (!allExtentsAllOnes && (minSExtent >= maxSExtent || minTExtent >= maxTExtent)))
1391         {
1392                 setASTCErrorColorBlock(dst, blockWidth, blockHeight, isSRGB);
1393                 return;
1394         }
1395
1396         const deUint32 rgba[4] =
1397         {
1398                 blockData.getBits(64,  79),
1399                 blockData.getBits(80,  95),
1400                 blockData.getBits(96,  111),
1401                 blockData.getBits(112, 127)
1402         };
1403
1404         if (isSRGB)
1405         {
1406                 deUint8* const dstU = (deUint8*)dst;
1407                 for (int i = 0; i < blockWidth*blockHeight; i++)
1408                 for (int c = 0; c < 4; c++)
1409                         dstU[i*4 + c] = (rgba[c] & 0xff00) >> 8;
1410         }
1411         else
1412         {
1413                 float* const dstF = (float*)dst;
1414
1415                 if (isHDRBlock)
1416                 {
1417                         for (int c = 0; c < 4; c++)
1418                         {
1419                                 if (isFloat16InfOrNan(rgba[c]))
1420                                         throw InternalError("Infinity or NaN color component in HDR void extent block in ASTC texture (behavior undefined by ASTC specification)");
1421                         }
1422
1423                         for (int i = 0; i < blockWidth*blockHeight; i++)
1424                         for (int c = 0; c < 4; c++)
1425                                 dstF[i*4 + c] = deFloat16To32((deFloat16)rgba[c]);
1426                 }
1427                 else
1428                 {
1429                         for (int i = 0; i < blockWidth*blockHeight; i++)
1430                         for (int c = 0; c < 4; c++)
1431                                 dstF[i*4 + c] = rgba[c] == 65535 ? 1.0f : (float)rgba[c] / 65536.0f;
1432                 }
1433         }
1434
1435         return;
1436 }
1437
1438 void decodeColorEndpointModes (deUint32* endpointModesDst, const Block128& blockData, int numPartitions, int extraCemBitsStart)
1439 {
1440         if (numPartitions == 1)
1441                 endpointModesDst[0] = blockData.getBits(13, 16);
1442         else
1443         {
1444                 const deUint32 highLevelSelector = blockData.getBits(23, 24);
1445
1446                 if (highLevelSelector == 0)
1447                 {
1448                         const deUint32 mode = blockData.getBits(25, 28);
1449                         for (int i = 0; i < numPartitions; i++)
1450                                 endpointModesDst[i] = mode;
1451                 }
1452                 else
1453                 {
1454                         for (int partNdx = 0; partNdx < numPartitions; partNdx++)
1455                         {
1456                                 const deUint32 cemClass         = highLevelSelector - (blockData.isBitSet(25 + partNdx) ? 0 : 1);
1457                                 const deUint32 lowBit0Ndx       = numPartitions + 2*partNdx;
1458                                 const deUint32 lowBit1Ndx       = numPartitions + 2*partNdx + 1;
1459                                 const deUint32 lowBit0          = blockData.getBit(lowBit0Ndx < 4 ? 25+lowBit0Ndx : extraCemBitsStart+lowBit0Ndx-4);
1460                                 const deUint32 lowBit1          = blockData.getBit(lowBit1Ndx < 4 ? 25+lowBit1Ndx : extraCemBitsStart+lowBit1Ndx-4);
1461
1462                                 endpointModesDst[partNdx] = (cemClass << 2) | (lowBit1 << 1) | lowBit0;
1463                         }
1464                 }
1465         }
1466 }
1467
1468 inline int computeNumColorEndpointValues (deUint32 endpointMode)
1469 {
1470         DE_ASSERT(endpointMode < 16);
1471         return (endpointMode/4 + 1) * 2;
1472 }
1473
1474 int computeNumColorEndpointValues (const deUint32* endpointModes, int numPartitions)
1475 {
1476         int result = 0;
1477         for (int i = 0; i < numPartitions; i++)
1478                 result += computeNumColorEndpointValues(endpointModes[i]);
1479         return result;
1480 }
1481
1482 void decodeISETritBlock (ISEDecodedResult* dst, int numValues, BitAccessStream& data, int numBits)
1483 {
1484         DE_ASSERT(de::inRange(numValues, 1, 5));
1485
1486         deUint32 m[5];
1487
1488         m[0]                    = data.getNext(numBits);
1489         deUint32 T01    = data.getNext(2);
1490         m[1]                    = data.getNext(numBits);
1491         deUint32 T23    = data.getNext(2);
1492         m[2]                    = data.getNext(numBits);
1493         deUint32 T4             = data.getNext(1);
1494         m[3]                    = data.getNext(numBits);
1495         deUint32 T56    = data.getNext(2);
1496         m[4]                    = data.getNext(numBits);
1497         deUint32 T7             = data.getNext(1);
1498
1499         switch (numValues)
1500         {
1501                 // \note Fall-throughs.
1502                 case 1: T23             = 0;
1503                 case 2: T4              = 0;
1504                 case 3: T56             = 0;
1505                 case 4: T7              = 0;
1506                 case 5: break;
1507                 default:
1508                         DE_ASSERT(false);
1509         }
1510
1511         const deUint32 T = (T7 << 7) | (T56 << 5) | (T4 << 4) | (T23 << 2) | (T01 << 0);
1512
1513         static const deUint32 tritsFromT[256][5] =
1514         {
1515                 { 0,0,0,0,0 }, { 1,0,0,0,0 }, { 2,0,0,0,0 }, { 0,0,2,0,0 }, { 0,1,0,0,0 }, { 1,1,0,0,0 }, { 2,1,0,0,0 }, { 1,0,2,0,0 }, { 0,2,0,0,0 }, { 1,2,0,0,0 }, { 2,2,0,0,0 }, { 2,0,2,0,0 }, { 0,2,2,0,0 }, { 1,2,2,0,0 }, { 2,2,2,0,0 }, { 2,0,2,0,0 },
1516                 { 0,0,1,0,0 }, { 1,0,1,0,0 }, { 2,0,1,0,0 }, { 0,1,2,0,0 }, { 0,1,1,0,0 }, { 1,1,1,0,0 }, { 2,1,1,0,0 }, { 1,1,2,0,0 }, { 0,2,1,0,0 }, { 1,2,1,0,0 }, { 2,2,1,0,0 }, { 2,1,2,0,0 }, { 0,0,0,2,2 }, { 1,0,0,2,2 }, { 2,0,0,2,2 }, { 0,0,2,2,2 },
1517                 { 0,0,0,1,0 }, { 1,0,0,1,0 }, { 2,0,0,1,0 }, { 0,0,2,1,0 }, { 0,1,0,1,0 }, { 1,1,0,1,0 }, { 2,1,0,1,0 }, { 1,0,2,1,0 }, { 0,2,0,1,0 }, { 1,2,0,1,0 }, { 2,2,0,1,0 }, { 2,0,2,1,0 }, { 0,2,2,1,0 }, { 1,2,2,1,0 }, { 2,2,2,1,0 }, { 2,0,2,1,0 },
1518                 { 0,0,1,1,0 }, { 1,0,1,1,0 }, { 2,0,1,1,0 }, { 0,1,2,1,0 }, { 0,1,1,1,0 }, { 1,1,1,1,0 }, { 2,1,1,1,0 }, { 1,1,2,1,0 }, { 0,2,1,1,0 }, { 1,2,1,1,0 }, { 2,2,1,1,0 }, { 2,1,2,1,0 }, { 0,1,0,2,2 }, { 1,1,0,2,2 }, { 2,1,0,2,2 }, { 1,0,2,2,2 },
1519                 { 0,0,0,2,0 }, { 1,0,0,2,0 }, { 2,0,0,2,0 }, { 0,0,2,2,0 }, { 0,1,0,2,0 }, { 1,1,0,2,0 }, { 2,1,0,2,0 }, { 1,0,2,2,0 }, { 0,2,0,2,0 }, { 1,2,0,2,0 }, { 2,2,0,2,0 }, { 2,0,2,2,0 }, { 0,2,2,2,0 }, { 1,2,2,2,0 }, { 2,2,2,2,0 }, { 2,0,2,2,0 },
1520                 { 0,0,1,2,0 }, { 1,0,1,2,0 }, { 2,0,1,2,0 }, { 0,1,2,2,0 }, { 0,1,1,2,0 }, { 1,1,1,2,0 }, { 2,1,1,2,0 }, { 1,1,2,2,0 }, { 0,2,1,2,0 }, { 1,2,1,2,0 }, { 2,2,1,2,0 }, { 2,1,2,2,0 }, { 0,2,0,2,2 }, { 1,2,0,2,2 }, { 2,2,0,2,2 }, { 2,0,2,2,2 },
1521                 { 0,0,0,0,2 }, { 1,0,0,0,2 }, { 2,0,0,0,2 }, { 0,0,2,0,2 }, { 0,1,0,0,2 }, { 1,1,0,0,2 }, { 2,1,0,0,2 }, { 1,0,2,0,2 }, { 0,2,0,0,2 }, { 1,2,0,0,2 }, { 2,2,0,0,2 }, { 2,0,2,0,2 }, { 0,2,2,0,2 }, { 1,2,2,0,2 }, { 2,2,2,0,2 }, { 2,0,2,0,2 },
1522                 { 0,0,1,0,2 }, { 1,0,1,0,2 }, { 2,0,1,0,2 }, { 0,1,2,0,2 }, { 0,1,1,0,2 }, { 1,1,1,0,2 }, { 2,1,1,0,2 }, { 1,1,2,0,2 }, { 0,2,1,0,2 }, { 1,2,1,0,2 }, { 2,2,1,0,2 }, { 2,1,2,0,2 }, { 0,2,2,2,2 }, { 1,2,2,2,2 }, { 2,2,2,2,2 }, { 2,0,2,2,2 },
1523                 { 0,0,0,0,1 }, { 1,0,0,0,1 }, { 2,0,0,0,1 }, { 0,0,2,0,1 }, { 0,1,0,0,1 }, { 1,1,0,0,1 }, { 2,1,0,0,1 }, { 1,0,2,0,1 }, { 0,2,0,0,1 }, { 1,2,0,0,1 }, { 2,2,0,0,1 }, { 2,0,2,0,1 }, { 0,2,2,0,1 }, { 1,2,2,0,1 }, { 2,2,2,0,1 }, { 2,0,2,0,1 },
1524                 { 0,0,1,0,1 }, { 1,0,1,0,1 }, { 2,0,1,0,1 }, { 0,1,2,0,1 }, { 0,1,1,0,1 }, { 1,1,1,0,1 }, { 2,1,1,0,1 }, { 1,1,2,0,1 }, { 0,2,1,0,1 }, { 1,2,1,0,1 }, { 2,2,1,0,1 }, { 2,1,2,0,1 }, { 0,0,1,2,2 }, { 1,0,1,2,2 }, { 2,0,1,2,2 }, { 0,1,2,2,2 },
1525                 { 0,0,0,1,1 }, { 1,0,0,1,1 }, { 2,0,0,1,1 }, { 0,0,2,1,1 }, { 0,1,0,1,1 }, { 1,1,0,1,1 }, { 2,1,0,1,1 }, { 1,0,2,1,1 }, { 0,2,0,1,1 }, { 1,2,0,1,1 }, { 2,2,0,1,1 }, { 2,0,2,1,1 }, { 0,2,2,1,1 }, { 1,2,2,1,1 }, { 2,2,2,1,1 }, { 2,0,2,1,1 },
1526                 { 0,0,1,1,1 }, { 1,0,1,1,1 }, { 2,0,1,1,1 }, { 0,1,2,1,1 }, { 0,1,1,1,1 }, { 1,1,1,1,1 }, { 2,1,1,1,1 }, { 1,1,2,1,1 }, { 0,2,1,1,1 }, { 1,2,1,1,1 }, { 2,2,1,1,1 }, { 2,1,2,1,1 }, { 0,1,1,2,2 }, { 1,1,1,2,2 }, { 2,1,1,2,2 }, { 1,1,2,2,2 },
1527                 { 0,0,0,2,1 }, { 1,0,0,2,1 }, { 2,0,0,2,1 }, { 0,0,2,2,1 }, { 0,1,0,2,1 }, { 1,1,0,2,1 }, { 2,1,0,2,1 }, { 1,0,2,2,1 }, { 0,2,0,2,1 }, { 1,2,0,2,1 }, { 2,2,0,2,1 }, { 2,0,2,2,1 }, { 0,2,2,2,1 }, { 1,2,2,2,1 }, { 2,2,2,2,1 }, { 2,0,2,2,1 },
1528                 { 0,0,1,2,1 }, { 1,0,1,2,1 }, { 2,0,1,2,1 }, { 0,1,2,2,1 }, { 0,1,1,2,1 }, { 1,1,1,2,1 }, { 2,1,1,2,1 }, { 1,1,2,2,1 }, { 0,2,1,2,1 }, { 1,2,1,2,1 }, { 2,2,1,2,1 }, { 2,1,2,2,1 }, { 0,2,1,2,2 }, { 1,2,1,2,2 }, { 2,2,1,2,2 }, { 2,1,2,2,2 },
1529                 { 0,0,0,1,2 }, { 1,0,0,1,2 }, { 2,0,0,1,2 }, { 0,0,2,1,2 }, { 0,1,0,1,2 }, { 1,1,0,1,2 }, { 2,1,0,1,2 }, { 1,0,2,1,2 }, { 0,2,0,1,2 }, { 1,2,0,1,2 }, { 2,2,0,1,2 }, { 2,0,2,1,2 }, { 0,2,2,1,2 }, { 1,2,2,1,2 }, { 2,2,2,1,2 }, { 2,0,2,1,2 },
1530                 { 0,0,1,1,2 }, { 1,0,1,1,2 }, { 2,0,1,1,2 }, { 0,1,2,1,2 }, { 0,1,1,1,2 }, { 1,1,1,1,2 }, { 2,1,1,1,2 }, { 1,1,2,1,2 }, { 0,2,1,1,2 }, { 1,2,1,1,2 }, { 2,2,1,1,2 }, { 2,1,2,1,2 }, { 0,2,2,2,2 }, { 1,2,2,2,2 }, { 2,2,2,2,2 }, { 2,1,2,2,2 }
1531         };
1532
1533         const deUint32 (& trits)[5] = tritsFromT[T];
1534
1535         for (int i = 0; i < numValues; i++)
1536         {
1537                 dst[i].m        = m[i];
1538                 dst[i].tq       = trits[i];
1539                 dst[i].v        = (trits[i] << numBits) + m[i];
1540         }
1541 }
1542
1543 void decodeISEQuintBlock (ISEDecodedResult* dst, int numValues, BitAccessStream& data, int numBits)
1544 {
1545         DE_ASSERT(de::inRange(numValues, 1, 3));
1546
1547         deUint32 m[3];
1548
1549         m[0]                    = data.getNext(numBits);
1550         deUint32 Q012   = data.getNext(3);
1551         m[1]                    = data.getNext(numBits);
1552         deUint32 Q34    = data.getNext(2);
1553         m[2]                    = data.getNext(numBits);
1554         deUint32 Q56    = data.getNext(2);
1555
1556         switch (numValues)
1557         {
1558                 // \note Fall-throughs.
1559                 case 1: Q34             = 0;
1560                 case 2: Q56             = 0;
1561                 case 3: break;
1562                 default:
1563                         DE_ASSERT(false);
1564         }
1565
1566         const deUint32 Q = (Q56 << 5) | (Q34 << 3) | (Q012 << 0);
1567
1568         static const deUint32 quintsFromQ[256][3] =
1569         {
1570                 { 0,0,0 }, { 1,0,0 }, { 2,0,0 }, { 3,0,0 }, { 4,0,0 }, { 0,4,0 }, { 4,4,0 }, { 4,4,4 }, { 0,1,0 }, { 1,1,0 }, { 2,1,0 }, { 3,1,0 }, { 4,1,0 }, { 1,4,0 }, { 4,4,1 }, { 4,4,4 },
1571                 { 0,2,0 }, { 1,2,0 }, { 2,2,0 }, { 3,2,0 }, { 4,2,0 }, { 2,4,0 }, { 4,4,2 }, { 4,4,4 }, { 0,3,0 }, { 1,3,0 }, { 2,3,0 }, { 3,3,0 }, { 4,3,0 }, { 3,4,0 }, { 4,4,3 }, { 4,4,4 },
1572                 { 0,0,1 }, { 1,0,1 }, { 2,0,1 }, { 3,0,1 }, { 4,0,1 }, { 0,4,1 }, { 4,0,4 }, { 0,4,4 }, { 0,1,1 }, { 1,1,1 }, { 2,1,1 }, { 3,1,1 }, { 4,1,1 }, { 1,4,1 }, { 4,1,4 }, { 1,4,4 },
1573                 { 0,2,1 }, { 1,2,1 }, { 2,2,1 }, { 3,2,1 }, { 4,2,1 }, { 2,4,1 }, { 4,2,4 }, { 2,4,4 }, { 0,3,1 }, { 1,3,1 }, { 2,3,1 }, { 3,3,1 }, { 4,3,1 }, { 3,4,1 }, { 4,3,4 }, { 3,4,4 },
1574                 { 0,0,2 }, { 1,0,2 }, { 2,0,2 }, { 3,0,2 }, { 4,0,2 }, { 0,4,2 }, { 2,0,4 }, { 3,0,4 }, { 0,1,2 }, { 1,1,2 }, { 2,1,2 }, { 3,1,2 }, { 4,1,2 }, { 1,4,2 }, { 2,1,4 }, { 3,1,4 },
1575                 { 0,2,2 }, { 1,2,2 }, { 2,2,2 }, { 3,2,2 }, { 4,2,2 }, { 2,4,2 }, { 2,2,4 }, { 3,2,4 }, { 0,3,2 }, { 1,3,2 }, { 2,3,2 }, { 3,3,2 }, { 4,3,2 }, { 3,4,2 }, { 2,3,4 }, { 3,3,4 },
1576                 { 0,0,3 }, { 1,0,3 }, { 2,0,3 }, { 3,0,3 }, { 4,0,3 }, { 0,4,3 }, { 0,0,4 }, { 1,0,4 }, { 0,1,3 }, { 1,1,3 }, { 2,1,3 }, { 3,1,3 }, { 4,1,3 }, { 1,4,3 }, { 0,1,4 }, { 1,1,4 },
1577                 { 0,2,3 }, { 1,2,3 }, { 2,2,3 }, { 3,2,3 }, { 4,2,3 }, { 2,4,3 }, { 0,2,4 }, { 1,2,4 }, { 0,3,3 }, { 1,3,3 }, { 2,3,3 }, { 3,3,3 }, { 4,3,3 }, { 3,4,3 }, { 0,3,4 }, { 1,3,4 }
1578         };
1579
1580         const deUint32 (& quints)[3] = quintsFromQ[Q];
1581
1582         for (int i = 0; i < numValues; i++)
1583         {
1584                 dst[i].m        = m[i];
1585                 dst[i].tq       = quints[i];
1586                 dst[i].v        = (quints[i] << numBits) + m[i];
1587         }
1588 }
1589
1590 inline void decodeISEBitBlock (ISEDecodedResult* dst, BitAccessStream& data, int numBits)
1591 {
1592         dst[0].m = data.getNext(numBits);
1593         dst[0].v = dst[0].m;
1594 }
1595
1596 void decodeISE (ISEDecodedResult* dst, int numValues, BitAccessStream& data, const ISEParams& params)
1597 {
1598         if (params.mode == ISEMODE_TRIT)
1599         {
1600                 const int numBlocks = divRoundUp(numValues, 5);
1601                 for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
1602                 {
1603                         const int numValuesInBlock = blockNdx == numBlocks-1 ? numValues - 5*(numBlocks-1) : 5;
1604                         decodeISETritBlock(&dst[5*blockNdx], numValuesInBlock, data, params.numBits);
1605                 }
1606         }
1607         else if (params.mode == ISEMODE_QUINT)
1608         {
1609                 const int numBlocks = divRoundUp(numValues, 3);
1610                 for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
1611                 {
1612                         const int numValuesInBlock = blockNdx == numBlocks-1 ? numValues - 3*(numBlocks-1) : 3;
1613                         decodeISEQuintBlock(&dst[3*blockNdx], numValuesInBlock, data, params.numBits);
1614                 }
1615         }
1616         else
1617         {
1618                 DE_ASSERT(params.mode == ISEMODE_PLAIN_BIT);
1619                 for (int i = 0; i < numValues; i++)
1620                         decodeISEBitBlock(&dst[i], data, params.numBits);
1621         }
1622 }
1623
1624 ISEParams computeMaximumRangeISEParams (int numAvailableBits, int numValuesInSequence)
1625 {
1626         int curBitsForTritMode          = 6;
1627         int curBitsForQuintMode         = 5;
1628         int curBitsForPlainBitMode      = 8;
1629
1630         while (true)
1631         {
1632                 DE_ASSERT(curBitsForTritMode > 0 || curBitsForQuintMode > 0 || curBitsForPlainBitMode > 0);
1633
1634                 const int tritRange                     = curBitsForTritMode > 0                ? (3 << curBitsForTritMode) - 1                 : -1;
1635                 const int quintRange            = curBitsForQuintMode > 0               ? (5 << curBitsForQuintMode) - 1                : -1;
1636                 const int plainBitRange         = curBitsForPlainBitMode > 0    ? (1 << curBitsForPlainBitMode) - 1             : -1;
1637                 const int maxRange                      = de::max(de::max(tritRange, quintRange), plainBitRange);
1638
1639                 if (maxRange == tritRange)
1640                 {
1641                         const ISEParams params(ISEMODE_TRIT, curBitsForTritMode);
1642                         if (computeNumRequiredBits(params, numValuesInSequence) <= numAvailableBits)
1643                                 return ISEParams(ISEMODE_TRIT, curBitsForTritMode);
1644                         curBitsForTritMode--;
1645                 }
1646                 else if (maxRange == quintRange)
1647                 {
1648                         const ISEParams params(ISEMODE_QUINT, curBitsForQuintMode);
1649                         if (computeNumRequiredBits(params, numValuesInSequence) <= numAvailableBits)
1650                                 return ISEParams(ISEMODE_QUINT, curBitsForQuintMode);
1651                         curBitsForQuintMode--;
1652                 }
1653                 else
1654                 {
1655                         const ISEParams params(ISEMODE_PLAIN_BIT, curBitsForPlainBitMode);
1656                         DE_ASSERT(maxRange == plainBitRange);
1657                         if (computeNumRequiredBits(params, numValuesInSequence) <= numAvailableBits)
1658                                 return ISEParams(ISEMODE_PLAIN_BIT, curBitsForPlainBitMode);
1659                         curBitsForPlainBitMode--;
1660                 }
1661         }
1662 }
1663
1664 void unquantizeColorEndpoints (deUint32* dst, const ISEDecodedResult* iseResults, int numEndpoints, const ISEParams& iseParams)
1665 {
1666         if (iseParams.mode == ISEMODE_TRIT || iseParams.mode == ISEMODE_QUINT)
1667         {
1668                 const int rangeCase                             = iseParams.numBits*2 - (iseParams.mode == ISEMODE_TRIT ? 2 : 1);
1669                 DE_ASSERT(de::inRange(rangeCase, 0, 10));
1670                 static const deUint32   Ca[11]  = { 204, 113, 93, 54, 44, 26, 22, 13, 11, 6, 5 };
1671                 const deUint32                  C               = Ca[rangeCase];
1672
1673                 for (int endpointNdx = 0; endpointNdx < numEndpoints; endpointNdx++)
1674                 {
1675                         const deUint32 a = getBit(iseResults[endpointNdx].m, 0);
1676                         const deUint32 b = getBit(iseResults[endpointNdx].m, 1);
1677                         const deUint32 c = getBit(iseResults[endpointNdx].m, 2);
1678                         const deUint32 d = getBit(iseResults[endpointNdx].m, 3);
1679                         const deUint32 e = getBit(iseResults[endpointNdx].m, 4);
1680                         const deUint32 f = getBit(iseResults[endpointNdx].m, 5);
1681
1682                         const deUint32 A = a == 0 ? 0 : (1<<9)-1;
1683                         const deUint32 B = rangeCase == 0       ? 0
1684                                                          : rangeCase == 1       ? 0
1685                                                          : rangeCase == 2       ? (b << 8) |                                                                    (b << 4) |                              (b << 2) |      (b << 1)
1686                                                          : rangeCase == 3       ? (b << 8) |                                                                                            (b << 3) |      (b << 2)
1687                                                          : rangeCase == 4       ? (c << 8) | (b << 7) |                                                                         (c << 3) |      (b << 2) |      (c << 1) |      (b << 0)
1688                                                          : rangeCase == 5       ? (c << 8) | (b << 7) |                                                                                                 (c << 2) |      (b << 1) |      (c << 0)
1689                                                          : rangeCase == 6       ? (d << 8) | (c << 7) | (b << 6) |                                                                              (d << 2) |      (c << 1) |      (b << 0)
1690                                                          : rangeCase == 7       ? (d << 8) | (c << 7) | (b << 6) |                                                                                                      (d << 1) |      (c << 0)
1691                                                          : rangeCase == 8       ? (e << 8) | (d << 7) | (c << 6) | (b << 5) |                                                                           (e << 1) |      (d << 0)
1692                                                          : rangeCase == 9       ? (e << 8) | (d << 7) | (c << 6) | (b << 5) |                                                                                                   (e << 0)
1693                                                          : rangeCase == 10      ? (f << 8) | (e << 7) | (d << 6) | (c << 5) |   (b << 4) |                                                                              (f << 0)
1694                                                          : (deUint32)-1;
1695                         DE_ASSERT(B != (deUint32)-1);
1696
1697                         dst[endpointNdx] = (((iseResults[endpointNdx].tq*C + B) ^ A) >> 2) | (A & 0x80);
1698                 }
1699         }
1700         else
1701         {
1702                 DE_ASSERT(iseParams.mode == ISEMODE_PLAIN_BIT);
1703
1704                 for (int endpointNdx = 0; endpointNdx < numEndpoints; endpointNdx++)
1705                         dst[endpointNdx] = bitReplicationScale(iseResults[endpointNdx].v, iseParams.numBits, 8);
1706         }
1707 }
1708
1709 inline void bitTransferSigned (deInt32& a, deInt32& b)
1710 {
1711         b >>= 1;
1712         b |= a & 0x80;
1713         a >>= 1;
1714         a &= 0x3f;
1715         if (isBitSet(a, 5))
1716                 a -= 0x40;
1717 }
1718
1719 inline UVec4 clampedRGBA (const IVec4& rgba)
1720 {
1721         return UVec4(de::clamp(rgba.x(), 0, 0xff),
1722                                  de::clamp(rgba.y(), 0, 0xff),
1723                                  de::clamp(rgba.z(), 0, 0xff),
1724                                  de::clamp(rgba.w(), 0, 0xff));
1725 }
1726
1727 inline IVec4 blueContract (int r, int g, int b, int a)
1728 {
1729         return IVec4((r+b)>>1, (g+b)>>1, b, a);
1730 }
1731
1732 inline bool isColorEndpointModeHDR (deUint32 mode)
1733 {
1734         return mode == 2        ||
1735                    mode == 3    ||
1736                    mode == 7    ||
1737                    mode == 11   ||
1738                    mode == 14   ||
1739                    mode == 15;
1740 }
1741
1742 void decodeHDREndpointMode7 (UVec4& e0, UVec4& e1, deUint32 v0, deUint32 v1, deUint32 v2, deUint32 v3)
1743 {
1744         const deUint32 m10              = getBit(v1, 7) | (getBit(v2, 7) << 1);
1745         const deUint32 m23              = getBits(v0, 6, 7);
1746         const deUint32 majComp  = m10 != 3      ? m10
1747                                                         : m23 != 3      ? m23
1748                                                         :                         0;
1749         const deUint32 mode             = m10 != 3      ? m23
1750                                                         : m23 != 3      ? 4
1751                                                         :                         5;
1752
1753         deInt32                 red             = (deInt32)getBits(v0, 0, 5);
1754         deInt32                 green   = (deInt32)getBits(v1, 0, 4);
1755         deInt32                 blue    = (deInt32)getBits(v2, 0, 4);
1756         deInt32                 scale   = (deInt32)getBits(v3, 0, 4);
1757
1758         {
1759 #define SHOR(DST_VAR, SHIFT, BIT_VAR) (DST_VAR) |= (BIT_VAR) << (SHIFT)
1760 #define ASSIGN_X_BITS(V0,S0, V1,S1, V2,S2, V3,S3, V4,S4, V5,S5, V6,S6) do { SHOR(V0,S0,x0); SHOR(V1,S1,x1); SHOR(V2,S2,x2); SHOR(V3,S3,x3); SHOR(V4,S4,x4); SHOR(V5,S5,x5); SHOR(V6,S6,x6); } while (false)
1761
1762                 const deUint32  x0      = getBit(v1, 6);
1763                 const deUint32  x1      = getBit(v1, 5);
1764                 const deUint32  x2      = getBit(v2, 6);
1765                 const deUint32  x3      = getBit(v2, 5);
1766                 const deUint32  x4      = getBit(v3, 7);
1767                 const deUint32  x5      = getBit(v3, 6);
1768                 const deUint32  x6      = getBit(v3, 5);
1769
1770                 deInt32&                R       = red;
1771                 deInt32&                G       = green;
1772                 deInt32&                B       = blue;
1773                 deInt32&                S       = scale;
1774
1775                 switch (mode)
1776                 {
1777                         case 0: ASSIGN_X_BITS(R,9,  R,8,  R,7,  R,10,  R,6,  S,6,   S,5); break;
1778                         case 1: ASSIGN_X_BITS(R,8,  G,5,  R,7,  B,5,   R,6,  R,10,  R,9); break;
1779                         case 2: ASSIGN_X_BITS(R,9,  R,8,  R,7,  R,6,   S,7,  S,6,   S,5); break;
1780                         case 3: ASSIGN_X_BITS(R,8,  G,5,  R,7,  B,5,   R,6,  S,6,   S,5); break;
1781                         case 4: ASSIGN_X_BITS(G,6,  G,5,  B,6,  B,5,   R,6,  R,7,   S,5); break;
1782                         case 5: ASSIGN_X_BITS(G,6,  G,5,  B,6,  B,5,   R,6,  S,6,   S,5); break;
1783                         default:
1784                                 DE_ASSERT(false);
1785                 }
1786
1787 #undef ASSIGN_X_BITS
1788 #undef SHOR
1789         }
1790
1791         static const int shiftAmounts[] = { 1, 1, 2, 3, 4, 5 };
1792         DE_ASSERT(mode < DE_LENGTH_OF_ARRAY(shiftAmounts));
1793
1794         red             <<= shiftAmounts[mode];
1795         green   <<= shiftAmounts[mode];
1796         blue    <<= shiftAmounts[mode];
1797         scale   <<= shiftAmounts[mode];
1798
1799         if (mode != 5)
1800         {
1801                 green   = red - green;
1802                 blue    = red - blue;
1803         }
1804
1805         if (majComp == 1)
1806                 std::swap(red, green);
1807         else if (majComp == 2)
1808                 std::swap(red, blue);
1809
1810         e0 = UVec4(de::clamp(red        - scale,        0, 0xfff),
1811                            de::clamp(green      - scale,        0, 0xfff),
1812                            de::clamp(blue       - scale,        0, 0xfff),
1813                            0x780);
1814
1815         e1 = UVec4(de::clamp(red,                               0, 0xfff),
1816                            de::clamp(green,                             0, 0xfff),
1817                            de::clamp(blue,                              0, 0xfff),
1818                            0x780);
1819 }
1820
1821 void decodeHDREndpointMode11 (UVec4& e0, UVec4& e1, deUint32 v0, deUint32 v1, deUint32 v2, deUint32 v3, deUint32 v4, deUint32 v5)
1822 {
1823         const deUint32 major = (getBit(v5, 7) << 1) | getBit(v4, 7);
1824
1825         if (major == 3)
1826         {
1827                 e0 = UVec4(v0<<4, v2<<4, getBits(v4,0,6)<<5, 0x780);
1828                 e1 = UVec4(v1<<4, v3<<4, getBits(v5,0,6)<<5, 0x780);
1829         }
1830         else
1831         {
1832                 const deUint32 mode = (getBit(v3, 7) << 2) | (getBit(v2, 7) << 1) | getBit(v1, 7);
1833
1834                 deInt32 a       = (deInt32)((getBit(v1, 6) << 8) | v0);
1835                 deInt32 c       = (deInt32)(getBits(v1, 0, 5));
1836                 deInt32 b0      = (deInt32)(getBits(v2, 0, 5));
1837                 deInt32 b1      = (deInt32)(getBits(v3, 0, 5));
1838                 deInt32 d0      = (deInt32)(getBits(v4, 0, 4));
1839                 deInt32 d1      = (deInt32)(getBits(v5, 0, 4));
1840
1841                 {
1842 #define SHOR(DST_VAR, SHIFT, BIT_VAR) (DST_VAR) |= (BIT_VAR) << (SHIFT)
1843 #define ASSIGN_X_BITS(V0,S0, V1,S1, V2,S2, V3,S3, V4,S4, V5,S5) do { SHOR(V0,S0,x0); SHOR(V1,S1,x1); SHOR(V2,S2,x2); SHOR(V3,S3,x3); SHOR(V4,S4,x4); SHOR(V5,S5,x5); } while (false)
1844
1845                         const deUint32 x0 = getBit(v2, 6);
1846                         const deUint32 x1 = getBit(v3, 6);
1847                         const deUint32 x2 = getBit(v4, 6);
1848                         const deUint32 x3 = getBit(v5, 6);
1849                         const deUint32 x4 = getBit(v4, 5);
1850                         const deUint32 x5 = getBit(v5, 5);
1851
1852                         switch (mode)
1853                         {
1854                                 case 0: ASSIGN_X_BITS(b0,6,  b1,6,   d0,6,  d1,6,  d0,5,  d1,5); break;
1855                                 case 1: ASSIGN_X_BITS(b0,6,  b1,6,   b0,7,  b1,7,  d0,5,  d1,5); break;
1856                                 case 2: ASSIGN_X_BITS(a,9,   c,6,    d0,6,  d1,6,  d0,5,  d1,5); break;
1857                                 case 3: ASSIGN_X_BITS(b0,6,  b1,6,   a,9,   c,6,   d0,5,  d1,5); break;
1858                                 case 4: ASSIGN_X_BITS(b0,6,  b1,6,   b0,7,  b1,7,  a,9,   a,10); break;
1859                                 case 5: ASSIGN_X_BITS(a,9,   a,10,   c,7,   c,6,   d0,5,  d1,5); break;
1860                                 case 6: ASSIGN_X_BITS(b0,6,  b1,6,   a,11,  c,6,   a,9,   a,10); break;
1861                                 case 7: ASSIGN_X_BITS(a,9,   a,10,   a,11,  c,6,   d0,5,  d1,5); break;
1862                                 default:
1863                                         DE_ASSERT(false);
1864                         }
1865
1866 #undef ASSIGN_X_BITS
1867 #undef SHOR
1868                 }
1869
1870                 static const int numDBits[] = { 7, 6, 7, 6, 5, 6, 5, 6 };
1871                 DE_ASSERT(mode < DE_LENGTH_OF_ARRAY(numDBits));
1872
1873                 d0 = signExtend(d0, numDBits[mode]);
1874                 d1 = signExtend(d1, numDBits[mode]);
1875
1876                 const int shiftAmount = (mode >> 1) ^ 3;
1877                 a       <<= shiftAmount;
1878                 c       <<= shiftAmount;
1879                 b0      <<= shiftAmount;
1880                 b1      <<= shiftAmount;
1881                 d0      <<= shiftAmount;
1882                 d1      <<= shiftAmount;
1883
1884                 e0 = UVec4(de::clamp(a-c,                       0, 0xfff),
1885                                    de::clamp(a-b0-c-d0,         0, 0xfff),
1886                                    de::clamp(a-b1-c-d1,         0, 0xfff),
1887                                    0x780);
1888
1889                 e1 = UVec4(de::clamp(a,                         0, 0xfff),
1890                                    de::clamp(a-b0,                      0, 0xfff),
1891                                    de::clamp(a-b1,                      0, 0xfff),
1892                                    0x780);
1893
1894                 if (major == 1)
1895                 {
1896                         std::swap(e0.x(), e0.y());
1897                         std::swap(e1.x(), e1.y());
1898                 }
1899                 else if (major == 2)
1900                 {
1901                         std::swap(e0.x(), e0.z());
1902                         std::swap(e1.x(), e1.z());
1903                 }
1904         }
1905 }
1906
1907 void decodeHDREndpointMode15(UVec4& e0, UVec4& e1, deUint32 v0, deUint32 v1, deUint32 v2, deUint32 v3, deUint32 v4, deUint32 v5, deUint32 v6In, deUint32 v7In)
1908 {
1909         decodeHDREndpointMode11(e0, e1, v0, v1, v2, v3, v4, v5);
1910
1911         const deUint32  mode    = (getBit(v7In, 7) << 1) | getBit(v6In, 7);
1912         deInt32                 v6              = (deInt32)getBits(v6In, 0, 6);
1913         deInt32                 v7              = (deInt32)getBits(v7In, 0, 6);
1914
1915         if (mode == 3)
1916         {
1917                 e0.w() = v6 << 5;
1918                 e1.w() = v7 << 5;
1919         }
1920         else
1921         {
1922                 v6 |= (v7 << (mode+1)) & 0x780;
1923                 v7 &= (0x3f >> mode);
1924                 v7 ^= 0x20 >> mode;
1925                 v7 -= 0x20 >> mode;
1926                 v6 <<= 4-mode;
1927                 v7 <<= 4-mode;
1928
1929                 v7 += v6;
1930                 v7 = de::clamp(v7, 0, 0xfff);
1931                 e0.w() = v6;
1932                 e1.w() = v7;
1933         }
1934 }
1935
1936 void decodeColorEndpoints (ColorEndpointPair* dst, const deUint32* unquantizedEndpoints, const deUint32* endpointModes, int numPartitions)
1937 {
1938         int unquantizedNdx = 0;
1939
1940         for (int partitionNdx = 0; partitionNdx < numPartitions; partitionNdx++)
1941         {
1942                 const deUint32          endpointMode    = endpointModes[partitionNdx];
1943                 const deUint32*         v                               = &unquantizedEndpoints[unquantizedNdx];
1944                 UVec4&                          e0                              = dst[partitionNdx].e0;
1945                 UVec4&                          e1                              = dst[partitionNdx].e1;
1946
1947                 unquantizedNdx += computeNumColorEndpointValues(endpointMode);
1948
1949                 switch (endpointMode)
1950                 {
1951                         case 0:
1952                                 e0 = UVec4(v[0], v[0], v[0], 0xff);
1953                                 e1 = UVec4(v[1], v[1], v[1], 0xff);
1954                                 break;
1955
1956                         case 1:
1957                         {
1958                                 const deUint32 L0 = (v[0] >> 2) | (getBits(v[1], 6, 7) << 6);
1959                                 const deUint32 L1 = de::min(0xffu, L0 + getBits(v[1], 0, 5));
1960                                 e0 = UVec4(L0, L0, L0, 0xff);
1961                                 e1 = UVec4(L1, L1, L1, 0xff);
1962                                 break;
1963                         }
1964
1965                         case 2:
1966                         {
1967                                 const deUint32 v1Gr             = v[1] >= v[0];
1968                                 const deUint32 y0               = v1Gr ? v[0]<<4 : (v[1]<<4) + 8;
1969                                 const deUint32 y1               = v1Gr ? v[1]<<4 : (v[0]<<4) - 8;
1970
1971                                 e0 = UVec4(y0, y0, y0, 0x780);
1972                                 e1 = UVec4(y1, y1, y1, 0x780);
1973                                 break;
1974                         }
1975
1976                         case 3:
1977                         {
1978                                 const bool              m       = isBitSet(v[0], 7);
1979                                 const deUint32  y0      = m ? (getBits(v[1], 5, 7) << 9) | (getBits(v[0], 0, 6) << 2)
1980                                                                                 : (getBits(v[1], 4, 7) << 8) | (getBits(v[0], 0, 6) << 1);
1981                                 const deUint32  d       = m ? getBits(v[1], 0, 4) << 2
1982                                                                                 : getBits(v[1], 0, 3) << 1;
1983                                 const deUint32  y1      = de::min(0xfffu, y0+d);
1984
1985                                 e0 = UVec4(y0, y0, y0, 0x780);
1986                                 e1 = UVec4(y1, y1, y1, 0x780);
1987                                 break;
1988                         }
1989
1990                         case 4:
1991                                 e0 = UVec4(v[0], v[0], v[0], v[2]);
1992                                 e1 = UVec4(v[1], v[1], v[1], v[3]);
1993                                 break;
1994
1995                         case 5:
1996                         {
1997                                 deInt32 v0 = (deInt32)v[0];
1998                                 deInt32 v1 = (deInt32)v[1];
1999                                 deInt32 v2 = (deInt32)v[2];
2000                                 deInt32 v3 = (deInt32)v[3];
2001                                 bitTransferSigned(v1, v0);
2002                                 bitTransferSigned(v3, v2);
2003
2004                                 e0 = clampedRGBA(IVec4(v0,              v0,             v0,             v2));
2005                                 e1 = clampedRGBA(IVec4(v0+v1,   v0+v1,  v0+v1,  v2+v3));
2006                                 break;
2007                         }
2008
2009                         case 6:
2010                                 e0 = UVec4((v[0]*v[3]) >> 8,    (v[1]*v[3]) >> 8,       (v[2]*v[3]) >> 8,       0xff);
2011                                 e1 = UVec4(v[0],                                v[1],                           v[2],                           0xff);
2012                                 break;
2013
2014                         case 7:
2015                                 decodeHDREndpointMode7(e0, e1, v[0], v[1], v[2], v[3]);
2016                                 break;
2017
2018                         case 8:
2019                                 if (v[1]+v[3]+v[5] >= v[0]+v[2]+v[4])
2020                                 {
2021                                         e0 = UVec4(v[0], v[2], v[4], 0xff);
2022                                         e1 = UVec4(v[1], v[3], v[5], 0xff);
2023                                 }
2024                                 else
2025                                 {
2026                                         e0 = blueContract(v[1], v[3], v[5], 0xff).asUint();
2027                                         e1 = blueContract(v[0], v[2], v[4], 0xff).asUint();
2028                                 }
2029                                 break;
2030
2031                         case 9:
2032                         {
2033                                 deInt32 v0 = (deInt32)v[0];
2034                                 deInt32 v1 = (deInt32)v[1];
2035                                 deInt32 v2 = (deInt32)v[2];
2036                                 deInt32 v3 = (deInt32)v[3];
2037                                 deInt32 v4 = (deInt32)v[4];
2038                                 deInt32 v5 = (deInt32)v[5];
2039                                 bitTransferSigned(v1, v0);
2040                                 bitTransferSigned(v3, v2);
2041                                 bitTransferSigned(v5, v4);
2042
2043                                 if (v1+v3+v5 >= 0)
2044                                 {
2045                                         e0 = clampedRGBA(IVec4(v0,              v2,             v4,             0xff));
2046                                         e1 = clampedRGBA(IVec4(v0+v1,   v2+v3,  v4+v5,  0xff));
2047                                 }
2048                                 else
2049                                 {
2050                                         e0 = clampedRGBA(blueContract(v0+v1,    v2+v3,  v4+v5,  0xff));
2051                                         e1 = clampedRGBA(blueContract(v0,               v2,             v4,             0xff));
2052                                 }
2053                                 break;
2054                         }
2055
2056                         case 10:
2057                                 e0 = UVec4((v[0]*v[3]) >> 8,    (v[1]*v[3]) >> 8,       (v[2]*v[3]) >> 8,       v[4]);
2058                                 e1 = UVec4(v[0],                                v[1],                           v[2],                           v[5]);
2059                                 break;
2060
2061                         case 11:
2062                                 decodeHDREndpointMode11(e0, e1, v[0], v[1], v[2], v[3], v[4], v[5]);
2063                                 break;
2064
2065                         case 12:
2066                                 if (v[1]+v[3]+v[5] >= v[0]+v[2]+v[4])
2067                                 {
2068                                         e0 = UVec4(v[0], v[2], v[4], v[6]);
2069                                         e1 = UVec4(v[1], v[3], v[5], v[7]);
2070                                 }
2071                                 else
2072                                 {
2073                                         e0 = clampedRGBA(blueContract(v[1], v[3], v[5], v[7]));
2074                                         e1 = clampedRGBA(blueContract(v[0], v[2], v[4], v[6]));
2075                                 }
2076                                 break;
2077
2078                         case 13:
2079                         {
2080                                 deInt32 v0 = (deInt32)v[0];
2081                                 deInt32 v1 = (deInt32)v[1];
2082                                 deInt32 v2 = (deInt32)v[2];
2083                                 deInt32 v3 = (deInt32)v[3];
2084                                 deInt32 v4 = (deInt32)v[4];
2085                                 deInt32 v5 = (deInt32)v[5];
2086                                 deInt32 v6 = (deInt32)v[6];
2087                                 deInt32 v7 = (deInt32)v[7];
2088                                 bitTransferSigned(v1, v0);
2089                                 bitTransferSigned(v3, v2);
2090                                 bitTransferSigned(v5, v4);
2091                                 bitTransferSigned(v7, v6);
2092
2093                                 if (v1+v3+v5 >= 0)
2094                                 {
2095                                         e0 = clampedRGBA(IVec4(v0,              v2,             v4,             v6));
2096                                         e1 = clampedRGBA(IVec4(v0+v1,   v2+v3,  v4+v5,  v6+v7));
2097                                 }
2098                                 else
2099                                 {
2100                                         e0 = clampedRGBA(blueContract(v0+v1,    v2+v3,  v4+v5,  v6+v7));
2101                                         e1 = clampedRGBA(blueContract(v0,               v2,             v4,             v6));
2102                                 }
2103
2104                                 break;
2105                         }
2106
2107                         case 14:
2108                                 decodeHDREndpointMode11(e0, e1, v[0], v[1], v[2], v[3], v[4], v[5]);
2109                                 e0.w() = v[6];
2110                                 e1.w() = v[7];
2111                                 break;
2112
2113                         case 15:
2114                                 decodeHDREndpointMode15(e0, e1, v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);
2115                                 break;
2116
2117                         default:
2118                                 DE_ASSERT(false);
2119                 }
2120         }
2121 }
2122
2123 void computeColorEndpoints (ColorEndpointPair* dst, const Block128& blockData, const deUint32* endpointModes, int numPartitions, int numColorEndpointValues, const ISEParams& iseParams, int numBitsAvailable)
2124 {
2125         const int                       colorEndpointDataStart = numPartitions == 1 ? 17 : 29;
2126         ISEDecodedResult        colorEndpointData[18];
2127
2128         {
2129                 BitAccessStream dataStream(blockData, colorEndpointDataStart, numBitsAvailable, true);
2130                 decodeISE(&colorEndpointData[0], numColorEndpointValues, dataStream, iseParams);
2131         }
2132
2133         {
2134                 deUint32 unquantizedEndpoints[18];
2135                 unquantizeColorEndpoints(&unquantizedEndpoints[0], &colorEndpointData[0], numColorEndpointValues, iseParams);
2136                 decodeColorEndpoints(dst, &unquantizedEndpoints[0], &endpointModes[0], numPartitions);
2137         }
2138 }
2139
2140 void unquantizeWeights (deUint32* dst, const ISEDecodedResult* weightGrid, const ASTCBlockMode& blockMode)
2141 {
2142         const int                       numWeights      = computeNumWeights(blockMode);
2143         const ISEParams&        iseParams       = blockMode.weightISEParams;
2144
2145         if (iseParams.mode == ISEMODE_TRIT || iseParams.mode == ISEMODE_QUINT)
2146         {
2147                 const int rangeCase = iseParams.numBits*2 + (iseParams.mode == ISEMODE_QUINT ? 1 : 0);
2148
2149                 if (rangeCase == 0 || rangeCase == 1)
2150                 {
2151                         static const deUint32 map0[3]   = { 0, 32, 63 };
2152                         static const deUint32 map1[5]   = { 0, 16, 32, 47, 63 };
2153                         const deUint32* const map               = rangeCase == 0 ? &map0[0] : &map1[0];
2154                         for (int i = 0; i < numWeights; i++)
2155                         {
2156                                 DE_ASSERT(weightGrid[i].v < (rangeCase == 0 ? 3u : 5u));
2157                                 dst[i] = map[weightGrid[i].v];
2158                         }
2159                 }
2160                 else
2161                 {
2162                         DE_ASSERT(rangeCase <= 6);
2163                         static const deUint32   Ca[5]   = { 50, 28, 23, 13, 11 };
2164                         const deUint32                  C               = Ca[rangeCase-2];
2165
2166                         for (int weightNdx = 0; weightNdx < numWeights; weightNdx++)
2167                         {
2168                                 const deUint32 a = getBit(weightGrid[weightNdx].m, 0);
2169                                 const deUint32 b = getBit(weightGrid[weightNdx].m, 1);
2170                                 const deUint32 c = getBit(weightGrid[weightNdx].m, 2);
2171
2172                                 const deUint32 A = a == 0 ? 0 : (1<<7)-1;
2173                                 const deUint32 B = rangeCase == 2 ? 0
2174                                                                  : rangeCase == 3 ? 0
2175                                                                  : rangeCase == 4 ? (b << 6) |                                  (b << 2) |                              (b << 0)
2176                                                                  : rangeCase == 5 ? (b << 6) |                                                          (b << 1)
2177                                                                  : rangeCase == 6 ? (c << 6) | (b << 5) |                                       (c << 1) |      (b << 0)
2178                                                                  : (deUint32)-1;
2179
2180                                 dst[weightNdx] = (((weightGrid[weightNdx].tq*C + B) ^ A) >> 2) | (A & 0x20);
2181                         }
2182                 }
2183         }
2184         else
2185         {
2186                 DE_ASSERT(iseParams.mode == ISEMODE_PLAIN_BIT);
2187
2188                 for (int weightNdx = 0; weightNdx < numWeights; weightNdx++)
2189                         dst[weightNdx] = bitReplicationScale(weightGrid[weightNdx].v, iseParams.numBits, 6);
2190         }
2191
2192         for (int weightNdx = 0; weightNdx < numWeights; weightNdx++)
2193                 dst[weightNdx] += dst[weightNdx] > 32 ? 1 : 0;
2194 }
2195
2196 void interpolateWeights (TexelWeightPair* dst, const deUint32* unquantizedWeights, int blockWidth, int blockHeight, const ASTCBlockMode& blockMode)
2197 {
2198         const int               numWeightsPerTexel      = blockMode.isDualPlane ? 2 : 1;
2199         const deUint32  scaleX                          = (1024 + blockWidth/2) / (blockWidth-1);
2200         const deUint32  scaleY                          = (1024 + blockHeight/2) / (blockHeight-1);
2201
2202         for (int texelY = 0; texelY < blockHeight; texelY++)
2203         {
2204                 for (int texelX = 0; texelX < blockWidth; texelX++)
2205                 {
2206                         const deUint32 gX       = (scaleX*texelX*(blockMode.weightGridWidth-1) + 32) >> 6;
2207                         const deUint32 gY       = (scaleY*texelY*(blockMode.weightGridHeight-1) + 32) >> 6;
2208                         const deUint32 jX       = gX >> 4;
2209                         const deUint32 jY       = gY >> 4;
2210                         const deUint32 fX       = gX & 0xf;
2211                         const deUint32 fY       = gY & 0xf;
2212                         const deUint32 w11      = (fX*fY + 8) >> 4;
2213                         const deUint32 w10      = fY - w11;
2214                         const deUint32 w01      = fX - w11;
2215                         const deUint32 w00      = 16 - fX - fY + w11;
2216                         const deUint32 v0       = jY*blockMode.weightGridWidth + jX;
2217
2218                         for (int texelWeightNdx = 0; texelWeightNdx < numWeightsPerTexel; texelWeightNdx++)
2219                         {
2220                                 const deUint32 p00      = unquantizedWeights[(v0)                                                                       * numWeightsPerTexel + texelWeightNdx];
2221                                 const deUint32 p01      = unquantizedWeights[(v0 + 1)                                                           * numWeightsPerTexel + texelWeightNdx];
2222                                 const deUint32 p10      = unquantizedWeights[(v0 + blockMode.weightGridWidth)           * numWeightsPerTexel + texelWeightNdx];
2223                                 const deUint32 p11      = unquantizedWeights[(v0 + blockMode.weightGridWidth + 1)       * numWeightsPerTexel + texelWeightNdx];
2224
2225                                 dst[texelY*blockWidth + texelX].w[texelWeightNdx] = (p00*w00 + p01*w01 + p10*w10 + p11*w11 + 8) >> 4;
2226                         }
2227                 }
2228         }
2229 }
2230
2231 void computeTexelWeights (TexelWeightPair* dst, const Block128& blockData, int blockWidth, int blockHeight, const ASTCBlockMode& blockMode)
2232 {
2233         ISEDecodedResult weightGrid[64];
2234
2235         {
2236                 BitAccessStream dataStream(blockData, 127, computeNumRequiredBits(blockMode.weightISEParams, computeNumWeights(blockMode)), false);
2237                 decodeISE(&weightGrid[0], computeNumWeights(blockMode), dataStream, blockMode.weightISEParams);
2238         }
2239
2240         {
2241                 deUint32 unquantizedWeights[64];
2242                 unquantizeWeights(&unquantizedWeights[0], &weightGrid[0], blockMode);
2243                 interpolateWeights(dst, &unquantizedWeights[0], blockWidth, blockHeight, blockMode);
2244         }
2245 }
2246
2247 inline deUint32 hash52 (deUint32 v)
2248 {
2249         deUint32 p = v;
2250         p ^= p >> 15;   p -= p << 17;   p += p << 7;    p += p << 4;
2251         p ^= p >>  5;   p += p << 16;   p ^= p >> 7;    p ^= p >> 3;
2252         p ^= p <<  6;   p ^= p >> 17;
2253         return p;
2254 }
2255
2256 int computeTexelPartition (deUint32 seedIn, deUint32 xIn, deUint32 yIn, deUint32 zIn, int numPartitions, bool smallBlock)
2257 {
2258         DE_ASSERT(zIn == 0);
2259         const deUint32  x               = smallBlock ? xIn << 1 : xIn;
2260         const deUint32  y               = smallBlock ? yIn << 1 : yIn;
2261         const deUint32  z               = smallBlock ? zIn << 1 : zIn;
2262         const deUint32  seed    = seedIn + 1024*(numPartitions-1);
2263         const deUint32  rnum    = hash52(seed);
2264         deUint8                 seed1   =  rnum                                                 & 0xf;
2265         deUint8                 seed2   = (rnum >>  4)                                  & 0xf;
2266         deUint8                 seed3   = (rnum >>  8)                                  & 0xf;
2267         deUint8                 seed4   = (rnum >> 12)                                  & 0xf;
2268         deUint8                 seed5   = (rnum >> 16)                                  & 0xf;
2269         deUint8                 seed6   = (rnum >> 20)                                  & 0xf;
2270         deUint8                 seed7   = (rnum >> 24)                                  & 0xf;
2271         deUint8                 seed8   = (rnum >> 28)                                  & 0xf;
2272         deUint8                 seed9   = (rnum >> 18)                                  & 0xf;
2273         deUint8                 seed10  = (rnum >> 22)                                  & 0xf;
2274         deUint8                 seed11  = (rnum >> 26)                                  & 0xf;
2275         deUint8                 seed12  = ((rnum >> 30) | (rnum << 2))  & 0xf;
2276
2277         seed1 *= seed1;         seed5 *= seed5;         seed9  *= seed9;
2278         seed2 *= seed2;         seed6 *= seed6;         seed10 *= seed10;
2279         seed3 *= seed3;         seed7 *= seed7;         seed11 *= seed11;
2280         seed4 *= seed4;         seed8 *= seed8;         seed12 *= seed12;
2281
2282         const int shA = (seed & 2) != 0         ? 4             : 5;
2283         const int shB = numPartitions == 3      ? 6             : 5;
2284         const int sh1 = (seed & 1) != 0         ? shA   : shB;
2285         const int sh2 = (seed & 1) != 0         ? shB   : shA;
2286         const int sh3 = (seed & 0x10) != 0      ? sh1   : sh2;
2287
2288         seed1 >>= sh1;          seed2  >>= sh2;         seed3  >>= sh1;         seed4  >>= sh2;
2289         seed5 >>= sh1;          seed6  >>= sh2;         seed7  >>= sh1;         seed8  >>= sh2;
2290         seed9 >>= sh3;          seed10 >>= sh3;         seed11 >>= sh3;         seed12 >>= sh3;
2291
2292         const int a =                                           0x3f & (seed1*x + seed2*y + seed11*z + (rnum >> 14));
2293         const int b =                                           0x3f & (seed3*x + seed4*y + seed12*z + (rnum >> 10));
2294         const int c = numPartitions >= 3 ?      0x3f & (seed5*x + seed6*y + seed9*z  + (rnum >>  6))    : 0;
2295         const int d = numPartitions >= 4 ?      0x3f & (seed7*x + seed8*y + seed10*z + (rnum >>  2))    : 0;
2296
2297         return a >= b && a >= c && a >= d       ? 0
2298                  : b >= c && b >= d                             ? 1
2299                  : c >= d                                               ? 2
2300                  :                                                                3;
2301 }
2302
2303 void setTexelColors (void* dst, ColorEndpointPair* colorEndpoints, TexelWeightPair* texelWeights, int ccs, deUint32 partitionIndexSeed,
2304                                                         int numPartitions, int blockWidth, int blockHeight, bool isSRGB, bool isLDRMode, const deUint32* colorEndpointModes)
2305 {
2306         const bool      smallBlock = blockWidth*blockHeight < 31;
2307         bool            isHDREndpoint[4];
2308
2309         for (int i = 0; i < numPartitions; i++)
2310                 isHDREndpoint[i] = isColorEndpointModeHDR(colorEndpointModes[i]);
2311
2312         for (int texelY = 0; texelY < blockHeight; texelY++)
2313         for (int texelX = 0; texelX < blockWidth; texelX++)
2314         {
2315                 const int                               texelNdx                        = texelY*blockWidth + texelX;
2316                 const int                               colorEndpointNdx        = numPartitions == 1 ? 0 : computeTexelPartition(partitionIndexSeed, texelX, texelY, 0, numPartitions, smallBlock);
2317                 DE_ASSERT(colorEndpointNdx < numPartitions);
2318                 const UVec4&                    e0                                      = colorEndpoints[colorEndpointNdx].e0;
2319                 const UVec4&                    e1                                      = colorEndpoints[colorEndpointNdx].e1;
2320                 const TexelWeightPair&  weight                          = texelWeights[texelNdx];
2321
2322                 if (isLDRMode && isHDREndpoint[colorEndpointNdx])
2323                 {
2324                         if (isSRGB)
2325                         {
2326                                 ((deUint8*)dst)[texelNdx*4 + 0] = 0xff;
2327                                 ((deUint8*)dst)[texelNdx*4 + 1] = 0;
2328                                 ((deUint8*)dst)[texelNdx*4 + 2] = 0xff;
2329                                 ((deUint8*)dst)[texelNdx*4 + 3] = 0xff;
2330                         }
2331                         else
2332                         {
2333                                 ((float*)dst)[texelNdx*4 + 0] = 1.0f;
2334                                 ((float*)dst)[texelNdx*4 + 1] = 0;
2335                                 ((float*)dst)[texelNdx*4 + 2] = 1.0f;
2336                                 ((float*)dst)[texelNdx*4 + 3] = 1.0f;
2337                         }
2338                 }
2339                 else
2340                 {
2341                         for (int channelNdx = 0; channelNdx < 4; channelNdx++)
2342                         {
2343                                 if (!isHDREndpoint[colorEndpointNdx] || (channelNdx == 3 && colorEndpointModes[colorEndpointNdx] == 14)) // \note Alpha for mode 14 is treated the same as LDR.
2344                                 {
2345                                         const deUint32 c0       = (e0[channelNdx] << 8) | (isSRGB ? 0x80 : e0[channelNdx]);
2346                                         const deUint32 c1       = (e1[channelNdx] << 8) | (isSRGB ? 0x80 : e1[channelNdx]);
2347                                         const deUint32 w        = weight.w[ccs == channelNdx ? 1 : 0];
2348                                         const deUint32 c        = (c0*(64-w) + c1*w + 32) / 64;
2349
2350                                         if (isSRGB)
2351                                                 ((deUint8*)dst)[texelNdx*4 + channelNdx] = (c & 0xff00) >> 8;
2352                                         else
2353                                                 ((float*)dst)[texelNdx*4 + channelNdx] = c == 65535 ? 1.0f : (float)c / 65536.0f;
2354                                 }
2355                                 else
2356                                 {
2357                                         DE_STATIC_ASSERT((isSameType<deFloat16, deUint16>::V));
2358                                         const deUint32          c0      = e0[channelNdx] << 4;
2359                                         const deUint32          c1      = e1[channelNdx] << 4;
2360                                         const deUint32          w       = weight.w[ccs == channelNdx ? 1 : 0];
2361                                         const deUint32          c       = (c0*(64-w) + c1*w + 32) / 64;
2362                                         const deUint32          e       = getBits(c, 11, 15);
2363                                         const deUint32          m       = getBits(c, 0, 10);
2364                                         const deUint32          mt      = m < 512               ? 3*m
2365                                                                                         : m >= 1536             ? 5*m - 2048
2366                                                                                         :                                 4*m - 512;
2367                                         const deFloat16         cf      = (e << 10) + (mt >> 3);
2368
2369                                         ((float*)dst)[texelNdx*4 + channelNdx] = deFloat16To32(isFloat16InfOrNan(cf) ? 0x7bff : cf);
2370                                 }
2371                         }
2372                 }
2373         }
2374 }
2375
2376 void decompressASTCBlock (void* dst, const Block128& blockData, int blockWidth, int blockHeight, bool isSRGB, bool isLDR)
2377 {
2378         DE_ASSERT(isLDR || !isSRGB);
2379
2380         // Decode block mode.
2381
2382         const ASTCBlockMode blockMode = getASTCBlockMode(blockData.getBits(0, 10));
2383
2384         // Check for block mode errors.
2385
2386         if (blockMode.isError)
2387         {
2388                 setASTCErrorColorBlock(dst, blockWidth, blockHeight, isSRGB);
2389                 return;
2390         }
2391
2392         // Separate path for void-extent.
2393
2394         if (blockMode.isVoidExtent)
2395         {
2396                 decodeVoidExtentBlock(dst, blockData, blockWidth, blockHeight, isSRGB, isLDR);
2397                 return;
2398         }
2399
2400         // Compute weight grid values.
2401
2402         const int numWeights                    = computeNumWeights(blockMode);
2403         const int numWeightDataBits             = computeNumRequiredBits(blockMode.weightISEParams, numWeights);
2404         const int numPartitions                 = (int)blockData.getBits(11, 12) + 1;
2405
2406         // Check for errors in weight grid, partition and dual-plane parameters.
2407
2408         if (numWeights > 64                                                             ||
2409                 numWeightDataBits > 96                                          ||
2410                 numWeightDataBits < 24                                          ||
2411                 blockMode.weightGridWidth > blockWidth          ||
2412                 blockMode.weightGridHeight > blockHeight        ||
2413                 (numPartitions == 4 && blockMode.isDualPlane))
2414         {
2415                 setASTCErrorColorBlock(dst, blockWidth, blockHeight, isSRGB);
2416                 return;
2417         }
2418
2419         // Compute number of bits available for color endpoint data.
2420
2421         const bool      isSingleUniqueCem                       = numPartitions == 1 || blockData.getBits(23, 24) == 0;
2422         const int       numConfigDataBits                       = (numPartitions == 1 ? 17 : isSingleUniqueCem ? 29 : 25 + 3*numPartitions) +
2423                                                                                           (blockMode.isDualPlane ? 2 : 0);
2424         const int       numBitsForColorEndpoints        = 128 - numWeightDataBits - numConfigDataBits;
2425         const int       extraCemBitsStart                       = 127 - numWeightDataBits - (isSingleUniqueCem          ? -1
2426                                                                                                                                                 : numPartitions == 4    ? 7
2427                                                                                                                                                 : numPartitions == 3    ? 4
2428                                                                                                                                                 : numPartitions == 2    ? 1
2429                                                                                                                                                 : 0);
2430         // Decode color endpoint modes.
2431
2432         deUint32 colorEndpointModes[4];
2433         decodeColorEndpointModes(&colorEndpointModes[0], blockData, numPartitions, extraCemBitsStart);
2434
2435         const int numColorEndpointValues = computeNumColorEndpointValues(colorEndpointModes, numPartitions);
2436
2437         // Check for errors in color endpoint value count.
2438
2439         if (numColorEndpointValues > 18 || numBitsForColorEndpoints < divRoundUp(13*numColorEndpointValues, 5))
2440         {
2441                 setASTCErrorColorBlock(dst, blockWidth, blockHeight, isSRGB);
2442                 return;
2443         }
2444
2445         // Compute color endpoints.
2446
2447         ColorEndpointPair colorEndpoints[4];
2448         computeColorEndpoints(&colorEndpoints[0], blockData, &colorEndpointModes[0], numPartitions, numColorEndpointValues,
2449                                                   computeMaximumRangeISEParams(numBitsForColorEndpoints, numColorEndpointValues), numBitsForColorEndpoints);
2450
2451         // Compute texel weights.
2452
2453         TexelWeightPair texelWeights[ASTC_MAX_BLOCK_WIDTH*ASTC_MAX_BLOCK_HEIGHT];
2454         computeTexelWeights(&texelWeights[0], blockData, blockWidth, blockHeight, blockMode);
2455
2456         // Set texel colors.
2457
2458         const int               ccs                                             = blockMode.isDualPlane ? (int)blockData.getBits(extraCemBitsStart-2, extraCemBitsStart-1) : -1;
2459         const deUint32  partitionIndexSeed              = numPartitions > 1 ? blockData.getBits(13, 22) : (deUint32)-1;
2460
2461         setTexelColors(dst, &colorEndpoints[0], &texelWeights[0], ccs, partitionIndexSeed, numPartitions, blockWidth, blockHeight, isSRGB, isLDR, &colorEndpointModes[0]);
2462 }
2463
2464 } // ASTCDecompressInternal
2465
2466 void decompressASTC (const PixelBufferAccess& dst, const deUint8* data, bool isSRGB, bool isLDR)
2467 {
2468         using namespace ASTCDecompressInternal;
2469
2470         DE_ASSERT(isLDR || !isSRGB);
2471
2472         const int blockWidth = dst.getWidth();
2473         const int blockHeight = dst.getHeight();
2474
2475         union
2476         {
2477                 deUint8         sRGB[ASTC_MAX_BLOCK_WIDTH*ASTC_MAX_BLOCK_HEIGHT*4];
2478                 float           linear[ASTC_MAX_BLOCK_WIDTH*ASTC_MAX_BLOCK_HEIGHT*4];
2479         } decompressedBuffer;
2480
2481         const Block128 blockData(data);
2482         decompressASTCBlock(isSRGB ? (void*)&decompressedBuffer.sRGB[0] : (void*)&decompressedBuffer.linear[0],
2483                                                 blockData, dst.getWidth(), dst.getHeight(), isSRGB, isLDR);
2484
2485         if (isSRGB)
2486         {
2487                 for (int i = 0; i < blockHeight; i++)
2488                 for (int j = 0; j < blockWidth; j++)
2489                 {
2490                         dst.setPixel(IVec4(decompressedBuffer.sRGB[(i*blockWidth + j) * 4 + 0],
2491                                                                         decompressedBuffer.sRGB[(i*blockWidth + j) * 4 + 1],
2492                                                                         decompressedBuffer.sRGB[(i*blockWidth + j) * 4 + 2],
2493                                                                         decompressedBuffer.sRGB[(i*blockWidth + j) * 4 + 3]), j, i);
2494                 }
2495         }
2496         else
2497         {
2498                 for (int i = 0; i < blockHeight; i++)
2499                 for (int j = 0; j < blockWidth; j++)
2500                 {
2501                         dst.setPixel(Vec4(decompressedBuffer.linear[(i*blockWidth + j) * 4 + 0],
2502                                                                    decompressedBuffer.linear[(i*blockWidth + j) * 4 + 1],
2503                                                                    decompressedBuffer.linear[(i*blockWidth + j) * 4 + 2],
2504                                                                    decompressedBuffer.linear[(i*blockWidth + j) * 4 + 3]), j, i);
2505                 }
2506         }
2507 }
2508
2509 void decompressBlock (CompressedTexFormat format, const PixelBufferAccess& dst, const deUint8* src, const TexDecompressionParams& params)
2510 {
2511         // No 3D blocks supported right now
2512         DE_ASSERT(dst.getDepth() == 1);
2513
2514         switch (format)
2515         {
2516                 case COMPRESSEDTEXFORMAT_ETC1_RGB8:                                                     decompressETC1                                                  (dst, src);                     break;
2517                 case COMPRESSEDTEXFORMAT_EAC_R11:                                                       decompressEAC_R11                                               (dst, src, false);      break;
2518                 case COMPRESSEDTEXFORMAT_EAC_SIGNED_R11:                                        decompressEAC_R11                                               (dst, src, true);       break;
2519                 case COMPRESSEDTEXFORMAT_EAC_RG11:                                                      decompressEAC_RG11                                              (dst, src, false);      break;
2520                 case COMPRESSEDTEXFORMAT_EAC_SIGNED_RG11:                                       decompressEAC_RG11                                              (dst, src, true);       break;
2521                 case COMPRESSEDTEXFORMAT_ETC2_RGB8:                                                     decompressETC2                                                  (dst, src);                     break;
2522                 case COMPRESSEDTEXFORMAT_ETC2_SRGB8:                                            decompressETC2                                                  (dst, src);                     break;
2523                 case COMPRESSEDTEXFORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1:         decompressETC2_RGB8_PUNCHTHROUGH_ALPHA1 (dst, src);                     break;
2524                 case COMPRESSEDTEXFORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1:        decompressETC2_RGB8_PUNCHTHROUGH_ALPHA1 (dst, src);                     break;
2525                 case COMPRESSEDTEXFORMAT_ETC2_EAC_RGBA8:                                        decompressETC2_EAC_RGBA8                                (dst, src);                     break;
2526                 case COMPRESSEDTEXFORMAT_ETC2_EAC_SRGB8_ALPHA8:                         decompressETC2_EAC_RGBA8                                (dst, src);                     break;
2527
2528                 case COMPRESSEDTEXFORMAT_ASTC_4x4_RGBA:
2529                 case COMPRESSEDTEXFORMAT_ASTC_5x4_RGBA:
2530                 case COMPRESSEDTEXFORMAT_ASTC_5x5_RGBA:
2531                 case COMPRESSEDTEXFORMAT_ASTC_6x5_RGBA:
2532                 case COMPRESSEDTEXFORMAT_ASTC_6x6_RGBA:
2533                 case COMPRESSEDTEXFORMAT_ASTC_8x5_RGBA:
2534                 case COMPRESSEDTEXFORMAT_ASTC_8x6_RGBA:
2535                 case COMPRESSEDTEXFORMAT_ASTC_8x8_RGBA:
2536                 case COMPRESSEDTEXFORMAT_ASTC_10x5_RGBA:
2537                 case COMPRESSEDTEXFORMAT_ASTC_10x6_RGBA:
2538                 case COMPRESSEDTEXFORMAT_ASTC_10x8_RGBA:
2539                 case COMPRESSEDTEXFORMAT_ASTC_10x10_RGBA:
2540                 case COMPRESSEDTEXFORMAT_ASTC_12x10_RGBA:
2541                 case COMPRESSEDTEXFORMAT_ASTC_12x12_RGBA:
2542                 case COMPRESSEDTEXFORMAT_ASTC_4x4_SRGB8_ALPHA8:
2543                 case COMPRESSEDTEXFORMAT_ASTC_5x4_SRGB8_ALPHA8:
2544                 case COMPRESSEDTEXFORMAT_ASTC_5x5_SRGB8_ALPHA8:
2545                 case COMPRESSEDTEXFORMAT_ASTC_6x5_SRGB8_ALPHA8:
2546                 case COMPRESSEDTEXFORMAT_ASTC_6x6_SRGB8_ALPHA8:
2547                 case COMPRESSEDTEXFORMAT_ASTC_8x5_SRGB8_ALPHA8:
2548                 case COMPRESSEDTEXFORMAT_ASTC_8x6_SRGB8_ALPHA8:
2549                 case COMPRESSEDTEXFORMAT_ASTC_8x8_SRGB8_ALPHA8:
2550                 case COMPRESSEDTEXFORMAT_ASTC_10x5_SRGB8_ALPHA8:
2551                 case COMPRESSEDTEXFORMAT_ASTC_10x6_SRGB8_ALPHA8:
2552                 case COMPRESSEDTEXFORMAT_ASTC_10x8_SRGB8_ALPHA8:
2553                 case COMPRESSEDTEXFORMAT_ASTC_10x10_SRGB8_ALPHA8:
2554                 case COMPRESSEDTEXFORMAT_ASTC_12x10_SRGB8_ALPHA8:
2555                 case COMPRESSEDTEXFORMAT_ASTC_12x12_SRGB8_ALPHA8:
2556                 {
2557                         DE_ASSERT(params.astcMode == TexDecompressionParams::ASTCMODE_LDR || params.astcMode == TexDecompressionParams::ASTCMODE_HDR);
2558
2559                         const bool isSRGBFormat = isAstcSRGBFormat(format);
2560                         decompressASTC(dst, src, isSRGBFormat, isSRGBFormat || params.astcMode == TexDecompressionParams::ASTCMODE_LDR);
2561
2562                         break;
2563                 }
2564
2565                 default:
2566                         DE_ASSERT(false);
2567                         break;
2568         }
2569 }
2570
2571 int componentSum (const IVec3& vec)
2572 {
2573         return vec.x() + vec.y() + vec.z();
2574 }
2575
2576 } // anonymous
2577
2578 void decompress (const PixelBufferAccess& dst, CompressedTexFormat fmt, const deUint8* src, const TexDecompressionParams& params)
2579 {
2580         const int                               blockSize                       = getBlockSize(fmt);
2581         const IVec3                             blockPixelSize          (getBlockPixelSize(fmt));
2582         const IVec3                             blockCount                      (divRoundUp(dst.getWidth(),             blockPixelSize.x()),
2583                                                                                                  divRoundUp(dst.getHeight(),    blockPixelSize.y()),
2584                                                                                                  divRoundUp(dst.getDepth(),             blockPixelSize.z()));
2585         const IVec3                             blockPitches            (blockSize, blockSize * blockCount.x(), blockSize * blockCount.x() * blockCount.y());
2586
2587         std::vector<deUint8>    uncompressedBlock       (dst.getFormat().getPixelSize() * blockPixelSize.x() * blockPixelSize.y() * blockPixelSize.z());
2588         const PixelBufferAccess blockAccess                     (getUncompressedFormat(fmt), blockPixelSize.x(), blockPixelSize.y(), blockPixelSize.z(), &uncompressedBlock[0]);
2589
2590         DE_ASSERT(dst.getFormat() == getUncompressedFormat(fmt));
2591
2592         for (int blockZ = 0; blockZ < blockCount.z(); blockZ++)
2593         for (int blockY = 0; blockY < blockCount.y(); blockY++)
2594         for (int blockX = 0; blockX < blockCount.x(); blockX++)
2595         {
2596                 const IVec3                             blockPos        (blockX, blockY, blockZ);
2597                 const deUint8* const    blockPtr        = src + componentSum(blockPos * blockPitches);
2598                 const IVec3                             copySize        (de::min(blockPixelSize.x(), dst.getWidth()             - blockPos.x() * blockPixelSize.x()),
2599                                                                                          de::min(blockPixelSize.y(), dst.getHeight()    - blockPos.y() * blockPixelSize.y()),
2600                                                                                          de::min(blockPixelSize.z(), dst.getDepth()             - blockPos.z() * blockPixelSize.z()));
2601                 const IVec3                             dstPixelPos     = blockPos * blockPixelSize;
2602
2603                 decompressBlock(fmt, blockAccess, blockPtr, params);
2604
2605                 copyRawPixels(getSubregion(dst, dstPixelPos.x(), dstPixelPos.y(), dstPixelPos.z(), copySize.x(), copySize.y(), copySize.z()), getSubregion(blockAccess, 0, 0, 0, copySize.x(), copySize.y(), copySize.z()));
2606         }
2607 }
2608
2609 CompressedTexture::CompressedTexture (void)
2610         : m_format      (COMPRESSEDTEXFORMAT_LAST)
2611         , m_width       (0)
2612         , m_height      (0)
2613         , m_depth       (0)
2614 {
2615 }
2616
2617 CompressedTexture::CompressedTexture (CompressedTexFormat format, int width, int height, int depth)
2618         : m_format      (COMPRESSEDTEXFORMAT_LAST)
2619         , m_width       (0)
2620         , m_height      (0)
2621         , m_depth       (0)
2622 {
2623         setStorage(format, width, height, depth);
2624 }
2625
2626 CompressedTexture::~CompressedTexture (void)
2627 {
2628 }
2629
2630 void CompressedTexture::setStorage (CompressedTexFormat format, int width, int height, int depth)
2631 {
2632         m_format        = format;
2633         m_width         = width;
2634         m_height        = height;
2635         m_depth         = depth;
2636
2637         if (isAstcFormat(m_format) && m_depth > 1)
2638                 throw InternalError("3D ASTC textures not currently supported");
2639
2640         if (m_format != COMPRESSEDTEXFORMAT_LAST)
2641         {
2642                 const IVec3     blockPixelSize  = getBlockPixelSize(m_format);
2643                 const int       blockSize               = getBlockSize(m_format);
2644
2645                 m_data.resize(divRoundUp(m_width, blockPixelSize.x()) * divRoundUp(m_height, blockPixelSize.y()) * divRoundUp(m_depth, blockPixelSize.z()) * blockSize);
2646         }
2647         else
2648         {
2649                 DE_ASSERT(m_format == COMPRESSEDTEXFORMAT_LAST);
2650                 DE_ASSERT(m_width == 0 && m_height == 0 && m_depth == 0);
2651                 m_data.resize(0);
2652         }
2653 }
2654
2655 /*--------------------------------------------------------------------*//*!
2656  * \brief Decode to uncompressed pixel data
2657  * \param dst Destination buffer
2658  *//*--------------------------------------------------------------------*/
2659 void CompressedTexture::decompress (const PixelBufferAccess& dst, const TexDecompressionParams& params) const
2660 {
2661         DE_ASSERT(dst.getWidth() == m_width && dst.getHeight() == m_height && dst.getDepth() == m_depth);
2662         DE_ASSERT(dst.getFormat() == getUncompressedFormat(m_format));
2663
2664         tcu::decompress(dst, m_format, &m_data[0], params);
2665 }
2666
2667 } // tcu