Skip OOB SSBO fragment tests for ES3.1 GPUs am: 66241e9dbb
[platform/upstream/VK-GL-CTS.git] / external / vulkancts / modules / vulkan / shaderexecutor / vktShaderPackingFunctionTests.cpp
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2015 The Khronos Group Inc.
6  * Copyright (c) 2015 Samsung Electronics Co., Ltd.
7  * Copyright (c) 2016 The Android Open Source Project
8  *
9  * Licensed under the Apache License, Version 2.0 (the "License");
10  * you may not use this file except in compliance with the License.
11  * You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  *
21  *//*!
22  * \file
23  * \brief Floating-point packing and unpacking function tests.
24  *//*--------------------------------------------------------------------*/
25
26 #include "vktShaderPackingFunctionTests.hpp"
27 #include "vktShaderExecutor.hpp"
28 #include "tcuTestLog.hpp"
29 #include "tcuFormatUtil.hpp"
30 #include "tcuFloat.hpp"
31 #include "deRandom.hpp"
32 #include "deMath.h"
33 #include "deString.h"
34 #include "deSharedPtr.hpp"
35
36 namespace vkt
37 {
38 namespace shaderexecutor
39 {
40
41 using namespace shaderexecutor;
42
43 using std::string;
44 using tcu::TestLog;
45
46 namespace
47 {
48
49 inline deUint32 getUlpDiff (float a, float b)
50 {
51         const deUint32  aBits   = tcu::Float32(a).bits();
52         const deUint32  bBits   = tcu::Float32(b).bits();
53         return aBits > bBits ? aBits - bBits : bBits - aBits;
54 }
55
56 struct HexFloat
57 {
58         const float value;
59         HexFloat (const float value_) : value(value_) {}
60 };
61
62 std::ostream& operator<< (std::ostream& str, const HexFloat& v)
63 {
64         return str << v.value << " / " << tcu::toHex(tcu::Float32(v.value).bits());
65 }
66
67 static const char* getPrecisionPostfix (glu::Precision precision)
68 {
69         static const char* s_postfix[] =
70         {
71                 "_lowp",
72                 "_mediump",
73                 "_highp"
74         };
75         DE_STATIC_ASSERT(DE_LENGTH_OF_ARRAY(s_postfix) == glu::PRECISION_LAST);
76         DE_ASSERT(de::inBounds<int>(precision, 0, DE_LENGTH_OF_ARRAY(s_postfix)));
77         return s_postfix[precision];
78 }
79
80 static const char* getShaderTypePostfix (glu::ShaderType shaderType)
81 {
82         static const char* s_postfix[] =
83         {
84                 "_vertex",
85                 "_fragment",
86                 "_geometry",
87                 "_tess_control",
88                 "_tess_eval",
89                 "_compute"
90         };
91         DE_ASSERT(de::inBounds<int>(shaderType, 0, DE_LENGTH_OF_ARRAY(s_postfix)));
92         return s_postfix[shaderType];
93 }
94
95 } // anonymous
96
97 // ShaderPackingFunctionCase
98
99 class ShaderPackingFunctionCase : public TestCase
100 {
101 public:
102                                                                                 ShaderPackingFunctionCase                       (tcu::TestContext& testCtx, const char* name, const char* description, glu::ShaderType shaderType);
103                                                                                 ~ShaderPackingFunctionCase                      (void);
104
105         virtual void                                            initPrograms                                            (vk::SourceCollections& programCollection) const
106                                                                                 {
107                                                                                         generateSources(m_shaderType, m_spec, programCollection);
108                                                                                 }
109
110 protected:
111         const glu::ShaderType                           m_shaderType;
112         ShaderSpec                                                      m_spec;
113
114 private:
115                                                                                 ShaderPackingFunctionCase                       (const ShaderPackingFunctionCase& other);
116         ShaderPackingFunctionCase&                      operator=                                                       (const ShaderPackingFunctionCase& other);
117 };
118
119 ShaderPackingFunctionCase::ShaderPackingFunctionCase (tcu::TestContext& testCtx, const char* name, const char* description, glu::ShaderType shaderType)
120         : TestCase              (testCtx, name, description)
121         , m_shaderType  (shaderType)
122 {
123 }
124
125 ShaderPackingFunctionCase::~ShaderPackingFunctionCase (void)
126 {
127 }
128
129 // ShaderPackingFunctionTestInstance
130
131 class ShaderPackingFunctionTestInstance : public TestInstance
132 {
133 public:
134                                                                                 ShaderPackingFunctionTestInstance       (Context& context, glu::ShaderType shaderType, const ShaderSpec& spec, const char* name)
135                                                                                         : TestInstance  (context)
136                                                                                         , m_testCtx             (context.getTestContext())
137                                                                                         , m_shaderType  (shaderType)
138                                                                                         , m_spec                (spec)
139                                                                                         , m_name                (name)
140                                                                                         , m_executor    (createExecutor(context, m_shaderType, m_spec))
141                                                                                 {
142                                                                                 }
143         virtual tcu::TestStatus                         iterate                                                         (void) = 0;
144 protected:
145         tcu::TestContext&                                       m_testCtx;
146         const glu::ShaderType                           m_shaderType;
147         ShaderSpec                                                      m_spec;
148         const char*                                                     m_name;
149         de::UniquePtr<ShaderExecutor>           m_executor;
150 };
151
152 // Test cases
153
154 class PackSnorm2x16CaseInstance: public ShaderPackingFunctionTestInstance
155 {
156 public:
157         PackSnorm2x16CaseInstance (Context& context, glu::ShaderType shaderType, const ShaderSpec& spec, glu::Precision precision, const char* name)
158                 : ShaderPackingFunctionTestInstance     (context, shaderType, spec, name)
159                 , m_precision                                           (precision)
160         {
161         }
162
163         tcu::TestStatus iterate (void)
164         {
165                 de::Random                                      rnd                     (deStringHash(m_name) ^ 0x776002);
166                 std::vector<tcu::Vec2>          inputs;
167                 std::vector<deUint32>           outputs;
168                 const int                                       maxDiff         = m_precision == glu::PRECISION_HIGHP   ? 1             :               // Rounding only.
169                                                                                                   m_precision == glu::PRECISION_MEDIUMP ? 33    :               // (2^-10) * (2^15) + 1
170                                                                                                   m_precision == glu::PRECISION_LOWP    ? 129   : 0;    // (2^-8) * (2^15) + 1
171
172                 // Special values to check.
173                 inputs.push_back(tcu::Vec2(0.0f, 0.0f));
174                 inputs.push_back(tcu::Vec2(-1.0f, 1.0f));
175                 inputs.push_back(tcu::Vec2(0.5f, -0.5f));
176                 inputs.push_back(tcu::Vec2(-1.5f, 1.5f));
177                 inputs.push_back(tcu::Vec2(0.25f, -0.75f));
178
179                 // Random values, mostly in range.
180                 for (int ndx = 0; ndx < 15; ndx++)
181                 {
182                         const float x = rnd.getFloat()*2.5f - 1.25f;
183                         const float y = rnd.getFloat()*2.5f - 1.25f;
184                         inputs.push_back(tcu::Vec2(x, y));
185                 }
186
187                 // Large random values.
188                 for (int ndx = 0; ndx < 80; ndx++)
189                 {
190                         const float x = rnd.getFloat()*1e6f - 0.5e6f;
191                         const float y = rnd.getFloat()*1e6f - 0.5e6f;
192                         inputs.push_back(tcu::Vec2(x, y));
193                 }
194
195                 outputs.resize(inputs.size());
196
197                 m_testCtx.getLog() << TestLog::Message << "Executing shader for " << inputs.size() << " input values" << tcu::TestLog::EndMessage;
198
199                 {
200                         const void*     in      = &inputs[0];
201                         void*           out     = &outputs[0];
202
203                         m_executor->execute((int)inputs.size(), &in, &out);
204                 }
205
206                 // Verify
207                 {
208                         const int       numValues       = (int)inputs.size();
209                         const int       maxPrints       = 10;
210                         int                     numFailed       = 0;
211
212                         for (int valNdx = 0; valNdx < numValues; valNdx++)
213                         {
214                                 const deUint16  ref0    = (deUint16)de::clamp(deRoundFloatToInt32(de::clamp(inputs[valNdx].x(), -1.0f, 1.0f) * 32767.0f), -(1<<15), (1<<15)-1);
215                                 const deUint16  ref1    = (deUint16)de::clamp(deRoundFloatToInt32(de::clamp(inputs[valNdx].y(), -1.0f, 1.0f) * 32767.0f), -(1<<15), (1<<15)-1);
216                                 const deUint32  ref             = (ref1 << 16) | ref0;
217                                 const deUint32  res             = outputs[valNdx];
218                                 const deUint16  res0    = (deUint16)(res & 0xffff);
219                                 const deUint16  res1    = (deUint16)(res >> 16);
220                                 const int               diff0   = de::abs((int)ref0 - (int)res0);
221                                 const int               diff1   = de::abs((int)ref1 - (int)res1);
222
223                                 if (diff0 > maxDiff || diff1 > maxDiff)
224                                 {
225                                         if (numFailed < maxPrints)
226                                         {
227                                                 m_testCtx.getLog() << TestLog::Message << "ERROR: Mismatch in value " << valNdx
228                                                                                                                            << ", expected packSnorm2x16(" << inputs[valNdx] << ") = " << tcu::toHex(ref)
229                                                                                                                            << ", got " << tcu::toHex(res)
230                                                                                                                            << "\n  diffs = (" << diff0 << ", " << diff1 << "), max diff = " << maxDiff
231                                                                                    << TestLog::EndMessage;
232                                         }
233                                         else if (numFailed == maxPrints)
234                                                 m_testCtx.getLog() << TestLog::Message << "..." << TestLog::EndMessage;
235
236                                         numFailed += 1;
237                                 }
238                         }
239
240                         m_testCtx.getLog() << TestLog::Message << (numValues - numFailed) << " / " << numValues << " values passed" << TestLog::EndMessage;
241
242                         if (numFailed == 0)
243                                 return tcu::TestStatus::pass("Pass");
244                         else
245                                 return tcu::TestStatus::fail("Result comparison failed");
246
247                 }
248         }
249
250 private:
251         const glu::Precision m_precision;
252 };
253
254 class PackSnorm2x16Case : public ShaderPackingFunctionCase
255 {
256 public:
257         PackSnorm2x16Case (tcu::TestContext& testCtx, glu::ShaderType shaderType, glu::Precision precision)
258                 : ShaderPackingFunctionCase     (testCtx, (string("packsnorm2x16") + getPrecisionPostfix(precision) + getShaderTypePostfix(shaderType)).c_str(), "packSnorm2x16", shaderType)
259                 , m_precision                           (precision)
260         {
261                 m_spec.inputs.push_back(Symbol("in0", glu::VarType(glu::TYPE_FLOAT_VEC2, precision)));
262                 m_spec.outputs.push_back(Symbol("out0", glu::VarType(glu::TYPE_UINT, glu::PRECISION_HIGHP)));
263
264                 m_spec.source = "out0 = packSnorm2x16(in0);";
265         }
266
267         TestInstance* createInstance (Context& ctx) const
268         {
269                 return new PackSnorm2x16CaseInstance(ctx, m_shaderType, m_spec, m_precision, getName());
270         }
271
272 private:
273         const glu::Precision m_precision;
274 };
275
276 class UnpackSnorm2x16CaseInstance : public ShaderPackingFunctionTestInstance
277 {
278 public:
279         UnpackSnorm2x16CaseInstance (Context& context, glu::ShaderType shaderType, const ShaderSpec& spec, const char* name)
280         : ShaderPackingFunctionTestInstance (context, shaderType, spec, name)
281         {
282         }
283
284         tcu::TestStatus iterate (void)
285         {
286                 const deUint32                          maxDiff         = 1; // Rounding error.
287                 de::Random                                      rnd                     (deStringHash(m_name) ^ 0x776002);
288                 std::vector<deUint32>           inputs;
289                 std::vector<tcu::Vec2>          outputs;
290
291                 inputs.push_back(0x00000000u);
292                 inputs.push_back(0x7fff8000u);
293                 inputs.push_back(0x80007fffu);
294                 inputs.push_back(0xffffffffu);
295                 inputs.push_back(0x0001fffeu);
296
297                 // Random values.
298                 for (int ndx = 0; ndx < 95; ndx++)
299                         inputs.push_back(rnd.getUint32());
300
301                 outputs.resize(inputs.size());
302
303                 m_testCtx.getLog() << TestLog::Message << "Executing shader for " << inputs.size() << " input values" << tcu::TestLog::EndMessage;
304
305                 {
306                         const void*     in      = &inputs[0];
307                         void*           out     = &outputs[0];
308
309                         m_executor->execute((int)inputs.size(), &in, &out);
310                 }
311
312                 // Verify
313                 {
314                         const int       numValues       = (int)inputs.size();
315                         const int       maxPrints       = 10;
316                         int                     numFailed       = 0;
317
318                         for (int valNdx = 0; valNdx < (int)inputs.size(); valNdx++)
319                         {
320                                 const deInt16   in0                     = (deInt16)(deUint16)(inputs[valNdx] & 0xffff);
321                                 const deInt16   in1                     = (deInt16)(deUint16)(inputs[valNdx] >> 16);
322                                 const float             ref0            = de::clamp(float(in0) / 32767.f, -1.0f, 1.0f);
323                                 const float             ref1            = de::clamp(float(in1) / 32767.f, -1.0f, 1.0f);
324                                 const float             res0            = outputs[valNdx].x();
325                                 const float             res1            = outputs[valNdx].y();
326
327                                 const deUint32  diff0   = getUlpDiff(ref0, res0);
328                                 const deUint32  diff1   = getUlpDiff(ref1, res1);
329
330                                 if (diff0 > maxDiff || diff1 > maxDiff)
331                                 {
332                                         if (numFailed < maxPrints)
333                                         {
334                                                 m_testCtx.getLog() << TestLog::Message << "ERROR: Mismatch in value " << valNdx << ",\n"
335                                                                                                                            << "  expected unpackSnorm2x16(" << tcu::toHex(inputs[valNdx]) << ") = "
336                                                                                                                            << "vec2(" << HexFloat(ref0) << ", " << HexFloat(ref1) << ")"
337                                                                                                                            << ", got vec2(" << HexFloat(res0) << ", " << HexFloat(res1) << ")"
338                                                                                                                            << "\n  ULP diffs = (" << diff0 << ", " << diff1 << "), max diff = " << maxDiff
339                                                                                    << TestLog::EndMessage;
340                                         }
341                                         else if (numFailed == maxPrints)
342                                                 m_testCtx.getLog() << TestLog::Message << "..." << TestLog::EndMessage;
343
344                                         numFailed += 1;
345                                 }
346                         }
347
348                         m_testCtx.getLog() << TestLog::Message << (numValues - numFailed) << " / " << numValues << " values passed" << TestLog::EndMessage;
349
350                         if (numFailed == 0)
351                                 return tcu::TestStatus::pass("Pass");
352                         else
353                                 return tcu::TestStatus::fail("Result comparison failed");
354
355                 }
356         }
357 };
358
359 class UnpackSnorm2x16Case : public ShaderPackingFunctionCase
360 {
361 public:
362         UnpackSnorm2x16Case (tcu::TestContext& testCtx, glu::ShaderType shaderType)
363                 : ShaderPackingFunctionCase     (testCtx, (string("unpacksnorm2x16") + getShaderTypePostfix(shaderType)).c_str(), "unpackSnorm2x16", shaderType)
364         {
365                 m_spec.inputs.push_back(Symbol("in0", glu::VarType(glu::TYPE_UINT, glu::PRECISION_HIGHP)));
366                 m_spec.outputs.push_back(Symbol("out0", glu::VarType(glu::TYPE_FLOAT_VEC2, glu::PRECISION_HIGHP)));
367
368                 m_spec.source = "out0 = unpackSnorm2x16(in0);";
369         }
370
371         TestInstance* createInstance (Context& ctx) const
372         {
373                 return new UnpackSnorm2x16CaseInstance(ctx, m_shaderType, m_spec, getName());
374         }
375 };
376
377 class PackUnorm2x16CaseInstance : public ShaderPackingFunctionTestInstance
378 {
379 public:
380         PackUnorm2x16CaseInstance (Context& context, glu::ShaderType shaderType, const ShaderSpec& spec, glu::Precision precision, const char* name)
381         : ShaderPackingFunctionTestInstance     (context, shaderType, spec, name)
382         , m_precision                                           (precision)
383         {
384         }
385
386         tcu::TestStatus iterate (void)
387         {
388                 de::Random                                      rnd                     (deStringHash(m_name) ^ 0x776002);
389                 std::vector<tcu::Vec2>          inputs;
390                 std::vector<deUint32>           outputs;
391                 const int                                       maxDiff         = m_precision == glu::PRECISION_HIGHP   ? 1             :               // Rounding only.
392                                                                                                   m_precision == glu::PRECISION_MEDIUMP ? 65    :               // (2^-10) * (2^16) + 1
393                                                                                                   m_precision == glu::PRECISION_LOWP    ? 257   : 0;    // (2^-8) * (2^16) + 1
394
395                 // Special values to check.
396                 inputs.push_back(tcu::Vec2(0.0f, 0.0f));
397                 inputs.push_back(tcu::Vec2(0.5f, 1.0f));
398                 inputs.push_back(tcu::Vec2(1.0f, 0.5f));
399                 inputs.push_back(tcu::Vec2(-0.5f, 1.5f));
400                 inputs.push_back(tcu::Vec2(0.25f, 0.75f));
401
402                 // Random values, mostly in range.
403                 for (int ndx = 0; ndx < 15; ndx++)
404                 {
405                         const float x = rnd.getFloat()*1.25f;
406                         const float y = rnd.getFloat()*1.25f;
407                         inputs.push_back(tcu::Vec2(x, y));
408                 }
409
410                 // Large random values.
411                 for (int ndx = 0; ndx < 80; ndx++)
412                 {
413                         const float x = rnd.getFloat()*1e6f - 1e5f;
414                         const float y = rnd.getFloat()*1e6f - 1e5f;
415                         inputs.push_back(tcu::Vec2(x, y));
416                 }
417
418                 outputs.resize(inputs.size());
419
420                 m_testCtx.getLog() << TestLog::Message << "Executing shader for " << inputs.size() << " input values" << tcu::TestLog::EndMessage;
421
422                 {
423                         const void*     in      = &inputs[0];
424                         void*           out     = &outputs[0];
425
426                         m_executor->execute((int)inputs.size(), &in, &out);
427                 }
428
429                 // Verify
430                 {
431                         const int       numValues       = (int)inputs.size();
432                         const int       maxPrints       = 10;
433                         int                     numFailed       = 0;
434
435                         for (int valNdx = 0; valNdx < (int)inputs.size(); valNdx++)
436                         {
437                                 const deUint16  ref0    = (deUint16)de::clamp(deRoundFloatToInt32(de::clamp(inputs[valNdx].x(), 0.0f, 1.0f) * 65535.0f), 0, (1<<16)-1);
438                                 const deUint16  ref1    = (deUint16)de::clamp(deRoundFloatToInt32(de::clamp(inputs[valNdx].y(), 0.0f, 1.0f) * 65535.0f), 0, (1<<16)-1);
439                                 const deUint32  ref             = (ref1 << 16) | ref0;
440                                 const deUint32  res             = outputs[valNdx];
441                                 const deUint16  res0    = (deUint16)(res & 0xffff);
442                                 const deUint16  res1    = (deUint16)(res >> 16);
443                                 const int               diff0   = de::abs((int)ref0 - (int)res0);
444                                 const int               diff1   = de::abs((int)ref1 - (int)res1);
445
446                                 if (diff0 > maxDiff || diff1 > maxDiff)
447                                 {
448                                         if (numFailed < maxPrints)
449                                         {
450                                                 m_testCtx.getLog() << TestLog::Message << "ERROR: Mismatch in value " << valNdx
451                                                                                                                            << ", expected packUnorm2x16(" << inputs[valNdx] << ") = " << tcu::toHex(ref)
452                                                                                                                            << ", got " << tcu::toHex(res)
453                                                                                                                            << "\n  diffs = (" << diff0 << ", " << diff1 << "), max diff = " << maxDiff
454                                                                                    << TestLog::EndMessage;
455                                         }
456                                         else if (numFailed == maxPrints)
457                                                 m_testCtx.getLog() << TestLog::Message << "..." << TestLog::EndMessage;
458
459                                         numFailed += 1;
460                                 }
461                         }
462
463                         m_testCtx.getLog() << TestLog::Message << (numValues - numFailed) << " / " << numValues << " values passed" << TestLog::EndMessage;
464
465                         if (numFailed == 0)
466                                 return tcu::TestStatus::pass("Pass");
467                         else
468                                 return tcu::TestStatus::fail("Result comparison failed");
469
470                 }
471         }
472
473 private:
474         const glu::Precision m_precision;
475 };
476
477 class PackUnorm2x16Case : public ShaderPackingFunctionCase
478 {
479 public:
480         PackUnorm2x16Case (tcu::TestContext& testCtx, glu::ShaderType shaderType, glu::Precision precision)
481                 : ShaderPackingFunctionCase     (testCtx, (string("packunorm2x16") + getPrecisionPostfix(precision) + getShaderTypePostfix(shaderType)).c_str(), "packUnorm2x16", shaderType)
482                 , m_precision                           (precision)
483         {
484                 m_spec.inputs.push_back(Symbol("in0", glu::VarType(glu::TYPE_FLOAT_VEC2, precision)));
485                 m_spec.outputs.push_back(Symbol("out0", glu::VarType(glu::TYPE_UINT, glu::PRECISION_HIGHP)));
486
487                 m_spec.source = "out0 = packUnorm2x16(in0);";
488         }
489
490         TestInstance* createInstance (Context& ctx) const
491         {
492                 return new PackUnorm2x16CaseInstance(ctx, m_shaderType, m_spec, m_precision, getName());
493         }
494
495 private:
496         const glu::Precision m_precision;
497 };
498
499 class UnpackUnorm2x16CaseInstance : public ShaderPackingFunctionTestInstance
500 {
501 public:
502         UnpackUnorm2x16CaseInstance (Context& context, glu::ShaderType shaderType, const ShaderSpec& spec, const char* name)
503                 : ShaderPackingFunctionTestInstance (context, shaderType, spec, name)
504         {
505         }
506
507         tcu::TestStatus iterate (void)
508         {
509                 const deUint32                          maxDiff         = 1; // Rounding error.
510                 de::Random                                      rnd                     (deStringHash(m_name) ^ 0x776002);
511                 std::vector<deUint32>           inputs;
512                 std::vector<tcu::Vec2>          outputs;
513
514                 inputs.push_back(0x00000000u);
515                 inputs.push_back(0x7fff8000u);
516                 inputs.push_back(0x80007fffu);
517                 inputs.push_back(0xffffffffu);
518                 inputs.push_back(0x0001fffeu);
519
520                 // Random values.
521                 for (int ndx = 0; ndx < 95; ndx++)
522                         inputs.push_back(rnd.getUint32());
523
524                 outputs.resize(inputs.size());
525
526                 m_testCtx.getLog() << TestLog::Message << "Executing shader for " << inputs.size() << " input values" << tcu::TestLog::EndMessage;
527
528                 {
529                         const void*     in      = &inputs[0];
530                         void*           out     = &outputs[0];
531
532                         m_executor->execute((int)inputs.size(), &in, &out);
533                 }
534
535                 // Verify
536                 {
537                         const int       numValues       = (int)inputs.size();
538                         const int       maxPrints       = 10;
539                         int                     numFailed       = 0;
540
541                         for (int valNdx = 0; valNdx < (int)inputs.size(); valNdx++)
542                         {
543                                 const deUint16  in0                     = (deUint16)(inputs[valNdx] & 0xffff);
544                                 const deUint16  in1                     = (deUint16)(inputs[valNdx] >> 16);
545                                 const float             ref0            = float(in0) / 65535.0f;
546                                 const float             ref1            = float(in1) / 65535.0f;
547                                 const float             res0            = outputs[valNdx].x();
548                                 const float             res1            = outputs[valNdx].y();
549
550                                 const deUint32  diff0           = getUlpDiff(ref0, res0);
551                                 const deUint32  diff1           = getUlpDiff(ref1, res1);
552
553                                 if (diff0 > maxDiff || diff1 > maxDiff)
554                                 {
555                                         if (numFailed < maxPrints)
556                                         {
557                                                 m_testCtx.getLog() << TestLog::Message << "ERROR: Mismatch in value " << valNdx << ",\n"
558                                                                                                                            << "  expected unpackUnorm2x16(" << tcu::toHex(inputs[valNdx]) << ") = "
559                                                                                                                            << "vec2(" << HexFloat(ref0) << ", " << HexFloat(ref1) << ")"
560                                                                                                                            << ", got vec2(" << HexFloat(res0) << ", " << HexFloat(res1) << ")"
561                                                                                                                            << "\n  ULP diffs = (" << diff0 << ", " << diff1 << "), max diff = " << maxDiff
562                                                                                    << TestLog::EndMessage;
563                                         }
564                                         else if (numFailed == maxPrints)
565                                                 m_testCtx.getLog() << TestLog::Message << "..." << TestLog::EndMessage;
566
567                                         numFailed += 1;
568                                 }
569                         }
570
571                         m_testCtx.getLog() << TestLog::Message << (numValues - numFailed) << " / " << numValues << " values passed" << TestLog::EndMessage;
572
573                         if (numFailed == 0)
574                                 return tcu::TestStatus::pass("Pass");
575                         else
576                                 return tcu::TestStatus::fail("Result comparison failed");
577
578                 }
579         }
580 };
581
582
583 class UnpackUnorm2x16Case : public ShaderPackingFunctionCase
584 {
585 public:
586         UnpackUnorm2x16Case (tcu::TestContext& testCtx, glu::ShaderType shaderType)
587                 : ShaderPackingFunctionCase(testCtx, (string("unpackunorm2x16") + getShaderTypePostfix(shaderType)).c_str(), "unpackUnorm2x16", shaderType)
588         {
589                 m_spec.inputs.push_back(Symbol("in0", glu::VarType(glu::TYPE_UINT, glu::PRECISION_HIGHP)));
590                 m_spec.outputs.push_back(Symbol("out0", glu::VarType(glu::TYPE_FLOAT_VEC2, glu::PRECISION_HIGHP)));
591
592                 m_spec.source = "out0 = unpackUnorm2x16(in0);";
593         }
594
595         TestInstance* createInstance (Context& ctx) const
596         {
597                 return new UnpackUnorm2x16CaseInstance(ctx, m_shaderType, m_spec, getName());
598         }
599
600 };
601
602 class PackHalf2x16CaseInstance : public ShaderPackingFunctionTestInstance
603 {
604 public:
605         PackHalf2x16CaseInstance (Context& context, glu::ShaderType shaderType, const ShaderSpec& spec, const char* name)
606         : ShaderPackingFunctionTestInstance (context, shaderType, spec, name)
607         {
608         }
609
610         tcu::TestStatus iterate (void)
611         {
612                 const int                                       maxDiff         = 0; // Values can be represented exactly in mediump.
613                 de::Random                                      rnd                     (deStringHash(m_name) ^ 0x776002);
614                 std::vector<tcu::Vec2>          inputs;
615                 std::vector<deUint32>           outputs;
616
617                 // Special values to check.
618                 inputs.push_back(tcu::Vec2(0.0f, 0.0f));
619                 inputs.push_back(tcu::Vec2(0.5f, 1.0f));
620                 inputs.push_back(tcu::Vec2(1.0f, 0.5f));
621                 inputs.push_back(tcu::Vec2(-0.5f, 1.5f));
622                 inputs.push_back(tcu::Vec2(0.25f, 0.75f));
623
624                 // Random values.
625                 {
626                         const int       minExp  = -14;
627                         const int       maxExp  = 15;
628
629                         for (int ndx = 0; ndx < 95; ndx++)
630                         {
631                                 tcu::Vec2 v;
632                                 for (int c = 0; c < 2; c++)
633                                 {
634                                         const int               s                       = rnd.getBool() ? 1 : -1;
635                                         const int               exp                     = rnd.getInt(minExp, maxExp);
636                                         const deUint32  mantissa        = rnd.getUint32() & ((1<<23)-1);
637
638                                         v[c] = tcu::Float32::construct(s, exp ? exp : 1 /* avoid denormals */, (1u<<23) | mantissa).asFloat();
639                                 }
640                                 inputs.push_back(v);
641                         }
642                 }
643
644                 // Convert input values to fp16 and back to make sure they can be represented exactly in mediump.
645                 for (std::vector<tcu::Vec2>::iterator inVal = inputs.begin(); inVal != inputs.end(); ++inVal)
646                         *inVal = tcu::Vec2(tcu::Float16(inVal->x()).asFloat(), tcu::Float16(inVal->y()).asFloat());
647
648                 outputs.resize(inputs.size());
649
650                 m_testCtx.getLog() << TestLog::Message << "Executing shader for " << inputs.size() << " input values" << tcu::TestLog::EndMessage;
651
652                 {
653                         const void*     in      = &inputs[0];
654                         void*           out     = &outputs[0];
655
656                         m_executor->execute((int)inputs.size(), &in, &out);
657                 }
658
659                 // Verify
660                 {
661                         const int       numValues       = (int)inputs.size();
662                         const int       maxPrints       = 10;
663                         int                     numFailed       = 0;
664
665                         for (int valNdx = 0; valNdx < (int)inputs.size(); valNdx++)
666                         {
667                                 const deUint16  ref0    = (deUint16)tcu::Float16(inputs[valNdx].x()).bits();
668                                 const deUint16  ref1    = (deUint16)tcu::Float16(inputs[valNdx].y()).bits();
669                                 const deUint32  ref             = (ref1 << 16) | ref0;
670                                 const deUint32  res             = outputs[valNdx];
671                                 const deUint16  res0    = (deUint16)(res & 0xffff);
672                                 const deUint16  res1    = (deUint16)(res >> 16);
673                                 const int               diff0   = de::abs((int)ref0 - (int)res0);
674                                 const int               diff1   = de::abs((int)ref1 - (int)res1);
675
676                                 if (diff0 > maxDiff || diff1 > maxDiff)
677                                 {
678                                         if (numFailed < maxPrints)
679                                         {
680                                                 m_testCtx.getLog() << TestLog::Message << "ERROR: Mismatch in value " << valNdx
681                                                                                                                            << ", expected packHalf2x16(" << inputs[valNdx] << ") = " << tcu::toHex(ref)
682                                                                                                                            << ", got " << tcu::toHex(res)
683                                                                                                                            << "\n  diffs = (" << diff0 << ", " << diff1 << "), max diff = " << maxDiff
684                                                                                    << TestLog::EndMessage;
685                                         }
686                                         else if (numFailed == maxPrints)
687                                                 m_testCtx.getLog() << TestLog::Message << "..." << TestLog::EndMessage;
688
689                                         numFailed += 1;
690                                 }
691                         }
692
693                         m_testCtx.getLog() << TestLog::Message << (numValues - numFailed) << " / " << numValues << " values passed" << TestLog::EndMessage;
694
695                         if (numFailed == 0)
696                                 return tcu::TestStatus::pass("Pass");
697                         else
698                                 return tcu::TestStatus::fail("Result comparison failed");
699
700                 }
701         }
702 };
703
704 class PackHalf2x16Case : public ShaderPackingFunctionCase
705 {
706 public:
707         PackHalf2x16Case (tcu::TestContext& testCtx, glu::ShaderType shaderType)
708                 : ShaderPackingFunctionCase     (testCtx, (string("packhalf2x16") + getShaderTypePostfix(shaderType)).c_str(), "packHalf2x16", shaderType)
709         {
710                 m_spec.inputs.push_back(Symbol("in0", glu::VarType(glu::TYPE_FLOAT_VEC2, glu::PRECISION_HIGHP)));
711                 m_spec.outputs.push_back(Symbol("out0", glu::VarType(glu::TYPE_UINT, glu::PRECISION_HIGHP)));
712
713                 m_spec.source = "out0 = packHalf2x16(in0);";
714         }
715
716         TestInstance* createInstance (Context& ctx) const
717         {
718                 return new PackHalf2x16CaseInstance(ctx, m_shaderType, m_spec, getName());
719         }
720
721 };
722
723 class UnpackHalf2x16CaseInstance : public ShaderPackingFunctionTestInstance
724 {
725 public:
726         UnpackHalf2x16CaseInstance (Context& context, glu::ShaderType shaderType, const ShaderSpec& spec, const char* name)
727         : ShaderPackingFunctionTestInstance (context, shaderType, spec, name)
728         {
729         }
730
731         tcu::TestStatus iterate (void)
732         {
733                 const int                                       maxDiff         = 0; // All bits must be accurate.
734                 de::Random                                      rnd                     (deStringHash(m_name) ^ 0x776002);
735                 std::vector<deUint32>           inputs;
736                 std::vector<tcu::Vec2>          outputs;
737
738                 // Special values.
739                 inputs.push_back((tcu::Float16( 0.0f).bits() << 16) | tcu::Float16( 1.0f).bits());
740                 inputs.push_back((tcu::Float16( 1.0f).bits() << 16) | tcu::Float16( 0.0f).bits());
741                 inputs.push_back((tcu::Float16(-1.0f).bits() << 16) | tcu::Float16( 0.5f).bits());
742                 inputs.push_back((tcu::Float16( 0.5f).bits() << 16) | tcu::Float16(-0.5f).bits());
743
744                 // Construct random values.
745                 {
746                         const int       minExp          = -14;
747                         const int       maxExp          = 15;
748                         const int       mantBits        = 10;
749
750                         for (int ndx = 0; ndx < 96; ndx++)
751                         {
752                                 deUint32 inVal = 0;
753                                 for (int c = 0; c < 2; c++)
754                                 {
755                                         const int               s                       = rnd.getBool() ? 1 : -1;
756                                         const int               exp                     = rnd.getInt(minExp, maxExp);
757                                         const deUint32  mantissa        = rnd.getUint32() & ((1<<mantBits)-1);
758                                         const deUint16  value           = tcu::Float16::construct(s, exp ? exp : 1 /* avoid denorm */, (deUint16)((1u<<10) | mantissa)).bits();
759
760                                         inVal |= value << (16*c);
761                                 }
762                                 inputs.push_back(inVal);
763                         }
764                 }
765
766                 outputs.resize(inputs.size());
767
768                 m_testCtx.getLog() << TestLog::Message << "Executing shader for " << inputs.size() << " input values" << tcu::TestLog::EndMessage;
769
770                 {
771                         const void*     in      = &inputs[0];
772                         void*           out     = &outputs[0];
773
774                         m_executor->execute((int)inputs.size(), &in, &out);
775                 }
776
777                 // Verify
778                 {
779                         const int       numValues       = (int)inputs.size();
780                         const int       maxPrints       = 10;
781                         int                     numFailed       = 0;
782
783                         for (int valNdx = 0; valNdx < (int)inputs.size(); valNdx++)
784                         {
785                                 const deUint16  in0                     = (deUint16)(inputs[valNdx] & 0xffff);
786                                 const deUint16  in1                     = (deUint16)(inputs[valNdx] >> 16);
787                                 const float             ref0            = tcu::Float16(in0).asFloat();
788                                 const float             ref1            = tcu::Float16(in1).asFloat();
789                                 const float             res0            = outputs[valNdx].x();
790                                 const float             res1            = outputs[valNdx].y();
791
792                                 const deUint32  refBits0        = tcu::Float32(ref0).bits();
793                                 const deUint32  refBits1        = tcu::Float32(ref1).bits();
794                                 const deUint32  resBits0        = tcu::Float32(res0).bits();
795                                 const deUint32  resBits1        = tcu::Float32(res1).bits();
796
797                                 const int               diff0   = de::abs((int)refBits0 - (int)resBits0);
798                                 const int               diff1   = de::abs((int)refBits1 - (int)resBits1);
799
800                                 if (diff0 > maxDiff || diff1 > maxDiff)
801                                 {
802                                         if (numFailed < maxPrints)
803                                         {
804                                                 m_testCtx.getLog() << TestLog::Message << "ERROR: Mismatch in value " << valNdx << ",\n"
805                                                                                                                            << "  expected unpackHalf2x16(" << tcu::toHex(inputs[valNdx]) << ") = "
806                                                                                                                            << "vec2(" << ref0 << " / " << tcu::toHex(refBits0) << ", " << ref1 << " / " << tcu::toHex(refBits1) << ")"
807                                                                                                                            << ", got vec2(" << res0 << " / " << tcu::toHex(resBits0) << ", " << res1 << " / " << tcu::toHex(resBits1) << ")"
808                                                                                                                            << "\n  ULP diffs = (" << diff0 << ", " << diff1 << "), max diff = " << maxDiff
809                                                                                    << TestLog::EndMessage;
810                                         }
811                                         else if (numFailed == maxPrints)
812                                                 m_testCtx.getLog() << TestLog::Message << "..." << TestLog::EndMessage;
813
814                                         numFailed += 1;
815                                 }
816                         }
817
818                         m_testCtx.getLog() << TestLog::Message << (numValues - numFailed) << " / " << numValues << " values passed" << TestLog::EndMessage;
819
820                         if (numFailed == 0)
821                                 return tcu::TestStatus::pass("Pass");
822                         else
823                                 return tcu::TestStatus::fail("Result comparison failed");
824
825                 }
826         }
827 };
828
829 class UnpackHalf2x16Case : public ShaderPackingFunctionCase
830 {
831 public:
832         UnpackHalf2x16Case (tcu::TestContext& testCtx, glu::ShaderType shaderType)
833                 : ShaderPackingFunctionCase     (testCtx, (string("unpackhalf2x16") + getShaderTypePostfix(shaderType)).c_str(), "unpackHalf2x16", shaderType)
834         {
835                 m_spec.inputs.push_back(Symbol("in0", glu::VarType(glu::TYPE_UINT, glu::PRECISION_HIGHP)));
836                 m_spec.outputs.push_back(Symbol("out0", glu::VarType(glu::TYPE_FLOAT_VEC2, glu::PRECISION_MEDIUMP)));
837
838                 m_spec.source = "out0 = unpackHalf2x16(in0);";
839         }
840
841         TestInstance* createInstance (Context& ctx) const
842         {
843                 return new UnpackHalf2x16CaseInstance(ctx, m_shaderType, m_spec, getName());
844         }
845
846 };
847
848 class PackSnorm4x8CaseInstance : public ShaderPackingFunctionTestInstance
849 {
850 public:
851         PackSnorm4x8CaseInstance (Context& context, glu::ShaderType shaderType, const ShaderSpec& spec, glu::Precision precision, const char* name)
852                 : ShaderPackingFunctionTestInstance     (context, shaderType, spec, name)
853                 , m_precision                                           (precision)
854         {
855         }
856
857         tcu::TestStatus iterate (void)
858         {
859                 de::Random                                      rnd                     (deStringHash(m_name) ^ 0x42f2c0);
860                 std::vector<tcu::Vec4>          inputs;
861                 std::vector<deUint32>           outputs;
862                 const int                                       maxDiff         = m_precision == glu::PRECISION_HIGHP   ? 1     :               // Rounding only.
863                                                                                                   m_precision == glu::PRECISION_MEDIUMP ? 1     :               // (2^-10) * (2^7) + 1
864                                                                                                   m_precision == glu::PRECISION_LOWP    ? 2     : 0;    // (2^-8) * (2^7) + 1
865
866                 // Special values to check.
867                 inputs.push_back(tcu::Vec4(0.0f, 0.0f, 0.0f, 0.0f));
868                 inputs.push_back(tcu::Vec4(-1.0f, 1.0f, -1.0f, 1.0f));
869                 inputs.push_back(tcu::Vec4(0.5f, -0.5f, -0.5f, 0.5f));
870                 inputs.push_back(tcu::Vec4(-1.5f, 1.5f, -1.5f, 1.5f));
871                 inputs.push_back(tcu::Vec4(0.25f, -0.75f, -0.25f, 0.75f));
872
873                 // Random values, mostly in range.
874                 for (int ndx = 0; ndx < 15; ndx++)
875                 {
876                         const float x = rnd.getFloat()*2.5f - 1.25f;
877                         const float y = rnd.getFloat()*2.5f - 1.25f;
878                         const float z = rnd.getFloat()*2.5f - 1.25f;
879                         const float w = rnd.getFloat()*2.5f - 1.25f;
880                         inputs.push_back(tcu::Vec4(x, y, z, w));
881                 }
882
883                 // Large random values.
884                 for (int ndx = 0; ndx < 80; ndx++)
885                 {
886                         const float x = rnd.getFloat()*1e6f - 0.5e6f;
887                         const float y = rnd.getFloat()*1e6f - 0.5e6f;
888                         const float z = rnd.getFloat()*1e6f - 0.5e6f;
889                         const float w = rnd.getFloat()*1e6f - 0.5e6f;
890                         inputs.push_back(tcu::Vec4(x, y, z, w));
891                 }
892
893                 outputs.resize(inputs.size());
894
895                 m_testCtx.getLog() << TestLog::Message << "Executing shader for " << inputs.size() << " input values" << tcu::TestLog::EndMessage;
896
897                 {
898                         const void*     in      = &inputs[0];
899                         void*           out     = &outputs[0];
900
901                         m_executor->execute((int)inputs.size(), &in, &out);
902                 }
903
904                 // Verify
905                 {
906                         const int       numValues       = (int)inputs.size();
907                         const int       maxPrints       = 10;
908                         int                     numFailed       = 0;
909
910                         for (int valNdx = 0; valNdx < numValues; valNdx++)
911                         {
912                                 const deUint16  ref0    = (deUint8)de::clamp(deRoundFloatToInt32(de::clamp(inputs[valNdx].x(), -1.0f, 1.0f) * 127.0f), -(1<<7), (1<<7)-1);
913                                 const deUint16  ref1    = (deUint8)de::clamp(deRoundFloatToInt32(de::clamp(inputs[valNdx].y(), -1.0f, 1.0f) * 127.0f), -(1<<7), (1<<7)-1);
914                                 const deUint16  ref2    = (deUint8)de::clamp(deRoundFloatToInt32(de::clamp(inputs[valNdx].z(), -1.0f, 1.0f) * 127.0f), -(1<<7), (1<<7)-1);
915                                 const deUint16  ref3    = (deUint8)de::clamp(deRoundFloatToInt32(de::clamp(inputs[valNdx].w(), -1.0f, 1.0f) * 127.0f), -(1<<7), (1<<7)-1);
916                                 const deUint32  ref             = (deUint32(ref3) << 24) | (deUint32(ref2) << 16) | (deUint32(ref1) << 8) | deUint32(ref0);
917                                 const deUint32  res             = outputs[valNdx];
918                                 const deUint16  res0    = (deUint8)(res & 0xff);
919                                 const deUint16  res1    = (deUint8)((res >> 8) & 0xff);
920                                 const deUint16  res2    = (deUint8)((res >> 16) & 0xff);
921                                 const deUint16  res3    = (deUint8)((res >> 24) & 0xff);
922                                 const int               diff0   = de::abs((int)ref0 - (int)res0);
923                                 const int               diff1   = de::abs((int)ref1 - (int)res1);
924                                 const int               diff2   = de::abs((int)ref2 - (int)res2);
925                                 const int               diff3   = de::abs((int)ref3 - (int)res3);
926
927                                 if (diff0 > maxDiff || diff1 > maxDiff || diff2 > maxDiff || diff3 > maxDiff)
928                                 {
929                                         if (numFailed < maxPrints)
930                                         {
931                                                 m_testCtx.getLog() << TestLog::Message << "ERROR: Mismatch in value " << valNdx
932                                                                                                                            << ", expected packSnorm4x8(" << inputs[valNdx] << ") = " << tcu::toHex(ref)
933                                                                                                                            << ", got " << tcu::toHex(res)
934                                                                                                                            << "\n  diffs = " << tcu::IVec4(diff0, diff1, diff2, diff3) << ", max diff = " << maxDiff
935                                                                                    << TestLog::EndMessage;
936                                         }
937                                         else if (numFailed == maxPrints)
938                                                 m_testCtx.getLog() << TestLog::Message << "..." << TestLog::EndMessage;
939
940                                         numFailed += 1;
941                                 }
942                         }
943
944                         m_testCtx.getLog() << TestLog::Message << (numValues - numFailed) << " / " << numValues << " values passed" << TestLog::EndMessage;
945
946                         if (numFailed == 0)
947                                 return tcu::TestStatus::pass("Pass");
948                         else
949                                 return tcu::TestStatus::fail("Result comparison failed");
950
951                 }
952         }
953
954 private:
955         const glu::Precision m_precision;
956 };
957
958 class PackSnorm4x8Case : public ShaderPackingFunctionCase
959 {
960 public:
961         PackSnorm4x8Case (tcu::TestContext& testCtx, glu::ShaderType shaderType, glu::Precision precision)
962                 : ShaderPackingFunctionCase     (testCtx, (string("packsnorm4x8") + getPrecisionPostfix(precision) + getShaderTypePostfix(shaderType)).c_str(), "packSnorm4x8", shaderType)
963                 , m_precision                           (precision)
964         {
965                 m_spec.inputs.push_back(Symbol("in0", glu::VarType(glu::TYPE_FLOAT_VEC4, precision)));
966                 m_spec.outputs.push_back(Symbol("out0", glu::VarType(glu::TYPE_UINT, glu::PRECISION_HIGHP)));
967
968                 m_spec.source = "out0 = packSnorm4x8(in0);";
969         }
970
971         TestInstance* createInstance (Context& ctx) const
972         {
973                 return new PackSnorm4x8CaseInstance(ctx, m_shaderType, m_spec, m_precision, getName());
974         }
975
976 private:
977         const glu::Precision m_precision;
978 };
979
980 class UnpackSnorm4x8CaseInstance : public ShaderPackingFunctionTestInstance
981 {
982 public:
983         UnpackSnorm4x8CaseInstance (Context& context, glu::ShaderType shaderType, const ShaderSpec& spec, const char* name)
984                 : ShaderPackingFunctionTestInstance     (context, shaderType, spec, name)
985         {
986         }
987
988         tcu::TestStatus iterate (void)
989         {
990                 const deUint32                          maxDiff         = 1; // Rounding error.
991                 de::Random                                      rnd                     (deStringHash(m_name) ^ 0x776002);
992                 std::vector<deUint32>           inputs;
993                 std::vector<tcu::Vec4>          outputs;
994
995                 inputs.push_back(0x00000000u);
996                 inputs.push_back(0x7fff8000u);
997                 inputs.push_back(0x80007fffu);
998                 inputs.push_back(0xffffffffu);
999                 inputs.push_back(0x0001fffeu);
1000
1001                 // Random values.
1002                 for (int ndx = 0; ndx < 95; ndx++)
1003                         inputs.push_back(rnd.getUint32());
1004
1005                 outputs.resize(inputs.size());
1006
1007                 m_testCtx.getLog() << TestLog::Message << "Executing shader for " << inputs.size() << " input values" << tcu::TestLog::EndMessage;
1008
1009                 {
1010                         const void*     in      = &inputs[0];
1011                         void*           out     = &outputs[0];
1012
1013                         m_executor->execute((int)inputs.size(), &in, &out);
1014                 }
1015
1016                 // Verify
1017                 {
1018                         const int       numValues       = (int)inputs.size();
1019                         const int       maxPrints       = 10;
1020                         int                     numFailed       = 0;
1021
1022                         for (int valNdx = 0; valNdx < (int)inputs.size(); valNdx++)
1023                         {
1024                                 const deInt8    in0             = (deInt8)(deUint8)(inputs[valNdx] & 0xff);
1025                                 const deInt8    in1             = (deInt8)(deUint8)((inputs[valNdx] >> 8) & 0xff);
1026                                 const deInt8    in2             = (deInt8)(deUint8)((inputs[valNdx] >> 16) & 0xff);
1027                                 const deInt8    in3             = (deInt8)(deUint8)(inputs[valNdx] >> 24);
1028                                 const float             ref0    = de::clamp(float(in0) / 127.f, -1.0f, 1.0f);
1029                                 const float             ref1    = de::clamp(float(in1) / 127.f, -1.0f, 1.0f);
1030                                 const float             ref2    = de::clamp(float(in2) / 127.f, -1.0f, 1.0f);
1031                                 const float             ref3    = de::clamp(float(in3) / 127.f, -1.0f, 1.0f);
1032                                 const float             res0    = outputs[valNdx].x();
1033                                 const float             res1    = outputs[valNdx].y();
1034                                 const float             res2    = outputs[valNdx].z();
1035                                 const float             res3    = outputs[valNdx].w();
1036
1037                                 const deUint32  diff0   = getUlpDiff(ref0, res0);
1038                                 const deUint32  diff1   = getUlpDiff(ref1, res1);
1039                                 const deUint32  diff2   = getUlpDiff(ref2, res2);
1040                                 const deUint32  diff3   = getUlpDiff(ref3, res3);
1041
1042                                 if (diff0 > maxDiff || diff1 > maxDiff || diff2 > maxDiff || diff3 > maxDiff)
1043                                 {
1044                                         if (numFailed < maxPrints)
1045                                         {
1046                                                 m_testCtx.getLog() << TestLog::Message << "ERROR: Mismatch in value " << valNdx << ",\n"
1047                                                                                                                            << "  expected unpackSnorm4x8(" << tcu::toHex(inputs[valNdx]) << ") = "
1048                                                                                                                            << "vec4(" << HexFloat(ref0) << ", " << HexFloat(ref1) << ", " << HexFloat(ref2) << ", " << HexFloat(ref3) << ")"
1049                                                                                                                            << ", got vec4(" << HexFloat(res0) << ", " << HexFloat(res1) << ", " << HexFloat(res2) << ", " << HexFloat(res3) << ")"
1050                                                                                                                            << "\n  ULP diffs = (" << diff0 << ", " << diff1 << ", " << diff2 << ", " << diff3 << "), max diff = " << maxDiff
1051                                                                                    << TestLog::EndMessage;
1052                                         }
1053                                         else if (numFailed == maxPrints)
1054                                                 m_testCtx.getLog() << TestLog::Message << "..." << TestLog::EndMessage;
1055
1056                                         numFailed += 1;
1057                                 }
1058                         }
1059
1060                         m_testCtx.getLog() << TestLog::Message << (numValues - numFailed) << " / " << numValues << " values passed" << TestLog::EndMessage;
1061
1062                         if (numFailed == 0)
1063                                 return tcu::TestStatus::pass("Pass");
1064                         else
1065                                 return tcu::TestStatus::fail("Result comparison failed");
1066
1067                 }
1068         }
1069 };
1070
1071
1072 class UnpackSnorm4x8Case : public ShaderPackingFunctionCase
1073 {
1074 public:
1075         UnpackSnorm4x8Case (tcu::TestContext& testCtx, glu::ShaderType shaderType)
1076                 : ShaderPackingFunctionCase     (testCtx, (string("unpacksnorm4x8") + getShaderTypePostfix(shaderType)).c_str(), "unpackSnorm4x8", shaderType)
1077         {
1078                 m_spec.inputs.push_back(Symbol("in0", glu::VarType(glu::TYPE_UINT, glu::PRECISION_HIGHP)));
1079                 m_spec.outputs.push_back(Symbol("out0", glu::VarType(glu::TYPE_FLOAT_VEC4, glu::PRECISION_HIGHP)));
1080
1081                 m_spec.source = "out0 = unpackSnorm4x8(in0);";
1082         }
1083
1084         TestInstance* createInstance (Context& ctx) const
1085         {
1086                 return new UnpackSnorm4x8CaseInstance(ctx, m_shaderType, m_spec, getName());
1087         }
1088
1089 };
1090
1091 class PackUnorm4x8CaseInstance : public ShaderPackingFunctionTestInstance
1092 {
1093 public:
1094         PackUnorm4x8CaseInstance (Context& context, glu::ShaderType shaderType, const ShaderSpec& spec, glu::Precision precision, const char* name)
1095                 : ShaderPackingFunctionTestInstance     (context, shaderType, spec, name)
1096                 , m_precision                                           (precision)
1097         {
1098         }
1099
1100         tcu::TestStatus iterate (void)
1101         {
1102                 de::Random                                      rnd                     (deStringHash(m_name) ^ 0x776002);
1103                 std::vector<tcu::Vec4>          inputs;
1104                 std::vector<deUint32>           outputs;
1105                 const int                                       maxDiff         = m_precision == glu::PRECISION_HIGHP   ? 1     :               // Rounding only.
1106                                                                                                   m_precision == glu::PRECISION_MEDIUMP ? 1     :               // (2^-10) * (2^8) + 1
1107                                                                                                   m_precision == glu::PRECISION_LOWP    ? 2     : 0;    // (2^-8) * (2^8) + 1
1108
1109                 // Special values to check.
1110                 inputs.push_back(tcu::Vec4(0.0f, 0.0f, 0.0f, 0.0f));
1111                 inputs.push_back(tcu::Vec4(-1.0f, 1.0f, -1.0f, 1.0f));
1112                 inputs.push_back(tcu::Vec4(0.5f, -0.5f, -0.5f, 0.5f));
1113                 inputs.push_back(tcu::Vec4(-1.5f, 1.5f, -1.5f, 1.5f));
1114                 inputs.push_back(tcu::Vec4(0.25f, -0.75f, -0.25f, 0.75f));
1115
1116                 // Random values, mostly in range.
1117                 for (int ndx = 0; ndx < 15; ndx++)
1118                 {
1119                         const float x = rnd.getFloat()*1.25f - 0.125f;
1120                         const float y = rnd.getFloat()*1.25f - 0.125f;
1121                         const float z = rnd.getFloat()*1.25f - 0.125f;
1122                         const float w = rnd.getFloat()*1.25f - 0.125f;
1123                         inputs.push_back(tcu::Vec4(x, y, z, w));
1124                 }
1125
1126                 // Large random values.
1127                 for (int ndx = 0; ndx < 80; ndx++)
1128                 {
1129                         const float x = rnd.getFloat()*1e6f - 1e5f;
1130                         const float y = rnd.getFloat()*1e6f - 1e5f;
1131                         const float z = rnd.getFloat()*1e6f - 1e5f;
1132                         const float w = rnd.getFloat()*1e6f - 1e5f;
1133                         inputs.push_back(tcu::Vec4(x, y, z, w));
1134                 }
1135
1136                 outputs.resize(inputs.size());
1137
1138                 m_testCtx.getLog() << TestLog::Message << "Executing shader for " << inputs.size() << " input values" << tcu::TestLog::EndMessage;
1139
1140                 {
1141                         const void*     in      = &inputs[0];
1142                         void*           out     = &outputs[0];
1143
1144                         m_executor->execute((int)inputs.size(), &in, &out);
1145                 }
1146
1147                 // Verify
1148                 {
1149                         const int       numValues       = (int)inputs.size();
1150                         const int       maxPrints       = 10;
1151                         int                     numFailed       = 0;
1152
1153                         for (int valNdx = 0; valNdx < (int)inputs.size(); valNdx++)
1154                         {
1155                                 const deUint16  ref0    = (deUint8)de::clamp(deRoundFloatToInt32(de::clamp(inputs[valNdx].x(), 0.0f, 1.0f) * 255.0f), 0, (1<<8)-1);
1156                                 const deUint16  ref1    = (deUint8)de::clamp(deRoundFloatToInt32(de::clamp(inputs[valNdx].y(), 0.0f, 1.0f) * 255.0f), 0, (1<<8)-1);
1157                                 const deUint16  ref2    = (deUint8)de::clamp(deRoundFloatToInt32(de::clamp(inputs[valNdx].z(), 0.0f, 1.0f) * 255.0f), 0, (1<<8)-1);
1158                                 const deUint16  ref3    = (deUint8)de::clamp(deRoundFloatToInt32(de::clamp(inputs[valNdx].w(), 0.0f, 1.0f) * 255.0f), 0, (1<<8)-1);
1159                                 const deUint32  ref             = (deUint32(ref3) << 24) | (deUint32(ref2) << 16) | (deUint32(ref1) << 8) | deUint32(ref0);
1160                                 const deUint32  res             = outputs[valNdx];
1161                                 const deUint16  res0    = (deUint8)(res & 0xff);
1162                                 const deUint16  res1    = (deUint8)((res >> 8) & 0xff);
1163                                 const deUint16  res2    = (deUint8)((res >> 16) & 0xff);
1164                                 const deUint16  res3    = (deUint8)((res >> 24) & 0xff);
1165                                 const int               diff0   = de::abs((int)ref0 - (int)res0);
1166                                 const int               diff1   = de::abs((int)ref1 - (int)res1);
1167                                 const int               diff2   = de::abs((int)ref2 - (int)res2);
1168                                 const int               diff3   = de::abs((int)ref3 - (int)res3);
1169
1170                                 if (diff0 > maxDiff || diff1 > maxDiff || diff2 > maxDiff || diff3 > maxDiff)
1171                                 {
1172                                         if (numFailed < maxPrints)
1173                                         {
1174                                                 m_testCtx.getLog() << TestLog::Message << "ERROR: Mismatch in value " << valNdx
1175                                                                                                                            << ", expected packUnorm4x8(" << inputs[valNdx] << ") = " << tcu::toHex(ref)
1176                                                                                                                            << ", got " << tcu::toHex(res)
1177                                                                                                                            << "\n  diffs = " << tcu::IVec4(diff0, diff1, diff2, diff3) << ", max diff = " << maxDiff
1178                                                                                    << TestLog::EndMessage;
1179                                         }
1180                                         else if (numFailed == maxPrints)
1181                                                 m_testCtx.getLog() << TestLog::Message << "..." << TestLog::EndMessage;
1182
1183                                         numFailed += 1;
1184                                 }
1185                         }
1186
1187                         m_testCtx.getLog() << TestLog::Message << (numValues - numFailed) << " / " << numValues << " values passed" << TestLog::EndMessage;
1188
1189                         if (numFailed == 0)
1190                                 return tcu::TestStatus::pass("Pass");
1191                         else
1192                                 return tcu::TestStatus::fail("Result comparison failed");
1193
1194                 }
1195         }
1196
1197 private:
1198         const glu::Precision m_precision;
1199 };
1200
1201 class PackUnorm4x8Case : public ShaderPackingFunctionCase
1202 {
1203 public:
1204         PackUnorm4x8Case (tcu::TestContext& testCtx, glu::ShaderType shaderType, glu::Precision precision)
1205                 : ShaderPackingFunctionCase     (testCtx, (string("packunorm4x8") + getPrecisionPostfix(precision) + getShaderTypePostfix(shaderType)).c_str(), "packUnorm4x8", shaderType)
1206                 , m_precision                           (precision)
1207         {
1208                 m_spec.inputs.push_back(Symbol("in0", glu::VarType(glu::TYPE_FLOAT_VEC4, precision)));
1209                 m_spec.outputs.push_back(Symbol("out0", glu::VarType(glu::TYPE_UINT, glu::PRECISION_HIGHP)));
1210
1211                 m_spec.source = "out0 = packUnorm4x8(in0);";
1212         }
1213
1214         TestInstance* createInstance (Context& ctx) const
1215         {
1216                 return new PackUnorm4x8CaseInstance(ctx, m_shaderType, m_spec, m_precision, getName());
1217         }
1218
1219 private:
1220         const glu::Precision m_precision;
1221 };
1222
1223 class UnpackUnorm4x8CaseInstance : public ShaderPackingFunctionTestInstance
1224 {
1225 public:
1226         UnpackUnorm4x8CaseInstance (Context& context, glu::ShaderType shaderType, const ShaderSpec& spec, const char* name)
1227                 : ShaderPackingFunctionTestInstance     (context, shaderType, spec, name)
1228         {
1229         }
1230
1231         tcu::TestStatus iterate (void)
1232         {
1233                 const deUint32                          maxDiff         = 1; // Rounding error.
1234                 de::Random                                      rnd                     (deStringHash(m_name) ^ 0x776002);
1235                 std::vector<deUint32>           inputs;
1236                 std::vector<tcu::Vec4>          outputs;
1237
1238                 inputs.push_back(0x00000000u);
1239                 inputs.push_back(0x7fff8000u);
1240                 inputs.push_back(0x80007fffu);
1241                 inputs.push_back(0xffffffffu);
1242                 inputs.push_back(0x0001fffeu);
1243
1244                 // Random values.
1245                 for (int ndx = 0; ndx < 95; ndx++)
1246                         inputs.push_back(rnd.getUint32());
1247
1248                 outputs.resize(inputs.size());
1249
1250                 m_testCtx.getLog() << TestLog::Message << "Executing shader for " << inputs.size() << " input values" << tcu::TestLog::EndMessage;
1251
1252                 {
1253                         const void*     in      = &inputs[0];
1254                         void*           out     = &outputs[0];
1255
1256                         m_executor->execute((int)inputs.size(), &in, &out);
1257                 }
1258
1259                 // Verify
1260                 {
1261                         const int       numValues       = (int)inputs.size();
1262                         const int       maxPrints       = 10;
1263                         int                     numFailed       = 0;
1264
1265                         for (int valNdx = 0; valNdx < (int)inputs.size(); valNdx++)
1266                         {
1267                                 const deUint8   in0             = (deUint8)(inputs[valNdx] & 0xff);
1268                                 const deUint8   in1             = (deUint8)((inputs[valNdx] >> 8) & 0xff);
1269                                 const deUint8   in2             = (deUint8)((inputs[valNdx] >> 16) & 0xff);
1270                                 const deUint8   in3             = (deUint8)(inputs[valNdx] >> 24);
1271                                 const float             ref0    = de::clamp(float(in0) / 255.f, 0.0f, 1.0f);
1272                                 const float             ref1    = de::clamp(float(in1) / 255.f, 0.0f, 1.0f);
1273                                 const float             ref2    = de::clamp(float(in2) / 255.f, 0.0f, 1.0f);
1274                                 const float             ref3    = de::clamp(float(in3) / 255.f, 0.0f, 1.0f);
1275                                 const float             res0    = outputs[valNdx].x();
1276                                 const float             res1    = outputs[valNdx].y();
1277                                 const float             res2    = outputs[valNdx].z();
1278                                 const float             res3    = outputs[valNdx].w();
1279
1280                                 const deUint32  diff0   = getUlpDiff(ref0, res0);
1281                                 const deUint32  diff1   = getUlpDiff(ref1, res1);
1282                                 const deUint32  diff2   = getUlpDiff(ref2, res2);
1283                                 const deUint32  diff3   = getUlpDiff(ref3, res3);
1284
1285                                 if (diff0 > maxDiff || diff1 > maxDiff || diff2 > maxDiff || diff3 > maxDiff)
1286                                 {
1287                                         if (numFailed < maxPrints)
1288                                         {
1289                                                 m_testCtx.getLog() << TestLog::Message << "ERROR: Mismatch in value " << valNdx << ",\n"
1290                                                                                                                            << "  expected unpackUnorm4x8(" << tcu::toHex(inputs[valNdx]) << ") = "
1291                                                                                                                            << "vec4(" << HexFloat(ref0) << ", " << HexFloat(ref1) << ", " << HexFloat(ref2) << ", " << HexFloat(ref3) << ")"
1292                                                                                                                            << ", got vec4(" << HexFloat(res0) << ", " << HexFloat(res1) << ", " << HexFloat(res2) << ", " << HexFloat(res3) << ")"
1293                                                                                                                            << "\n  ULP diffs = (" << diff0 << ", " << diff1 << ", " << diff2 << ", " << diff3 << "), max diff = " << maxDiff
1294                                                                                    << TestLog::EndMessage;
1295                                         }
1296                                         else if (numFailed == maxPrints)
1297                                                 m_testCtx.getLog() << TestLog::Message << "..." << TestLog::EndMessage;
1298
1299                                         numFailed += 1;
1300                                 }
1301                         }
1302
1303                         m_testCtx.getLog() << TestLog::Message << (numValues - numFailed) << " / " << numValues << " values passed" << TestLog::EndMessage;
1304
1305                         if (numFailed == 0)
1306                                 return tcu::TestStatus::pass("Pass");
1307                         else
1308                                 return tcu::TestStatus::fail("Result comparison failed");
1309
1310                 }
1311         }
1312 };
1313
1314 class UnpackUnorm4x8Case : public ShaderPackingFunctionCase
1315 {
1316 public:
1317         UnpackUnorm4x8Case (tcu::TestContext& testCtx, glu::ShaderType shaderType)
1318                 : ShaderPackingFunctionCase     (testCtx, (string("unpackunorm4x8") + getShaderTypePostfix(shaderType)).c_str(), "unpackUnorm4x8", shaderType)
1319         {
1320                 m_spec.inputs.push_back(Symbol("in0", glu::VarType(glu::TYPE_UINT, glu::PRECISION_HIGHP)));
1321                 m_spec.outputs.push_back(Symbol("out0", glu::VarType(glu::TYPE_FLOAT_VEC4, glu::PRECISION_HIGHP)));
1322
1323                 m_spec.source = "out0 = unpackUnorm4x8(in0);";
1324         }
1325
1326         TestInstance* createInstance (Context& ctx) const
1327         {
1328                 return new UnpackUnorm4x8CaseInstance(ctx, m_shaderType, m_spec, getName());
1329         }
1330
1331 };
1332
1333 ShaderPackingFunctionTests::ShaderPackingFunctionTests (tcu::TestContext& testCtx)
1334         : tcu::TestCaseGroup    (testCtx, "pack_unpack", "Floating-point pack and unpack function tests")
1335 {
1336 }
1337
1338 ShaderPackingFunctionTests::~ShaderPackingFunctionTests (void)
1339 {
1340 }
1341
1342 void ShaderPackingFunctionTests::init (void)
1343 {
1344         // New built-in functions in GLES 3.1
1345         {
1346                 const glu::ShaderType allShaderTypes[] =
1347                 {
1348                         glu::SHADERTYPE_VERTEX,
1349                         glu::SHADERTYPE_TESSELLATION_CONTROL,
1350                         glu::SHADERTYPE_TESSELLATION_EVALUATION,
1351                         glu::SHADERTYPE_GEOMETRY,
1352                         glu::SHADERTYPE_FRAGMENT,
1353                         glu::SHADERTYPE_COMPUTE
1354                 };
1355
1356                 // packSnorm4x8
1357                 for (int prec = glu::PRECISION_MEDIUMP; prec < glu::PRECISION_LAST; prec++)
1358                 {
1359                         for (int shaderTypeNdx = 0; shaderTypeNdx < DE_LENGTH_OF_ARRAY(allShaderTypes); shaderTypeNdx++)
1360                                 addChild(new PackSnorm4x8Case(m_testCtx, allShaderTypes[shaderTypeNdx], glu::Precision(prec)));
1361                 }
1362
1363                 // unpackSnorm4x8
1364                 for (int shaderTypeNdx = 0; shaderTypeNdx < DE_LENGTH_OF_ARRAY(allShaderTypes); shaderTypeNdx++)
1365                         addChild(new UnpackSnorm4x8Case(m_testCtx, allShaderTypes[shaderTypeNdx]));
1366
1367                 // packUnorm4x8
1368                 for (int prec = glu::PRECISION_MEDIUMP; prec < glu::PRECISION_LAST; prec++)
1369                 {
1370                         for (int shaderTypeNdx = 0; shaderTypeNdx < DE_LENGTH_OF_ARRAY(allShaderTypes); shaderTypeNdx++)
1371                                 addChild(new PackUnorm4x8Case(m_testCtx, allShaderTypes[shaderTypeNdx], glu::Precision(prec)));
1372                 }
1373
1374                 // unpackUnorm4x8
1375                 for (int shaderTypeNdx = 0; shaderTypeNdx < DE_LENGTH_OF_ARRAY(allShaderTypes); shaderTypeNdx++)
1376                         addChild(new UnpackUnorm4x8Case(m_testCtx, allShaderTypes[shaderTypeNdx]));
1377         }
1378
1379         // GLES 3 functions in new shader types.
1380         {
1381                 const glu::ShaderType newShaderTypes[] =
1382                 {
1383                         glu::SHADERTYPE_GEOMETRY,
1384                         glu::SHADERTYPE_COMPUTE
1385                 };
1386
1387                 // packSnorm2x16
1388                 for (int prec = glu::PRECISION_MEDIUMP; prec < glu::PRECISION_LAST; prec++)
1389                 {
1390                         for (int shaderTypeNdx = 0; shaderTypeNdx < DE_LENGTH_OF_ARRAY(newShaderTypes); shaderTypeNdx++)
1391                                 addChild(new PackSnorm2x16Case(m_testCtx, newShaderTypes[shaderTypeNdx], glu::Precision(prec)));
1392                 }
1393
1394                 // unpackSnorm2x16
1395                 for (int shaderTypeNdx = 0; shaderTypeNdx < DE_LENGTH_OF_ARRAY(newShaderTypes); shaderTypeNdx++)
1396                         addChild(new UnpackSnorm2x16Case(m_testCtx, newShaderTypes[shaderTypeNdx]));
1397
1398                 // packUnorm2x16
1399                 for (int prec = glu::PRECISION_MEDIUMP; prec < glu::PRECISION_LAST; prec++)
1400                 {
1401                         for (int shaderTypeNdx = 0; shaderTypeNdx < DE_LENGTH_OF_ARRAY(newShaderTypes); shaderTypeNdx++)
1402                                 addChild(new PackUnorm2x16Case(m_testCtx, newShaderTypes[shaderTypeNdx], glu::Precision(prec)));
1403                 }
1404
1405                 // unpackUnorm2x16
1406                 for (int shaderTypeNdx = 0; shaderTypeNdx < DE_LENGTH_OF_ARRAY(newShaderTypes); shaderTypeNdx++)
1407                         addChild(new UnpackUnorm2x16Case(m_testCtx, newShaderTypes[shaderTypeNdx]));
1408
1409                 // packHalf2x16
1410                 for (int shaderTypeNdx = 0; shaderTypeNdx < DE_LENGTH_OF_ARRAY(newShaderTypes); shaderTypeNdx++)
1411                         addChild(new PackHalf2x16Case(m_testCtx, newShaderTypes[shaderTypeNdx]));
1412
1413                 // unpackHalf2x16
1414                 for (int shaderTypeNdx = 0; shaderTypeNdx < DE_LENGTH_OF_ARRAY(newShaderTypes); shaderTypeNdx++)
1415                         addChild(new UnpackHalf2x16Case(m_testCtx, newShaderTypes[shaderTypeNdx]));
1416         }
1417 }
1418
1419 } // shaderexecutor
1420 } // vkt