Rename various things for more inclusive language
[platform/upstream/VK-GL-CTS.git] / external / vulkancts / modules / vulkan / spirv_assembly / vktSpvAsmFloatControlsTests.cpp
1 /*-------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2018 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief VK_KHR_shader_float_controls tests.
22  *//*--------------------------------------------------------------------*/
23
24
25 #include "vktSpvAsmFloatControlsTests.hpp"
26 #include "vktSpvAsmComputeShaderCase.hpp"
27 #include "vktSpvAsmGraphicsShaderTestUtil.hpp"
28 #include "vktTestGroupUtil.hpp"
29 #include "tcuFloat.hpp"
30 #include "tcuFloatFormat.hpp"
31 #include "tcuStringTemplate.hpp"
32 #include "deUniquePtr.hpp"
33 #include "deFloat16.h"
34 #include "vkQueryUtil.hpp"
35 #include "vkRefUtil.hpp"
36 #include <cstring>
37 #include <vector>
38 #include <limits>
39 #include <fenv.h>
40
41 namespace vkt
42 {
43 namespace SpirVAssembly
44 {
45
46 namespace
47 {
48
49 using namespace std;
50 using namespace tcu;
51
52 enum FloatType
53 {
54         FP16 = 0,
55         FP32,
56         FP64
57 };
58
59 enum class BufferDataType
60 {
61         DATA_UNKNOWN    = 0,
62         DATA_FP16               = 1,
63         DATA_FP32               = 2,
64         DATA_FP64               = 3,
65 };
66
67 enum FloatUsage
68 {
69         // If the float type is 16bit, then the use of the type is supported by
70         // VK_KHR_16bit_storage.
71         FLOAT_STORAGE_ONLY = 0,
72         // Use of the float type goes beyond VK_KHR_16bit_storage.
73         FLOAT_ARITHMETIC
74 };
75
76 enum FloatStatementUsageBits
77 {
78         B_STATEMENT_USAGE_ARGS_CONST_FLOAT              = (1<<0 ),
79         B_STATEMENT_USAGE_ARGS_CONST_FP16               = (1<<1 ),
80         B_STATEMENT_USAGE_ARGS_CONST_FP32               = (1<<2 ),
81         B_STATEMENT_USAGE_ARGS_CONST_FP64               = (1<<3 ),
82         B_STATEMENT_USAGE_TYPES_TYPE_FLOAT              = (1<<4 ),
83         B_STATEMENT_USAGE_TYPES_TYPE_FP16               = (1<<5 ),
84         B_STATEMENT_USAGE_TYPES_TYPE_FP32               = (1<<6 ),
85         B_STATEMENT_USAGE_TYPES_TYPE_FP64               = (1<<7 ),
86         B_STATEMENT_USAGE_CONSTS_TYPE_FLOAT             = (1<<8 ),
87         B_STATEMENT_USAGE_CONSTS_TYPE_FP16              = (1<<9 ),
88         B_STATEMENT_USAGE_CONSTS_TYPE_FP32              = (1<<10),
89         B_STATEMENT_USAGE_CONSTS_TYPE_FP64              = (1<<11),
90         B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT  = (1<<12),
91         B_STATEMENT_USAGE_COMMANDS_CONST_FP16   = (1<<13),
92         B_STATEMENT_USAGE_COMMANDS_CONST_FP32   = (1<<14),
93         B_STATEMENT_USAGE_COMMANDS_CONST_FP64   = (1<<15),
94         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT   = (1<<16),
95         B_STATEMENT_USAGE_COMMANDS_TYPE_FP16    = (1<<17),
96         B_STATEMENT_USAGE_COMMANDS_TYPE_FP32    = (1<<18),
97         B_STATEMENT_USAGE_COMMANDS_TYPE_FP64    = (1<<19),
98 };
99
100 typedef deUint32 FloatStatementUsageFlags;
101
102 // Enum containing float behaviors that its possible to test.
103 enum BehaviorFlagBits
104 {
105         B_DENORM_PRESERVE       = 0x00000001,           // DenormPreserve
106         B_DENORM_FLUSH          = 0x00000002,           // DenormFlushToZero
107         B_ZIN_PRESERVE          = 0x00000004,           // SignedZeroInfNanPreserve
108         B_RTE_ROUNDING          = 0x00000008,           // RoundingModeRTE
109         B_RTZ_ROUNDING          = 0x00000010            // RoundingModeRTZ
110 };
111
112 typedef deUint32 BehaviorFlags;
113
114 // Codes for all float values used in tests as arguments and operation results
115 // This approach allows to replace values with different types reducing complexity of the tests implementation
116 enum ValueId
117 {
118         // common values used as both arguments and results
119         V_UNUSED = 0,           //  used to mark arguments that are not used in operation
120         V_MINUS_INF,            //    or results of tests cases that should be skipped
121         V_MINUS_ONE,            // -1.0
122         V_MINUS_ZERO,           // -0.0
123         V_ZERO,                         //  0.0
124         V_HALF,                         //  0.5
125         V_ONE,                          //  1.0
126         V_INF,
127         V_DENORM,
128         V_NAN,
129
130         // arguments for rounding mode tests - used only when arguments are passed from input
131         V_ADD_ARG_A,
132         V_ADD_ARG_B,
133         V_SUB_ARG_A,
134         V_SUB_ARG_B,
135         V_MUL_ARG_A,
136         V_MUL_ARG_B,
137         V_DOT_ARG_A,
138         V_DOT_ARG_B,
139
140         // arguments of conversion operations - used only when arguments are passed from input
141         V_CONV_FROM_FP32_ARG,
142         V_CONV_FROM_FP64_ARG,
143
144         // arguments of rounding operations
145         V_ADD_RTZ_RESULT,
146         V_ADD_RTE_RESULT,
147         V_SUB_RTZ_RESULT,
148         V_SUB_RTE_RESULT,
149         V_MUL_RTZ_RESULT,
150         V_MUL_RTE_RESULT,
151         V_DOT_RTZ_RESULT,
152         V_DOT_RTE_RESULT,
153
154         // non comon results of some operation - corner cases
155         V_ZERO_OR_DENORM_TIMES_TWO,             // fp16 addition of non-flushed denorm with itself (or equivalent dot-product or vector-matrix multiply)
156         V_MINUS_ONE_OR_CLOSE,                   // value used only for fp16 subtraction result of preserved denorm and one
157         V_PI_DIV_2,
158         V_ZERO_OR_MINUS_ZERO,                   // both +0 and -0 are accepted
159         V_ZERO_OR_ONE,                                  // both +0 and 1 are accepted
160         V_ZERO_OR_FP16_DENORM_TO_FP32,  // both 0 and fp32 representation of fp16 denorm are accepted
161         V_ZERO_OR_FP16_DENORM_TO_FP64,
162         V_ZERO_OR_FP32_DENORM_TO_FP64,
163         V_DENORM_TIMES_TWO,
164         V_DEGREES_DENORM,
165         V_TRIG_ONE,                                             // 1.0 trigonometric operations, including precision margin
166         V_MINUS_INF_OR_LOG_DENORM,
167         V_MINUS_INF_OR_LOG2_DENORM,
168         V_ZERO_OR_SQRT_DENORM,
169         V_INF_OR_INV_SQRT_DENORM,
170
171         //results of conversion operations
172         V_CONV_TO_FP16_RTZ_RESULT,
173         V_CONV_TO_FP16_RTE_RESULT,
174         V_CONV_TO_FP32_RTZ_RESULT,
175         V_CONV_TO_FP32_RTE_RESULT,
176         V_CONV_DENORM_SMALLER,                  // used e.g. when converting fp16 denorm to fp32
177         V_CONV_DENORM_BIGGER,
178 };
179
180 // Enum containing all tested operatios. Operations are defined in generic way so that
181 // they can be used to generate tests operating on arguments with different values of
182 // specified float type.
183 enum OperationId
184 {
185         // spir-v unary operations
186         O_NEGATE = 0,
187         O_COMPOSITE,
188         O_COMPOSITE_INS,
189         O_COPY,
190         O_D_EXTRACT,
191         O_D_INSERT,
192         O_SHUFFLE,
193         O_TRANSPOSE,
194         O_CONV_FROM_FP16,
195         O_CONV_FROM_FP32,
196         O_CONV_FROM_FP64,
197         O_SCONST_CONV_FROM_FP32_TO_FP16,
198         O_SCONST_CONV_FROM_FP64_TO_FP32,
199         O_SCONST_CONV_FROM_FP64_TO_FP16,
200         O_RETURN_VAL,
201
202         // spir-v binary operations
203         O_ADD,
204         O_SUB,
205         O_MUL,
206         O_DIV,
207         O_REM,
208         O_MOD,
209         O_PHI,
210         O_SELECT,
211         O_DOT,
212         O_VEC_MUL_S,
213         O_VEC_MUL_M,
214         O_MAT_MUL_S,
215         O_MAT_MUL_V,
216         O_MAT_MUL_M,
217         O_OUT_PROD,
218         O_ORD_EQ,
219         O_UORD_EQ,
220         O_ORD_NEQ,
221         O_UORD_NEQ,
222         O_ORD_LS,
223         O_UORD_LS,
224         O_ORD_GT,
225         O_UORD_GT,
226         O_ORD_LE,
227         O_UORD_LE,
228         O_ORD_GE,
229         O_UORD_GE,
230
231         // glsl unary operations
232         O_ROUND,
233         O_ROUND_EV,
234         O_TRUNC,
235         O_ABS,
236         O_SIGN,
237         O_FLOOR,
238         O_CEIL,
239         O_FRACT,
240         O_RADIANS,
241         O_DEGREES,
242         O_SIN,
243         O_COS,
244         O_TAN,
245         O_ASIN,
246         O_ACOS,
247         O_ATAN,
248         O_SINH,
249         O_COSH,
250         O_TANH,
251         O_ASINH,
252         O_ACOSH,
253         O_ATANH,
254         O_EXP,
255         O_LOG,
256         O_EXP2,
257         O_LOG2,
258         O_SQRT,
259         O_INV_SQRT,
260         O_MODF,
261         O_MODF_ST,
262         O_FREXP,
263         O_FREXP_ST,
264         O_LENGHT,
265         O_NORMALIZE,
266         O_REFLECT,
267         O_REFRACT,
268         O_MAT_DET,
269         O_MAT_INV,
270         O_PH_DENORM,    // PackHalf2x16
271         O_UPH_DENORM,
272         O_PD_DENORM,    // PackDouble2x32
273         O_UPD_DENORM_FLUSH,
274         O_UPD_DENORM_PRESERVE,
275
276         // glsl binary operations
277         O_ATAN2,
278         O_POW,
279         O_MIX,
280         O_FMA,
281         O_MIN,
282         O_MAX,
283         O_CLAMP,
284         O_STEP,
285         O_SSTEP,
286         O_DIST,
287         O_CROSS,
288         O_FACE_FWD,
289         O_NMIN,
290         O_NMAX,
291         O_NCLAMP,
292
293         O_ORTE_ROUND,
294         O_ORTZ_ROUND
295 };
296
297 // Structures storing data required to test DenormPreserve and DenormFlushToZero modes.
298 // Operations are separated into binary and unary lists because binary operations can be tested with
299 // two attributes and thus denorms can be tested in combination with value, denorm, inf and nan.
300 // Unary operations are only tested with denorms.
301 struct BinaryCase
302 {
303         OperationId     operationId;
304         ValueId         opVarResult;
305         ValueId         opDenormResult;
306         ValueId         opInfResult;
307         ValueId         opNanResult;
308 };
309 struct UnaryCase
310 {
311         OperationId     operationId;
312         ValueId         result;
313 };
314
315 // Function replacing all occurrences of substring with string passed in last parameter.
316 string replace(string str, const string& from, const string& to)
317 {
318         // to keep spir-v code clean and easier to read parts of it are processed
319         // with this method instead of StringTemplate; main usage of this method is the
320         // replacement of "float_" with "f16_", "f32_" or "f64_" depending on test case
321
322         size_t start_pos = 0;
323         while((start_pos = str.find(from, start_pos)) != std::string::npos)
324         {
325                 str.replace(start_pos, from.length(), to);
326                 start_pos += to.length();
327         }
328         return str;
329 }
330
331 // Structure used to perform bits conversion int type <-> float type.
332 template<typename FLOAT_TYPE, typename UINT_TYPE>
333 struct RawConvert
334 {
335         union Value
336         {
337                 FLOAT_TYPE      fp;
338                 UINT_TYPE       ui;
339         };
340 };
341
342 // Traits used to get int type that can store equivalent float type.
343 template<typename FLOAT_TYPE>
344 struct GetCoresponding
345 {
346         typedef deUint16 uint_type;
347 };
348 template<>
349 struct GetCoresponding<float>
350 {
351         typedef deUint32 uint_type;
352 };
353 template<>
354 struct GetCoresponding<double>
355 {
356         typedef deUint64 uint_type;
357 };
358
359 // All values used for arguments and operation results are stored in single map.
360 // Each float type (fp16, fp32, fp64) has its own map that is used during
361 // test setup and during verification. TypeValuesBase is interface to that map.
362 class TypeValuesBase
363 {
364 public:
365         TypeValuesBase();
366         virtual ~TypeValuesBase() = default;
367
368         virtual BufferSp        constructInputBuffer    (const ValueId* twoArguments) const = 0;
369         virtual BufferSp        constructOutputBuffer   (ValueId result) const = 0;
370         virtual void            fillInputData                   (const ValueId* twoArguments, vector<deUint8>& bufferData, deUint32& offset) const = 0;
371
372 protected:
373         const double    pi;
374 };
375
376 TypeValuesBase::TypeValuesBase()
377         : pi(3.14159265358979323846)
378 {
379 }
380
381 typedef de::SharedPtr<TypeValuesBase> TypeValuesSP;
382
383 template <typename FLOAT_TYPE>
384 class TypeValues: public TypeValuesBase
385 {
386 public:
387         TypeValues();
388
389         BufferSp        constructInputBuffer    (const ValueId* twoArguments) const override;
390         BufferSp        constructOutputBuffer   (ValueId result) const override;
391         void            fillInputData                   (const ValueId* twoArguments, vector<deUint8>& bufferData, deUint32& offset) const override;
392
393         FLOAT_TYPE getValue(ValueId id) const;
394
395         template <typename UINT_TYPE>
396         FLOAT_TYPE exactByteEquivalent(UINT_TYPE byteValue) const;
397
398 private:
399         typedef map<ValueId, FLOAT_TYPE> ValueMap;
400         ValueMap m_valueIdToFloatType;
401 };
402
403 template <typename FLOAT_TYPE>
404 BufferSp TypeValues<FLOAT_TYPE>::constructInputBuffer(const ValueId* twoArguments) const
405 {
406         std::vector<FLOAT_TYPE> inputData(2);
407         inputData[0] = m_valueIdToFloatType.at(twoArguments[0]);
408         inputData[1] = m_valueIdToFloatType.at(twoArguments[1]);
409         return BufferSp(new Buffer<FLOAT_TYPE>(inputData));
410 }
411
412 template <typename FLOAT_TYPE>
413 BufferSp TypeValues<FLOAT_TYPE>::constructOutputBuffer(ValueId result) const
414 {
415         // note: we are not doing maping here, ValueId is directly saved in
416         // float type in order to be able to retireve it during verification
417
418         typedef typename GetCoresponding<FLOAT_TYPE>::uint_type uint_t;
419         uint_t value = static_cast<uint_t>(result);
420
421         // For FP16 we increase the buffer size to hold an unsigned integer, as
422         // we can be in the no 16bit_storage case.
423         const uint_t outputSize = sizeof(FLOAT_TYPE) == 2u ? 2u : 1u;
424         std::vector<FLOAT_TYPE> outputData(outputSize, exactByteEquivalent<uint_t>(value));
425         return BufferSp(new Buffer<FLOAT_TYPE>(outputData));
426 }
427
428 template <typename FLOAT_TYPE>
429 void TypeValues<FLOAT_TYPE>::fillInputData(const ValueId* twoArguments, vector<deUint8>& bufferData, deUint32& offset) const
430 {
431         deUint32 typeSize = sizeof(FLOAT_TYPE);
432
433         FLOAT_TYPE argA = getValue(twoArguments[0]);
434         deMemcpy(&bufferData[offset], &argA, typeSize);
435         offset += typeSize;
436
437         FLOAT_TYPE argB = getValue(twoArguments[1]);
438         deMemcpy(&bufferData[offset], &argB, typeSize);
439         offset += typeSize;
440 }
441
442 template <typename FLOAT_TYPE>
443 FLOAT_TYPE TypeValues<FLOAT_TYPE>::getValue(ValueId id) const
444 {
445         return m_valueIdToFloatType.at(id);
446 }
447
448 template <typename FLOAT_TYPE>
449 template <typename UINT_TYPE>
450 FLOAT_TYPE TypeValues<FLOAT_TYPE>::exactByteEquivalent(UINT_TYPE byteValue) const
451 {
452         typename RawConvert<FLOAT_TYPE, UINT_TYPE>::Value value;
453         value.ui = byteValue;
454         return value.fp;
455 }
456
457 template <>
458 TypeValues<deFloat16>::TypeValues()
459         : TypeValuesBase()
460 {
461         // NOTE: when updating entries in m_valueIdToFloatType make sure to
462         // update also valueIdToSnippetArgMap defined in updateSpirvSnippets()
463         ValueMap& vm = m_valueIdToFloatType;
464         vm[V_UNUSED]                    = deFloat32To16(0.0f);
465         vm[V_MINUS_INF]                 = 0xfc00;
466         vm[V_MINUS_ONE]                 = deFloat32To16(-1.0f);
467         vm[V_MINUS_ZERO]                = 0x8000;
468         vm[V_ZERO]                              = 0x0000;
469         vm[V_HALF]                              = deFloat32To16(0.5f);
470         vm[V_ONE]                               = deFloat32To16(1.0f);
471         vm[V_INF]                               = 0x7c00;
472         vm[V_DENORM]                    = 0x03f0; // this value should be the same as the result of denormBase - epsilon
473         vm[V_NAN]                               = 0x7cf0;
474
475         vm[V_PI_DIV_2]                  = 0x3e48;
476         vm[V_DENORM_TIMES_TWO]  = 0x07e0;
477         vm[V_DEGREES_DENORM]    = 0x1b0c;
478
479         vm[V_ADD_ARG_A]                                 = 0x3c03;
480         vm[V_ADD_ARG_B]                                 = vm[V_ONE];
481         vm[V_SUB_ARG_A]                                 = vm[V_ADD_ARG_A];
482         vm[V_SUB_ARG_B]                                 = 0x4203;
483         vm[V_MUL_ARG_A]                                 = vm[V_ADD_ARG_A];
484         vm[V_MUL_ARG_B]                                 = 0x1900;
485         vm[V_DOT_ARG_A]                                 = vm[V_ADD_ARG_A];
486         vm[V_DOT_ARG_B]                                 = vm[V_MUL_ARG_B];
487         vm[V_CONV_FROM_FP32_ARG]                = vm[V_UNUSED];
488         vm[V_CONV_FROM_FP64_ARG]                = vm[V_UNUSED];
489
490         vm[V_ADD_RTZ_RESULT]                    = 0x4001;       // deFloat16Add(vm[V_ADD_ARG_A], vm[V_ADD_ARG_B], rtz)
491         vm[V_SUB_RTZ_RESULT]                    = 0xc001;       // deFloat16Sub(vm[V_SUB_ARG_A], vm[V_SUB_ARG_B], rtz)
492         vm[V_MUL_RTZ_RESULT]                    = 0x1903;       // deFloat16Mul(vm[V_MUL_ARG_A], vm[V_MUL_ARG_B], rtz)
493         vm[V_DOT_RTZ_RESULT]                    = 0x1d03;
494         vm[V_CONV_TO_FP16_RTZ_RESULT]   = deFloat32To16Round(1.22334445f, DE_ROUNDINGMODE_TO_ZERO);
495         vm[V_CONV_TO_FP32_RTZ_RESULT]   = vm[V_UNUSED];
496
497         vm[V_ADD_RTE_RESULT]                    = 0x4002;       // deFloat16Add(vm[V_ADD_ARG_A], vm[V_ADD_ARG_B], rte)
498         vm[V_SUB_RTE_RESULT]                    = 0xc002;       // deFloat16Sub(vm[V_SUB_ARG_A], vm[V_SUB_ARG_B], rte)
499         vm[V_MUL_RTE_RESULT]                    = 0x1904;       // deFloat16Mul(vm[V_MUL_ARG_A], vm[V_MUL_ARG_B], rte)
500         vm[V_DOT_RTE_RESULT]                    = 0x1d04;
501         vm[V_CONV_TO_FP16_RTE_RESULT]   = deFloat32To16Round(1.22334445f, DE_ROUNDINGMODE_TO_NEAREST_EVEN);
502         vm[V_CONV_TO_FP32_RTE_RESULT]   = vm[V_UNUSED];
503
504         // there is no precision to store fp32 denorm nor fp64 denorm
505         vm[V_CONV_DENORM_SMALLER]               = vm[V_ZERO];
506         vm[V_CONV_DENORM_BIGGER]                = vm[V_ZERO];
507 }
508
509 template <>
510 TypeValues<float>::TypeValues()
511         : TypeValuesBase()
512 {
513         // NOTE: when updating entries in m_valueIdToFloatType make sure to
514         // update also valueIdToSnippetArgMap defined in updateSpirvSnippets()
515         ValueMap& vm = m_valueIdToFloatType;
516         vm[V_UNUSED]                    =  0.0f;
517         vm[V_MINUS_INF]                 = -std::numeric_limits<float>::infinity();
518         vm[V_MINUS_ONE]                 = -1.0f;
519         vm[V_MINUS_ZERO]                = -0.0f;
520         vm[V_ZERO]                              =  0.0f;
521         vm[V_HALF]                              =  0.5f;
522         vm[V_ONE]                               =  1.0f;
523         vm[V_INF]                               =  std::numeric_limits<float>::infinity();
524         vm[V_DENORM]                    =  static_cast<float>(1.413e-42); // 0x000003f0
525         vm[V_NAN]                               =  std::numeric_limits<float>::quiet_NaN();
526
527         vm[V_PI_DIV_2]                  =  static_cast<float>(pi / 2);
528         vm[V_DENORM_TIMES_TWO]  =  vm[V_DENORM] + vm[V_DENORM];
529         vm[V_DEGREES_DENORM]    =  deFloatDegrees(vm[V_DENORM]);
530
531         float e = std::numeric_limits<float>::epsilon();
532         vm[V_ADD_ARG_A]                                 = 1.0f + 3 * e;
533         vm[V_ADD_ARG_B]                                 = 1.0f;
534         vm[V_SUB_ARG_A]                                 = vm[V_ADD_ARG_A];
535         vm[V_SUB_ARG_B]                                 = 3.0f + 6 * e;
536         vm[V_MUL_ARG_A]                                 = vm[V_ADD_ARG_A];
537         vm[V_MUL_ARG_B]                                 = 5 * e;
538         vm[V_DOT_ARG_A]                                 = vm[V_ADD_ARG_A];
539         vm[V_DOT_ARG_B]                                 = 5 * e;
540         vm[V_CONV_FROM_FP32_ARG]                = 1.22334445f;
541         vm[V_CONV_FROM_FP64_ARG]                = vm[V_UNUSED];
542
543         int prevRound = fegetround();
544         fesetround(FE_TOWARDZERO);
545         vm[V_ADD_RTZ_RESULT]                    = vm[V_ADD_ARG_A] + vm[V_ADD_ARG_B];
546         vm[V_SUB_RTZ_RESULT]                    = vm[V_SUB_ARG_A] - vm[V_SUB_ARG_B];
547         vm[V_MUL_RTZ_RESULT]                    = vm[V_MUL_ARG_A] * vm[V_MUL_ARG_B];
548         vm[V_DOT_RTZ_RESULT]                    = vm[V_MUL_RTZ_RESULT] + vm[V_MUL_RTZ_RESULT];
549         vm[V_CONV_TO_FP16_RTZ_RESULT]   = vm[V_UNUSED];
550         vm[V_CONV_TO_FP32_RTZ_RESULT]   = exactByteEquivalent<deUint32>(0x3f9c968d); // result of conversion from double(1.22334455)
551
552         fesetround(FE_TONEAREST);
553         vm[V_ADD_RTE_RESULT]                    = vm[V_ADD_ARG_A] + vm[V_ADD_ARG_B];
554         vm[V_SUB_RTE_RESULT]                    = vm[V_SUB_ARG_A] - vm[V_SUB_ARG_B];
555         vm[V_MUL_RTE_RESULT]                    = vm[V_MUL_ARG_A] * vm[V_MUL_ARG_B];
556         vm[V_DOT_RTE_RESULT]                    = vm[V_MUL_RTE_RESULT] + vm[V_MUL_RTE_RESULT];
557         vm[V_CONV_TO_FP16_RTE_RESULT]   = vm[V_UNUSED];
558         vm[V_CONV_TO_FP32_RTE_RESULT]   = exactByteEquivalent<deUint32>(0x3f9c968e); // result of conversion from double(1.22334455)
559         fesetround(prevRound);
560
561         // there is no precision to store fp64 denorm
562         vm[V_CONV_DENORM_SMALLER]               = exactByteEquivalent<deUint32>(0x387c0000); // fp16 denorm
563         vm[V_CONV_DENORM_BIGGER]                = vm[V_ZERO];
564 }
565
566 template <>
567 TypeValues<double>::TypeValues()
568         : TypeValuesBase()
569 {
570         // NOTE: when updating entries in m_valueIdToFloatType make sure to
571         // update also valueIdToSnippetArgMap defined in updateSpirvSnippets()
572         ValueMap& vm = m_valueIdToFloatType;
573         vm[V_UNUSED]                    =  0.0;
574         vm[V_MINUS_INF]                 = -std::numeric_limits<double>::infinity();
575         vm[V_MINUS_ONE]                 = -1.0;
576         vm[V_MINUS_ZERO]                = -0.0;
577         vm[V_ZERO]                              =  0.0;
578         vm[V_HALF]                              =  0.5;
579         vm[V_ONE]                               =  1.0;
580         vm[V_INF]                               =  std::numeric_limits<double>::infinity();
581         vm[V_DENORM]                    =  4.98e-321; // 0x00000000000003F0
582         vm[V_NAN]                               =  std::numeric_limits<double>::quiet_NaN();
583
584         vm[V_PI_DIV_2]                  =  pi / 2;
585         vm[V_DENORM_TIMES_TWO]  =  vm[V_DENORM] + vm[V_DENORM];
586         vm[V_DEGREES_DENORM]    =  vm[V_UNUSED];
587
588         double e = std::numeric_limits<double>::epsilon();
589         vm[V_ADD_ARG_A]                         = 1.0 + 3 * e;
590         vm[V_ADD_ARG_B]                         = 1.0;
591         vm[V_SUB_ARG_A]                         = vm[V_ADD_ARG_A];
592         vm[V_SUB_ARG_B]                         = 3.0 + 6 * e;
593         vm[V_MUL_ARG_A]                         = vm[V_ADD_ARG_A];
594         vm[V_MUL_ARG_B]                         = 5 * e;
595         vm[V_DOT_ARG_A]                         = vm[V_ADD_ARG_A];
596         vm[V_DOT_ARG_B]                         = 5 * e;
597         vm[V_CONV_FROM_FP32_ARG]        = vm[V_UNUSED];
598         vm[V_CONV_FROM_FP64_ARG]        = 1.22334455;
599
600         int prevRound = fegetround();
601         fesetround(FE_TOWARDZERO);
602         vm[V_ADD_RTZ_RESULT]                    = vm[V_ADD_ARG_A] + vm[V_ADD_ARG_B];
603         vm[V_SUB_RTZ_RESULT]                    = vm[V_SUB_ARG_A] - vm[V_SUB_ARG_B];
604         vm[V_MUL_RTZ_RESULT]                    = vm[V_MUL_ARG_A] * vm[V_MUL_ARG_B];
605         vm[V_DOT_RTZ_RESULT]                    = vm[V_MUL_RTZ_RESULT] + vm[V_MUL_RTZ_RESULT];
606         vm[V_CONV_TO_FP16_RTZ_RESULT]   = vm[V_UNUSED];
607         vm[V_CONV_TO_FP32_RTZ_RESULT]   = vm[V_UNUSED];
608
609         fesetround(FE_TONEAREST);
610         vm[V_ADD_RTE_RESULT]                    = vm[V_ADD_ARG_A] + vm[V_ADD_ARG_B];
611         vm[V_SUB_RTE_RESULT]                    = vm[V_SUB_ARG_A] - vm[V_SUB_ARG_B];
612         vm[V_MUL_RTE_RESULT]                    = vm[V_MUL_ARG_A] * vm[V_MUL_ARG_B];
613         vm[V_DOT_RTE_RESULT]                    = vm[V_MUL_RTE_RESULT] + vm[V_MUL_RTE_RESULT];
614         vm[V_CONV_TO_FP16_RTE_RESULT]   = vm[V_UNUSED];
615         vm[V_CONV_TO_FP32_RTE_RESULT]   = vm[V_UNUSED];
616         fesetround(prevRound);
617
618         vm[V_CONV_DENORM_SMALLER]               = exactByteEquivalent<deUint64>(0x3f0f800000000000); // 0x03f0 is fp16 denorm
619         vm[V_CONV_DENORM_BIGGER]                = exactByteEquivalent<deUint64>(0x373f800000000000); // 0x000003f0 is fp32 denorm
620 }
621
622 // Each float type (fp16, fp32, fp64) has specific set of SPIR-V snippets
623 // that was extracted to separate template specialization. Those snippets
624 // are used to compose final test shaders. With this approach
625 // parameterization can be done just once per type and reused for many tests.
626 class TypeSnippetsBase
627 {
628 public:
629         virtual ~TypeSnippetsBase() = default;
630
631 protected:
632         void updateSpirvSnippets();
633
634 public: // Type specific data:
635
636         // Number of bits consumed by float type
637         string bitWidth;
638
639         // Minimum positive normal
640         string epsilon;
641
642         // denormBase is a normal value (found empirically) used to generate denorm value.
643         // Denorm is generated by substracting epsilon from denormBase.
644         // denormBase is not a denorm - it is used to create denorm.
645         // This value is needed when operations are tested with arguments that were
646         // generated in the code. Generated denorm should be the same as denorm
647         // used when arguments are passed via input (m_valueIdToFloatType[V_DENORM]).
648         // This is required as result of some operations depends on actual denorm value
649         // e.g. OpRadians(0x0001) is 0 but OpRadians(0x03f0) is denorm.
650         string denormBase;
651
652         string capabilities;
653         string extensions;
654         string capabilitiesFp16Without16BitStorage;
655         string extensionsFp16Without16BitStorage;
656         string arrayStride;
657
658         bool loadStoreRequiresShaderFloat16;
659
660 public: // Type specific spir-v snippets:
661
662         // Common annotations
663         string typeAnnotationsSnippet;
664
665         // Definitions of all types commonly used by operation tests
666         string typeDefinitionsSnippet;
667
668         // Definitions of all types commonly used by settings tests
669         string minTypeDefinitionsSnippet;
670
671         // Definitions of all constants commonly used by tests
672         string constantsDefinitionsSnippet;
673
674         // Map that stores instructions that generate arguments of specified value.
675         // Every test that uses generated inputod will select up to two items from this map
676         typedef map<ValueId, string> SnippetMap;
677         SnippetMap valueIdToSnippetArgMap;
678
679         // Spir-v snippets that read argument from SSBO
680         string argumentsFromInputSnippet;
681         string multiArgumentsFromInputSnippet;
682
683         // SSBO with stage input/output definitions
684         string inputAnnotationsSnippet;
685         string inputDefinitionsSnippet;
686         string outputAnnotationsSnippet;
687         string multiOutputAnnotationsSnippet;
688         string outputDefinitionsSnippet;
689         string multiOutputDefinitionsSnippet;
690
691         // Varying is required to pass result from vertex stage to fragment stage,
692         // one of requirements was to not use SSBO writes in vertex stage so we
693         // need to do that in fragment stage; we also cant pass operation result
694         // directly because of interpolation, to avoid it we do a bitcast to uint
695         string varyingsTypesSnippet;
696         string inputVaryingsSnippet;
697         string outputVaryingsSnippet;
698         string storeVertexResultSnippet;
699         string loadVertexResultSnippet;
700
701         string storeResultsSnippet;
702         string multiStoreResultsSnippet;
703
704         string argumentsFromInputFp16Snippet;
705         string storeResultsFp16Snippet;
706         string multiArgumentsFromInputFp16Snippet;
707         string multiOutputAnnotationsFp16Snippet;
708         string multiStoreResultsFp16Snippet;
709         string multiOutputDefinitionsFp16Snippet;
710         string inputDefinitionsFp16Snippet;
711         string outputDefinitionsFp16Snippet;
712         string typeAnnotationsFp16Snippet;
713         string typeDefinitionsFp16Snippet;
714 };
715
716 void TypeSnippetsBase::updateSpirvSnippets()
717 {
718         // annotations to types that are commonly used by tests
719         const string typeAnnotationsTemplate =
720                 "OpDecorate %type_float_arr_1 ArrayStride " + arrayStride + "\n"
721                 "OpDecorate %type_float_arr_2 ArrayStride " + arrayStride + "\n";
722
723         // definition off all types that are commonly used by tests
724         const string typeDefinitionsTemplate =
725                 "%type_float             = OpTypeFloat " + bitWidth + "\n"
726                 "%type_float_uptr        = OpTypePointer Uniform %type_float\n"
727                 "%type_float_fptr        = OpTypePointer Function %type_float\n"
728                 "%type_float_vec2        = OpTypeVector %type_float 2\n"
729                 "%type_float_vec3        = OpTypeVector %type_float 3\n"
730                 "%type_float_vec4        = OpTypeVector %type_float 4\n"
731                 "%type_float_vec4_iptr   = OpTypePointer Input %type_float_vec4\n"
732                 "%type_float_vec4_optr   = OpTypePointer Output %type_float_vec4\n"
733                 "%type_float_mat2x2      = OpTypeMatrix %type_float_vec2 2\n"
734                 "%type_float_arr_1       = OpTypeArray %type_float %c_i32_1\n"
735                 "%type_float_arr_2       = OpTypeArray %type_float %c_i32_2\n";
736
737         // minimal type definition set that is used by settings tests
738         const string minTypeDefinitionsTemplate =
739                 "%type_float             = OpTypeFloat " + bitWidth + "\n"
740                 "%type_float_uptr        = OpTypePointer Uniform %type_float\n"
741                 "%type_float_arr_2       = OpTypeArray %type_float %c_i32_2\n";
742
743         // definition off all constants that are used by tests
744         const string constantsDefinitionsTemplate =
745                 "%c_float_n1             = OpConstant %type_float -1\n"
746                 "%c_float_0              = OpConstant %type_float 0.0\n"
747                 "%c_float_0_5            = OpConstant %type_float 0.5\n"
748                 "%c_float_1              = OpConstant %type_float 1\n"
749                 "%c_float_2              = OpConstant %type_float 2\n"
750                 "%c_float_3              = OpConstant %type_float 3\n"
751                 "%c_float_4              = OpConstant %type_float 4\n"
752                 "%c_float_5              = OpConstant %type_float 5\n"
753                 "%c_float_6              = OpConstant %type_float 6\n"
754                 "%c_float_eps            = OpConstant %type_float " + epsilon + "\n"
755                 "%c_float_denorm_base    = OpConstant %type_float " + denormBase + "\n";
756
757         // when arguments are read from SSBO this snipped is placed in main function
758         const string argumentsFromInputTemplate =
759                 "%arg1loc                = OpAccessChain %type_float_uptr %ssbo_in %c_i32_0 %c_i32_0\n"
760                 "%arg1                   = OpLoad %type_float %arg1loc\n"
761                 "%arg2loc                = OpAccessChain %type_float_uptr %ssbo_in %c_i32_0 %c_i32_1\n"
762                 "%arg2                   = OpLoad %type_float %arg2loc\n";
763
764         const string multiArgumentsFromInputTemplate =
765                 "%arg1_float_loc         = OpAccessChain %type_float_uptr %ssbo_in %c_i32_${attr} %c_i32_0\n"
766                 "%arg2_float_loc         = OpAccessChain %type_float_uptr %ssbo_in %c_i32_${attr} %c_i32_1\n"
767                 "%arg1_float             = OpLoad %type_float %arg1_float_loc\n"
768                 "%arg2_float             = OpLoad %type_float %arg2_float_loc\n";
769
770         // when tested shader stage reads from SSBO it has to have this snippet
771         inputAnnotationsSnippet =
772                 "OpMemberDecorate %SSBO_in 0 Offset 0\n"
773                 "OpDecorate %SSBO_in BufferBlock\n"
774                 "OpDecorate %ssbo_in DescriptorSet 0\n"
775                 "OpDecorate %ssbo_in Binding 0\n"
776                 "OpDecorate %ssbo_in NonWritable\n";
777
778         const string inputDefinitionsTemplate =
779                 "%SSBO_in              = OpTypeStruct %type_float_arr_2\n"
780                 "%up_SSBO_in           = OpTypePointer Uniform %SSBO_in\n"
781                 "%ssbo_in              = OpVariable %up_SSBO_in Uniform\n";
782
783         outputAnnotationsSnippet =
784                 "OpMemberDecorate %SSBO_out 0 Offset 0\n"
785                 "OpDecorate %SSBO_out BufferBlock\n"
786                 "OpDecorate %ssbo_out DescriptorSet 0\n"
787                 "OpDecorate %ssbo_out Binding 1\n";
788
789         const string multiOutputAnnotationsTemplate =
790                 "OpMemberDecorate %SSBO_float_out 0 Offset 0\n"
791                 "OpDecorate %type_float_arr_2 ArrayStride "+ arrayStride + "\n"
792                 "OpDecorate %SSBO_float_out BufferBlock\n"
793                 "OpDecorate %ssbo_float_out DescriptorSet 0\n";
794
795         const string outputDefinitionsTemplate =
796                 "%SSBO_out             = OpTypeStruct %type_float_arr_1\n"
797                 "%up_SSBO_out          = OpTypePointer Uniform %SSBO_out\n"
798                 "%ssbo_out             = OpVariable %up_SSBO_out Uniform\n";
799
800         const string multiOutputDefinitionsTemplate =
801                 "%SSBO_float_out         = OpTypeStruct %type_float\n"
802                 "%up_SSBO_float_out      = OpTypePointer Uniform %SSBO_float_out\n"
803                 "%ssbo_float_out         = OpVariable %up_SSBO_float_out Uniform\n";
804
805         // this snippet is used by compute and fragment stage but not by vertex stage
806         const string storeResultsTemplate =
807                 "%outloc               = OpAccessChain %type_float_uptr %ssbo_out %c_i32_0 %c_i32_0\n"
808                 "OpStore %outloc %result\n";
809
810         const string multiStoreResultsTemplate =
811                 "%outloc" + bitWidth + "             = OpAccessChain %type_float_uptr %ssbo_float_out %c_i32_0\n"
812                 "                        OpStore %outloc" + bitWidth + " %result" + bitWidth + "\n";
813
814         const string typeToken  = "_float";
815         const string typeName   = "_f" + bitWidth;
816
817         typeAnnotationsSnippet                  = replace(typeAnnotationsTemplate, typeToken, typeName);
818         typeDefinitionsSnippet                  = replace(typeDefinitionsTemplate, typeToken, typeName);
819         minTypeDefinitionsSnippet               = replace(minTypeDefinitionsTemplate, typeToken, typeName);
820         constantsDefinitionsSnippet             = replace(constantsDefinitionsTemplate, typeToken, typeName);
821         argumentsFromInputSnippet               = replace(argumentsFromInputTemplate, typeToken, typeName);
822         multiArgumentsFromInputSnippet  = replace(multiArgumentsFromInputTemplate, typeToken, typeName);
823         inputDefinitionsSnippet                 = replace(inputDefinitionsTemplate, typeToken, typeName);
824         multiOutputAnnotationsSnippet   = replace(multiOutputAnnotationsTemplate, typeToken, typeName);
825         outputDefinitionsSnippet                = replace(outputDefinitionsTemplate, typeToken, typeName);
826         multiOutputDefinitionsSnippet   = replace(multiOutputDefinitionsTemplate, typeToken, typeName);
827         storeResultsSnippet                             = replace(storeResultsTemplate, typeToken, typeName);
828         multiStoreResultsSnippet                = replace(multiStoreResultsTemplate, typeToken, typeName);
829
830         argumentsFromInputFp16Snippet           = "";
831         storeResultsFp16Snippet                         = "";
832         multiArgumentsFromInputFp16Snippet      = "";
833         multiOutputAnnotationsFp16Snippet       = "";
834         multiStoreResultsFp16Snippet            = "";
835         multiOutputDefinitionsFp16Snippet       = "";
836         inputDefinitionsFp16Snippet                     = "";
837         typeAnnotationsFp16Snippet                      = "";
838         outputDefinitionsFp16Snippet            = "";
839         typeDefinitionsFp16Snippet                      = "";
840
841         if (bitWidth.compare("16") == 0)
842         {
843                 typeDefinitionsFp16Snippet              =
844                         "%type_u32_uptr       = OpTypePointer Uniform %type_u32\n"
845                         "%type_u32_arr_1      = OpTypeArray %type_u32 %c_i32_1\n";
846
847                 typeAnnotationsFp16Snippet              = "OpDecorate %type_u32_arr_1 ArrayStride 4\n";
848                 const string inputToken                 = "_f16_arr_2";
849                 const string inputName                  = "_u32_arr_1";
850                 inputDefinitionsFp16Snippet             = replace(inputDefinitionsSnippet, inputToken, inputName);
851
852                 argumentsFromInputFp16Snippet   =
853                         "%argloc            = OpAccessChain %type_u32_uptr %ssbo_in %c_i32_0 %c_i32_0\n"
854                         "%inval             = OpLoad %type_u32 %argloc\n"
855                         "%arg               = OpBitcast %type_f16_vec2 %inval\n"
856                         "%arg1              = OpCompositeExtract %type_f16 %arg 0\n"
857                         "%arg2              = OpCompositeExtract %type_f16 %arg 1\n";
858
859                 const string outputToken                = "_f16_arr_1";
860                 const string outputName                 = "_u32_arr_1";
861                 outputDefinitionsFp16Snippet    = replace(outputDefinitionsSnippet, outputToken, outputName);
862
863                 storeResultsFp16Snippet =
864                         "%result_f16_vec2   = OpCompositeConstruct %type_f16_vec2 %result %c_f16_0\n"
865                         "%result_u32            = OpBitcast %type_u32 %result_f16_vec2\n"
866                         "%outloc            = OpAccessChain %type_u32_uptr %ssbo_out %c_i32_0 %c_i32_0\n"
867                         "OpStore %outloc %result_u32\n";
868
869                 multiArgumentsFromInputFp16Snippet      =
870                         "%arg_u32_loc         = OpAccessChain %type_u32_uptr %ssbo_in %c_i32_${attr} %c_i32_0\n"
871                         "%arg_u32             = OpLoad %type_u32 %arg_u32_loc\n"
872                         "%arg_f16_vec2        = OpBitcast %type_f16_vec2 %arg_u32\n"
873                         "%arg1_f16            = OpCompositeExtract %type_f16 %arg_f16_vec2 0\n"
874                         "%arg2_f16            = OpCompositeExtract %type_f16 %arg_f16_vec2 1\n";
875
876                 multiOutputAnnotationsFp16Snippet       =
877                         "OpMemberDecorate %SSBO_u32_out 0 Offset 0\n"
878                         "OpDecorate %type_u32_arr_1 ArrayStride 4\n"
879                         "OpDecorate %SSBO_u32_out BufferBlock\n"
880                         "OpDecorate %ssbo_u32_out DescriptorSet 0\n";
881
882                 multiStoreResultsFp16Snippet            =
883                         "%outloc_u32            = OpAccessChain %type_u32_uptr %ssbo_u32_out %c_i32_0\n"
884                         "%result16_vec2                 = OpCompositeConstruct %type_f16_vec2 %result16 %c_f16_0\n"
885                         "%result_u32            = OpBitcast %type_u32 %result16_vec2\n"
886                         "                        OpStore %outloc_u32 %result_u32\n";
887
888                 multiOutputDefinitionsFp16Snippet       =
889                         "%c_f16_0              = OpConstant %type_f16 0.0\n"
890                         "%SSBO_u32_out         = OpTypeStruct %type_u32\n"
891                         "%up_SSBO_u32_out      = OpTypePointer Uniform %SSBO_u32_out\n"
892                         "%ssbo_u32_out         = OpVariable %up_SSBO_u32_out Uniform\n";
893         }
894
895         // NOTE: only values used as _generated_ arguments in test operations
896         // need to be in this map, arguments that are only used by tests,
897         // that grab arguments from input, do need to be in this map
898         // NOTE: when updating entries in valueIdToSnippetArgMap make
899         // sure to update also m_valueIdToFloatType for all float width
900         SnippetMap& sm = valueIdToSnippetArgMap;
901         sm[V_UNUSED]            = "OpFSub %type_float %c_float_0 %c_float_0\n";
902         sm[V_MINUS_INF]         = "OpFDiv %type_float %c_float_n1 %c_float_0\n";
903         sm[V_MINUS_ONE]         = "OpFAdd %type_float %c_float_n1 %c_float_0\n";
904         sm[V_MINUS_ZERO]        = "OpFMul %type_float %c_float_n1 %c_float_0\n";
905         sm[V_ZERO]                      = "OpFMul %type_float %c_float_0 %c_float_0\n";
906         sm[V_HALF]                      = "OpFAdd %type_float %c_float_0_5 %c_float_0\n";
907         sm[V_ONE]                       = "OpFAdd %type_float %c_float_1 %c_float_0\n";
908         sm[V_INF]                       = "OpFDiv %type_float %c_float_1 %c_float_0\n";                                 // x / 0                == Inf
909         sm[V_DENORM]            = "OpFSub %type_float %c_float_denorm_base %c_float_eps\n";
910         sm[V_NAN]                       = "OpFDiv %type_float %c_float_0 %c_float_0\n";                                 // 0 / 0                == Nan
911
912         map<ValueId, string>::iterator it;
913         for ( it = sm.begin(); it != sm.end(); it++ )
914                 sm[it->first] = replace(it->second, typeToken, typeName);
915 }
916
917 typedef de::SharedPtr<TypeSnippetsBase> TypeSnippetsSP;
918
919 template<typename FLOAT_TYPE>
920 class TypeSnippets: public TypeSnippetsBase
921 {
922 public:
923         TypeSnippets();
924 };
925
926 template<>
927 TypeSnippets<deFloat16>::TypeSnippets()
928 {
929         bitWidth                = "16";
930         epsilon                 = "6.104e-5";   // 2^-14 = 0x0400
931
932         // 1.2113e-4 is 0x07f0 which after substracting epsilon will give 0x03f0 (same as vm[V_DENORM])
933         // NOTE: constants in SPIR-V cant be specified as exact fp16 - there is conversion from double to fp16
934         denormBase              = "1.2113e-4";
935
936         capabilities    = "OpCapability StorageUniform16\n";
937         extensions              = "OpExtension \"SPV_KHR_16bit_storage\"\n";
938
939         capabilitiesFp16Without16BitStorage     = "OpCapability Float16\n";
940         extensionsFp16Without16BitStorage       = "";
941
942         arrayStride             = "2";
943
944         varyingsTypesSnippet =
945                                         "%type_u32_iptr        = OpTypePointer Input %type_u32\n"
946                                         "%type_u32_optr        = OpTypePointer Output %type_u32\n";
947         inputVaryingsSnippet =
948                                         "%BP_vertex_result    = OpVariable %type_u32_iptr Input\n";
949         outputVaryingsSnippet =
950                                         "%BP_vertex_result    = OpVariable %type_u32_optr Output\n";
951         storeVertexResultSnippet =
952                                         "%tmp_vec2            = OpCompositeConstruct %type_f16_vec2 %result %c_f16_0\n"
953                                         "%packed_result       = OpBitcast %type_u32 %tmp_vec2\n"
954                                         "OpStore %BP_vertex_result %packed_result\n";
955         loadVertexResultSnippet =
956                                         "%packed_result       = OpLoad %type_u32 %BP_vertex_result\n"
957                                         "%tmp_vec2            = OpBitcast %type_f16_vec2 %packed_result\n"
958                                         "%result              = OpCompositeExtract %type_f16 %tmp_vec2 0\n";
959
960         loadStoreRequiresShaderFloat16 = true;
961
962         updateSpirvSnippets();
963 }
964
965 template<>
966 TypeSnippets<float>::TypeSnippets()
967 {
968         bitWidth                = "32";
969         epsilon                 = "1.175494351e-38";
970         denormBase              = "1.1756356e-38";
971         capabilities    = "";
972         extensions              = "";
973         capabilitiesFp16Without16BitStorage     = "";
974         extensionsFp16Without16BitStorage       = "";
975         arrayStride             = "4";
976
977         varyingsTypesSnippet =
978                                         "%type_u32_iptr        = OpTypePointer Input %type_u32\n"
979                                         "%type_u32_optr        = OpTypePointer Output %type_u32\n";
980         inputVaryingsSnippet =
981                                         "%BP_vertex_result    = OpVariable %type_u32_iptr Input\n";
982         outputVaryingsSnippet =
983                                         "%BP_vertex_result    = OpVariable %type_u32_optr Output\n";
984         storeVertexResultSnippet =
985                                         "%packed_result       = OpBitcast %type_u32 %result\n"
986                                         "OpStore %BP_vertex_result %packed_result\n";
987         loadVertexResultSnippet =
988                                         "%packed_result       = OpLoad %type_u32 %BP_vertex_result\n"
989                                         "%result              = OpBitcast %type_f32 %packed_result\n";
990
991         loadStoreRequiresShaderFloat16 = false;
992
993         updateSpirvSnippets();
994 }
995
996 template<>
997 TypeSnippets<double>::TypeSnippets()
998 {
999         bitWidth                = "64";
1000         epsilon                 = "2.2250738585072014e-308"; // 0x0010000000000000
1001         denormBase              = "2.2250738585076994e-308"; // 0x00100000000003F0
1002         capabilities    = "OpCapability Float64\n";
1003         extensions              = "";
1004         capabilitiesFp16Without16BitStorage     = "";
1005         extensionsFp16Without16BitStorage       = "";
1006         arrayStride             = "8";
1007
1008         varyingsTypesSnippet =
1009                                         "%type_u32_vec2_iptr   = OpTypePointer Input %type_u32_vec2\n"
1010                                         "%type_u32_vec2_optr   = OpTypePointer Output %type_u32_vec2\n";
1011         inputVaryingsSnippet =
1012                                         "%BP_vertex_result     = OpVariable %type_u32_vec2_iptr Input\n";
1013         outputVaryingsSnippet =
1014                                         "%BP_vertex_result     = OpVariable %type_u32_vec2_optr Output\n";
1015         storeVertexResultSnippet =
1016                                         "%packed_result        = OpBitcast %type_u32_vec2 %result\n"
1017                                         "OpStore %BP_vertex_result %packed_result\n";
1018         loadVertexResultSnippet =
1019                                         "%packed_result        = OpLoad %type_u32_vec2 %BP_vertex_result\n"
1020                                         "%result               = OpBitcast %type_f64 %packed_result\n";
1021
1022         loadStoreRequiresShaderFloat16 = false;
1023
1024         updateSpirvSnippets();
1025 }
1026
1027 class TypeTestResultsBase
1028 {
1029 public:
1030         virtual ~TypeTestResultsBase() {}
1031         FloatType floatType() const;
1032
1033 protected:
1034         FloatType m_floatType;
1035
1036 public:
1037         // Vectors containing test data for float controls
1038         vector<BinaryCase>      binaryOpFTZ;
1039         vector<UnaryCase>       unaryOpFTZ;
1040         vector<BinaryCase>      binaryOpDenormPreserve;
1041         vector<UnaryCase>       unaryOpDenormPreserve;
1042 };
1043
1044 FloatType TypeTestResultsBase::floatType() const
1045 {
1046         return m_floatType;
1047 }
1048
1049 typedef de::SharedPtr<TypeTestResultsBase> TypeTestResultsSP;
1050
1051 template<typename FLOAT_TYPE>
1052 class TypeTestResults: public TypeTestResultsBase
1053 {
1054 public:
1055         TypeTestResults();
1056 };
1057
1058 template<>
1059 TypeTestResults<deFloat16>::TypeTestResults()
1060 {
1061         m_floatType = FP16;
1062
1063         // note: there are many FTZ test cases that can produce diferent result depending
1064         // on input denorm being flushed or not; because of that FTZ tests can be limited
1065         // to those that return denorm as those are the ones affected by tested extension
1066         const BinaryCase binaryOpFTZArr[] = {
1067                 //operation             den op one              den op den              den op inf              den op nan
1068                 { O_ADD,                V_ONE,                  V_ZERO_OR_DENORM_TIMES_TWO,
1069                                                                                                                 V_INF,                  V_UNUSED },
1070                 { O_SUB,                V_MINUS_ONE,    V_ZERO,                 V_MINUS_INF,    V_UNUSED },
1071                 { O_MUL,                V_ZERO,                 V_ZERO,                 V_UNUSED,               V_UNUSED },
1072                 { O_DIV,                V_ZERO,                 V_UNUSED,               V_ZERO,                 V_UNUSED },
1073                 { O_REM,                V_ZERO,                 V_UNUSED,               V_UNUSED,               V_UNUSED },
1074                 { O_MOD,                V_ZERO,                 V_UNUSED,               V_UNUSED,               V_UNUSED },
1075                 { O_VEC_MUL_S,  V_ZERO,                 V_ZERO,                 V_UNUSED,               V_UNUSED },
1076                 { O_VEC_MUL_M,  V_ZERO_OR_DENORM_TIMES_TWO,
1077                                                                                 V_ZERO,                 V_UNUSED,               V_UNUSED },
1078                 { O_MAT_MUL_S,  V_ZERO,                 V_ZERO,                 V_UNUSED,               V_UNUSED },
1079                 { O_MAT_MUL_V,  V_ZERO,                 V_ZERO,                 V_UNUSED,               V_UNUSED },
1080                 { O_MAT_MUL_M,  V_ZERO,                 V_ZERO,                 V_UNUSED,               V_UNUSED },
1081                 { O_OUT_PROD,   V_ZERO,                 V_ZERO,                 V_UNUSED,               V_UNUSED },
1082                 { O_DOT,                V_ZERO_OR_DENORM_TIMES_TWO,
1083                                                                                 V_ZERO,                 V_UNUSED,               V_UNUSED },
1084                 { O_ATAN2,              V_ZERO,                 V_UNUSED,               V_ZERO,                 V_UNUSED },
1085                 { O_POW,                V_ZERO,                 V_UNUSED,               V_ZERO,                 V_UNUSED },
1086                 { O_MIX,                V_HALF,                 V_ZERO,                 V_INF,                  V_UNUSED },
1087                 { O_MIN,                V_ZERO,                 V_ZERO,                 V_ZERO,                 V_UNUSED },
1088                 { O_MAX,                V_ONE,                  V_ZERO,                 V_INF,                  V_UNUSED },
1089                 { O_CLAMP,              V_ONE,                  V_ZERO,                 V_INF,                  V_UNUSED },
1090                 { O_STEP,               V_ONE,                  V_ONE,                  V_ONE,                  V_UNUSED },
1091                 { O_SSTEP,              V_HALF,                 V_ONE,                  V_ZERO,                 V_UNUSED },
1092                 { O_FMA,                V_HALF,                 V_HALF,                 V_UNUSED,               V_UNUSED },
1093                 { O_FACE_FWD,   V_MINUS_ONE,    V_MINUS_ONE,    V_MINUS_ONE,    V_MINUS_ONE },
1094                 { O_NMIN,               V_ZERO,                 V_ZERO,                 V_ZERO,                 V_ZERO },
1095                 { O_NMAX,               V_ONE,                  V_ZERO,                 V_INF,                  V_ZERO },
1096                 { O_NCLAMP,             V_ONE,                  V_ZERO,                 V_INF,                  V_ZERO },
1097                 { O_DIST,               V_ONE,                  V_ZERO,                 V_INF,                  V_UNUSED },
1098                 { O_CROSS,              V_ZERO,                 V_ZERO,                 V_UNUSED,               V_UNUSED },
1099         };
1100
1101         const UnaryCase unaryOpFTZArr[] = {
1102                 //operation                     op den
1103                 { O_NEGATE,                     V_MINUS_ZERO },
1104                 { O_ROUND,                      V_ZERO },
1105                 { O_ROUND_EV,           V_ZERO },
1106                 { O_TRUNC,                      V_ZERO },
1107                 { O_ABS,                        V_ZERO },
1108                 { O_FLOOR,                      V_ZERO },
1109                 { O_CEIL,                       V_ZERO_OR_ONE },
1110                 { O_FRACT,                      V_ZERO },
1111                 { O_RADIANS,            V_ZERO },
1112                 { O_DEGREES,            V_ZERO },
1113                 { O_SIN,                        V_ZERO },
1114                 { O_COS,                        V_TRIG_ONE },
1115                 { O_TAN,                        V_ZERO },
1116                 { O_ASIN,                       V_ZERO },
1117                 { O_ACOS,                       V_PI_DIV_2 },
1118                 { O_ATAN,                       V_ZERO },
1119                 { O_SINH,                       V_ZERO },
1120                 { O_COSH,                       V_ONE },
1121                 { O_TANH,                       V_ZERO },
1122                 { O_ASINH,                      V_ZERO },
1123                 { O_ACOSH,                      V_UNUSED },
1124                 { O_ATANH,                      V_ZERO },
1125                 { O_EXP,                        V_ONE },
1126                 { O_LOG,                        V_MINUS_INF_OR_LOG_DENORM },
1127                 { O_EXP2,                       V_ONE },
1128                 { O_LOG2,                       V_MINUS_INF_OR_LOG2_DENORM },
1129                 { O_SQRT,                       V_ZERO_OR_SQRT_DENORM },
1130                 { O_INV_SQRT,           V_INF_OR_INV_SQRT_DENORM },
1131                 { O_MAT_DET,            V_ZERO },
1132                 { O_MAT_INV,            V_ZERO_OR_MINUS_ZERO },
1133                 { O_MODF,                       V_ZERO },
1134                 { O_MODF_ST,            V_ZERO },
1135                 { O_NORMALIZE,          V_ZERO },
1136                 { O_REFLECT,            V_ZERO },
1137                 { O_REFRACT,            V_ZERO },
1138                 { O_LENGHT,                     V_ZERO },
1139         };
1140
1141         const BinaryCase binaryOpDenormPreserveArr[] = {
1142                 //operation                     den op one                              den op den                              den op inf              den op nan
1143                 { O_PHI,                        V_DENORM,                               V_DENORM,                               V_DENORM,               V_DENORM },
1144                 { O_SELECT,                     V_DENORM,                               V_DENORM,                               V_DENORM,               V_DENORM },
1145                 { O_ADD,                        V_ONE,                                  V_DENORM_TIMES_TWO,             V_INF,                  V_NAN },
1146                 { O_SUB,                        V_MINUS_ONE_OR_CLOSE,   V_ZERO,                                 V_MINUS_INF,    V_NAN },
1147                 { O_MUL,                        V_DENORM,                               V_ZERO,                                 V_INF,                  V_NAN },
1148                 { O_VEC_MUL_S,          V_DENORM,                               V_ZERO,                                 V_INF,                  V_NAN },
1149                 { O_VEC_MUL_M,          V_DENORM_TIMES_TWO,             V_ZERO,                                 V_INF,                  V_NAN },
1150                 { O_MAT_MUL_S,          V_DENORM,                               V_ZERO,                                 V_INF,                  V_NAN },
1151                 { O_MAT_MUL_V,          V_DENORM_TIMES_TWO,             V_ZERO,                                 V_INF,                  V_NAN },
1152                 { O_MAT_MUL_M,          V_DENORM_TIMES_TWO,             V_ZERO,                                 V_INF,                  V_NAN },
1153                 { O_OUT_PROD,           V_DENORM,                               V_ZERO,                                 V_INF,                  V_NAN },
1154                 { O_DOT,                        V_DENORM_TIMES_TWO,             V_ZERO,                                 V_INF,                  V_NAN },
1155                 { O_MIX,                        V_HALF,                                 V_DENORM,                               V_INF,                  V_NAN },
1156                 { O_FMA,                        V_HALF,                                 V_HALF,                                 V_INF,                  V_NAN },
1157                 { O_MIN,                        V_DENORM,                               V_DENORM,                               V_DENORM,               V_UNUSED },
1158                 { O_MAX,                        V_ONE,                                  V_DENORM,                               V_INF,                  V_UNUSED },
1159                 { O_CLAMP,                      V_ONE,                                  V_DENORM,                               V_INF,                  V_UNUSED },
1160                 { O_NMIN,                       V_DENORM,                               V_DENORM,                               V_DENORM,               V_DENORM },
1161                 { O_NMAX,                       V_ONE,                                  V_DENORM,                               V_INF,                  V_DENORM },
1162                 { O_NCLAMP,                     V_ONE,                                  V_DENORM,                               V_INF,                  V_DENORM },
1163         };
1164
1165         const UnaryCase unaryOpDenormPreserveArr[] = {
1166                 //operation                     op den
1167                 { O_RETURN_VAL,         V_DENORM },
1168                 { O_D_EXTRACT,          V_DENORM },
1169                 { O_D_INSERT,           V_DENORM },
1170                 { O_SHUFFLE,            V_DENORM },
1171                 { O_COMPOSITE,          V_DENORM },
1172                 { O_COMPOSITE_INS,      V_DENORM },
1173                 { O_COPY,                       V_DENORM },
1174                 { O_TRANSPOSE,          V_DENORM },
1175                 { O_NEGATE,                     V_DENORM },
1176                 { O_ABS,                        V_DENORM },
1177                 { O_SIGN,                       V_ONE },
1178                 { O_RADIANS,            V_DENORM },
1179                 { O_DEGREES,            V_DEGREES_DENORM },
1180         };
1181
1182         binaryOpFTZ.insert(binaryOpFTZ.begin(), binaryOpFTZArr,
1183                                            binaryOpFTZArr + DE_LENGTH_OF_ARRAY(binaryOpFTZArr));
1184         unaryOpFTZ.insert(unaryOpFTZ.begin(), unaryOpFTZArr,
1185                                           unaryOpFTZArr + DE_LENGTH_OF_ARRAY(unaryOpFTZArr));
1186         binaryOpDenormPreserve.insert(binaryOpDenormPreserve.begin(), binaryOpDenormPreserveArr,
1187                                                                   binaryOpDenormPreserveArr + DE_LENGTH_OF_ARRAY(binaryOpDenormPreserveArr));
1188         unaryOpDenormPreserve.insert(unaryOpDenormPreserve.begin(), unaryOpDenormPreserveArr,
1189                                                                  unaryOpDenormPreserveArr + DE_LENGTH_OF_ARRAY(unaryOpDenormPreserveArr));
1190 }
1191
1192 template<>
1193 TypeTestResults<float>::TypeTestResults()
1194 {
1195         m_floatType = FP32;
1196
1197         const BinaryCase binaryOpFTZArr[] = {
1198                 //operation             den op one              den op den              den op inf              den op nan
1199                 { O_ADD,                V_ONE,                  V_ZERO,                 V_INF,                  V_UNUSED },
1200                 { O_SUB,                V_MINUS_ONE,    V_ZERO,                 V_MINUS_INF,    V_UNUSED },
1201                 { O_MUL,                V_ZERO,                 V_ZERO,                 V_UNUSED,               V_UNUSED },
1202                 { O_DIV,                V_ZERO,                 V_UNUSED,               V_ZERO,                 V_UNUSED },
1203                 { O_REM,                V_ZERO,                 V_UNUSED,               V_UNUSED,               V_UNUSED },
1204                 { O_MOD,                V_ZERO,                 V_UNUSED,               V_UNUSED,               V_UNUSED },
1205                 { O_VEC_MUL_S,  V_ZERO,                 V_ZERO,                 V_UNUSED,               V_UNUSED },
1206                 { O_VEC_MUL_M,  V_ZERO,                 V_ZERO,                 V_UNUSED,               V_UNUSED },
1207                 { O_MAT_MUL_S,  V_ZERO,                 V_ZERO,                 V_UNUSED,               V_UNUSED },
1208                 { O_MAT_MUL_V,  V_ZERO,                 V_ZERO,                 V_UNUSED,               V_UNUSED },
1209                 { O_MAT_MUL_M,  V_ZERO,                 V_ZERO,                 V_UNUSED,               V_UNUSED },
1210                 { O_OUT_PROD,   V_ZERO,                 V_ZERO,                 V_UNUSED,               V_UNUSED },
1211                 { O_DOT,                V_ZERO,                 V_ZERO,                 V_UNUSED,               V_UNUSED },
1212                 { O_ATAN2,              V_ZERO,                 V_UNUSED,               V_ZERO,                 V_UNUSED },
1213                 { O_POW,                V_ZERO,                 V_UNUSED,               V_ZERO,                 V_UNUSED },
1214                 { O_MIX,                V_HALF,                 V_ZERO,                 V_INF,                  V_UNUSED },
1215                 { O_MIN,                V_ZERO,                 V_ZERO,                 V_ZERO,                 V_UNUSED },
1216                 { O_MAX,                V_ONE,                  V_ZERO,                 V_INF,                  V_UNUSED },
1217                 { O_CLAMP,              V_ONE,                  V_ZERO,                 V_INF,                  V_UNUSED },
1218                 { O_STEP,               V_ONE,                  V_ONE,                  V_ONE,                  V_UNUSED },
1219                 { O_SSTEP,              V_HALF,                 V_ONE,                  V_ZERO,                 V_UNUSED },
1220                 { O_FMA,                V_HALF,                 V_HALF,                 V_UNUSED,               V_UNUSED },
1221                 { O_FACE_FWD,   V_MINUS_ONE,    V_MINUS_ONE,    V_MINUS_ONE,    V_MINUS_ONE },
1222                 { O_NMIN,               V_ZERO,                 V_ZERO,                 V_ZERO,                 V_ZERO },
1223                 { O_NMAX,               V_ONE,                  V_ZERO,                 V_INF,                  V_ZERO },
1224                 { O_NCLAMP,             V_ONE,                  V_ZERO,                 V_INF,                  V_ZERO },
1225                 { O_DIST,               V_ONE,                  V_ZERO,                 V_INF,                  V_UNUSED },
1226                 { O_CROSS,              V_ZERO,                 V_ZERO,                 V_UNUSED,               V_UNUSED },
1227         };
1228
1229         const UnaryCase unaryOpFTZArr[] = {
1230                 //operation                     op den
1231                 { O_NEGATE,                     V_MINUS_ZERO },
1232                 { O_ROUND,                      V_ZERO },
1233                 { O_ROUND_EV,           V_ZERO },
1234                 { O_TRUNC,                      V_ZERO },
1235                 { O_ABS,                        V_ZERO },
1236                 { O_FLOOR,                      V_ZERO },
1237                 { O_CEIL,                       V_ZERO_OR_ONE },
1238                 { O_FRACT,                      V_ZERO },
1239                 { O_RADIANS,            V_ZERO },
1240                 { O_DEGREES,            V_ZERO },
1241                 { O_SIN,                        V_ZERO },
1242                 { O_COS,                        V_TRIG_ONE },
1243                 { O_TAN,                        V_ZERO },
1244                 { O_ASIN,                       V_ZERO },
1245                 { O_ACOS,                       V_PI_DIV_2 },
1246                 { O_ATAN,                       V_ZERO },
1247                 { O_SINH,                       V_ZERO },
1248                 { O_COSH,                       V_ONE },
1249                 { O_TANH,                       V_ZERO },
1250                 { O_ASINH,                      V_ZERO },
1251                 { O_ACOSH,                      V_UNUSED },
1252                 { O_ATANH,                      V_ZERO },
1253                 { O_EXP,                        V_ONE },
1254                 { O_LOG,                        V_MINUS_INF_OR_LOG_DENORM },
1255                 { O_EXP2,                       V_ONE },
1256                 { O_LOG2,                       V_MINUS_INF_OR_LOG2_DENORM },
1257                 { O_SQRT,                       V_ZERO_OR_SQRT_DENORM },
1258                 { O_INV_SQRT,           V_INF_OR_INV_SQRT_DENORM },
1259                 { O_MAT_DET,            V_ZERO },
1260                 { O_MAT_INV,            V_ZERO_OR_MINUS_ZERO },
1261                 { O_MODF,                       V_ZERO },
1262                 { O_MODF_ST,            V_ZERO },
1263                 { O_NORMALIZE,          V_ZERO },
1264                 { O_REFLECT,            V_ZERO },
1265                 { O_REFRACT,            V_ZERO },
1266                 { O_LENGHT,                     V_ZERO },
1267         };
1268
1269         const BinaryCase binaryOpDenormPreserveArr[] = {
1270                 //operation                     den op one                      den op den                              den op inf              den op nan
1271                 { O_PHI,                        V_DENORM,                       V_DENORM,                               V_DENORM,               V_DENORM },
1272                 { O_SELECT,                     V_DENORM,                       V_DENORM,                               V_DENORM,               V_DENORM },
1273                 { O_ADD,                        V_ONE,                          V_DENORM_TIMES_TWO,             V_INF,                  V_NAN },
1274                 { O_SUB,                        V_MINUS_ONE,            V_ZERO,                                 V_MINUS_INF,    V_NAN },
1275                 { O_MUL,                        V_DENORM,                       V_ZERO,                                 V_INF,                  V_NAN },
1276                 { O_VEC_MUL_S,          V_DENORM,                       V_ZERO,                                 V_INF,                  V_NAN },
1277                 { O_VEC_MUL_M,          V_DENORM,                       V_ZERO,                                 V_INF,                  V_NAN },
1278                 { O_MAT_MUL_S,          V_DENORM,                       V_ZERO,                                 V_INF,                  V_NAN },
1279                 { O_MAT_MUL_V,          V_DENORM,                       V_ZERO,                                 V_INF,                  V_NAN },
1280                 { O_MAT_MUL_M,          V_DENORM,                       V_ZERO,                                 V_INF,                  V_NAN },
1281                 { O_OUT_PROD,           V_DENORM,                       V_ZERO,                                 V_INF,                  V_NAN },
1282                 { O_DOT,                        V_DENORM_TIMES_TWO,     V_ZERO,                                 V_INF,                  V_NAN },
1283                 { O_MIX,                        V_HALF,                         V_DENORM,                               V_INF,                  V_NAN },
1284                 { O_FMA,                        V_HALF,                         V_HALF,                                 V_INF,                  V_NAN },
1285                 { O_MIN,                        V_DENORM,                       V_DENORM,                               V_DENORM,               V_UNUSED },
1286                 { O_MAX,                        V_ONE,                          V_DENORM,                               V_INF,                  V_UNUSED },
1287                 { O_CLAMP,                      V_ONE,                          V_DENORM,                               V_INF,                  V_UNUSED },
1288                 { O_NMIN,                       V_DENORM,                       V_DENORM,                               V_DENORM,               V_DENORM },
1289                 { O_NMAX,                       V_ONE,                          V_DENORM,                               V_INF,                  V_DENORM },
1290                 { O_NCLAMP,                     V_ONE,                          V_DENORM,                               V_INF,                  V_DENORM },
1291         };
1292
1293         const UnaryCase unaryOpDenormPreserveArr[] = {
1294                 //operation                     op den
1295                 { O_RETURN_VAL,         V_DENORM },
1296                 { O_D_EXTRACT,          V_DENORM },
1297                 { O_D_INSERT,           V_DENORM },
1298                 { O_SHUFFLE,            V_DENORM },
1299                 { O_COMPOSITE,          V_DENORM },
1300                 { O_COMPOSITE_INS,      V_DENORM },
1301                 { O_COPY,                       V_DENORM },
1302                 { O_TRANSPOSE,          V_DENORM },
1303                 { O_NEGATE,                     V_DENORM },
1304                 { O_ABS,                        V_DENORM },
1305                 { O_SIGN,                       V_ONE },
1306                 { O_RADIANS,            V_DENORM },
1307                 { O_DEGREES,            V_DEGREES_DENORM },
1308         };
1309
1310         binaryOpFTZ.insert(binaryOpFTZ.begin(), binaryOpFTZArr,
1311                                            binaryOpFTZArr + DE_LENGTH_OF_ARRAY(binaryOpFTZArr));
1312         unaryOpFTZ.insert(unaryOpFTZ.begin(), unaryOpFTZArr,
1313                                           unaryOpFTZArr + DE_LENGTH_OF_ARRAY(unaryOpFTZArr));
1314         binaryOpDenormPreserve.insert(binaryOpDenormPreserve.begin(), binaryOpDenormPreserveArr,
1315                                                                   binaryOpDenormPreserveArr + DE_LENGTH_OF_ARRAY(binaryOpDenormPreserveArr));
1316         unaryOpDenormPreserve.insert(unaryOpDenormPreserve.begin(), unaryOpDenormPreserveArr,
1317                                                                  unaryOpDenormPreserveArr + DE_LENGTH_OF_ARRAY(unaryOpDenormPreserveArr));
1318 }
1319
1320 template<>
1321 TypeTestResults<double>::TypeTestResults()
1322 {
1323         m_floatType = FP64;
1324
1325         // fp64 is supported by fewer operations then fp16 and fp32
1326         // e.g. Radians and Degrees functions are not supported
1327         const BinaryCase binaryOpFTZArr[] = {
1328                 //operation             den op one              den op den              den op inf              den op nan
1329                 { O_ADD,                V_ONE,                  V_ZERO,                 V_INF,                  V_UNUSED },
1330                 { O_SUB,                V_MINUS_ONE,    V_ZERO,                 V_MINUS_INF,    V_UNUSED },
1331                 { O_MUL,                V_ZERO,                 V_ZERO,                 V_UNUSED,               V_UNUSED },
1332                 { O_DIV,                V_ZERO,                 V_UNUSED,               V_ZERO,                 V_UNUSED },
1333                 { O_REM,                V_ZERO,                 V_UNUSED,               V_UNUSED,               V_UNUSED },
1334                 { O_MOD,                V_ZERO,                 V_UNUSED,               V_UNUSED,               V_UNUSED },
1335                 { O_VEC_MUL_S,  V_ZERO,                 V_ZERO,                 V_UNUSED,               V_UNUSED },
1336                 { O_VEC_MUL_M,  V_ZERO,                 V_ZERO,                 V_UNUSED,               V_UNUSED },
1337                 { O_MAT_MUL_S,  V_ZERO,                 V_ZERO,                 V_UNUSED,               V_UNUSED },
1338                 { O_MAT_MUL_V,  V_ZERO,                 V_ZERO,                 V_UNUSED,               V_UNUSED },
1339                 { O_MAT_MUL_M,  V_ZERO,                 V_ZERO,                 V_UNUSED,               V_UNUSED },
1340                 { O_OUT_PROD,   V_ZERO,                 V_ZERO,                 V_UNUSED,               V_UNUSED },
1341                 { O_DOT,                V_ZERO,                 V_ZERO,                 V_UNUSED,               V_UNUSED },
1342                 { O_MIX,                V_HALF,                 V_ZERO,                 V_INF,                  V_UNUSED },
1343                 { O_MIN,                V_ZERO,                 V_ZERO,                 V_ZERO,                 V_UNUSED },
1344                 { O_MAX,                V_ONE,                  V_ZERO,                 V_INF,                  V_UNUSED },
1345                 { O_CLAMP,              V_ONE,                  V_ZERO,                 V_INF,                  V_UNUSED },
1346                 { O_STEP,               V_ONE,                  V_ONE,                  V_ONE,                  V_UNUSED },
1347                 { O_SSTEP,              V_HALF,                 V_ONE,                  V_ZERO,                 V_UNUSED },
1348                 { O_FMA,                V_HALF,                 V_HALF,                 V_UNUSED,               V_UNUSED },
1349                 { O_FACE_FWD,   V_MINUS_ONE,    V_MINUS_ONE,    V_MINUS_ONE,    V_MINUS_ONE },
1350                 { O_NMIN,               V_ZERO,                 V_ZERO,                 V_ZERO,                 V_ZERO },
1351                 { O_NMAX,               V_ONE,                  V_ZERO,                 V_INF,                  V_ZERO },
1352                 { O_NCLAMP,             V_ONE,                  V_ZERO,                 V_INF,                  V_ZERO },
1353                 { O_DIST,               V_ONE,                  V_ZERO,                 V_INF,                  V_UNUSED },
1354                 { O_CROSS,              V_ZERO,                 V_ZERO,                 V_UNUSED,               V_UNUSED },
1355         };
1356
1357         const UnaryCase unaryOpFTZArr[] = {
1358                 //operation                     op den
1359                 { O_NEGATE,                     V_MINUS_ZERO },
1360                 { O_ROUND,                      V_ZERO },
1361                 { O_ROUND_EV,           V_ZERO },
1362                 { O_TRUNC,                      V_ZERO },
1363                 { O_ABS,                        V_ZERO },
1364                 { O_FLOOR,                      V_ZERO },
1365                 { O_CEIL,                       V_ZERO_OR_ONE },
1366                 { O_FRACT,                      V_ZERO },
1367                 { O_SQRT,                       V_ZERO_OR_SQRT_DENORM },
1368                 { O_INV_SQRT,           V_INF_OR_INV_SQRT_DENORM },
1369                 { O_MAT_DET,            V_ZERO },
1370                 { O_MAT_INV,            V_ZERO_OR_MINUS_ZERO },
1371                 { O_MODF,                       V_ZERO },
1372                 { O_MODF_ST,            V_ZERO },
1373                 { O_NORMALIZE,          V_ZERO },
1374                 { O_REFLECT,            V_ZERO },
1375                 { O_LENGHT,                     V_ZERO },
1376         };
1377
1378         const BinaryCase binaryOpDenormPreserveArr[] = {
1379                 //operation                     den op one                      den op den                              den op inf              den op nan
1380                 { O_PHI,                        V_DENORM,                       V_DENORM,                               V_DENORM,               V_DENORM },
1381                 { O_SELECT,                     V_DENORM,                       V_DENORM,                               V_DENORM,               V_DENORM },
1382                 { O_ADD,                        V_ONE,                          V_DENORM_TIMES_TWO,             V_INF,                  V_NAN },
1383                 { O_SUB,                        V_MINUS_ONE,            V_ZERO,                                 V_MINUS_INF,    V_NAN },
1384                 { O_MUL,                        V_DENORM,                       V_ZERO,                                 V_INF,                  V_NAN },
1385                 { O_VEC_MUL_S,          V_DENORM,                       V_ZERO,                                 V_INF,                  V_NAN },
1386                 { O_VEC_MUL_M,          V_DENORM_TIMES_TWO,     V_ZERO,                                 V_INF,                  V_NAN },
1387                 { O_MAT_MUL_S,          V_DENORM,                       V_ZERO,                                 V_INF,                  V_NAN },
1388                 { O_MAT_MUL_V,          V_DENORM_TIMES_TWO,     V_ZERO,                                 V_INF,                  V_NAN },
1389                 { O_MAT_MUL_M,          V_DENORM_TIMES_TWO,     V_ZERO,                                 V_INF,                  V_NAN },
1390                 { O_OUT_PROD,           V_DENORM,                       V_ZERO,                                 V_INF,                  V_NAN },
1391                 { O_DOT,                        V_DENORM_TIMES_TWO,     V_ZERO,                                 V_INF,                  V_NAN },
1392                 { O_MIX,                        V_HALF,                         V_DENORM,                               V_INF,                  V_NAN },
1393                 { O_FMA,                        V_HALF,                         V_HALF,                                 V_INF,                  V_NAN },
1394                 { O_MIN,                        V_DENORM,                       V_DENORM,                               V_DENORM,               V_UNUSED },
1395                 { O_MAX,                        V_ONE,                          V_DENORM,                               V_INF,                  V_UNUSED },
1396                 { O_CLAMP,                      V_ONE,                          V_DENORM,                               V_INF,                  V_UNUSED },
1397                 { O_NMIN,                       V_DENORM,                       V_DENORM,                               V_DENORM,               V_DENORM },
1398                 { O_NMAX,                       V_ONE,                          V_DENORM,                               V_INF,                  V_DENORM },
1399                 { O_NCLAMP,                     V_ONE,                          V_DENORM,                               V_INF,                  V_DENORM },
1400         };
1401
1402         const UnaryCase unaryOpDenormPreserveArr[] = {
1403                 //operation                     op den
1404                 { O_RETURN_VAL,         V_DENORM },
1405                 { O_D_EXTRACT,          V_DENORM },
1406                 { O_D_INSERT,           V_DENORM },
1407                 { O_SHUFFLE,            V_DENORM },
1408                 { O_COMPOSITE,          V_DENORM },
1409                 { O_COMPOSITE_INS,      V_DENORM },
1410                 { O_COPY,                       V_DENORM },
1411                 { O_TRANSPOSE,          V_DENORM },
1412                 { O_NEGATE,                     V_DENORM },
1413                 { O_ABS,                        V_DENORM },
1414                 { O_SIGN,                       V_ONE },
1415         };
1416
1417         binaryOpFTZ.insert(binaryOpFTZ.begin(), binaryOpFTZArr,
1418                                            binaryOpFTZArr + DE_LENGTH_OF_ARRAY(binaryOpFTZArr));
1419         unaryOpFTZ.insert(unaryOpFTZ.begin(), unaryOpFTZArr,
1420                                           unaryOpFTZArr + DE_LENGTH_OF_ARRAY(unaryOpFTZArr));
1421         binaryOpDenormPreserve.insert(binaryOpDenormPreserve.begin(), binaryOpDenormPreserveArr,
1422                                                                   binaryOpDenormPreserveArr + DE_LENGTH_OF_ARRAY(binaryOpDenormPreserveArr));
1423         unaryOpDenormPreserve.insert(unaryOpDenormPreserve.begin(), unaryOpDenormPreserveArr,
1424                                                                  unaryOpDenormPreserveArr + DE_LENGTH_OF_ARRAY(unaryOpDenormPreserveArr));
1425 }
1426
1427 // Operation structure holds data needed to test specified SPIR-V operation. This class contains
1428 // additional annotations, additional types and aditional constants that should be properly included
1429 // in SPIR-V code. Commands attribute in this structure contains code that performs tested operation
1430 // on given arguments, in some cases verification is also performed there.
1431 // All snipets stroed in this structure are generic and can be specialized for fp16, fp32 or fp64,
1432 // thanks to that this data can be shared by many OperationTestCase instances (testing diferent
1433 // float behaviours on diferent float widths).
1434 struct Operation
1435 {
1436         // operation name is included in test case name
1437         const char*     name;
1438
1439         // How extensively is the floating point type used?
1440         FloatUsage floatUsage;
1441
1442         // operation specific spir-v snippets that will be
1443         // placed in proper places in final test shader
1444         const char*     annotations;
1445         const char*     types;
1446         const char*     constants;
1447         const char*     variables;
1448         const char*     functions;
1449         const char*     commands;
1450
1451         // conversion operations operate on one float type and produce float
1452         // type with different bit width; restrictedInputType is used only when
1453         // isInputTypeRestricted is set to true and it restricts usage of this
1454         // operation to specified input type
1455         bool            isInputTypeRestricted;
1456         FloatType       restrictedInputType;
1457
1458         // arguments for OpSpecConstant need to be specified also as constant
1459         bool            isSpecConstant;
1460
1461         // set if c_float* constant is used in operation
1462         FloatStatementUsageFlags        statementUsageFlags;
1463
1464         Operation()             {}
1465
1466         // Minimal constructor - used by most of operations
1467         Operation(const char* _name, FloatUsage _floatUsage, const char* _commands, const FloatStatementUsageFlags _statementUsageFlags = 0)
1468                 : name(_name)
1469                 , floatUsage(_floatUsage)
1470                 , annotations("")
1471                 , types("")
1472                 , constants("")
1473                 , variables("")
1474                 , functions("")
1475                 , commands(_commands)
1476                 , isInputTypeRestricted(false)
1477                 , restrictedInputType(FP16)             // not used as isInputTypeRestricted is false
1478                 , isSpecConstant(false)
1479                 , statementUsageFlags(_statementUsageFlags)
1480         {}
1481
1482         // Conversion operations constructor (used also by conversions done in SpecConstantOp)
1483         Operation(const char* _name,
1484                           FloatUsage _floatUsage,
1485                           bool specConstant,
1486                           FloatType _inputType,
1487                           const char* _constants,
1488                           const char* _commands,
1489                           const FloatStatementUsageFlags _statementUsageFlags = 0)
1490                 : name(_name)
1491                 , floatUsage(_floatUsage)
1492                 , annotations("")
1493                 , types("")
1494                 , constants(_constants)
1495                 , variables("")
1496                 , functions("")
1497                 , commands(_commands)
1498                 , isInputTypeRestricted(true)
1499                 , restrictedInputType(_inputType)
1500                 , isSpecConstant(specConstant)
1501                 , statementUsageFlags(_statementUsageFlags)
1502         {}
1503
1504         // Full constructor - used by few operations, that are more complex to test
1505         Operation(const char* _name,
1506                           FloatUsage _floatUsage,
1507                           const char* _annotations,
1508                           const char* _types,
1509                           const char* _constants,
1510                           const char* _variables,
1511                           const char* _functions,
1512                           const char* _commands,
1513                           const FloatStatementUsageFlags _statementUsageFlags = 0)
1514                 : name(_name)
1515                 , floatUsage(_floatUsage)
1516                 , annotations(_annotations)
1517                 , types(_types)
1518                 , constants(_constants)
1519                 , variables(_variables)
1520                 , functions(_functions)
1521                 , commands(_commands)
1522                 , isInputTypeRestricted(false)
1523                 , restrictedInputType(FP16)             // not used as isInputTypeRestricted is false
1524                 , isSpecConstant(false)
1525                 , statementUsageFlags(_statementUsageFlags)
1526         {}
1527
1528         // Full constructor - used by rounding override cases
1529         Operation(const char* _name,
1530                           FloatUsage _floatUsage,
1531                           FloatType _inputType,
1532                           const char* _annotations,
1533                           const char* _types,
1534                           const char* _constants,
1535                           const char* _commands,
1536                           const FloatStatementUsageFlags _statementUsageFlags = 0)
1537                 : name(_name)
1538                 , floatUsage(_floatUsage)
1539                 , annotations(_annotations)
1540                 , types(_types)
1541                 , constants(_constants)
1542                 , variables("")
1543                 , functions("")
1544                 , commands(_commands)
1545                 , isInputTypeRestricted(true)
1546                 , restrictedInputType(_inputType)
1547                 , isSpecConstant(false)
1548                 , statementUsageFlags(_statementUsageFlags)
1549         {}
1550 };
1551
1552 // Class storing input that will be passed to operation and expected
1553 // output that should be generated for specified behaviour.
1554 class OperationTestCase
1555 {
1556 public:
1557
1558         OperationTestCase()             {}
1559
1560         OperationTestCase(const char*   _baseName,
1561                                           BehaviorFlags _behaviorFlags,
1562                                           OperationId   _operatinId,
1563                                           ValueId               _input1,
1564                                           ValueId               _input2,
1565                                           ValueId               _expectedOutput,
1566                                           deBool                _fp16Without16BitStorage = DE_FALSE)
1567                 : baseName(_baseName)
1568                 , behaviorFlags(_behaviorFlags)
1569                 , operationId(_operatinId)
1570                 , expectedOutput(_expectedOutput)
1571                 , fp16Without16BitStorage(_fp16Without16BitStorage)
1572         {
1573                 input[0] = _input1;
1574                 input[1] = _input2;
1575         }
1576
1577 public:
1578
1579         string                                  baseName;
1580         BehaviorFlags                   behaviorFlags;
1581         OperationId                             operationId;
1582         ValueId                                 input[2];
1583         ValueId                                 expectedOutput;
1584         deBool                                  fp16Without16BitStorage;
1585 };
1586
1587 // Helper structure used to store specialized operation
1588 // data. This data is ready to be used during shader assembly.
1589 struct SpecializedOperation
1590 {
1591         string constants;
1592         string annotations;
1593         string types;
1594         string arguments;
1595         string variables;
1596         string functions;
1597         string commands;
1598
1599         FloatType                                       inFloatType;
1600         TypeSnippetsSP                          inTypeSnippets;
1601         TypeSnippetsSP                          outTypeSnippets;
1602         FloatStatementUsageFlags        argumentsUsesFloatConstant;
1603 };
1604
1605 // Class responsible for constructing list of test cases for specified
1606 // float type and specified way of preparation of arguments.
1607 // Arguments can be either read from input SSBO or generated via math
1608 // operations in spir-v code.
1609 class TestCasesBuilder
1610 {
1611 public:
1612
1613         void init();
1614         void build(vector<OperationTestCase>& testCases, TypeTestResultsSP typeTestResults, bool argumentsFromInput);
1615         const Operation& getOperation(OperationId id) const;
1616
1617 private:
1618
1619         void createUnaryTestCases(vector<OperationTestCase>& testCases,
1620                                                           OperationId operationId,
1621                                                           ValueId denormPreserveResult,
1622                                                           ValueId denormFTZResult,
1623                                                           deBool fp16WithoutStorage = DE_FALSE) const;
1624
1625 private:
1626
1627         // Operations are shared betwean test cases so they are
1628         // passed to them as pointers to data stored in TestCasesBuilder.
1629         typedef OperationTestCase OTC;
1630         typedef Operation Op;
1631         map<int, Op> m_operations;
1632 };
1633
1634 void TestCasesBuilder::init()
1635 {
1636         map<int, Op>& mo = m_operations;
1637
1638         // predefine operations repeatedly used in tests; note that "_float"
1639         // in every operation command will be replaced with either "_f16",
1640         // "_f32" or "_f64" - StringTemplate is not used here because it
1641         // would make code less readable
1642         // m_operations contains generic operation definitions that can be
1643         // used for all float types
1644
1645         mo[O_NEGATE]            = Op("negate",          FLOAT_ARITHMETIC,
1646                                                                                         "%result             = OpFNegate %type_float %arg1\n",
1647                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1648         mo[O_COMPOSITE]         = Op("composite",       FLOAT_ARITHMETIC,
1649                                                                                         "%vec1               = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1650                                                                                         "%result             = OpCompositeExtract %type_float %vec1 0\n",
1651                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1652         mo[O_COMPOSITE_INS]     = Op("comp_ins",        FLOAT_ARITHMETIC,
1653                                                                                         "%vec1               = OpCompositeConstruct %type_float_vec2 %c_float_0 %c_float_0\n"
1654                                                                                         "%vec2               = OpCompositeInsert %type_float_vec2 %arg1 %vec1 0\n"
1655                                                                                         "%result             = OpCompositeExtract %type_float %vec2 0\n",
1656                                                                                         B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1657         mo[O_COPY]                      = Op("copy",            FLOAT_STORAGE_ONLY,
1658                                                                                         "%result             = OpCopyObject %type_float %arg1\n",
1659                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1660         mo[O_D_EXTRACT]         = Op("extract",         FLOAT_ARITHMETIC,
1661                                                                                         "%vec1               = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1662                                                                                         "%result             = OpVectorExtractDynamic %type_float %vec1 %c_i32_0\n",
1663                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1664         mo[O_D_INSERT]          = Op("insert",          FLOAT_ARITHMETIC,
1665                                                                                         "%tmpVec             = OpCompositeConstruct %type_float_vec2 %c_float_2 %c_float_2\n"
1666                                                                                         "%vec1               = OpVectorInsertDynamic %type_float_vec2 %tmpVec %arg1 %c_i32_0\n"
1667                                                                                         "%result             = OpCompositeExtract %type_float %vec1 0\n",
1668                                                                                         B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1669         mo[O_SHUFFLE]           = Op("shuffle",         FLOAT_ARITHMETIC,
1670                                                                                         "%tmpVec1            = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1671                                                                                         "%tmpVec2            = OpCompositeConstruct %type_float_vec2 %c_float_2 %c_float_2\n"   // NOTE: its impossible to test shuffle with denorms flushed
1672                                                                                         "%vec1               = OpVectorShuffle %type_float_vec2 %tmpVec1 %tmpVec2 0 2\n"                //       to zero as this will be done by earlier operation
1673                                                                                         "%result             = OpCompositeExtract %type_float %vec1 0\n",                                               //       (this also applies to few other operations)
1674                                                                                         B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1675         mo[O_TRANSPOSE]         = Op("transpose",       FLOAT_ARITHMETIC,
1676                                                                                         "%col                = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1677                                                                                         "%mat                = OpCompositeConstruct %type_float_mat2x2 %col %col\n"
1678                                                                                         "%tmat               = OpTranspose %type_float_mat2x2 %mat\n"
1679                                                                                         "%tcol               = OpCompositeExtract %type_float_vec2 %tmat 0\n"
1680                                                                                         "%result             = OpCompositeExtract %type_float %tcol 0\n",
1681                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1682         mo[O_RETURN_VAL]        = Op("ret_val",         FLOAT_ARITHMETIC,
1683                                                                                         "",
1684                                                                                         "%type_test_fun      = OpTypeFunction %type_float %type_float\n",
1685                                                                                         "",
1686                                                                                         "",
1687                                                                                         "%test_fun = OpFunction %type_float None %type_test_fun\n"
1688                                                                                         "%param = OpFunctionParameter %type_float\n"
1689                                                                                         "%entry = OpLabel\n"
1690                                                                                         "OpReturnValue %param\n"
1691                                                                                         "OpFunctionEnd\n",
1692                                                                                         "%result             = OpFunctionCall %type_float %test_fun %arg1\n",
1693                                                                                         B_STATEMENT_USAGE_TYPES_TYPE_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1694
1695         // conversion operations that are meant to be used only for single output type (defined by the second number in name)
1696         const char* convertSource =                             "%result             = OpFConvert %type_float %arg1\n";
1697         mo[O_CONV_FROM_FP16]    = Op("conv_from_fp16", FLOAT_STORAGE_ONLY, false, FP16, "", convertSource, B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1698         mo[O_CONV_FROM_FP32]    = Op("conv_from_fp32", FLOAT_STORAGE_ONLY, false, FP32, "", convertSource, B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1699         mo[O_CONV_FROM_FP64]    = Op("conv_from_fp64", FLOAT_STORAGE_ONLY, false, FP64, "", convertSource, B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1700
1701         // from all operands supported by OpSpecConstantOp we can only test FConvert opcode with literals as everything
1702         // else requires Karnel capability (OpenCL); values of literals used in SPIR-V code must be equiwalent to
1703         // V_CONV_FROM_FP32_ARG and V_CONV_FROM_FP64_ARG so we can use same expected rounded values as for regular OpFConvert
1704         mo[O_SCONST_CONV_FROM_FP32_TO_FP16]
1705                                                 = Op("sconst_conv_from_fp32", FLOAT_ARITHMETIC, true, FP32,
1706                                                                                         "%c_arg              = OpConstant %type_f32 1.22334445\n"
1707                                                                                         "%result             = OpSpecConstantOp %type_f16 FConvert %c_arg\n",
1708                                                                                         "",
1709                                                                                         B_STATEMENT_USAGE_CONSTS_TYPE_FP16 | B_STATEMENT_USAGE_CONSTS_TYPE_FP32);
1710         mo[O_SCONST_CONV_FROM_FP64_TO_FP32]
1711                                                 = Op("sconst_conv_from_fp64", FLOAT_ARITHMETIC, true, FP64,
1712                                                                                         "%c_arg              = OpConstant %type_f64 1.22334455\n"
1713                                                                                         "%result             = OpSpecConstantOp %type_f32 FConvert %c_arg\n",
1714                                                                                         "",
1715                                                                                         B_STATEMENT_USAGE_CONSTS_TYPE_FP32 | B_STATEMENT_USAGE_CONSTS_TYPE_FP64);
1716         mo[O_SCONST_CONV_FROM_FP64_TO_FP16]
1717                                                 = Op("sconst_conv_from_fp64", FLOAT_ARITHMETIC, true, FP64,
1718                                                                                         "%c_arg              = OpConstant %type_f64 1.22334445\n"
1719                                                                                         "%result             = OpSpecConstantOp %type_f16 FConvert %c_arg\n",
1720                                                                                         "",
1721                                                                                         B_STATEMENT_USAGE_CONSTS_TYPE_FP16 | B_STATEMENT_USAGE_CONSTS_TYPE_FP64);
1722
1723         mo[O_ADD]                       = Op("add",                     FLOAT_ARITHMETIC, "%result             = OpFAdd %type_float %arg1 %arg2\n", B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1724         mo[O_SUB]                       = Op("sub",                     FLOAT_ARITHMETIC, "%result             = OpFSub %type_float %arg1 %arg2\n", B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1725         mo[O_MUL]                       = Op("mul",                     FLOAT_ARITHMETIC, "%result             = OpFMul %type_float %arg1 %arg2\n", B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1726         mo[O_DIV]                       = Op("div",                     FLOAT_ARITHMETIC, "%result             = OpFDiv %type_float %arg1 %arg2\n", B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1727         mo[O_REM]                       = Op("rem",                     FLOAT_ARITHMETIC, "%result             = OpFRem %type_float %arg1 %arg2\n", B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1728         mo[O_MOD]                       = Op("mod",                     FLOAT_ARITHMETIC, "%result             = OpFMod %type_float %arg1 %arg2\n", B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1729         mo[O_PHI]                       = Op("phi",                     FLOAT_ARITHMETIC,
1730                                                                                         "%comp               = OpFOrdGreaterThan %type_bool %arg1 %arg2\n"
1731                                                                                         "                      OpSelectionMerge %comp_merge None\n"
1732                                                                                         "                      OpBranchConditional %comp %true_branch %false_branch\n"
1733                                                                                         "%true_branch        = OpLabel\n"
1734                                                                                         "                      OpBranch %comp_merge\n"
1735                                                                                         "%false_branch       = OpLabel\n"
1736                                                                                         "                      OpBranch %comp_merge\n"
1737                                                                                         "%comp_merge         = OpLabel\n"
1738                                                                                         "%result             = OpPhi %type_float %arg2 %true_branch %arg1 %false_branch\n",
1739                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1740         mo[O_SELECT]            = Op("select",          FLOAT_ARITHMETIC,
1741                                                                                         "%always_true        = OpFOrdGreaterThan %type_bool %c_float_1 %c_float_0\n"
1742                                                                                         "%result             = OpSelect %type_float %always_true %arg1 %arg2\n",
1743                                                                                         B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1744         mo[O_DOT]                       = Op("dot",                     FLOAT_ARITHMETIC,
1745                                                                                         "%vec1               = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1746                                                                                         "%vec2               = OpCompositeConstruct %type_float_vec2 %arg2 %arg2\n"
1747                                                                                         "%result             = OpDot %type_float %vec1 %vec2\n",
1748                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1749         mo[O_VEC_MUL_S]         = Op("vmuls",           FLOAT_ARITHMETIC,
1750                                                                                         "%vec                = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1751                                                                                         "%tmpVec             = OpVectorTimesScalar %type_float_vec2 %vec %arg2\n"
1752                                                                                         "%result             = OpCompositeExtract %type_float %tmpVec 0\n",
1753                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1754         mo[O_VEC_MUL_M]         = Op("vmulm",           FLOAT_ARITHMETIC,
1755                                                                                         "%col                = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1756                                                                                         "%mat                = OpCompositeConstruct %type_float_mat2x2 %col %col\n"
1757                                                                                         "%vec                = OpCompositeConstruct %type_float_vec2 %arg2 %arg2\n"
1758                                                                                         "%tmpVec             = OpVectorTimesMatrix %type_float_vec2 %vec %mat\n"
1759                                                                                         "%result             = OpCompositeExtract %type_float %tmpVec 0\n",
1760                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1761         mo[O_MAT_MUL_S]         = Op("mmuls",           FLOAT_ARITHMETIC,
1762                                                                                         "%col                = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1763                                                                                         "%mat                = OpCompositeConstruct %type_float_mat2x2 %col %col\n"
1764                                                                                         "%mulMat             = OpMatrixTimesScalar %type_float_mat2x2 %mat %arg2\n"
1765                                                                                         "%extCol             = OpCompositeExtract %type_float_vec2 %mulMat 0\n"
1766                                                                                         "%result             = OpCompositeExtract %type_float %extCol 0\n",
1767                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1768         mo[O_MAT_MUL_V]         = Op("mmulv",           FLOAT_ARITHMETIC,
1769                                                                                         "%col                = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1770                                                                                         "%mat                = OpCompositeConstruct %type_float_mat2x2 %col %col\n"
1771                                                                                         "%vec                = OpCompositeConstruct %type_float_vec2 %arg2 %arg2\n"
1772                                                                                         "%mulVec             = OpMatrixTimesVector %type_float_vec2 %mat %vec\n"
1773                                                                                         "%result             = OpCompositeExtract %type_float %mulVec 0\n",
1774                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1775         mo[O_MAT_MUL_M]         = Op("mmulm",           FLOAT_ARITHMETIC,
1776                                                                                         "%col1               = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1777                                                                                         "%mat1               = OpCompositeConstruct %type_float_mat2x2 %col1 %col1\n"
1778                                                                                         "%col2               = OpCompositeConstruct %type_float_vec2 %arg2 %arg2\n"
1779                                                                                         "%mat2               = OpCompositeConstruct %type_float_mat2x2 %col2 %col2\n"
1780                                                                                         "%mulMat             = OpMatrixTimesMatrix %type_float_mat2x2 %mat1 %mat2\n"
1781                                                                                         "%extCol             = OpCompositeExtract %type_float_vec2 %mulMat 0\n"
1782                                                                                         "%result             = OpCompositeExtract %type_float %extCol 0\n",
1783                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1784         mo[O_OUT_PROD]          = Op("out_prod",        FLOAT_ARITHMETIC,
1785                                                                                         "%vec1               = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
1786                                                                                         "%vec2               = OpCompositeConstruct %type_float_vec2 %arg2 %arg2\n"
1787                                                                                         "%mulMat             = OpOuterProduct %type_float_mat2x2 %vec1 %vec2\n"
1788                                                                                         "%extCol             = OpCompositeExtract %type_float_vec2 %mulMat 0\n"
1789                                                                                         "%result             = OpCompositeExtract %type_float %extCol 0\n",
1790                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1791
1792         // comparison operations
1793         mo[O_ORD_EQ]            = Op("ord_eq",          FLOAT_ARITHMETIC,
1794                                                                                         "%boolVal           = OpFOrdEqual %type_bool %arg1 %arg2\n"
1795                                                                                         "%result            = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1796                                                                                         B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1797         mo[O_UORD_EQ]           = Op("uord_eq",         FLOAT_ARITHMETIC,
1798                                                                                         "%boolVal           = OpFUnordEqual %type_bool %arg1 %arg2\n"
1799                                                                                         "%result            = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1800                                                                                         B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1801         mo[O_ORD_NEQ]           = Op("ord_neq",         FLOAT_ARITHMETIC,
1802                                                                                         "%boolVal           = OpFOrdNotEqual %type_bool %arg1 %arg2\n"
1803                                                                                         "%result            = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1804                                                                                         B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1805         mo[O_UORD_NEQ]          = Op("uord_neq",        FLOAT_ARITHMETIC,
1806                                                                                         "%boolVal           = OpFUnordNotEqual %type_bool %arg1 %arg2\n"
1807                                                                                         "%result            = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1808                                                                                         B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1809         mo[O_ORD_LS]            = Op("ord_ls",          FLOAT_ARITHMETIC,
1810                                                                                         "%boolVal           = OpFOrdLessThan %type_bool %arg1 %arg2\n"
1811                                                                                         "%result            = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1812                                                                                         B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1813         mo[O_UORD_LS]           = Op("uord_ls",         FLOAT_ARITHMETIC,
1814                                                                                         "%boolVal           = OpFUnordLessThan %type_bool %arg1 %arg2\n"
1815                                                                                         "%result            = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1816                                                                                         B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1817         mo[O_ORD_GT]            = Op("ord_gt",          FLOAT_ARITHMETIC,
1818                                                                                         "%boolVal           = OpFOrdGreaterThan %type_bool %arg1 %arg2\n"
1819                                                                                         "%result            = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1820                                                                                         B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1821         mo[O_UORD_GT]           = Op("uord_gt",         FLOAT_ARITHMETIC,
1822                                                                                         "%boolVal           = OpFUnordGreaterThan %type_bool %arg1 %arg2\n"
1823                                                                                         "%result            = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1824                                                                                         B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1825         mo[O_ORD_LE]            = Op("ord_le",          FLOAT_ARITHMETIC,
1826                                                                                         "%boolVal           = OpFOrdLessThanEqual %type_bool %arg1 %arg2\n"
1827                                                                                         "%result            = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1828                                                                                         B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1829         mo[O_UORD_LE]           = Op("uord_le",         FLOAT_ARITHMETIC,
1830                                                                                         "%boolVal           = OpFUnordLessThanEqual %type_bool %arg1 %arg2\n"
1831                                                                                         "%result            = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1832                                                                                         B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1833         mo[O_ORD_GE]            = Op("ord_ge",          FLOAT_ARITHMETIC,
1834                                                                                         "%boolVal           = OpFOrdGreaterThanEqual %type_bool %arg1 %arg2\n"
1835                                                                                         "%result            = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1836                                                                                         B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1837         mo[O_UORD_GE]           = Op("uord_ge",         FLOAT_ARITHMETIC,
1838                                                                                         "%boolVal           = OpFUnordGreaterThanEqual %type_bool %arg1 %arg2\n"
1839                                                                                         "%result            = OpSelect %type_float %boolVal %c_float_1 %c_float_0\n",
1840                                                                                         B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1841
1842         mo[O_ATAN2]                     = Op("atan2",           FLOAT_ARITHMETIC,
1843                                                                                         "%result             = OpExtInst %type_float %std450 Atan2 %arg1 %arg2\n",
1844                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1845         mo[O_POW]                       = Op("pow",                     FLOAT_ARITHMETIC,
1846                                                                                         "%result             = OpExtInst %type_float %std450 Pow %arg1 %arg2\n",
1847                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1848         mo[O_MIX]                       = Op("mix",                     FLOAT_ARITHMETIC,
1849                                                                                         "%result             = OpExtInst %type_float %std450 FMix %arg1 %arg2 %c_float_0_5\n",
1850                                                                                         B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1851         mo[O_FMA]                       = Op("fma",                     FLOAT_ARITHMETIC,
1852                                                                                         "%result             = OpExtInst %type_float %std450 Fma %arg1 %arg2 %c_float_0_5\n",
1853                                                                                         B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1854         mo[O_MIN]                       = Op("min",                     FLOAT_ARITHMETIC,
1855                                                                                         "%result             = OpExtInst %type_float %std450 FMin %arg1 %arg2\n",
1856                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1857         mo[O_MAX]                       = Op("max",                     FLOAT_ARITHMETIC,
1858                                                                                         "%result             = OpExtInst %type_float %std450 FMax %arg1 %arg2\n",
1859                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1860         mo[O_CLAMP]                     = Op("clamp",           FLOAT_ARITHMETIC,
1861                                                                                         "%result             = OpExtInst %type_float %std450 FClamp %arg1 %arg2 %arg2\n",
1862                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1863         mo[O_STEP]                      = Op("step",            FLOAT_ARITHMETIC,
1864                                                                                         "%result             = OpExtInst %type_float %std450 Step %arg1 %arg2\n",
1865                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1866         mo[O_SSTEP]                     = Op("sstep",           FLOAT_ARITHMETIC,
1867                                                                                         "%result             = OpExtInst %type_float %std450 SmoothStep %arg1 %arg2 %c_float_0_5\n",
1868                                                                                         B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1869         mo[O_DIST]                      = Op("distance",        FLOAT_ARITHMETIC,
1870                                                                                         "%result             = OpExtInst %type_float %std450 Distance %arg1 %arg2\n",
1871                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1872         mo[O_CROSS]                     = Op("cross",           FLOAT_ARITHMETIC,
1873                                                                                         "%vec1               = OpCompositeConstruct %type_float_vec3 %arg1 %arg1 %arg1\n"
1874                                                                                         "%vec2               = OpCompositeConstruct %type_float_vec3 %arg2 %arg2 %arg2\n"
1875                                                                                         "%tmpVec             = OpExtInst %type_float_vec3 %std450 Cross %vec1 %vec2\n"
1876                                                                                         "%result             = OpCompositeExtract %type_float %tmpVec 0\n",
1877                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1878         mo[O_FACE_FWD]          = Op("face_fwd",        FLOAT_ARITHMETIC,
1879                                                                                         "%result             = OpExtInst %type_float %std450 FaceForward %c_float_1 %arg1 %arg2\n",
1880                                                                                         B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1881         mo[O_NMIN]                      = Op("nmin",            FLOAT_ARITHMETIC,
1882                                                                                         "%result             = OpExtInst %type_float %std450 NMin %arg1 %arg2\n",
1883                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1884         mo[O_NMAX]                      = Op("nmax",            FLOAT_ARITHMETIC,
1885                                                                                         "%result             = OpExtInst %type_float %std450 NMax %arg1 %arg2\n",
1886                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1887         mo[O_NCLAMP]            = Op("nclamp",          FLOAT_ARITHMETIC,
1888                                                                                         "%result             = OpExtInst %type_float %std450 NClamp %arg2 %arg1 %arg2\n",
1889                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1890
1891         mo[O_ROUND]                     = Op("round",           FLOAT_ARITHMETIC,
1892                                                                                         "%result             = OpExtInst %type_float %std450 Round %arg1\n",
1893                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1894         mo[O_ROUND_EV]          = Op("round_ev",        FLOAT_ARITHMETIC,
1895                                                                                         "%result             = OpExtInst %type_float %std450 RoundEven %arg1\n",
1896                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1897         mo[O_TRUNC]                     = Op("trunc",           FLOAT_ARITHMETIC,
1898                                                                                         "%result             = OpExtInst %type_float %std450 Trunc %arg1\n",
1899                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1900         mo[O_ABS]                       = Op("abs",                     FLOAT_ARITHMETIC,
1901                                                                                         "%result             = OpExtInst %type_float %std450 FAbs %arg1\n",
1902                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1903         mo[O_SIGN]                      = Op("sign",            FLOAT_ARITHMETIC,
1904                                                                                         "%result             = OpExtInst %type_float %std450 FSign %arg1\n",
1905                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1906         mo[O_FLOOR]                     = Op("floor",           FLOAT_ARITHMETIC,
1907                                                                                         "%result             = OpExtInst %type_float %std450 Floor %arg1\n",
1908                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1909         mo[O_CEIL]                      = Op("ceil",            FLOAT_ARITHMETIC,
1910                                                                                         "%result             = OpExtInst %type_float %std450 Ceil %arg1\n",
1911                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1912         mo[O_FRACT]                     = Op("fract",           FLOAT_ARITHMETIC,
1913                                                                                         "%result             = OpExtInst %type_float %std450 Fract %arg1\n",
1914                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1915         mo[O_RADIANS]           = Op("radians",         FLOAT_ARITHMETIC,
1916                                                                                         "%result             = OpExtInst %type_float %std450 Radians %arg1\n",
1917                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1918         mo[O_DEGREES]           = Op("degrees",         FLOAT_ARITHMETIC,
1919                                                                                         "%result             = OpExtInst %type_float %std450 Degrees %arg1\n",
1920                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1921         mo[O_SIN]                       = Op("sin",                     FLOAT_ARITHMETIC,
1922                                                                                         "%result             = OpExtInst %type_float %std450 Sin %arg1\n",
1923                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1924         mo[O_COS]                       = Op("cos",                     FLOAT_ARITHMETIC,
1925                                                                                         "%result             = OpExtInst %type_float %std450 Cos %arg1\n",
1926                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1927         mo[O_TAN]                       = Op("tan",                     FLOAT_ARITHMETIC,
1928                                                                                         "%result             = OpExtInst %type_float %std450 Tan %arg1\n",
1929                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1930         mo[O_ASIN]                      = Op("asin",            FLOAT_ARITHMETIC,
1931                                                                                         "%result             = OpExtInst %type_float %std450 Asin %arg1\n",
1932                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1933         mo[O_ACOS]                      = Op("acos",            FLOAT_ARITHMETIC,
1934                                                                                         "%result             = OpExtInst %type_float %std450 Acos %arg1\n",
1935                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1936         mo[O_ATAN]                      = Op("atan",            FLOAT_ARITHMETIC,
1937                                                                                         "%result             = OpExtInst %type_float %std450 Atan %arg1\n",
1938                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1939         mo[O_SINH]                      = Op("sinh",            FLOAT_ARITHMETIC,
1940                                                                                         "%result             = OpExtInst %type_float %std450 Sinh %arg1\n",
1941                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1942         mo[O_COSH]                      = Op("cosh",            FLOAT_ARITHMETIC,
1943                                                                                         "%result             = OpExtInst %type_float %std450 Cosh %arg1\n",
1944                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1945         mo[O_TANH]                      = Op("tanh",            FLOAT_ARITHMETIC,
1946                                                                                         "%result             = OpExtInst %type_float %std450 Tanh %arg1\n",
1947                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1948         mo[O_ASINH]                     = Op("asinh",           FLOAT_ARITHMETIC,
1949                                                                                         "%result             = OpExtInst %type_float %std450 Asinh %arg1\n",
1950                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1951         mo[O_ACOSH]                     = Op("acosh",           FLOAT_ARITHMETIC,
1952                                                                                         "%result             = OpExtInst %type_float %std450 Acosh %arg1\n",
1953                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1954         mo[O_ATANH]                     = Op("atanh",           FLOAT_ARITHMETIC,
1955                                                                                         "%result             = OpExtInst %type_float %std450 Atanh %arg1\n",
1956                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1957         mo[O_EXP]                       = Op("exp",                     FLOAT_ARITHMETIC,
1958                                                                                         "%result             = OpExtInst %type_float %std450 Exp %arg1\n",
1959                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1960         mo[O_LOG]                       = Op("log",                     FLOAT_ARITHMETIC,
1961                                                                                         "%result             = OpExtInst %type_float %std450 Log %arg1\n",
1962                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1963         mo[O_EXP2]                      = Op("exp2",            FLOAT_ARITHMETIC,
1964                                                                                         "%result             = OpExtInst %type_float %std450 Exp2 %arg1\n",
1965                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1966         mo[O_LOG2]                      = Op("log2",            FLOAT_ARITHMETIC,
1967                                                                                         "%result             = OpExtInst %type_float %std450 Log2 %arg1\n",
1968                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1969         mo[O_SQRT]                      = Op("sqrt",            FLOAT_ARITHMETIC,
1970                                                                                         "%result             = OpExtInst %type_float %std450 Sqrt %arg1\n",
1971                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1972         mo[O_INV_SQRT]          = Op("inv_sqrt",        FLOAT_ARITHMETIC,
1973                                                                                         "%result             = OpExtInst %type_float %std450 InverseSqrt %arg1\n",
1974                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1975         mo[O_MODF]                      = Op("modf",            FLOAT_ARITHMETIC,
1976                                                                                         "",
1977                                                                                         "",
1978                                                                                         "",
1979                                                                                         "%tmpVarPtr          = OpVariable %type_float_fptr Function\n",
1980                                                                                         "",
1981                                                                                         "%result             = OpExtInst %type_float %std450 Modf %arg1 %tmpVarPtr\n",
1982                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1983         mo[O_MODF_ST]           = Op("modf_st",         FLOAT_ARITHMETIC,
1984                                                                                         "OpMemberDecorate %struct_ff 0 Offset ${float_width}\n"
1985                                                                                         "OpMemberDecorate %struct_ff 1 Offset ${float_width}\n",
1986                                                                                         "%struct_ff          = OpTypeStruct %type_float %type_float\n"
1987                                                                                         "%struct_ff_fptr     = OpTypePointer Function %struct_ff\n",
1988                                                                                         "",
1989                                                                                         "%tmpStructPtr       = OpVariable %struct_ff_fptr Function\n",
1990                                                                                         "",
1991                                                                                         "%tmpStruct          = OpExtInst %struct_ff %std450 ModfStruct %arg1\n"
1992                                                                                         "                      OpStore %tmpStructPtr %tmpStruct\n"
1993                                                                                         "%tmpLoc             = OpAccessChain %type_float_fptr %tmpStructPtr %c_i32_0\n"
1994                                                                                         "%result             = OpLoad %type_float %tmpLoc\n",
1995                                                                                         B_STATEMENT_USAGE_TYPES_TYPE_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
1996         mo[O_FREXP]                     = Op("frexp",           FLOAT_ARITHMETIC,
1997                                                                                         "",
1998                                                                                         "",
1999                                                                                         "",
2000                                                                                         "%tmpVarPtr          = OpVariable %type_i32_fptr Function\n",
2001                                                                                         "",
2002                                                                                         "%result             = OpExtInst %type_float %std450 Frexp %arg1 %tmpVarPtr\n",
2003                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2004         mo[O_FREXP_ST]          = Op("frexp_st",        FLOAT_ARITHMETIC,
2005                                                                                         "OpMemberDecorate %struct_fi 0 Offset ${float_width}\n"
2006                                                                                         "OpMemberDecorate %struct_fi 1 Offset 32\n",
2007                                                                                         "%struct_fi          = OpTypeStruct %type_float %type_i32\n"
2008                                                                                         "%struct_fi_fptr     = OpTypePointer Function %struct_fi\n",
2009                                                                                         "",
2010                                                                                         "%tmpStructPtr       = OpVariable %struct_fi_fptr Function\n",
2011                                                                                         "",
2012                                                                                         "%tmpStruct          = OpExtInst %struct_fi %std450 FrexpStruct %arg1\n"
2013                                                                                         "                      OpStore %tmpStructPtr %tmpStruct\n"
2014                                                                                         "%tmpLoc             = OpAccessChain %type_float_fptr %tmpStructPtr %c_i32_0\n"
2015                                                                                         "%result             = OpLoad %type_float %tmpLoc\n",
2016                                                                                         B_STATEMENT_USAGE_TYPES_TYPE_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2017         mo[O_LENGHT]            = Op("length",          FLOAT_ARITHMETIC,
2018                                                                                         "%result             = OpExtInst %type_float %std450 Length %arg1\n",
2019                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2020         mo[O_NORMALIZE]         = Op("normalize",       FLOAT_ARITHMETIC,
2021                                                                                         "%vec1               = OpCompositeConstruct %type_float_vec2 %arg1 %c_float_2\n"
2022                                                                                         "%tmpVec             = OpExtInst %type_float_vec2 %std450 Normalize %vec1\n"
2023                                                                                         "%result             = OpCompositeExtract %type_float %tmpVec 0\n",
2024                                                                                         B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2025         mo[O_REFLECT]           = Op("reflect",         FLOAT_ARITHMETIC,
2026                                                                                         "%vec1               = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
2027                                                                                         "%vecN               = OpCompositeConstruct %type_float_vec2 %c_float_0 %c_float_n1\n"
2028                                                                                         "%tmpVec             = OpExtInst %type_float_vec2 %std450 Reflect %vec1 %vecN\n"
2029                                                                                         "%result             = OpCompositeExtract %type_float %tmpVec 0\n",
2030                                                                                         B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2031         mo[O_REFRACT]           = Op("refract",         FLOAT_ARITHMETIC,
2032                                                                                         "%vec1               = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
2033                                                                                         "%vecN               = OpCompositeConstruct %type_float_vec2 %c_float_0 %c_float_n1\n"
2034                                                                                         "%tmpVec             = OpExtInst %type_float_vec2 %std450 Refract %vec1 %vecN %c_float_0_5\n"
2035                                                                                         "%result             = OpCompositeExtract %type_float %tmpVec 0\n",
2036                                                                                         B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2037         mo[O_MAT_DET]           = Op("mat_det",         FLOAT_ARITHMETIC,
2038                                                                                         "%col                = OpCompositeConstruct %type_float_vec2 %arg1 %arg1\n"
2039                                                                                         "%mat                = OpCompositeConstruct %type_float_mat2x2 %col %col\n"
2040                                                                                         "%result             = OpExtInst %type_float %std450 Determinant %mat\n",
2041                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2042         mo[O_MAT_INV]           = Op("mat_inv",         FLOAT_ARITHMETIC,
2043                                                                                         "%col1               = OpCompositeConstruct %type_float_vec2 %arg1 %c_float_1\n"
2044                                                                                         "%col2               = OpCompositeConstruct %type_float_vec2 %c_float_1 %c_float_1\n"
2045                                                                                         "%mat                = OpCompositeConstruct %type_float_mat2x2 %col1 %col2\n"
2046                                                                                         "%invMat             = OpExtInst %type_float_mat2x2 %std450 MatrixInverse %mat\n"
2047                                                                                         "%extCol             = OpCompositeExtract %type_float_vec2 %invMat 1\n"
2048                                                                                         "%result             = OpCompositeExtract %type_float %extCol 1\n",
2049                                                                                         B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_TYPE_FLOAT);
2050
2051         // PackHalf2x16 is a special case as it operates on fp32 vec2 and returns unsigned int,
2052         // the verification is done in SPIR-V code (if result is correct 1.0 will be written to SSBO)
2053         mo[O_PH_DENORM]         = Op("ph_denorm",       FLOAT_STORAGE_ONLY,
2054                                                                                         "",
2055                                                                                         "",
2056                                                                                         "%c_fp32_denorm_fp16 = OpConstant %type_f32 6.01e-5\n"          // fp32 representation of fp16 denorm value
2057                                                                                         "%c_ref              = OpConstant %type_u32 66061296\n",
2058                                                                                         "",
2059                                                                                         "",
2060                                                                                         "%srcVec             = OpCompositeConstruct %type_f32_vec2 %c_fp32_denorm_fp16 %c_fp32_denorm_fp16\n"
2061                                                                                         "%packedInt          = OpExtInst %type_u32 %std450 PackHalf2x16 %srcVec\n"
2062                                                                                         "%boolVal            = OpIEqual %type_bool %c_ref %packedInt\n"
2063                                                                                         "%result             = OpSelect %type_f32 %boolVal %c_f32_1 %c_f32_0\n",
2064                                                                                         B_STATEMENT_USAGE_CONSTS_TYPE_FP32 | B_STATEMENT_USAGE_COMMANDS_CONST_FP32 | B_STATEMENT_USAGE_COMMANDS_TYPE_FP32);
2065
2066         // UnpackHalf2x16 is a special case that operates on uint32 and returns two 32-bit floats,
2067         // this function is tested using constants
2068         mo[O_UPH_DENORM]        = Op("uph_denorm",      FLOAT_STORAGE_ONLY,
2069                                                                                         "",
2070                                                                                         "",
2071                                                                                         "%c_u32_2_16_pack    = OpConstant %type_u32 66061296\n", // == packHalf2x16(vec2(denorm))
2072                                                                                         "",
2073                                                                                         "",
2074                                                                                         "%tmpVec             = OpExtInst %type_f32_vec2 %std450 UnpackHalf2x16 %c_u32_2_16_pack\n"
2075                                                                                         "%result             = OpCompositeExtract %type_f32 %tmpVec 0\n",
2076                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FP32);
2077
2078         // PackDouble2x32 is a special case that operates on two uint32 and returns
2079         // double, this function is tested using constants
2080         mo[O_PD_DENORM]         = Op("pd_denorm",       FLOAT_STORAGE_ONLY,
2081                                                                                         "",
2082                                                                                         "",
2083                                                                                         "%c_p1               = OpConstant %type_u32 0\n"
2084                                                                                         "%c_p2               = OpConstant %type_u32 262144\n",          // == UnpackDouble2x32(denorm)
2085                                                                                         "",
2086                                                                                         "",
2087                                                                                         "%srcVec             = OpCompositeConstruct %type_u32_vec2 %c_p1 %c_p2\n"
2088                                                                                         "%result             = OpExtInst %type_f64 %std450 PackDouble2x32 %srcVec\n",
2089                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FP64);
2090
2091         // UnpackDouble2x32 is a special case as it operates only on FP64 and returns two ints,
2092         // the verification is done in SPIR-V code (if result is correct 1.0 will be written to SSBO)
2093         const char* unpackDouble2x32Types       =       "%type_bool_vec2     = OpTypeVector %type_bool 2\n";
2094         const char* unpackDouble2x32Source      =       "%refVec2            = OpCompositeConstruct %type_u32_vec2 %c_p1 %c_p2\n"
2095                                                                                         "%resVec2            = OpExtInst %type_u32_vec2 %std450 UnpackDouble2x32 %arg1\n"
2096                                                                                         "%boolVec2           = OpIEqual %type_bool_vec2 %refVec2 %resVec2\n"
2097                                                                                         "%boolVal            = OpAll %type_bool %boolVec2\n"
2098                                                                                         "%result             = OpSelect %type_f64 %boolVal %c_f64_1 %c_f64_0\n";
2099         mo[O_UPD_DENORM_FLUSH]          = Op("upd_denorm",      FLOAT_STORAGE_ONLY, "",
2100                                                                                         unpackDouble2x32Types,
2101                                                                                         "%c_p1               = OpConstant %type_u32 0\n"
2102                                                                                         "%c_p2               = OpConstant %type_u32 0\n",
2103                                                                                         "",
2104                                                                                         "",
2105                                                                                         unpackDouble2x32Source,
2106                                                                                         B_STATEMENT_USAGE_COMMANDS_CONST_FP64 | B_STATEMENT_USAGE_COMMANDS_TYPE_FP64);
2107         mo[O_UPD_DENORM_PRESERVE]       = Op("upd_denorm",      FLOAT_STORAGE_ONLY, "",
2108                                                                                         unpackDouble2x32Types,
2109                                                                                         "%c_p1               = OpConstant %type_u32 1008\n"
2110                                                                                         "%c_p2               = OpConstant %type_u32 0\n",
2111                                                                                         "",
2112                                                                                         "",
2113                                                                                         unpackDouble2x32Source,
2114                                                                                         B_STATEMENT_USAGE_COMMANDS_CONST_FP64 | B_STATEMENT_USAGE_COMMANDS_TYPE_FP64);
2115
2116         mo[O_ORTE_ROUND]        = Op("orte_round",      FLOAT_STORAGE_ONLY, FP32,
2117                                                                                         "OpDecorate %result FPRoundingMode RTE\n",
2118                                                                                         "",
2119                                                                                         "",
2120                                                                                         "%result             = OpFConvert %type_f16 %arg1\n",
2121                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FP16);
2122         mo[O_ORTZ_ROUND]        = Op("ortz_round",      FLOAT_STORAGE_ONLY, FP32,
2123                                                                                         "OpDecorate %result FPRoundingMode RTZ\n",
2124                                                                                         "",
2125                                                                                         "",
2126                                                                                         "%result             = OpFConvert %type_f16 %arg1\n",
2127                                                                                         B_STATEMENT_USAGE_COMMANDS_TYPE_FP16);
2128 }
2129
2130 void TestCasesBuilder::build(vector<OperationTestCase>& testCases, TypeTestResultsSP typeTestResults, bool argumentsFromInput)
2131 {
2132         // this method constructs a list of test cases; this list is a bit different
2133         // for every combination of float type, arguments preparation method and tested float control
2134
2135         testCases.reserve(750);
2136
2137         bool isFP16 = typeTestResults->floatType() == FP16;
2138
2139         // Denorm - FlushToZero - binary operations
2140         for (size_t i = 0 ; i < typeTestResults->binaryOpFTZ.size() ; ++i)
2141         {
2142                 const BinaryCase&       binaryCase      = typeTestResults->binaryOpFTZ[i];
2143                 OperationId                     operation       = binaryCase.operationId;
2144                 testCases.push_back(OTC("denorm_op_var_flush_to_zero",          B_DENORM_FLUSH,                                  operation, V_DENORM, V_ONE,            binaryCase.opVarResult));
2145                 testCases.push_back(OTC("denorm_op_denorm_flush_to_zero",       B_DENORM_FLUSH,                                  operation, V_DENORM, V_DENORM,         binaryCase.opDenormResult));
2146                 testCases.push_back(OTC("denorm_op_inf_flush_to_zero",          B_DENORM_FLUSH | B_ZIN_PRESERVE, operation, V_DENORM, V_INF,            binaryCase.opInfResult));
2147                 testCases.push_back(OTC("denorm_op_nan_flush_to_zero",          B_DENORM_FLUSH | B_ZIN_PRESERVE, operation, V_DENORM, V_NAN,            binaryCase.opNanResult));
2148
2149                 if (isFP16)
2150                 {
2151                         testCases.push_back(OTC("denorm_op_var_flush_to_zero_nostorage",                B_DENORM_FLUSH,                                  operation, V_DENORM, V_ONE,            binaryCase.opVarResult, DE_TRUE));
2152                         testCases.push_back(OTC("denorm_op_denorm_flush_to_zero_nostorage",     B_DENORM_FLUSH,                                  operation, V_DENORM, V_DENORM,         binaryCase.opDenormResult, DE_TRUE));
2153                         testCases.push_back(OTC("denorm_op_inf_flush_to_zero_nostorage",                B_DENORM_FLUSH | B_ZIN_PRESERVE, operation, V_DENORM, V_INF,            binaryCase.opInfResult, DE_TRUE));
2154                         testCases.push_back(OTC("denorm_op_nan_flush_to_zero_nostorage",                B_DENORM_FLUSH | B_ZIN_PRESERVE, operation, V_DENORM, V_NAN,            binaryCase.opNanResult, DE_TRUE));
2155                 }
2156         }
2157
2158         // Denorm - FlushToZero - unary operations
2159         for (size_t i = 0 ; i < typeTestResults->unaryOpFTZ.size() ; ++i)
2160         {
2161                 const UnaryCase&        unaryCase = typeTestResults->unaryOpFTZ[i];
2162                 OperationId                     operation = unaryCase.operationId;
2163                 testCases.push_back(OTC("op_denorm_flush_to_zero", B_DENORM_FLUSH, operation, V_DENORM, V_UNUSED, unaryCase.result));
2164                 if (isFP16)
2165                         testCases.push_back(OTC("op_denorm_flush_to_zero_nostorage", B_DENORM_FLUSH, operation, V_DENORM, V_UNUSED, unaryCase.result, DE_TRUE));
2166
2167         }
2168
2169         // Denom - Preserve - binary operations
2170         for (size_t i = 0 ; i < typeTestResults->binaryOpDenormPreserve.size() ; ++i)
2171         {
2172                 const BinaryCase&       binaryCase      = typeTestResults->binaryOpDenormPreserve[i];
2173                 OperationId                     operation       = binaryCase.operationId;
2174                 testCases.push_back(OTC("denorm_op_var_preserve",                       B_DENORM_PRESERVE,                                      operation, V_DENORM,    V_ONE,          binaryCase.opVarResult));
2175                 testCases.push_back(OTC("denorm_op_denorm_preserve",            B_DENORM_PRESERVE,                                      operation, V_DENORM,    V_DENORM,       binaryCase.opDenormResult));
2176                 testCases.push_back(OTC("denorm_op_inf_preserve",                       B_DENORM_PRESERVE | B_ZIN_PRESERVE, operation, V_DENORM,        V_INF,          binaryCase.opInfResult));
2177                 testCases.push_back(OTC("denorm_op_nan_preserve",                       B_DENORM_PRESERVE | B_ZIN_PRESERVE, operation, V_DENORM,        V_NAN,          binaryCase.opNanResult));
2178
2179                 if (isFP16)
2180                 {
2181                         testCases.push_back(OTC("denorm_op_var_preserve_nostorage",                     B_DENORM_PRESERVE,                                      operation, V_DENORM,    V_ONE,          binaryCase.opVarResult, DE_TRUE));
2182                         testCases.push_back(OTC("denorm_op_denorm_preserve_nostorage",          B_DENORM_PRESERVE,                                      operation, V_DENORM,    V_DENORM,       binaryCase.opDenormResult, DE_TRUE));
2183                         testCases.push_back(OTC("denorm_op_inf_preserve_nostorage",                     B_DENORM_PRESERVE | B_ZIN_PRESERVE, operation, V_DENORM,        V_INF,          binaryCase.opInfResult, DE_TRUE));
2184                         testCases.push_back(OTC("denorm_op_nan_preserve_nostorage",                     B_DENORM_PRESERVE | B_ZIN_PRESERVE, operation, V_DENORM,        V_NAN,          binaryCase.opNanResult, DE_TRUE));
2185                 }
2186         }
2187
2188         // Denom - Preserve - unary operations
2189         for (size_t i = 0 ; i < typeTestResults->unaryOpDenormPreserve.size() ; ++i)
2190         {
2191                 const UnaryCase&        unaryCase       = typeTestResults->unaryOpDenormPreserve[i];
2192                 OperationId                     operation       = unaryCase.operationId;
2193                 testCases.push_back(OTC("op_denorm_preserve", B_DENORM_PRESERVE, operation, V_DENORM, V_UNUSED, unaryCase.result));
2194                 if (isFP16)
2195                         testCases.push_back(OTC("op_denorm_preserve_nostorage", B_DENORM_PRESERVE, operation, V_DENORM, V_UNUSED, unaryCase.result, DE_TRUE));
2196         }
2197
2198         struct ZINCase
2199         {
2200                 OperationId     operationId;
2201                 bool            supportedByFP64;
2202                 ValueId         secondArgument;
2203                 ValueId         preserveZeroResult;
2204                 ValueId         preserveSZeroResult;
2205                 ValueId         preserveInfResult;
2206                 ValueId         preserveSInfResult;
2207                 ValueId         preserveNanResult;
2208         };
2209
2210         const ZINCase binaryOpZINPreserve[] = {
2211                 // operation            fp64    second arg              preserve zero   preserve szero          preserve inf    preserve sinf           preserve nan
2212                 { O_PHI,                        true,   V_INF,                  V_ZERO,                 V_MINUS_ZERO,           V_INF,                  V_MINUS_INF,            V_NAN },
2213                 { O_SELECT,                     true,   V_ONE,                  V_ZERO,                 V_MINUS_ZERO,           V_INF,                  V_MINUS_INF,            V_NAN },
2214                 { O_ADD,                        true,   V_ZERO,                 V_ZERO,                 V_ZERO,                         V_INF,                  V_MINUS_INF,            V_NAN },
2215                 { O_SUB,                        true,   V_ZERO,                 V_ZERO,                 V_MINUS_ZERO,           V_INF,                  V_MINUS_INF,            V_NAN },
2216                 { O_MUL,                        true,   V_ONE,                  V_ZERO,                 V_MINUS_ZERO,           V_INF,                  V_MINUS_INF,            V_NAN },
2217         };
2218
2219         const ZINCase unaryOpZINPreserve[] = {
2220                 // operation                            fp64    second arg              preserve zero   preserve szero          preserve inf    preserve sinf           preserve nan
2221                 { O_RETURN_VAL,                         true,   V_UNUSED,               V_ZERO,                 V_MINUS_ZERO,           V_INF,                  V_MINUS_INF,            V_NAN },
2222                 { O_D_EXTRACT,                          true,   V_UNUSED,               V_ZERO,                 V_MINUS_ZERO,           V_INF,                  V_MINUS_INF,            V_NAN },
2223                 { O_D_INSERT,                           true,   V_UNUSED,               V_ZERO,                 V_MINUS_ZERO,           V_INF,                  V_MINUS_INF,            V_NAN },
2224                 { O_SHUFFLE,                            true,   V_UNUSED,               V_ZERO,                 V_MINUS_ZERO,           V_INF,                  V_MINUS_INF,            V_NAN },
2225                 { O_COMPOSITE,                          true,   V_UNUSED,               V_ZERO,                 V_MINUS_ZERO,           V_INF,                  V_MINUS_INF,            V_NAN },
2226                 { O_COMPOSITE_INS,                      true,   V_UNUSED,               V_ZERO,                 V_MINUS_ZERO,           V_INF,                  V_MINUS_INF,            V_NAN },
2227                 { O_COPY,                                       true,   V_UNUSED,               V_ZERO,                 V_MINUS_ZERO,           V_INF,                  V_MINUS_INF,            V_NAN },
2228                 { O_TRANSPOSE,                          true,   V_UNUSED,               V_ZERO,                 V_MINUS_ZERO,           V_INF,                  V_MINUS_INF,            V_NAN },
2229                 { O_NEGATE,                                     true,   V_UNUSED,               V_MINUS_ZERO,   V_ZERO,                         V_MINUS_INF,    V_INF,                          V_NAN },
2230         };
2231
2232         bool isFP64 = typeTestResults->floatType() == FP64;
2233
2234         // Signed Zero Inf Nan - Preserve - binary operations
2235         for (size_t i = 0 ; i < DE_LENGTH_OF_ARRAY(binaryOpZINPreserve) ; ++i)
2236         {
2237                 const ZINCase& zc = binaryOpZINPreserve[i];
2238                 if (isFP64 && !zc.supportedByFP64)
2239                         continue;
2240
2241                 testCases.push_back(OTC("zero_op_var_preserve",                         B_ZIN_PRESERVE, zc.operationId, V_ZERO,                 zc.secondArgument,      zc.preserveZeroResult));
2242                 testCases.push_back(OTC("signed_zero_op_var_preserve",          B_ZIN_PRESERVE, zc.operationId, V_MINUS_ZERO,   zc.secondArgument,      zc.preserveSZeroResult));
2243                 testCases.push_back(OTC("inf_op_var_preserve",                          B_ZIN_PRESERVE, zc.operationId, V_INF,                  zc.secondArgument,      zc.preserveInfResult));
2244                 testCases.push_back(OTC("signed_inf_op_var_preserve",           B_ZIN_PRESERVE, zc.operationId, V_MINUS_INF,    zc.secondArgument,      zc.preserveSInfResult));
2245                 testCases.push_back(OTC("nan_op_var_preserve",                          B_ZIN_PRESERVE, zc.operationId, V_NAN,                  zc.secondArgument,      zc.preserveNanResult));
2246
2247                 if (isFP16)
2248                 {
2249                         testCases.push_back(OTC("zero_op_var_preserve_nostorage",                               B_ZIN_PRESERVE, zc.operationId, V_ZERO,                 zc.secondArgument,      zc.preserveZeroResult, DE_TRUE));
2250                         testCases.push_back(OTC("signed_zero_op_var_preserve_nostorage",                B_ZIN_PRESERVE, zc.operationId, V_MINUS_ZERO,   zc.secondArgument,      zc.preserveSZeroResult, DE_TRUE));
2251                         testCases.push_back(OTC("inf_op_var_preserve_nostorage",                                B_ZIN_PRESERVE, zc.operationId, V_INF,                  zc.secondArgument,      zc.preserveInfResult, DE_TRUE));
2252                         testCases.push_back(OTC("signed_inf_op_var_preserve_nostorage",                 B_ZIN_PRESERVE, zc.operationId, V_MINUS_INF,    zc.secondArgument,      zc.preserveSInfResult, DE_TRUE));
2253                         testCases.push_back(OTC("nan_op_var_preserve_nostorage",                                B_ZIN_PRESERVE, zc.operationId, V_NAN,                  zc.secondArgument,      zc.preserveNanResult, DE_TRUE));
2254                 }
2255         }
2256
2257         // Signed Zero Inf Nan - Preserve - unary operations
2258         for (size_t i = 0 ; i < DE_LENGTH_OF_ARRAY(unaryOpZINPreserve) ; ++i)
2259         {
2260                 const ZINCase& zc = unaryOpZINPreserve[i];
2261                 if (isFP64 && !zc.supportedByFP64)
2262                         continue;
2263
2264                 testCases.push_back(OTC("op_zero_preserve",                     B_ZIN_PRESERVE,zc.operationId, V_ZERO,                  V_UNUSED,       zc.preserveZeroResult));
2265                 testCases.push_back(OTC("op_signed_zero_preserve",      B_ZIN_PRESERVE,zc.operationId, V_MINUS_ZERO,    V_UNUSED,       zc.preserveSZeroResult));
2266                 testCases.push_back(OTC("op_inf_preserve",                      B_ZIN_PRESERVE,zc.operationId, V_INF,                   V_UNUSED,       zc.preserveInfResult));
2267                 testCases.push_back(OTC("op_signed_inf_preserve",       B_ZIN_PRESERVE,zc.operationId, V_MINUS_INF,             V_UNUSED,       zc.preserveSInfResult));
2268                 testCases.push_back(OTC("op_nan_preserve",                      B_ZIN_PRESERVE,zc.operationId, V_NAN,                   V_UNUSED,       zc.preserveNanResult));
2269
2270                 if (isFP16)
2271                 {
2272                         testCases.push_back(OTC("op_zero_preserve_nostorage",                   B_ZIN_PRESERVE,zc.operationId, V_ZERO,                  V_UNUSED,       zc.preserveZeroResult, DE_TRUE));
2273                         testCases.push_back(OTC("op_signed_zero_preserve_nostorage",    B_ZIN_PRESERVE,zc.operationId, V_MINUS_ZERO,    V_UNUSED,       zc.preserveSZeroResult, DE_TRUE));
2274                         testCases.push_back(OTC("op_inf_preserve_nostorage",                    B_ZIN_PRESERVE,zc.operationId, V_INF,                   V_UNUSED,       zc.preserveInfResult, DE_TRUE));
2275                         testCases.push_back(OTC("op_signed_inf_preserve_nostorage",             B_ZIN_PRESERVE,zc.operationId, V_MINUS_INF,             V_UNUSED,       zc.preserveSInfResult, DE_TRUE));
2276                         testCases.push_back(OTC("op_nan_preserve_nostorage",                    B_ZIN_PRESERVE,zc.operationId, V_NAN,                   V_UNUSED,       zc.preserveNanResult, DE_TRUE));
2277                 }
2278         }
2279
2280         // comparison operations - tested differently because they return true/false
2281         struct ComparisonCase
2282         {
2283                 OperationId     operationId;
2284                 ValueId         denormPreserveResult;
2285         };
2286         const ComparisonCase comparisonCases[] =
2287         {
2288                 // operation    denorm
2289                 { O_ORD_EQ,             V_ZERO },
2290                 { O_UORD_EQ,    V_ZERO },
2291                 { O_ORD_NEQ,    V_ONE  },
2292                 { O_UORD_NEQ,   V_ONE  },
2293                 { O_ORD_LS,             V_ONE  },
2294                 { O_UORD_LS,    V_ONE  },
2295                 { O_ORD_GT,             V_ZERO },
2296                 { O_UORD_GT,    V_ZERO },
2297                 { O_ORD_LE,             V_ONE  },
2298                 { O_UORD_LE,    V_ONE  },
2299                 { O_ORD_GE,             V_ZERO },
2300                 { O_UORD_GE,    V_ZERO }
2301         };
2302         for (int op = 0 ; op < DE_LENGTH_OF_ARRAY(comparisonCases) ; ++op)
2303         {
2304                 const ComparisonCase& cc = comparisonCases[op];
2305                 testCases.push_back(OTC("denorm_op_var_preserve", B_DENORM_PRESERVE, cc.operationId, V_DENORM, V_ONE, cc.denormPreserveResult));
2306                 if (isFP16)
2307                         testCases.push_back(OTC("denorm_op_var_preserve_nostorage", B_DENORM_PRESERVE, cc.operationId, V_DENORM, V_ONE, cc.denormPreserveResult, DE_TRUE));
2308         }
2309
2310         if (argumentsFromInput)
2311         {
2312                 struct RoundingModeCase
2313                 {
2314                         OperationId     operationId;
2315                         ValueId         arg1;
2316                         ValueId         arg2;
2317                         ValueId         expectedRTEResult;
2318                         ValueId         expectedRTZResult;
2319                 };
2320
2321                 const RoundingModeCase roundingCases[] =
2322                 {
2323                         { O_ADD,                        V_ADD_ARG_A,    V_ADD_ARG_B,    V_ADD_RTE_RESULT,       V_ADD_RTZ_RESULT },
2324                         { O_SUB,                        V_SUB_ARG_A,    V_SUB_ARG_B,    V_SUB_RTE_RESULT,       V_SUB_RTZ_RESULT },
2325                         { O_MUL,                        V_MUL_ARG_A,    V_MUL_ARG_B,    V_MUL_RTE_RESULT,       V_MUL_RTZ_RESULT },
2326                         { O_DOT,                        V_DOT_ARG_A,    V_DOT_ARG_B,    V_DOT_RTE_RESULT,       V_DOT_RTZ_RESULT },
2327
2328                         // in vect/mat multiplication by scalar operations only first element of result is checked
2329                         // so argument and result values prepared for multiplication can be reused for those cases
2330                         { O_VEC_MUL_S,          V_MUL_ARG_A,    V_MUL_ARG_B,    V_MUL_RTE_RESULT,       V_MUL_RTZ_RESULT },
2331                         { O_MAT_MUL_S,          V_MUL_ARG_A,    V_MUL_ARG_B,    V_MUL_RTE_RESULT,       V_MUL_RTZ_RESULT },
2332                         { O_OUT_PROD,           V_MUL_ARG_A,    V_MUL_ARG_B,    V_MUL_RTE_RESULT,       V_MUL_RTZ_RESULT },
2333
2334                         // in SPIR-V code we return first element of operation result so for following
2335                         // cases argument and result values prepared for dot product can be reused
2336                         { O_VEC_MUL_M,          V_DOT_ARG_A,    V_DOT_ARG_B,    V_DOT_RTE_RESULT,       V_DOT_RTZ_RESULT },
2337                         { O_MAT_MUL_V,          V_DOT_ARG_A,    V_DOT_ARG_B,    V_DOT_RTE_RESULT,       V_DOT_RTZ_RESULT },
2338                         { O_MAT_MUL_M,          V_DOT_ARG_A,    V_DOT_ARG_B,    V_DOT_RTE_RESULT,       V_DOT_RTZ_RESULT },
2339
2340                         // conversion operations are added separately - depending on float type width
2341                 };
2342
2343                 for (int c = 0 ; c < DE_LENGTH_OF_ARRAY(roundingCases) ; ++c)
2344                 {
2345                         const RoundingModeCase& rmc = roundingCases[c];
2346                         testCases.push_back(OTC("rounding_rte_op", B_RTE_ROUNDING, rmc.operationId, rmc.arg1, rmc.arg2, rmc.expectedRTEResult));
2347                         testCases.push_back(OTC("rounding_rtz_op", B_RTZ_ROUNDING, rmc.operationId, rmc.arg1, rmc.arg2, rmc.expectedRTZResult));
2348                         if (isFP16)
2349                         {
2350                                 testCases.push_back(OTC("rounding_rte_op_nostorage", B_RTE_ROUNDING, rmc.operationId, rmc.arg1, rmc.arg2, rmc.expectedRTEResult, DE_TRUE));
2351                                 testCases.push_back(OTC("rounding_rtz_op_nostorage", B_RTZ_ROUNDING, rmc.operationId, rmc.arg1, rmc.arg2, rmc.expectedRTZResult, DE_TRUE));
2352                         }
2353                 }
2354         }
2355
2356         // special cases
2357         if (typeTestResults->floatType() == FP16)
2358         {
2359                 if (argumentsFromInput)
2360                 {
2361                         testCases.push_back(OTC("rounding_rte_conv_from_fp32", B_RTE_ROUNDING, O_CONV_FROM_FP32, V_CONV_FROM_FP32_ARG, V_UNUSED, V_CONV_TO_FP16_RTE_RESULT));
2362                         testCases.push_back(OTC("rounding_rtz_conv_from_fp32", B_RTZ_ROUNDING, O_CONV_FROM_FP32, V_CONV_FROM_FP32_ARG, V_UNUSED, V_CONV_TO_FP16_RTZ_RESULT));
2363                         testCases.push_back(OTC("rounding_rte_conv_from_fp64", B_RTE_ROUNDING, O_CONV_FROM_FP64, V_CONV_FROM_FP64_ARG, V_UNUSED, V_CONV_TO_FP16_RTE_RESULT));
2364                         testCases.push_back(OTC("rounding_rtz_conv_from_fp64", B_RTZ_ROUNDING, O_CONV_FROM_FP64, V_CONV_FROM_FP64_ARG, V_UNUSED, V_CONV_TO_FP16_RTZ_RESULT));
2365
2366                         testCases.push_back(OTC("rounding_rte_sconst_conv_from_fp32", B_RTE_ROUNDING, O_SCONST_CONV_FROM_FP32_TO_FP16, V_UNUSED, V_UNUSED, V_CONV_TO_FP16_RTE_RESULT));
2367                         testCases.push_back(OTC("rounding_rtz_sconst_conv_from_fp32", B_RTZ_ROUNDING, O_SCONST_CONV_FROM_FP32_TO_FP16, V_UNUSED, V_UNUSED, V_CONV_TO_FP16_RTZ_RESULT));
2368                         testCases.push_back(OTC("rounding_rte_sconst_conv_from_fp64", B_RTE_ROUNDING, O_SCONST_CONV_FROM_FP64_TO_FP16, V_UNUSED, V_UNUSED, V_CONV_TO_FP16_RTE_RESULT));
2369                         testCases.push_back(OTC("rounding_rtz_sconst_conv_from_fp64", B_RTZ_ROUNDING, O_SCONST_CONV_FROM_FP64_TO_FP16, V_UNUSED, V_UNUSED, V_CONV_TO_FP16_RTZ_RESULT));
2370
2371                         testCases.push_back(OTC("rounding_rte_conv_from_fp32_nostorage", B_RTE_ROUNDING, O_CONV_FROM_FP32, V_CONV_FROM_FP32_ARG, V_UNUSED, V_CONV_TO_FP16_RTE_RESULT, DE_TRUE));
2372                         testCases.push_back(OTC("rounding_rtz_conv_from_fp32_nostorage", B_RTZ_ROUNDING, O_CONV_FROM_FP32, V_CONV_FROM_FP32_ARG, V_UNUSED, V_CONV_TO_FP16_RTZ_RESULT, DE_TRUE));
2373                         testCases.push_back(OTC("rounding_rte_conv_from_fp64_nostorage", B_RTE_ROUNDING, O_CONV_FROM_FP64, V_CONV_FROM_FP64_ARG, V_UNUSED, V_CONV_TO_FP16_RTE_RESULT, DE_TRUE));
2374                         testCases.push_back(OTC("rounding_rtz_conv_from_fp64_nostorage", B_RTZ_ROUNDING, O_CONV_FROM_FP64, V_CONV_FROM_FP64_ARG, V_UNUSED, V_CONV_TO_FP16_RTZ_RESULT, DE_TRUE));
2375
2376                         testCases.push_back(OTC("rounding_rte_sconst_conv_from_fp32_nostorage", B_RTE_ROUNDING, O_SCONST_CONV_FROM_FP32_TO_FP16, V_UNUSED, V_UNUSED, V_CONV_TO_FP16_RTE_RESULT, DE_TRUE));
2377                         testCases.push_back(OTC("rounding_rtz_sconst_conv_from_fp32_nostorage", B_RTZ_ROUNDING, O_SCONST_CONV_FROM_FP32_TO_FP16, V_UNUSED, V_UNUSED, V_CONV_TO_FP16_RTZ_RESULT, DE_TRUE));
2378                         testCases.push_back(OTC("rounding_rte_sconst_conv_from_fp64_nostorage", B_RTE_ROUNDING, O_SCONST_CONV_FROM_FP64_TO_FP16, V_UNUSED, V_UNUSED, V_CONV_TO_FP16_RTE_RESULT, DE_TRUE));
2379                         testCases.push_back(OTC("rounding_rtz_sconst_conv_from_fp64_nostorage", B_RTZ_ROUNDING, O_SCONST_CONV_FROM_FP64_TO_FP16, V_UNUSED, V_UNUSED, V_CONV_TO_FP16_RTZ_RESULT, DE_TRUE));
2380
2381                         // verify that VkShaderFloatingPointRoundingModeKHR can be overridden for a given instruction by the FPRoundingMode decoration.
2382                         // FPRoundingMode decoration requires VK_KHR_16bit_storage.
2383                         testCases.push_back(OTC("rounding_rte_override", B_RTE_ROUNDING, O_ORTZ_ROUND, V_CONV_FROM_FP32_ARG, V_UNUSED, V_CONV_TO_FP16_RTZ_RESULT));
2384                         testCases.push_back(OTC("rounding_rtz_override", B_RTZ_ROUNDING, O_ORTE_ROUND, V_CONV_FROM_FP32_ARG, V_UNUSED, V_CONV_TO_FP16_RTE_RESULT));
2385                 }
2386
2387                 createUnaryTestCases(testCases, O_CONV_FROM_FP32, V_CONV_DENORM_SMALLER, V_ZERO);
2388                 createUnaryTestCases(testCases, O_CONV_FROM_FP64, V_CONV_DENORM_BIGGER, V_ZERO);
2389                 createUnaryTestCases(testCases, O_CONV_FROM_FP32, V_CONV_DENORM_SMALLER, V_ZERO, DE_TRUE);
2390                 createUnaryTestCases(testCases, O_CONV_FROM_FP64, V_CONV_DENORM_BIGGER, V_ZERO, DE_TRUE);
2391
2392         }
2393         else if (typeTestResults->floatType() == FP32)
2394         {
2395                 if (argumentsFromInput)
2396                 {
2397                         // convert from fp64 to fp32
2398                         testCases.push_back(OTC("rounding_rte_conv_from_fp64", B_RTE_ROUNDING, O_CONV_FROM_FP64, V_CONV_FROM_FP64_ARG, V_UNUSED, V_CONV_TO_FP32_RTE_RESULT));
2399                         testCases.push_back(OTC("rounding_rtz_conv_from_fp64", B_RTZ_ROUNDING, O_CONV_FROM_FP64, V_CONV_FROM_FP64_ARG, V_UNUSED, V_CONV_TO_FP32_RTZ_RESULT));
2400
2401                         testCases.push_back(OTC("rounding_rte_sconst_conv_from_fp64", B_RTE_ROUNDING, O_SCONST_CONV_FROM_FP64_TO_FP32, V_UNUSED, V_UNUSED, V_CONV_TO_FP32_RTE_RESULT));
2402                         testCases.push_back(OTC("rounding_rtz_sconst_conv_from_fp64", B_RTZ_ROUNDING, O_SCONST_CONV_FROM_FP64_TO_FP32, V_UNUSED, V_UNUSED, V_CONV_TO_FP32_RTZ_RESULT));
2403                 }
2404                 else
2405                 {
2406                         // PackHalf2x16 - verification done in SPIR-V
2407                         testCases.push_back(OTC("pack_half_denorm_preserve",            B_DENORM_PRESERVE,      O_PH_DENORM,    V_UNUSED, V_UNUSED, V_ONE));
2408
2409                         // UnpackHalf2x16 - custom arguments defined as constants
2410                         testCases.push_back(OTC("upack_half_denorm_flush_to_zero",      B_DENORM_FLUSH,         O_UPH_DENORM,   V_UNUSED, V_UNUSED, V_ZERO));
2411                         testCases.push_back(OTC("upack_half_denorm_preserve",           B_DENORM_PRESERVE,      O_UPH_DENORM,   V_UNUSED, V_UNUSED, V_CONV_DENORM_SMALLER));
2412                 }
2413
2414                 createUnaryTestCases(testCases, O_CONV_FROM_FP16, V_CONV_DENORM_SMALLER, V_ZERO_OR_FP16_DENORM_TO_FP32);
2415                 createUnaryTestCases(testCases, O_CONV_FROM_FP16, V_CONV_DENORM_SMALLER, V_ZERO_OR_FP16_DENORM_TO_FP32, DE_TRUE);
2416                 createUnaryTestCases(testCases, O_CONV_FROM_FP64, V_CONV_DENORM_BIGGER, V_ZERO);
2417         }
2418         else // FP64
2419         {
2420                 if (!argumentsFromInput)
2421                 {
2422                         // PackDouble2x32 - custom arguments defined as constants
2423                         testCases.push_back(OTC("pack_double_denorm_preserve",                  B_DENORM_PRESERVE,      O_PD_DENORM,                    V_UNUSED, V_UNUSED, V_DENORM));
2424
2425                         // UnpackDouble2x32 - verification done in SPIR-V
2426                         testCases.push_back(OTC("upack_double_denorm_flush_to_zero",    B_DENORM_FLUSH,         O_UPD_DENORM_FLUSH,             V_DENORM, V_UNUSED, V_ONE));
2427                         testCases.push_back(OTC("upack_double_denorm_preserve",                 B_DENORM_PRESERVE,      O_UPD_DENORM_PRESERVE,  V_DENORM, V_UNUSED, V_ONE));
2428                 }
2429
2430                 createUnaryTestCases(testCases, O_CONV_FROM_FP16, V_CONV_DENORM_SMALLER, V_ZERO_OR_FP16_DENORM_TO_FP64);
2431                 createUnaryTestCases(testCases, O_CONV_FROM_FP16, V_CONV_DENORM_SMALLER, V_ZERO_OR_FP16_DENORM_TO_FP64, DE_TRUE);
2432                 createUnaryTestCases(testCases, O_CONV_FROM_FP32, V_CONV_DENORM_BIGGER, V_ZERO_OR_FP32_DENORM_TO_FP64);
2433         }
2434 }
2435
2436 const Operation& TestCasesBuilder::getOperation(OperationId id) const
2437 {
2438         return m_operations.at(id);
2439 }
2440
2441 void TestCasesBuilder::createUnaryTestCases(vector<OperationTestCase>& testCases, OperationId operationId, ValueId denormPreserveResult, ValueId denormFTZResult, deBool fp16WithoutStorage) const
2442 {
2443         if (fp16WithoutStorage)
2444         {
2445                 // Denom - Preserve
2446                 testCases.push_back(OTC("op_denorm_preserve_nostorage",         B_DENORM_PRESERVE,      operationId, V_DENORM,  V_UNUSED, denormPreserveResult, DE_TRUE));
2447
2448                 // Denorm - FlushToZero
2449                 testCases.push_back(OTC("op_denorm_flush_to_zero_nostorage",    B_DENORM_FLUSH,         operationId, V_DENORM,  V_UNUSED, denormFTZResult, DE_TRUE));
2450
2451                 // Signed Zero Inf Nan - Preserve
2452                 testCases.push_back(OTC("op_zero_preserve_nostorage",                   B_ZIN_PRESERVE,         operationId, V_ZERO,            V_UNUSED, V_ZERO, DE_TRUE));
2453                 testCases.push_back(OTC("op_signed_zero_preserve_nostorage",    B_ZIN_PRESERVE,         operationId, V_MINUS_ZERO,      V_UNUSED, V_MINUS_ZERO, DE_TRUE));
2454                 testCases.push_back(OTC("op_inf_preserve_nostorage",                    B_ZIN_PRESERVE,         operationId, V_INF,                     V_UNUSED, V_INF, DE_TRUE));
2455                 testCases.push_back(OTC("op_nan_preserve_nostorage",                    B_ZIN_PRESERVE,         operationId, V_NAN,                     V_UNUSED, V_NAN, DE_TRUE));
2456         }
2457         else
2458         {
2459                 // Denom - Preserve
2460                 testCases.push_back(OTC("op_denorm_preserve",           B_DENORM_PRESERVE,      operationId, V_DENORM,  V_UNUSED, denormPreserveResult));
2461
2462                 // Denorm - FlushToZero
2463                 testCases.push_back(OTC("op_denorm_flush_to_zero",      B_DENORM_FLUSH,         operationId, V_DENORM,  V_UNUSED, denormFTZResult));
2464
2465                 // Signed Zero Inf Nan - Preserve
2466                 testCases.push_back(OTC("op_zero_preserve",                     B_ZIN_PRESERVE,         operationId, V_ZERO,            V_UNUSED, V_ZERO));
2467                 testCases.push_back(OTC("op_signed_zero_preserve",      B_ZIN_PRESERVE,         operationId, V_MINUS_ZERO,      V_UNUSED, V_MINUS_ZERO));
2468                 testCases.push_back(OTC("op_inf_preserve",                      B_ZIN_PRESERVE,         operationId, V_INF,                     V_UNUSED, V_INF));
2469                 testCases.push_back(OTC("op_nan_preserve",                      B_ZIN_PRESERVE,         operationId, V_NAN,                     V_UNUSED, V_NAN));
2470         }
2471 }
2472
2473 template <typename TYPE, typename FLOAT_TYPE>
2474 bool isZeroOrOtherValue(const TYPE& returnedFloat, ValueId secondAcceptableResult, TestLog& log)
2475 {
2476         if (returnedFloat.isZero() && !returnedFloat.signBit())
2477                 return true;
2478
2479         TypeValues<FLOAT_TYPE> typeValues;
2480         typedef typename TYPE::StorageType SType;
2481         typename RawConvert<FLOAT_TYPE, SType>::Value value;
2482         value.fp = typeValues.getValue(secondAcceptableResult);
2483
2484         if (returnedFloat.bits() == value.ui)
2485                 return true;
2486
2487         log << TestLog::Message << "Expected 0 or " << toHex(value.ui)
2488                 << " (" << value.fp << ")" << TestLog::EndMessage;
2489         return false;
2490 }
2491
2492 template <typename TYPE>
2493 bool isAcosResultCorrect(const TYPE& returnedFloat, TestLog& log)
2494 {
2495         // pi/2 is result of acos(0) which in the specs is defined as equivalent to
2496         // atan2(sqrt(1.0 - x^2), x), where atan2 has 4096 ULP, sqrt is equivalent to
2497         // 1.0 /inversesqrt(), inversesqrt() is 2 ULP and rcp is another 2.5 ULP
2498
2499         double precision = 0;
2500         const double piDiv2 = 3.14159265358979323846 / 2;
2501         if (returnedFloat.MANTISSA_BITS == 23)
2502         {
2503                 FloatFormat fp32Format(-126, 127, 23, true, tcu::MAYBE, tcu::YES, tcu::MAYBE);
2504                 precision = fp32Format.ulp(piDiv2, 4096.0);
2505         }
2506         else
2507         {
2508                 FloatFormat fp16Format(-14, 15, 10, true, tcu::MAYBE);
2509                 precision = fp16Format.ulp(piDiv2, 5.0);
2510         }
2511
2512         if (deAbs(returnedFloat.asDouble() - piDiv2) < precision)
2513                 return true;
2514
2515         log << TestLog::Message << "Expected result to be in range"
2516                 << " (" << piDiv2 - precision << ", " << piDiv2 + precision << "), got "
2517                 << returnedFloat.asDouble() << TestLog::EndMessage;
2518         return false;
2519 }
2520
2521 template <typename TYPE>
2522 bool isCosResultCorrect(const TYPE& returnedFloat, TestLog& log)
2523 {
2524         // for cos(x) with x between -pi and pi, the precision error is 2^-11 for fp32 and 2^-7 for fp16.
2525         double precision = returnedFloat.MANTISSA_BITS == 23 ? dePow(2, -11) : dePow(2, -7);
2526         const double expected = 1.0;
2527
2528         if (deAbs(returnedFloat.asDouble() - expected) < precision)
2529                 return true;
2530
2531         log << TestLog::Message << "Expected result to be in range"
2532                 << " (" << expected - precision << ", " << expected + precision << "), got "
2533                 << returnedFloat.asDouble() << TestLog::EndMessage;
2534         return false;
2535 }
2536
2537 template <typename FLOAT_TYPE>
2538 double getFloatTypeAsDouble(FLOAT_TYPE param)
2539 {
2540         return param;
2541 }
2542 template<> double getFloatTypeAsDouble(deFloat16 param)
2543 {
2544         return deFloat16To64(param);
2545 }
2546
2547
2548 double getPrecisionAt(double value, float ulp, int mantissaBits)
2549 {
2550         if (mantissaBits == 23)
2551         {
2552                 FloatFormat fp32Format(-126, 127, 23, true, tcu::MAYBE, tcu::YES, tcu::MAYBE);
2553                 return fp32Format.ulp(value, ulp);
2554         }
2555         else if (mantissaBits == 52)
2556         {
2557                 FloatFormat fp32Format(-1022, 1023, 52, true, tcu::MAYBE, tcu::YES, tcu::MAYBE);
2558                 return fp32Format.ulp(value, ulp);
2559         }
2560         else
2561         {
2562                 DE_ASSERT(mantissaBits == 10);
2563                 FloatFormat fp16Format(-14, 15, 10, true, tcu::MAYBE);
2564                 return fp16Format.ulp(value, ulp);
2565         }
2566 }
2567
2568 template <typename TYPE, typename FLOAT_TYPE, typename REF_FUNCTION>
2569 bool isLogResultCorrect(const TYPE& returnedFloat, FLOAT_TYPE param, REF_FUNCTION refFunction, TestLog& log)
2570 {
2571         if (returnedFloat.isInf() && returnedFloat.signBit())
2572                 return true;
2573
2574         const double expected   = refFunction(getFloatTypeAsDouble(param));
2575         const double precision  = getPrecisionAt(expected, 3.0, returnedFloat.MANTISSA_BITS);
2576
2577         if (deAbs(returnedFloat.asDouble() - expected) < precision)
2578                 return true;
2579
2580         log << TestLog::Message << "Expected result to be -INF or in range"
2581                 << " (" << expected - precision << ", " << expected + precision << "), got "
2582                 << returnedFloat.asDouble() << TestLog::EndMessage;
2583         return false;
2584 }
2585
2586 template <typename TYPE, typename FLOAT_TYPE>
2587 bool isInverseSqrtResultCorrect(const TYPE& returnedFloat, FLOAT_TYPE param, TestLog& log)
2588 {
2589         if (returnedFloat.isInf() && !returnedFloat.signBit())
2590                 return true;
2591
2592         const double expected   = 1.0/ deSqrt(getFloatTypeAsDouble(param));
2593         const double precision  = getPrecisionAt(expected, 2.0, returnedFloat.MANTISSA_BITS);
2594
2595         if (deAbs(returnedFloat.asDouble() - expected) < precision)
2596                 return true;
2597
2598         log << TestLog::Message << "Expected result to be INF or in range"
2599                 << " (" << expected - precision << ", " << expected + precision << "), got "
2600                 << returnedFloat.asDouble() << TestLog::EndMessage;
2601         return false;
2602 }
2603
2604 template <typename TYPE, typename FLOAT_TYPE>
2605 bool isSqrtResultCorrect(const TYPE& returnedFloat, FLOAT_TYPE param, TestLog& log)
2606 {
2607         if (returnedFloat.isZero() && !returnedFloat.signBit())
2608                 return true;
2609
2610
2611         const double expected                           = deSqrt(getFloatTypeAsDouble(param));
2612         const double expectedInverseSqrt        = 1.0 / expected;
2613         const double inverseSqrtPrecision       = getPrecisionAt(expectedInverseSqrt, 2.0, returnedFloat.MANTISSA_BITS);
2614
2615         double expectedMin = deMin(1.0 / (expectedInverseSqrt - inverseSqrtPrecision), 1.0 / (expectedInverseSqrt + inverseSqrtPrecision));
2616         double expectedMax = deMax(1.0 / (expectedInverseSqrt - inverseSqrtPrecision), 1.0 / (expectedInverseSqrt + inverseSqrtPrecision));
2617
2618         expectedMin -= getPrecisionAt(expectedMin, 2.5, returnedFloat.MANTISSA_BITS);
2619         expectedMax += getPrecisionAt(expectedMax, 2.5, returnedFloat.MANTISSA_BITS);
2620
2621         if (returnedFloat.asDouble() >= expectedMin  && returnedFloat.asDouble() <= expectedMax)
2622                 return true;
2623
2624         log << TestLog::Message << "Expected result to be +0 or in range"
2625                 << " (" << expectedMin << ", " << expectedMax << "), got "
2626                 << returnedFloat.asDouble() << TestLog::EndMessage;
2627         return false;
2628 }
2629
2630 // Function used to compare test result with expected output.
2631 // TYPE can be Float16, Float32 or Float64.
2632 // FLOAT_TYPE can be deFloat16, float, double.
2633 template <typename TYPE, typename FLOAT_TYPE>
2634 bool compareBytes(vector<deUint8>& expectedBytes, AllocationSp outputAlloc, TestLog& log)
2635 {
2636         const TYPE* returned    = static_cast<const TYPE*>(outputAlloc->getHostPtr());
2637         const TYPE* fValueId    = reinterpret_cast<const TYPE*>(&expectedBytes.front());
2638
2639         // all test return single value
2640         // Fp16 nostorage tests get their values from a deUint32 value, but we create the
2641         // buffer with the same size for both cases: 4 bytes.
2642         if (sizeof(TYPE) == 2u)
2643                 DE_ASSERT((expectedBytes.size() / sizeof(TYPE)) == 2);
2644         else
2645                 DE_ASSERT((expectedBytes.size() / sizeof(TYPE)) == 1);
2646
2647         // during test setup we do not store expected value but id that can be used to
2648         // retrieve actual value - this is done to handle special cases like multiple
2649         // allowed results or epsilon checks for some cases
2650         // note that this is workaround - this should be done by changing
2651         // ComputerShaderCase and GraphicsShaderCase so that additional arguments can
2652         // be passed to this verification callback
2653         typedef typename TYPE::StorageType SType;
2654         SType           expectedInt             = fValueId[0].bits();
2655         ValueId         expectedValueId = static_cast<ValueId>(expectedInt);
2656
2657         // something went wrong, expected value cant be V_UNUSED,
2658         // if this is the case then test shouldn't be created at all
2659         DE_ASSERT(expectedValueId != V_UNUSED);
2660
2661         TYPE returnedFloat = returned[0];
2662
2663         log << TestLog::Message << "Calculated result: " << toHex(returnedFloat.bits())
2664                 << " (" << returnedFloat.asFloat() << ")" << TestLog::EndMessage;
2665
2666         if (expectedValueId == V_NAN)
2667         {
2668                 if (returnedFloat.isNaN())
2669                         return true;
2670
2671                 log << TestLog::Message << "Expected NaN" << TestLog::EndMessage;
2672                 return false;
2673         }
2674
2675         if (expectedValueId == V_DENORM)
2676         {
2677                 if (returnedFloat.isDenorm())
2678                         return true;
2679
2680                 log << TestLog::Message << "Expected Denorm" << TestLog::EndMessage;
2681                 return false;
2682         }
2683
2684         // handle multiple acceptable results cases
2685         if (expectedValueId == V_ZERO_OR_MINUS_ZERO)
2686         {
2687                 if (returnedFloat.isZero())
2688                         return true;
2689
2690                 log << TestLog::Message << "Expected 0 or -0" << TestLog::EndMessage;
2691                 return false;
2692         }
2693         if (expectedValueId == V_ZERO_OR_ONE)
2694                 return isZeroOrOtherValue<TYPE, FLOAT_TYPE>(returnedFloat, V_ONE, log);
2695         if ((expectedValueId == V_ZERO_OR_FP16_DENORM_TO_FP32) || (expectedValueId == V_ZERO_OR_FP16_DENORM_TO_FP64))
2696                 return isZeroOrOtherValue<TYPE, FLOAT_TYPE>(returnedFloat, V_CONV_DENORM_SMALLER, log);
2697         if (expectedValueId == V_ZERO_OR_FP32_DENORM_TO_FP64)
2698                 return isZeroOrOtherValue<TYPE, FLOAT_TYPE>(returnedFloat, V_CONV_DENORM_BIGGER, log);
2699         if (expectedValueId == V_ZERO_OR_DENORM_TIMES_TWO)
2700         {
2701                 // this expected value is only needed for fp16
2702                 DE_ASSERT(returnedFloat.EXPONENT_BIAS == 15);
2703                 return isZeroOrOtherValue<TYPE, FLOAT_TYPE>(returnedFloat, V_DENORM_TIMES_TWO, log);
2704         }
2705         if (expectedValueId == V_MINUS_ONE_OR_CLOSE)
2706         {
2707                 // this expected value is only needed for fp16
2708                 DE_ASSERT(returnedFloat.EXPONENT_BIAS == 15);
2709                 typename TYPE::StorageType returnedValue = returnedFloat.bits();
2710                 return (returnedValue == 0xbc00) || (returnedValue == 0xbbff);
2711         }
2712
2713         // handle trigonometric operations precision errors
2714         if (expectedValueId == V_TRIG_ONE)
2715                 return isCosResultCorrect<TYPE>(returnedFloat, log);
2716
2717         // handle acos(0) case
2718         if (expectedValueId == V_PI_DIV_2)
2719                 return isAcosResultCorrect<TYPE>(returnedFloat, log);
2720
2721         TypeValues<FLOAT_TYPE> typeValues;
2722
2723         if (expectedValueId == V_MINUS_INF_OR_LOG_DENORM)
2724                 return isLogResultCorrect<TYPE>(returnedFloat, typeValues.getValue(V_DENORM), deLog, log);
2725
2726         if (expectedValueId == V_MINUS_INF_OR_LOG2_DENORM)
2727                 return isLogResultCorrect<TYPE>(returnedFloat, typeValues.getValue(V_DENORM), deLog2, log);
2728
2729         if (expectedValueId == V_ZERO_OR_SQRT_DENORM)
2730                 return isSqrtResultCorrect<TYPE>(returnedFloat, typeValues.getValue(V_DENORM), log);
2731
2732         if (expectedValueId == V_INF_OR_INV_SQRT_DENORM)
2733                 return isInverseSqrtResultCorrect<TYPE>(returnedFloat, typeValues.getValue(V_DENORM), log);
2734
2735
2736         typename RawConvert<FLOAT_TYPE, SType>::Value value;
2737         value.fp = typeValues.getValue(expectedValueId);
2738
2739         if (returnedFloat.bits() == value.ui)
2740                 return true;
2741
2742         log << TestLog::Message << "Expected " << toHex(value.ui)
2743                 << " (" << value.fp << ")" << TestLog::EndMessage;
2744         return false;
2745 }
2746
2747 template <typename TYPE, typename FLOAT_TYPE>
2748 bool checkFloats (const vector<Resource>&               ,
2749                                   const vector<AllocationSp>&   outputAllocs,
2750                                   const vector<Resource>&               expectedOutputs,
2751                                   TestLog&                                              log)
2752 {
2753         if (outputAllocs.size() != expectedOutputs.size())
2754                 return false;
2755
2756         for (deUint32 outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
2757         {
2758                 vector<deUint8> expectedBytes;
2759                 expectedOutputs[outputNdx].getBytes(expectedBytes);
2760
2761                 if (!compareBytes<TYPE, FLOAT_TYPE>(expectedBytes, outputAllocs[outputNdx], log))
2762                         return false;
2763         }
2764
2765         return true;
2766 }
2767
2768 bool checkMixedFloats (const vector<Resource>&          ,
2769                                            const vector<AllocationSp>&  outputAllocs,
2770                                            const vector<Resource>&              expectedOutputs,
2771                                            TestLog&                                             log)
2772 {
2773         // this function validates buffers containing floats of diferent widths, order is not important
2774
2775         if (outputAllocs.size() != expectedOutputs.size())
2776                 return false;
2777
2778         // The comparison function depends on the data type stored in the resource.
2779         using compareFun = bool (*)(vector<deUint8>& expectedBytes, AllocationSp outputAlloc, TestLog& log);
2780         const map<BufferDataType, compareFun> compareMap =
2781         {
2782                 { BufferDataType::DATA_FP16, compareBytes<Float16, deFloat16> },
2783                 { BufferDataType::DATA_FP32, compareBytes<Float32, float> },
2784                 { BufferDataType::DATA_FP64, compareBytes<Float64, double>},
2785         };
2786
2787         vector<deUint8> expectedBytes;
2788         bool                    allResultsAreCorrect    = true;
2789         int                             resultIndex                             = static_cast<int>(outputAllocs.size());
2790
2791         while (resultIndex--)
2792         {
2793                 expectedOutputs[resultIndex].getBytes(expectedBytes);
2794                 BufferDataType type              = static_cast<BufferDataType>(reinterpret_cast<std::uintptr_t>(expectedOutputs[resultIndex].getUserData()));
2795                 allResultsAreCorrect    &= compareMap.at(type)(expectedBytes, outputAllocs[resultIndex], log);
2796         }
2797
2798         return allResultsAreCorrect;
2799 }
2800
2801 // Base class for ComputeTestGroupBuilder and GrephicstestGroupBuilder classes.
2802 // It contains all functionalities that are used by both child classes.
2803 class TestGroupBuilderBase
2804 {
2805 public:
2806
2807         TestGroupBuilderBase();
2808         virtual ~TestGroupBuilderBase() = default;
2809
2810         virtual void createOperationTests(TestCaseGroup* parentGroup,
2811                                                                           const char* groupName,
2812                                                                           FloatType floatType,
2813                                                                           bool argumentsFromInput) = 0;
2814
2815         virtual void createSettingsTests(TestCaseGroup* parentGroup) = 0;
2816
2817 protected:
2818
2819         typedef vector<OperationTestCase> TestCaseVect;
2820
2821         // Structure containing all data required to create single operation test.
2822         struct OperationTestCaseInfo
2823         {
2824                 FloatType                                       outFloatType;
2825                 bool                                            argumentsFromInput;
2826                 VkShaderStageFlagBits           testedStage;
2827                 const Operation&                        operation;
2828                 const OperationTestCase&        testCase;
2829         };
2830
2831         // Mode used by SettingsTestCaseInfo to specify what settings do we want to test.
2832         enum SettingsMode
2833         {
2834                 SM_ROUNDING                     = 0,
2835                 SM_DENORMS
2836         };
2837
2838         // Enum containing available options. When rounding is tested only SO_RTE and SO_RTZ
2839         // should be used. SO_FLUSH and SO_PRESERVE should be used only for denorm tests.
2840         enum SettingsOption
2841         {
2842                 SO_UNUSED                       = 0,
2843                 SO_RTE,
2844                 SO_RTZ,
2845                 SO_FLUSH,
2846                 SO_PRESERVE
2847         };
2848
2849         // Structure containing all data required to create single settings test.
2850         struct SettingsTestCaseInfo
2851         {
2852                 const char*                                                             name;
2853                 SettingsMode                                                    testedMode;
2854                 VkShaderFloatControlsIndependence               independenceSetting;
2855
2856                 SettingsOption                                                  fp16Option;
2857                 SettingsOption                                                  fp32Option;
2858                 SettingsOption                                                  fp64Option;
2859                 deBool                                                                  fp16Without16BitStorage;
2860         };
2861
2862         void specializeOperation(const OperationTestCaseInfo&   testCaseInfo,
2863                                                          SpecializedOperation&                  specializedOperation) const;
2864
2865         void getBehaviorCapabilityAndExecutionMode(BehaviorFlags behaviorFlags,
2866                                                                                            const string inBitWidth,
2867                                                                                            const string outBitWidth,
2868                                                                                            string& capability,
2869                                                                                            string& executionMode) const;
2870
2871         void setupVulkanFeatures(FloatType                      inFloatType,
2872                                                          FloatType                      outFloatType,
2873                                                          BehaviorFlags          behaviorFlags,
2874                                                          bool                           float64FeatureRequired,
2875                                                          VulkanFeatures&        features) const;
2876
2877 protected:
2878
2879         struct TypeData
2880         {
2881                 TypeValuesSP            values;
2882                 TypeSnippetsSP          snippets;
2883                 TypeTestResultsSP       testResults;
2884         };
2885
2886         // Type specific parameters are stored in this map.
2887         map<FloatType, TypeData> m_typeData;
2888
2889         // Map converting behaviuor id to OpCapability instruction
2890         typedef map<BehaviorFlagBits, string> BehaviorNameMap;
2891         BehaviorNameMap m_behaviorToName;
2892 };
2893
2894 TestGroupBuilderBase::TestGroupBuilderBase()
2895 {
2896         m_typeData[FP16] = TypeData();
2897         m_typeData[FP16].values                 = TypeValuesSP(new TypeValues<deFloat16>);
2898         m_typeData[FP16].snippets               = TypeSnippetsSP(new TypeSnippets<deFloat16>);
2899         m_typeData[FP16].testResults    = TypeTestResultsSP(new TypeTestResults<deFloat16>);
2900         m_typeData[FP32] = TypeData();
2901         m_typeData[FP32].values                 = TypeValuesSP(new TypeValues<float>);
2902         m_typeData[FP32].snippets               = TypeSnippetsSP(new TypeSnippets<float>);
2903         m_typeData[FP32].testResults    = TypeTestResultsSP(new TypeTestResults<float>);
2904         m_typeData[FP64] = TypeData();
2905         m_typeData[FP64].values                 = TypeValuesSP(new TypeValues<double>);
2906         m_typeData[FP64].snippets               = TypeSnippetsSP(new TypeSnippets<double>);
2907         m_typeData[FP64].testResults    = TypeTestResultsSP(new TypeTestResults<double>);
2908
2909         m_behaviorToName[B_DENORM_PRESERVE]     = "DenormPreserve";
2910         m_behaviorToName[B_DENORM_FLUSH]        = "DenormFlushToZero";
2911         m_behaviorToName[B_ZIN_PRESERVE]        = "SignedZeroInfNanPreserve";
2912         m_behaviorToName[B_RTE_ROUNDING]        = "RoundingModeRTE";
2913         m_behaviorToName[B_RTZ_ROUNDING]        = "RoundingModeRTZ";
2914 }
2915
2916 void TestGroupBuilderBase::specializeOperation (const OperationTestCaseInfo&    testCaseInfo,
2917                                                                                                 SpecializedOperation&                   specializedOperation) const
2918 {
2919         const string            typeToken               = "_float";
2920         const string            widthToken              = "${float_width}";
2921
2922         FloatType                               outFloatType    = testCaseInfo.outFloatType;
2923         const Operation&                operation               = testCaseInfo.operation;
2924         const TypeSnippetsSP    outTypeSnippets = m_typeData.at(outFloatType).snippets;
2925         const bool                              inputRestricted = operation.isInputTypeRestricted;
2926         FloatType                               inFloatType             = operation.restrictedInputType;
2927
2928         // usually input type is same as output but this is not the case for conversion
2929         // operations; in those cases operation definitions have restricted input type
2930         inFloatType = inputRestricted ? inFloatType : outFloatType;
2931
2932         TypeSnippetsSP inTypeSnippets = m_typeData.at(inFloatType).snippets;
2933
2934         const string inTypePrefix       = string("_f") + inTypeSnippets->bitWidth;
2935         const string outTypePrefix      = string("_f") + outTypeSnippets->bitWidth;
2936
2937         specializedOperation.constants          = replace(operation.constants, typeToken, inTypePrefix);
2938         specializedOperation.annotations        = replace(operation.annotations, widthToken, outTypeSnippets->bitWidth);
2939         specializedOperation.types                      = replace(operation.types, typeToken, outTypePrefix);
2940         specializedOperation.variables          = replace(operation.variables, typeToken, outTypePrefix);
2941         specializedOperation.functions          = replace(operation.functions, typeToken, outTypePrefix);
2942         specializedOperation.commands           = replace(operation.commands, typeToken, outTypePrefix);
2943
2944         specializedOperation.inFloatType                                = inFloatType;
2945         specializedOperation.inTypeSnippets                             = inTypeSnippets;
2946         specializedOperation.outTypeSnippets                    = outTypeSnippets;
2947         specializedOperation.argumentsUsesFloatConstant = 0;
2948
2949         if (operation.isSpecConstant)
2950                 return;
2951
2952         // select way arguments are prepared
2953         if (testCaseInfo.argumentsFromInput)
2954         {
2955                 // read arguments from input SSBO in main function
2956                 specializedOperation.arguments = inTypeSnippets->argumentsFromInputSnippet;
2957
2958                 if (inFloatType == FP16 && testCaseInfo.testCase.fp16Without16BitStorage)
2959                         specializedOperation.arguments = inTypeSnippets->argumentsFromInputFp16Snippet;
2960         }
2961         else
2962         {
2963                 // generate proper values in main function
2964                 const string arg1 = "%arg1                 = ";
2965                 const string arg2 = "%arg2                 = ";
2966
2967                 const ValueId* inputArguments = testCaseInfo.testCase.input;
2968                 if (inputArguments[0] != V_UNUSED)
2969                 {
2970                         specializedOperation.arguments                                  = arg1 + inTypeSnippets->valueIdToSnippetArgMap.at(inputArguments[0]);
2971                         specializedOperation.argumentsUsesFloatConstant |= B_STATEMENT_USAGE_ARGS_CONST_FLOAT;
2972                 }
2973                 if (inputArguments[1] != V_UNUSED)
2974                 {
2975                         specializedOperation.arguments                                  += arg2 + inTypeSnippets->valueIdToSnippetArgMap.at(inputArguments[1]);
2976                         specializedOperation.argumentsUsesFloatConstant |= B_STATEMENT_USAGE_ARGS_CONST_FLOAT;
2977                 }
2978         }
2979 }
2980
2981
2982 void TestGroupBuilderBase::getBehaviorCapabilityAndExecutionMode(BehaviorFlags behaviorFlags,
2983                                                                                                                                  const string inBitWidth,
2984                                                                                                                                  const string outBitWidth,
2985                                                                                                                                  string& capability,
2986                                                                                                                                  string& executionMode) const
2987 {
2988         // iterate over all behaviours and request those that are needed
2989         BehaviorNameMap::const_iterator it = m_behaviorToName.begin();
2990         while (it != m_behaviorToName.end())
2991         {
2992                 BehaviorFlagBits        behaviorId              = it->first;
2993                 string                          behaviorName    = it->second;
2994
2995                 if (behaviorFlags & behaviorId)
2996                 {
2997                         capability += "OpCapability " + behaviorName + "\n";
2998
2999                         // rounding mode should be obeyed for destination type
3000                         bool rounding = (behaviorId == B_RTE_ROUNDING) || (behaviorId == B_RTZ_ROUNDING);
3001                         executionMode += "OpExecutionMode %main " + behaviorName + " " +
3002                                                          (rounding ? outBitWidth : inBitWidth) + "\n";
3003                 }
3004
3005                 ++it;
3006         }
3007
3008         DE_ASSERT(!capability.empty() && !executionMode.empty());
3009 }
3010
3011 void TestGroupBuilderBase::setupVulkanFeatures(FloatType                inFloatType,
3012                                                                                            FloatType            outFloatType,
3013                                                                                            BehaviorFlags        behaviorFlags,
3014                                                                                            bool                         float64FeatureRequired,
3015                                                                                            VulkanFeatures&      features) const
3016 {
3017         features.coreFeatures.shaderFloat64 = float64FeatureRequired;
3018
3019         // request proper float controls features
3020         ExtensionFloatControlsFeatures& floatControls = features.floatControlsProperties;
3021
3022         // rounding mode should obey the destination type
3023         bool rteRounding = (behaviorFlags & B_RTE_ROUNDING) != 0;
3024         bool rtzRounding = (behaviorFlags & B_RTZ_ROUNDING) != 0;
3025         if (rteRounding || rtzRounding)
3026         {
3027                 switch(outFloatType)
3028                 {
3029                 case FP16:
3030                         floatControls.shaderRoundingModeRTEFloat16 = rteRounding;
3031                         floatControls.shaderRoundingModeRTZFloat16 = rtzRounding;
3032                         return;
3033                 case FP32:
3034                         floatControls.shaderRoundingModeRTEFloat32 = rteRounding;
3035                         floatControls.shaderRoundingModeRTZFloat32 = rtzRounding;
3036                         return;
3037                 case FP64:
3038                         floatControls.shaderRoundingModeRTEFloat64 = rteRounding;
3039                         floatControls.shaderRoundingModeRTZFloat64 = rtzRounding;
3040                         return;
3041                 }
3042         }
3043
3044         switch(inFloatType)
3045         {
3046         case FP16:
3047                 floatControls.shaderDenormPreserveFloat16                       = behaviorFlags & B_DENORM_PRESERVE;
3048                 floatControls.shaderDenormFlushToZeroFloat16            = behaviorFlags & B_DENORM_FLUSH;
3049                 floatControls.shaderSignedZeroInfNanPreserveFloat16     = behaviorFlags & B_ZIN_PRESERVE;
3050                 return;
3051         case FP32:
3052                 floatControls.shaderDenormPreserveFloat32                       = behaviorFlags & B_DENORM_PRESERVE;
3053                 floatControls.shaderDenormFlushToZeroFloat32            = behaviorFlags & B_DENORM_FLUSH;
3054                 floatControls.shaderSignedZeroInfNanPreserveFloat32     = behaviorFlags & B_ZIN_PRESERVE;
3055                 return;
3056         case FP64:
3057                 floatControls.shaderDenormPreserveFloat64                       = behaviorFlags & B_DENORM_PRESERVE;
3058                 floatControls.shaderDenormFlushToZeroFloat64            = behaviorFlags & B_DENORM_FLUSH;
3059                 floatControls.shaderSignedZeroInfNanPreserveFloat64     = behaviorFlags & B_ZIN_PRESERVE;
3060                 return;
3061         }
3062 }
3063
3064 // Test case not related to SPIR-V but executed with compute tests. It checks if specified
3065 // features are set to the same value when specific independence settings are used.
3066 tcu::TestStatus verifyIndependenceSettings(Context& context)
3067 {
3068         if (!context.isDeviceFunctionalitySupported("VK_KHR_shader_float_controls"))
3069                 TCU_THROW(NotSupportedError, "VK_KHR_shader_float_controls not supported");
3070
3071         vk::VkPhysicalDeviceFloatControlsPropertiesKHR  fcProperties;
3072         fcProperties.sType      = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES_KHR;
3073         fcProperties.pNext      = DE_NULL;
3074
3075         vk::VkPhysicalDeviceProperties2 deviceProperties;
3076         deviceProperties.sType  = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
3077         deviceProperties.pNext  = &fcProperties;
3078
3079         auto fail = [](const string& featureGroup)
3080         {
3081                 return tcu::TestStatus::fail(featureGroup + " features should be set to the same value");
3082         };
3083
3084         const VkPhysicalDevice                  physicalDevice          = context.getPhysicalDevice();
3085         const vk::InstanceInterface&    instanceInterface       = context.getInstanceInterface();
3086         instanceInterface.getPhysicalDeviceProperties2(physicalDevice, &deviceProperties);
3087
3088         if (fcProperties.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE_KHR)
3089         {
3090                 vk::VkBool32 fp16rte = fcProperties.shaderRoundingModeRTEFloat16;
3091                 vk::VkBool32 fp32rte = fcProperties.shaderRoundingModeRTEFloat32;
3092                 vk::VkBool32 fp64rte = fcProperties.shaderRoundingModeRTEFloat64;
3093                 if ((fp16rte != fp32rte) || (fp32rte != fp64rte))
3094                         return fail("shaderRoundingModeRTEFloat*");
3095
3096                 vk::VkBool32 fp16rtz = fcProperties.shaderRoundingModeRTZFloat16;
3097                 vk::VkBool32 fp32rtz = fcProperties.shaderRoundingModeRTZFloat32;
3098                 vk::VkBool32 fp64rtz = fcProperties.shaderRoundingModeRTZFloat64;
3099                 if ((fp16rtz != fp32rtz) || (fp32rtz != fp64rtz))
3100                         return fail("shaderRoundingModeRTZFloat*");
3101         }
3102         else if (fcProperties.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY_KHR)
3103         {
3104                 vk::VkBool32 fp16rte = fcProperties.shaderRoundingModeRTEFloat16;
3105                 vk::VkBool32 fp64rte = fcProperties.shaderRoundingModeRTEFloat64;
3106                 if ((fp16rte != fp64rte))
3107                         return fail("shaderRoundingModeRTEFloat16 and 64");
3108
3109                 vk::VkBool32 fp16rtz = fcProperties.shaderRoundingModeRTZFloat16;
3110                 vk::VkBool32 fp64rtz = fcProperties.shaderRoundingModeRTZFloat64;
3111                 if ((fp16rtz != fp64rtz))
3112                         return fail("shaderRoundingModeRTZFloat16 and 64");
3113         }
3114
3115         if (fcProperties.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE_KHR)
3116         {
3117                 vk::VkBool32 fp16flush = fcProperties.shaderDenormFlushToZeroFloat16;
3118                 vk::VkBool32 fp32flush = fcProperties.shaderDenormFlushToZeroFloat32;
3119                 vk::VkBool32 fp64flush = fcProperties.shaderDenormFlushToZeroFloat64;
3120                 if ((fp16flush != fp32flush) || (fp32flush != fp64flush))
3121                         return fail("shaderDenormFlushToZeroFloat*");
3122
3123                 vk::VkBool32 fp16preserve = fcProperties.shaderDenormPreserveFloat16;
3124                 vk::VkBool32 fp32preserve = fcProperties.shaderDenormPreserveFloat32;
3125                 vk::VkBool32 fp64preserve = fcProperties.shaderDenormPreserveFloat64;
3126                 if ((fp16preserve != fp32preserve) || (fp32preserve != fp64preserve))
3127                         return fail("shaderDenormPreserveFloat*");
3128         }
3129         else if (fcProperties.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY_KHR)
3130         {
3131                 vk::VkBool32 fp16flush = fcProperties.shaderDenormFlushToZeroFloat16;
3132                 vk::VkBool32 fp64flush = fcProperties.shaderDenormFlushToZeroFloat64;
3133                 if ((fp16flush != fp64flush))
3134                         return fail("shaderDenormFlushToZeroFloat16 and 64");
3135
3136                 vk::VkBool32 fp16preserve = fcProperties.shaderDenormPreserveFloat16;
3137                 vk::VkBool32 fp64preserve = fcProperties.shaderDenormPreserveFloat64;
3138                 if ((fp16preserve != fp64preserve))
3139                         return fail("shaderDenormPreserveFloat16 and 64");
3140         }
3141
3142         return tcu::TestStatus::pass("Pass");
3143 }
3144
3145 // ComputeTestGroupBuilder contains logic that creates compute shaders
3146 // for all test cases. As most tests in spirv-assembly it uses functionality
3147 // implemented in vktSpvAsmComputeShaderTestUtil.cpp.
3148 class ComputeTestGroupBuilder: public TestGroupBuilderBase
3149 {
3150 public:
3151
3152         void init();
3153
3154         void createOperationTests(TestCaseGroup* parentGroup,
3155                                                           const char* groupName,
3156                                                           FloatType floatType,
3157                                                           bool argumentsFromInput) override;
3158
3159         void createSettingsTests(TestCaseGroup* parentGroup) override;
3160
3161 protected:
3162
3163         void fillShaderSpec(const OperationTestCaseInfo&        testCaseInfo,
3164                                                 ComputeShaderSpec&                              csSpec) const;
3165         void fillShaderSpec(const SettingsTestCaseInfo&         testCaseInfo,
3166                                                 ComputeShaderSpec&                              csSpec) const;
3167
3168 private:
3169
3170
3171         StringTemplate          m_operationShaderTemplate;
3172         StringTemplate          m_settingsShaderTemplate;
3173         TestCasesBuilder        m_operationTestCaseBuilder;
3174 };
3175
3176 void ComputeTestGroupBuilder::init()
3177 {
3178         m_operationTestCaseBuilder.init();
3179
3180         // generic compute shader template with common code for all
3181         // float types and all possible operations listed in OperationId enum
3182         m_operationShaderTemplate.setString(
3183                 "OpCapability Shader\n"
3184                 "${capabilities}"
3185
3186                 "OpExtension \"SPV_KHR_float_controls\"\n"
3187                 "${extensions}"
3188
3189                 "%std450            = OpExtInstImport \"GLSL.std.450\"\n"
3190                 "OpMemoryModel Logical GLSL450\n"
3191                 "OpEntryPoint GLCompute %main \"main\" %id\n"
3192                 "OpExecutionMode %main LocalSize 1 1 1\n"
3193                 "${execution_mode}"
3194
3195                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3196
3197                 // some tests require additional annotations
3198                 "${annotations}"
3199
3200                 "%type_void            = OpTypeVoid\n"
3201                 "%type_voidf           = OpTypeFunction %type_void\n"
3202                 "%type_bool            = OpTypeBool\n"
3203                 "%type_u32             = OpTypeInt 32 0\n"
3204                 "%type_i32             = OpTypeInt 32 1\n"
3205                 "%type_i32_fptr        = OpTypePointer Function %type_i32\n"
3206                 "%type_u32_vec2        = OpTypeVector %type_u32 2\n"
3207                 "%type_u32_vec3        = OpTypeVector %type_u32 3\n"
3208                 "%type_u32_vec3_ptr    = OpTypePointer Input %type_u32_vec3\n"
3209
3210                 "%c_i32_0              = OpConstant %type_i32 0\n"
3211                 "%c_i32_1              = OpConstant %type_i32 1\n"
3212                 "%c_i32_2              = OpConstant %type_i32 2\n"
3213                 "%c_u32_1              = OpConstant %type_u32 1\n"
3214
3215                 // if input float type has different width then output then
3216                 // both types are defined here along with all types derived from
3217                 // them that are commonly used by tests; some tests also define
3218                 // their own types (those that are needed just by this single test)
3219                 "${types}"
3220
3221                 // SSBO definitions
3222                 "${io_definitions}"
3223
3224                 "%id                   = OpVariable %type_u32_vec3_ptr Input\n"
3225
3226                 // set of default constants per float type is placed here,
3227                 // operation tests can also define additional constants.
3228                 "${constants}"
3229
3230                 // O_RETURN_VAL defines function here and becouse
3231                 // of that this token needs to be directly before main function
3232                 "${functions}"
3233
3234                 "%main                 = OpFunction %type_void None %type_voidf\n"
3235                 "%label                = OpLabel\n"
3236
3237                 "${variables}"
3238
3239                 // depending on test case arguments are either read from input ssbo
3240                 // or generated in spir-v code - in later case shader input is not used
3241                 "${arguments}"
3242
3243                 // perform test commands
3244                 "${commands}"
3245
3246                 // save result to SSBO
3247                 "${save_result}"
3248
3249                 "OpReturn\n"
3250                 "OpFunctionEnd\n");
3251
3252         m_settingsShaderTemplate.setString(
3253                 "OpCapability Shader\n"
3254                 "${capabilities}"
3255
3256                 "OpExtension \"SPV_KHR_float_controls\"\n"
3257                 "${extensions}"
3258
3259                 "%std450 = OpExtInstImport \"GLSL.std.450\"\n"
3260                 "OpMemoryModel Logical GLSL450\n"
3261                 "OpEntryPoint GLCompute %main \"main\" %id\n"
3262                 "OpExecutionMode %main LocalSize 1 1 1\n"
3263                 "${execution_modes}"
3264
3265                 // annotations
3266                 "OpDecorate %SSBO_in BufferBlock\n"
3267                 "OpDecorate %ssbo_in DescriptorSet 0\n"
3268                 "OpDecorate %ssbo_in Binding 0\n"
3269                 "OpDecorate %ssbo_in NonWritable\n"
3270                 "${io_annotations}"
3271
3272                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3273
3274                 // types
3275                 "%type_void            = OpTypeVoid\n"
3276                 "%type_voidf           = OpTypeFunction %type_void\n"
3277                 "%type_u32             = OpTypeInt 32 0\n"
3278                 "%type_i32             = OpTypeInt 32 1\n"
3279                 "%type_i32_fptr        = OpTypePointer Function %type_i32\n"
3280                 "%type_u32_vec3        = OpTypeVector %type_u32 3\n"
3281                 "%type_u32_vec3_ptr    = OpTypePointer Input %type_u32_vec3\n"
3282
3283                 "%c_i32_0              = OpConstant %type_i32 0\n"
3284                 "%c_i32_1              = OpConstant %type_i32 1\n"
3285                 "%c_i32_2              = OpConstant %type_i32 2\n"
3286
3287                 "${types}"
3288
3289                 // in SSBO definition
3290                 "%SSBO_in              = OpTypeStruct ${in_struct}\n"
3291                 "%up_SSBO_in           = OpTypePointer Uniform %SSBO_in\n"
3292                 "%ssbo_in              = OpVariable %up_SSBO_in Uniform\n"
3293
3294                 // out SSBO definitions
3295                 "${out_definitions}"
3296
3297                 "%id                   = OpVariable %type_u32_vec3_ptr Input\n"
3298                 "%main                 = OpFunction %type_void None %type_voidf\n"
3299                 "%label                = OpLabel\n"
3300
3301                 "${commands}"
3302
3303                 "${save_result}"
3304
3305                 "OpReturn\n"
3306                 "OpFunctionEnd\n");
3307 }
3308
3309 void ComputeTestGroupBuilder::createOperationTests(TestCaseGroup* parentGroup, const char* groupName, FloatType floatType, bool argumentsFromInput)
3310 {
3311         TestContext&    testCtx = parentGroup->getTestContext();
3312         TestCaseGroup*  group   = new TestCaseGroup(testCtx, groupName, "");
3313         parentGroup->addChild(group);
3314
3315         TestCaseVect testCases;
3316         m_operationTestCaseBuilder.build(testCases, m_typeData[floatType].testResults, argumentsFromInput);
3317
3318         TestCaseVect::const_iterator currTestCase = testCases.begin();
3319         TestCaseVect::const_iterator lastTestCase = testCases.end();
3320         while(currTestCase != lastTestCase)
3321         {
3322                 const OperationTestCase& testCase = *currTestCase;
3323                 ++currTestCase;
3324
3325                 // skip cases with undefined output
3326                 if (testCase.expectedOutput == V_UNUSED)
3327                         continue;
3328
3329                 OperationTestCaseInfo testCaseInfo =
3330                 {
3331                         floatType,
3332                         argumentsFromInput,
3333                         VK_SHADER_STAGE_COMPUTE_BIT,
3334                         m_operationTestCaseBuilder.getOperation(testCase.operationId),
3335                         testCase
3336                 };
3337
3338                 ComputeShaderSpec       csSpec;
3339
3340                 fillShaderSpec(testCaseInfo, csSpec);
3341
3342                 string testName = replace(testCase.baseName, "op", testCaseInfo.operation.name);
3343                 group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), "", csSpec));
3344         }
3345 }
3346
3347 void ComputeTestGroupBuilder::createSettingsTests(TestCaseGroup* parentGroup)
3348 {
3349         TestContext&    testCtx = parentGroup->getTestContext();
3350         TestCaseGroup*  group   = new TestCaseGroup(testCtx, "independence_settings", "");
3351         parentGroup->addChild(group);
3352
3353         using SFCI = VkShaderFloatControlsIndependence;
3354         const SFCI independence32       = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY_KHR;
3355         const SFCI independenceAll      = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR;
3356
3357         vector<SettingsTestCaseInfo> testCases =
3358         {
3359                 // name                                                                                                                 mode                    independenceSetting             fp16Option              fp32Option              fp64Option              fp16Without16bitstorage
3360
3361                 // test rounding modes when only two float widths are available
3362                 { "rounding_ind_all_fp16_rte_fp32_rtz",                                                 SM_ROUNDING,    independenceAll,                SO_RTE,                 SO_RTZ,                 SO_UNUSED,              DE_FALSE },
3363                 { "rounding_ind_all_fp16_rtz_fp32_rte",                                                 SM_ROUNDING,    independenceAll,                SO_RTZ,                 SO_RTE,                 SO_UNUSED,              DE_FALSE },
3364                 { "rounding_ind_32_fp16_rte_fp32_rtz",                                                  SM_ROUNDING,    independence32,                 SO_RTE,                 SO_RTZ,                 SO_UNUSED,              DE_FALSE },
3365                 { "rounding_ind_32_fp16_rtz_fp32_rte",                                                  SM_ROUNDING,    independence32,                 SO_RTZ,                 SO_RTE,                 SO_UNUSED,              DE_FALSE },
3366                 { "rounding_ind_all_fp16_rte_fp64_rtz",                                                 SM_ROUNDING,    independenceAll,                SO_RTE,                 SO_UNUSED,              SO_RTZ,                 DE_FALSE },
3367                 { "rounding_ind_all_fp16_rtz_fp64_rte",                                                 SM_ROUNDING,    independenceAll,                SO_RTZ,                 SO_UNUSED,              SO_RTE,                 DE_FALSE },
3368                 { "rounding_ind_all_fp32_rte_fp64_rtz",                                                 SM_ROUNDING,    independenceAll,                SO_UNUSED,              SO_RTE,                 SO_RTZ,                 DE_FALSE },
3369                 { "rounding_ind_all_fp32_rtz_fp64_rte",                                                 SM_ROUNDING,    independenceAll,                SO_UNUSED,              SO_RTZ,                 SO_RTE,                 DE_FALSE },
3370                 { "rounding_ind_32_fp32_rte_fp64_rtz",                                                  SM_ROUNDING,    independence32,                 SO_UNUSED,              SO_RTE,                 SO_RTZ,                 DE_FALSE },
3371                 { "rounding_ind_32_fp32_rtz_fp64_rte",                                                  SM_ROUNDING,    independence32,                 SO_UNUSED,              SO_RTZ,                 SO_RTE,                 DE_FALSE },
3372
3373                 // test rounding modes when three widths are available
3374                 { "rounding_ind_all_fp16_rtz_fp32_rte_fp64_rtz",                                SM_ROUNDING,    independenceAll,                SO_RTZ,                 SO_RTE,                 SO_RTZ,                 DE_FALSE },
3375                 { "rounding_ind_32_fp16_rtz_fp32_rte_fp64_rtz",                                 SM_ROUNDING,    independence32,                 SO_RTZ,                 SO_RTE,                 SO_RTZ,                 DE_FALSE },
3376                 { "rounding_ind_all_fp16_rte_fp32_rtz_fp64_rte",                                SM_ROUNDING,    independenceAll,                SO_RTE,                 SO_RTZ,                 SO_RTE,                 DE_FALSE },
3377                 { "rounding_ind_32_fp16_rte_fp32_rtz_fp64_rte",                                 SM_ROUNDING,    independence32,                 SO_RTE,                 SO_RTZ,                 SO_RTE,                 DE_FALSE },
3378                 { "rounding_ind_all_fp16_rtz_fp32_rtz_fp64_rte",                                SM_ROUNDING,    independenceAll,                SO_RTZ,                 SO_RTZ,                 SO_RTE,                 DE_FALSE },
3379                 { "rounding_ind_all_fp16_rtz_fp32_rte_fp64_rte",                                SM_ROUNDING,    independenceAll,                SO_RTZ,                 SO_RTE,                 SO_RTE,                 DE_FALSE },
3380                 { "rounding_ind_all_fp16_rte_fp32_rte_fp64_rtz",                                SM_ROUNDING,    independenceAll,                SO_RTE,                 SO_RTE,                 SO_RTZ,                 DE_FALSE },
3381                 { "rounding_ind_all_fp16_rte_fp32_rtz_fp64_rtz",                                SM_ROUNDING,    independenceAll,                SO_RTE,                 SO_RTZ,                 SO_RTZ,                 DE_FALSE },
3382
3383                 // test denorm settings when only two float widths are available
3384                 { "denorm_ind_all_fp16_flush_fp32_preserve",                                    SM_DENORMS,             independenceAll,                SO_FLUSH,               SO_PRESERVE,    SO_UNUSED,              DE_FALSE },
3385                 { "denorm_ind_all_fp16_preserve_fp32_flush",                                    SM_DENORMS,             independenceAll,                SO_PRESERVE,    SO_FLUSH,               SO_UNUSED,              DE_FALSE },
3386                 { "denorm_ind_32_fp16_flush_fp32_preserve",                                             SM_DENORMS,             independence32,                 SO_FLUSH,               SO_PRESERVE,    SO_UNUSED,              DE_FALSE },
3387                 { "denorm_ind_32_fp16_preserve_fp32_flush",                                             SM_DENORMS,             independence32,                 SO_PRESERVE,    SO_FLUSH,               SO_UNUSED,              DE_FALSE },
3388                 { "denorm_ind_all_fp16_flush_fp64_preserve",                                    SM_DENORMS,             independenceAll,                SO_FLUSH,               SO_UNUSED,              SO_PRESERVE,    DE_FALSE },
3389                 { "denorm_ind_all_fp16_preserve_fp64_flush",                                    SM_DENORMS,             independenceAll,                SO_PRESERVE,    SO_UNUSED,              SO_FLUSH,               DE_FALSE },
3390                 { "denorm_ind_all_fp32_flush_fp64_preserve",                                    SM_DENORMS,             independenceAll,                SO_UNUSED,              SO_FLUSH,               SO_PRESERVE,    DE_FALSE },
3391                 { "denorm_ind_all_fp32_preserve_fp64_flush",                                    SM_DENORMS,             independenceAll,                SO_UNUSED,              SO_PRESERVE,    SO_FLUSH,               DE_FALSE },
3392                 { "denorm_ind_32_fp32_flush_fp64_preserve",                                             SM_DENORMS,             independence32,                 SO_UNUSED,              SO_FLUSH,               SO_PRESERVE,    DE_FALSE },
3393                 { "denorm_ind_32_fp32_preserve_fp64_flush",                                             SM_DENORMS,             independence32,                 SO_UNUSED,              SO_PRESERVE,    SO_FLUSH,               DE_FALSE },
3394
3395                 // test denorm settings when three widths are available
3396                 { "denorm_ind_all_fp16_preserve_fp32_flush_fp64_preserve",              SM_DENORMS,             independenceAll,                SO_PRESERVE,    SO_FLUSH,               SO_PRESERVE,    DE_FALSE },
3397                 { "denorm_ind_32_fp16_preserve_fp32_flush_fp64_preserve",               SM_DENORMS,             independence32,                 SO_PRESERVE,    SO_FLUSH,               SO_PRESERVE,    DE_FALSE },
3398                 { "denorm_ind_all_fp16_flush_fp32_preserve_fp64_flush",                 SM_DENORMS,             independenceAll,                SO_FLUSH,               SO_PRESERVE,    SO_FLUSH,               DE_FALSE },
3399                 { "denorm_ind_32_fp16_flush_fp32_preserve_fp64_flush",                  SM_DENORMS,             independence32,                 SO_FLUSH,               SO_PRESERVE,    SO_FLUSH,               DE_FALSE },
3400                 { "denorm_ind_all_fp16_preserve_fp32_preserve_fp64_flush",              SM_DENORMS,             independenceAll,                SO_PRESERVE,    SO_PRESERVE,    SO_FLUSH,               DE_FALSE },
3401                 { "denorm_ind_all_fp16_preserve_fp32_flush_fp64_flush",                 SM_DENORMS,             independenceAll,                SO_PRESERVE,    SO_FLUSH,               SO_FLUSH,               DE_FALSE },
3402                 { "denorm_ind_all_fp16_flush_fp32_flush_fp64_preserve",                 SM_DENORMS,             independenceAll,                SO_FLUSH,               SO_FLUSH,               SO_PRESERVE,    DE_FALSE },
3403                 { "denorm_ind_all_fp16_flush_fp32_preserve_fp64_preserve",              SM_DENORMS,             independenceAll,                SO_FLUSH,               SO_PRESERVE,    SO_PRESERVE,    DE_FALSE },
3404
3405                 // Same fp16 tests but without requiring VK_KHR_16bit_storage
3406                 // test rounding modes when only two float widths are available
3407                 { "rounding_ind_all_fp16_rte_fp32_rtz_nostorage",                               SM_ROUNDING,    independenceAll,                SO_RTE,                 SO_RTZ,                 SO_UNUSED,              DE_TRUE },
3408                 { "rounding_ind_all_fp16_rtz_fp32_rte_nostorage",                               SM_ROUNDING,    independenceAll,                SO_RTZ,                 SO_RTE,                 SO_UNUSED,              DE_TRUE },
3409                 { "rounding_ind_32_fp16_rte_fp32_rtz_nostorage",                                SM_ROUNDING,    independence32,                 SO_RTE,                 SO_RTZ,                 SO_UNUSED,              DE_TRUE },
3410                 { "rounding_ind_32_fp16_rtz_fp32_rte_nostorage",                                SM_ROUNDING,    independence32,                 SO_RTZ,                 SO_RTE,                 SO_UNUSED,              DE_TRUE },
3411                 { "rounding_ind_all_fp16_rte_fp64_rtz_nostorage",                               SM_ROUNDING,    independenceAll,                SO_RTE,                 SO_UNUSED,              SO_RTZ,                 DE_TRUE },
3412                 { "rounding_ind_all_fp16_rtz_fp64_rte_nostorage",                               SM_ROUNDING,    independenceAll,                SO_RTZ,                 SO_UNUSED,              SO_RTE,                 DE_TRUE },
3413
3414                 // test rounding modes when three widths are available
3415                 { "rounding_ind_all_fp16_rtz_fp32_rte_fp64_rtz_nostorage",              SM_ROUNDING,    independenceAll,                SO_RTZ,                 SO_RTE,                 SO_RTZ,                 DE_TRUE },
3416                 { "rounding_ind_32_fp16_rtz_fp32_rte_fp64_rtz_nostorage",               SM_ROUNDING,    independence32,                 SO_RTZ,                 SO_RTE,                 SO_RTZ,                 DE_TRUE },
3417                 { "rounding_ind_all_fp16_rte_fp32_rtz_fp64_rte_nostorage",              SM_ROUNDING,    independenceAll,                SO_RTE,                 SO_RTZ,                 SO_RTE,                 DE_TRUE },
3418                 { "rounding_ind_32_fp16_rte_fp32_rtz_fp64_rte_nostorage",               SM_ROUNDING,    independence32,                 SO_RTE,                 SO_RTZ,                 SO_RTE,                 DE_TRUE },
3419                 { "rounding_ind_all_fp16_rtz_fp32_rtz_fp64_rte_nostorage",              SM_ROUNDING,    independenceAll,                SO_RTZ,                 SO_RTZ,                 SO_RTE,                 DE_TRUE },
3420                 { "rounding_ind_all_fp16_rtz_fp32_rte_fp64_rte_nostorage",              SM_ROUNDING,    independenceAll,                SO_RTZ,                 SO_RTE,                 SO_RTE,                 DE_TRUE },
3421                 { "rounding_ind_all_fp16_rte_fp32_rte_fp64_rtz_nostorage",              SM_ROUNDING,    independenceAll,                SO_RTE,                 SO_RTE,                 SO_RTZ,                 DE_TRUE },
3422                 { "rounding_ind_all_fp16_rte_fp32_rtz_fp64_rtz_nostorage",              SM_ROUNDING,    independenceAll,                SO_RTE,                 SO_RTZ,                 SO_RTZ,                 DE_TRUE },
3423
3424                 // test denorm settings when only two float widths are available
3425                 { "denorm_ind_all_fp16_flush_fp32_preserve_nostorage",                  SM_DENORMS,             independenceAll,                SO_FLUSH,               SO_PRESERVE,    SO_UNUSED,              DE_TRUE },
3426                 { "denorm_ind_all_fp16_preserve_fp32_flush_nostorage",                  SM_DENORMS,             independenceAll,                SO_PRESERVE,    SO_FLUSH,               SO_UNUSED,              DE_TRUE },
3427                 { "denorm_ind_32_fp16_flush_fp32_preserve_nostorage",                   SM_DENORMS,             independence32,                 SO_FLUSH,               SO_PRESERVE,    SO_UNUSED,              DE_TRUE },
3428                 { "denorm_ind_32_fp16_preserve_fp32_flush_nostorage",                   SM_DENORMS,             independence32,                 SO_PRESERVE,    SO_FLUSH,               SO_UNUSED,              DE_TRUE },
3429                 { "denorm_ind_all_fp16_flush_fp64_preserve_nostorage",                  SM_DENORMS,             independenceAll,                SO_FLUSH,               SO_UNUSED,              SO_PRESERVE,    DE_TRUE },
3430                 { "denorm_ind_all_fp16_preserve_fp64_flush_nostorage",                  SM_DENORMS,             independenceAll,                SO_PRESERVE,    SO_UNUSED,              SO_FLUSH,               DE_TRUE },
3431
3432                 // test denorm settings when three widths are available
3433                 { "denorm_ind_all_fp16_preserve_fp32_flush_fp64_preserve_nostorage",    SM_DENORMS,             independenceAll,                SO_PRESERVE,    SO_FLUSH,               SO_PRESERVE,    DE_TRUE },
3434                 { "denorm_ind_32_fp16_preserve_fp32_flush_fp64_preserve_nostorage",             SM_DENORMS,             independence32,                 SO_PRESERVE,    SO_FLUSH,               SO_PRESERVE,    DE_TRUE },
3435                 { "denorm_ind_all_fp16_flush_fp32_preserve_fp64_flush_nostorage",               SM_DENORMS,             independenceAll,                SO_FLUSH,               SO_PRESERVE,    SO_FLUSH,               DE_TRUE },
3436                 { "denorm_ind_32_fp16_flush_fp32_preserve_fp64_flush_nostorage",                SM_DENORMS,             independence32,                 SO_FLUSH,               SO_PRESERVE,    SO_FLUSH,               DE_TRUE },
3437                 { "denorm_ind_all_fp16_preserve_fp32_preserve_fp64_flush_nostorage",    SM_DENORMS,             independenceAll,                SO_PRESERVE,    SO_PRESERVE,    SO_FLUSH,               DE_TRUE },
3438                 { "denorm_ind_all_fp16_preserve_fp32_flush_fp64_flush_nostorage",               SM_DENORMS,             independenceAll,                SO_PRESERVE,    SO_FLUSH,               SO_FLUSH,               DE_TRUE },
3439                 { "denorm_ind_all_fp16_flush_fp32_flush_fp64_preserve_nostorage",               SM_DENORMS,             independenceAll,                SO_FLUSH,               SO_FLUSH,               SO_PRESERVE,    DE_TRUE },
3440                 { "denorm_ind_all_fp16_flush_fp32_preserve_fp64_preserve_nostorage",    SM_DENORMS,             independenceAll,                SO_FLUSH,               SO_PRESERVE,    SO_PRESERVE,    DE_TRUE },
3441         };
3442
3443         for(const auto& testCase : testCases)
3444         {
3445                 ComputeShaderSpec       csSpec;
3446                 fillShaderSpec(testCase, csSpec);
3447                 group->addChild(new SpvAsmComputeShaderCase(testCtx, testCase.name, "", csSpec));
3448         }
3449
3450         addFunctionCase(group, "independence_settings", "", verifyIndependenceSettings);
3451 }
3452
3453 void ComputeTestGroupBuilder::fillShaderSpec(const OperationTestCaseInfo&       testCaseInfo,
3454                                                                                          ComputeShaderSpec&                             csSpec) const
3455 {
3456         // LUT storing functions used to verify test results
3457         const VerifyIOFunc checkFloatsLUT[] =
3458         {
3459                 checkFloats<Float16, deFloat16>,
3460                 checkFloats<Float32, float>,
3461                 checkFloats<Float64, double>
3462         };
3463
3464         const Operation&                        testOperation   = testCaseInfo.operation;
3465         const OperationTestCase&        testCase                = testCaseInfo.testCase;
3466         FloatType                                       outFloatType    = testCaseInfo.outFloatType;
3467
3468         SpecializedOperation specOpData;
3469         specializeOperation(testCaseInfo, specOpData);
3470
3471         TypeSnippetsSP  inTypeSnippets          = specOpData.inTypeSnippets;
3472         TypeSnippetsSP  outTypeSnippets         = specOpData.outTypeSnippets;
3473         FloatType               inFloatType                     = specOpData.inFloatType;
3474
3475         deBool                  outFp16WithoutStorage   = (outFloatType == FP16) && testCase.fp16Without16BitStorage;
3476         deBool                  inFp16WithoutStorage    = (inFloatType == FP16) && testCase.fp16Without16BitStorage;
3477
3478         // UnpackHalf2x16 is a corner case - it returns two 32-bit floats but
3479         // internaly operates on fp16 and this type should be used by float controls
3480         FloatType               inFloatTypeForCaps              = inFloatType;
3481         string                  inFloatWidthForCaps             = inTypeSnippets->bitWidth;
3482         if (testCase.operationId == O_UPH_DENORM)
3483         {
3484                 inFloatTypeForCaps      = FP16;
3485                 inFloatWidthForCaps     = "16";
3486         }
3487
3488         string behaviorCapability;
3489         string behaviorExecutionMode;
3490         getBehaviorCapabilityAndExecutionMode(testCase.behaviorFlags,
3491                                                                                   inFloatWidthForCaps,
3492                                                                                   outTypeSnippets->bitWidth,
3493                                                                                   behaviorCapability,
3494                                                                                   behaviorExecutionMode);
3495
3496         string capabilities             = behaviorCapability + outTypeSnippets->capabilities;
3497         string extensions               = outTypeSnippets->extensions;
3498         string annotations              = inTypeSnippets->inputAnnotationsSnippet + outTypeSnippets->outputAnnotationsSnippet + outTypeSnippets->typeAnnotationsSnippet;
3499         string types                    = outTypeSnippets->typeDefinitionsSnippet;
3500         string constants                = outTypeSnippets->constantsDefinitionsSnippet;
3501         string ioDefinitions    = "";
3502
3503         // Getting rid of 16bit_storage dependency imply replacing lots of snippets.
3504         {
3505                 if (inFp16WithoutStorage)
3506                 {
3507                         ioDefinitions   = inTypeSnippets->inputDefinitionsFp16Snippet;
3508                 }
3509                 else
3510                 {
3511                         ioDefinitions   = inTypeSnippets->inputDefinitionsSnippet;
3512                 }
3513
3514                 if (outFp16WithoutStorage)
3515                 {
3516                         extensions              = outTypeSnippets->extensionsFp16Without16BitStorage;
3517                         capabilities    = behaviorCapability + outTypeSnippets->capabilitiesFp16Without16BitStorage;
3518                         types                   += outTypeSnippets->typeDefinitionsFp16Snippet;
3519                         annotations     += outTypeSnippets->typeAnnotationsFp16Snippet;
3520                         ioDefinitions   += outTypeSnippets->outputDefinitionsFp16Snippet;
3521                 }
3522                 else
3523                 {
3524                         ioDefinitions   += outTypeSnippets->outputDefinitionsSnippet;
3525                 }
3526         }
3527
3528         bool outFp16TypeUsage   = outTypeSnippets->loadStoreRequiresShaderFloat16;
3529         bool inFp16TypeUsage    = false;
3530
3531         if (testOperation.isInputTypeRestricted)
3532         {
3533                 annotations             += inTypeSnippets->typeAnnotationsSnippet;
3534                 types                   += inTypeSnippets->typeDefinitionsSnippet;
3535                 constants               += inTypeSnippets->constantsDefinitionsSnippet;
3536
3537                 if (inFp16WithoutStorage)
3538                 {
3539                         annotations             += inTypeSnippets->typeAnnotationsFp16Snippet;
3540                         types                   += inTypeSnippets->typeDefinitionsFp16Snippet;
3541                         capabilities    += inTypeSnippets->capabilitiesFp16Without16BitStorage;
3542                         extensions              += inTypeSnippets->extensionsFp16Without16BitStorage;
3543                 }
3544                 else
3545                 {
3546                         capabilities    += inTypeSnippets->capabilities;
3547                         extensions              += inTypeSnippets->extensions;
3548                 }
3549
3550                 inFp16TypeUsage = inTypeSnippets->loadStoreRequiresShaderFloat16;
3551         }
3552
3553         map<string, string> specializations;
3554         specializations["extensions"]           = extensions;
3555         specializations["execution_mode"]       = behaviorExecutionMode;
3556         specializations["annotations"]          = annotations + specOpData.annotations;
3557         specializations["types"]                        = types + specOpData.types;
3558         specializations["io_definitions"]       = ioDefinitions;
3559         specializations["variables"]            = specOpData.variables;
3560         specializations["functions"]            = specOpData.functions;
3561         specializations["save_result"]          = (outFp16WithoutStorage ? outTypeSnippets->storeResultsFp16Snippet : outTypeSnippets->storeResultsSnippet);
3562         specializations["arguments"]            = specOpData.arguments;
3563         specializations["commands"]                     = specOpData.commands;
3564
3565         // Build constants. They are only needed sometimes.
3566         const FloatStatementUsageFlags  argsAnyFloatConstMask                           = B_STATEMENT_USAGE_ARGS_CONST_FLOAT | B_STATEMENT_USAGE_ARGS_CONST_FP16 | B_STATEMENT_USAGE_ARGS_CONST_FP32 | B_STATEMENT_USAGE_ARGS_CONST_FP64;
3567         const bool                                              argsUseFPConstants                                      = (specOpData.argumentsUsesFloatConstant & argsAnyFloatConstMask) != 0;
3568         const FloatStatementUsageFlags  commandsAnyFloatConstMask                       = B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_CONST_FP16 | B_STATEMENT_USAGE_COMMANDS_CONST_FP32 | B_STATEMENT_USAGE_COMMANDS_CONST_FP64;
3569         const bool                                              commandsUseFPConstants                          = (testCaseInfo.operation.statementUsageFlags & commandsAnyFloatConstMask) != 0;
3570         const bool                                              needConstants                                           = argsUseFPConstants || commandsUseFPConstants;
3571         const FloatStatementUsageFlags  constsFloatTypeMask                                     = B_STATEMENT_USAGE_CONSTS_TYPE_FLOAT | B_STATEMENT_USAGE_CONSTS_TYPE_FP16;
3572         const bool                                              constsUsesFP16Type                                      = (testCaseInfo.operation.statementUsageFlags & constsFloatTypeMask) != 0;
3573         const bool                                              loadStoreRequiresShaderFloat16          = inFp16TypeUsage || outFp16TypeUsage;
3574         const bool                                              usesFP16Constants                                       = constsUsesFP16Type || (needConstants && loadStoreRequiresShaderFloat16);
3575
3576         specializations["constants"]            = "";
3577         if (needConstants || outFp16WithoutStorage)
3578         {
3579                 specializations["constants"]    = constants;
3580         }
3581         specializations["constants"]            += specOpData.constants;
3582
3583         // check which format features are needed
3584         bool float16FeatureRequired = (outFloatType == FP16) || (inFloatType == FP16);
3585         bool float64FeatureRequired = (outFloatType == FP64) || (inFloatType == FP64);
3586
3587         // Determine required capabilities.
3588         bool float16CapabilityAlreadyAdded = inFp16WithoutStorage || outFp16WithoutStorage;
3589         if ((testOperation.floatUsage == FLOAT_ARITHMETIC && float16FeatureRequired && !float16CapabilityAlreadyAdded) || usesFP16Constants)
3590         {
3591                 capabilities += "OpCapability Float16\n";
3592         }
3593         specializations["capabilities"]         = capabilities;
3594
3595         // specialize shader
3596         const string shaderCode = m_operationShaderTemplate.specialize(specializations);
3597
3598         // construct input and output buffers of proper types
3599         TypeValuesSP inTypeValues       = m_typeData.at(inFloatType).values;
3600         TypeValuesSP outTypeValues      = m_typeData.at(outFloatType).values;
3601         BufferSp inBufferSp                     = inTypeValues->constructInputBuffer(testCase.input);
3602         BufferSp outBufferSp            = outTypeValues->constructOutputBuffer(testCase.expectedOutput);
3603         csSpec.inputs.push_back(Resource(inBufferSp, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
3604         csSpec.outputs.push_back(Resource(outBufferSp));
3605
3606         // check which format features are needed
3607         setupVulkanFeatures(inFloatTypeForCaps,         // usualy same as inFloatType - different only for UnpackHalf2x16
3608                                                 outFloatType,
3609                                                 testCase.behaviorFlags,
3610                                                 float64FeatureRequired,
3611                                                 csSpec.requestedVulkanFeatures);
3612
3613         csSpec.assembly                 = shaderCode;
3614         csSpec.numWorkGroups    = IVec3(1, 1, 1);
3615         csSpec.verifyIO                 = checkFloatsLUT[outFloatType];
3616
3617         csSpec.extensions.push_back("VK_KHR_shader_float_controls");
3618         bool needShaderFloat16 = float16CapabilityAlreadyAdded;
3619
3620         if (float16FeatureRequired && !testCase.fp16Without16BitStorage)
3621         {
3622                 csSpec.extensions.push_back("VK_KHR_16bit_storage");
3623                 csSpec.requestedVulkanFeatures.ext16BitStorage = EXT16BITSTORAGEFEATURES_UNIFORM_BUFFER_BLOCK;
3624                 needShaderFloat16 |= testOperation.floatUsage == FLOAT_ARITHMETIC;
3625         }
3626         needShaderFloat16 |= usesFP16Constants;
3627         if (needShaderFloat16)
3628         {
3629                 csSpec.extensions.push_back("VK_KHR_shader_float16_int8");
3630                 csSpec.requestedVulkanFeatures.extFloat16Int8 = EXTFLOAT16INT8FEATURES_FLOAT16;
3631         }
3632         if (float64FeatureRequired)
3633                 csSpec.requestedVulkanFeatures.coreFeatures.shaderFloat64 = VK_TRUE;
3634 }
3635
3636 void ComputeTestGroupBuilder::fillShaderSpec(const SettingsTestCaseInfo&        testCaseInfo,
3637                                                                                          ComputeShaderSpec&                             csSpec) const
3638 {
3639         string          capabilities;
3640         string          fp16behaviorName;
3641         string          fp32behaviorName;
3642         string          fp64behaviorName;
3643
3644         ValueId         addArgs[2];
3645         ValueId         fp16resultValue;
3646         ValueId         fp32resultValue;
3647         ValueId         fp64resultValue;
3648
3649         ExtensionFloatControlsFeatures& floatControls = csSpec.requestedVulkanFeatures.floatControlsProperties;
3650         bool fp16Required       = testCaseInfo.fp16Option != SO_UNUSED;
3651         bool fp32Required       = testCaseInfo.fp32Option != SO_UNUSED;
3652         bool fp64Required       = testCaseInfo.fp64Option != SO_UNUSED;
3653
3654         if (testCaseInfo.testedMode == SM_ROUNDING)
3655         {
3656                 // make sure that only rounding options are used
3657                 DE_ASSERT((testCaseInfo.fp16Option != SO_FLUSH) ||
3658                                   (testCaseInfo.fp16Option != SO_PRESERVE) ||
3659                                   (testCaseInfo.fp32Option != SO_FLUSH) ||
3660                                   (testCaseInfo.fp32Option != SO_PRESERVE) ||
3661                                   (testCaseInfo.fp64Option != SO_FLUSH) ||
3662                                   (testCaseInfo.fp64Option != SO_PRESERVE));
3663
3664                 bool fp16RteRounding    = testCaseInfo.fp16Option == SO_RTE;
3665                 bool fp32RteRounding    = testCaseInfo.fp32Option == SO_RTE;
3666                 bool fp64RteRounding    = testCaseInfo.fp64Option == SO_RTE;
3667
3668                 const string& rte               = m_behaviorToName.at(B_RTE_ROUNDING);
3669                 const string& rtz               = m_behaviorToName.at(B_RTZ_ROUNDING);
3670
3671                 fp16behaviorName                = fp16RteRounding ? rte : rtz;
3672                 fp32behaviorName                = fp32RteRounding ? rte : rtz;
3673                 fp64behaviorName                = fp64RteRounding ? rte : rtz;
3674
3675                 addArgs[0]                              = V_ADD_ARG_A;
3676                 addArgs[1]                              = V_ADD_ARG_B;
3677                 fp16resultValue                 = fp16RteRounding ? V_ADD_RTE_RESULT : V_ADD_RTZ_RESULT;
3678                 fp32resultValue                 = fp32RteRounding ? V_ADD_RTE_RESULT : V_ADD_RTZ_RESULT;
3679                 fp64resultValue                 = fp64RteRounding ? V_ADD_RTE_RESULT : V_ADD_RTZ_RESULT;
3680
3681                 capabilities                    = "OpCapability " + rte + "\n"
3682                                                                   "OpCapability " + rtz + "\n";
3683
3684                 floatControls.roundingModeIndependence          = testCaseInfo.independenceSetting;
3685                 floatControls.denormBehaviorIndependence        = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE_KHR;
3686                 floatControls.shaderRoundingModeRTEFloat16      = fp16RteRounding;
3687                 floatControls.shaderRoundingModeRTZFloat16      = fp16Required && !fp16RteRounding;
3688                 floatControls.shaderRoundingModeRTEFloat32      = fp32RteRounding;
3689                 floatControls.shaderRoundingModeRTZFloat32      = fp32Required && !fp32RteRounding;
3690                 floatControls.shaderRoundingModeRTEFloat64      = fp64RteRounding;
3691                 floatControls.shaderRoundingModeRTZFloat64      = fp64Required && !fp64RteRounding;
3692         }
3693         else // SM_DENORMS
3694         {
3695                 // make sure that only denorm options are used
3696                 DE_ASSERT((testCaseInfo.fp16Option != SO_RTE) ||
3697                                   (testCaseInfo.fp16Option != SO_RTZ) ||
3698                                   (testCaseInfo.fp32Option != SO_RTE) ||
3699                                   (testCaseInfo.fp32Option != SO_RTZ) ||
3700                                   (testCaseInfo.fp64Option != SO_RTE) ||
3701                                   (testCaseInfo.fp64Option != SO_RTZ));
3702
3703                 bool fp16DenormPreserve         = testCaseInfo.fp16Option == SO_PRESERVE;
3704                 bool fp32DenormPreserve         = testCaseInfo.fp32Option == SO_PRESERVE;
3705                 bool fp64DenormPreserve         = testCaseInfo.fp64Option == SO_PRESERVE;
3706
3707                 const string& preserve          = m_behaviorToName.at(B_DENORM_PRESERVE);
3708                 const string& flush                     = m_behaviorToName.at(B_DENORM_FLUSH);
3709
3710                 fp16behaviorName                        = fp16DenormPreserve ? preserve : flush;
3711                 fp32behaviorName                        = fp32DenormPreserve ? preserve : flush;
3712                 fp64behaviorName                        = fp64DenormPreserve ? preserve : flush;
3713
3714                 addArgs[0]                                      = V_DENORM;
3715                 addArgs[1]                                      = V_DENORM;
3716                 fp16resultValue                         = fp16DenormPreserve ? V_DENORM_TIMES_TWO : V_ZERO_OR_DENORM_TIMES_TWO;
3717                 fp32resultValue                         = fp32DenormPreserve ? V_DENORM_TIMES_TWO : V_ZERO;
3718                 fp64resultValue                         = fp64DenormPreserve ? V_DENORM_TIMES_TWO : V_ZERO;
3719
3720                 capabilities                            = "OpCapability " + preserve + "\n"
3721                                                                           "OpCapability " + flush + "\n";
3722
3723                 floatControls.denormBehaviorIndependence                = testCaseInfo.independenceSetting;
3724                 floatControls.roundingModeIndependence                  = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE_KHR;
3725                 floatControls.shaderDenormPreserveFloat16               = fp16DenormPreserve;
3726                 floatControls.shaderDenormFlushToZeroFloat16    = fp16Required && !fp16DenormPreserve;
3727                 floatControls.shaderDenormPreserveFloat32               = fp32DenormPreserve;
3728                 floatControls.shaderDenormFlushToZeroFloat32    = fp32Required && !fp32DenormPreserve;
3729                 floatControls.shaderDenormPreserveFloat64               = fp64DenormPreserve;
3730                 floatControls.shaderDenormFlushToZeroFloat64    = fp64Required && !fp64DenormPreserve;
3731         }
3732
3733         const auto&     fp64Data                        = m_typeData.at(FP64);
3734         const auto&     fp32Data                        = m_typeData.at(FP32);
3735         const auto&     fp16Data                        = m_typeData.at(FP16);
3736
3737         deUint32        attributeIndex          = 0;
3738         deUint32        attributeOffset         = 0;
3739         string          attribute;
3740         string          extensions                      = "";
3741         string          executionModes          = "";
3742         string          ioAnnotations           = "";
3743         string          types                           = "";
3744         string          inStruct                        = "";
3745         string          outDefinitions          = "";
3746         string          commands                        = "";
3747         string          saveResult                      = "";
3748
3749         // construct single input buffer containing arguments for all float widths
3750         // (maxPerStageDescriptorStorageBuffers can be min 4 and we need 3 for outputs)
3751         deUint32                                inputOffset     = 0;
3752         std::vector<deUint8>    inputData       ((fp64Required * sizeof(double) + sizeof(float) + fp16Required * sizeof(deFloat16)) * 2);
3753
3754         // to follow storage buffer layout rules we store data in ssbo in order 64 -> 16
3755         if (fp64Required)
3756         {
3757                 capabilities    += fp64Data.snippets->capabilities;
3758                 executionModes  += "OpExecutionMode %main " + fp64behaviorName + " 64\n";
3759                 attribute                = to_string(attributeIndex);
3760                 ioAnnotations   += "OpMemberDecorate %SSBO_in " + attribute + " Offset " + to_string(attributeOffset) +"\n" +
3761                                                    fp64Data.snippets->multiOutputAnnotationsSnippet +
3762                                                    "OpDecorate %ssbo_f64_out Binding " + to_string(attributeIndex+1) + "\n";
3763                 types                   += fp64Data.snippets->minTypeDefinitionsSnippet;
3764                 inStruct                += " %type_f64_arr_2";
3765                 outDefinitions  += fp64Data.snippets->multiOutputDefinitionsSnippet;
3766                 commands                += replace(fp64Data.snippets->multiArgumentsFromInputSnippet, "${attr}", attribute) +
3767                                                    "%result64             = OpFAdd %type_f64 %arg1_f64 %arg2_f64\n";
3768                 saveResult              += fp64Data.snippets->multiStoreResultsSnippet;
3769                 attributeOffset += 2 * static_cast<deUint32>(sizeof(double));
3770                 attributeIndex++;
3771
3772                 fp64Data.values->fillInputData(addArgs, inputData, inputOffset);
3773
3774                 // construct separate buffers for outputs to make validation easier
3775                 BufferSp fp64OutBufferSp = fp64Data.values->constructOutputBuffer(fp64resultValue);
3776                 csSpec.outputs.push_back(Resource(fp64OutBufferSp, vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, reinterpret_cast<void*>(BufferDataType::DATA_FP64)));
3777
3778                 csSpec.requestedVulkanFeatures.coreFeatures.shaderFloat64 = VK_TRUE;
3779         }
3780         if (fp32Required)
3781         {
3782                 executionModes          += "OpExecutionMode %main " + fp32behaviorName + " 32\n";
3783                 attribute                        = to_string(attributeIndex);
3784                 ioAnnotations           += "OpMemberDecorate %SSBO_in " + attribute + " Offset " + to_string(attributeOffset) +"\n" +
3785                                                            fp32Data.snippets->multiOutputAnnotationsSnippet +
3786                                                            "OpDecorate %ssbo_f32_out Binding " + to_string(attributeIndex+1) + "\n";
3787                 types                           += fp32Data.snippets->minTypeDefinitionsSnippet;
3788                 inStruct                        += " %type_f32_arr_2";
3789                 outDefinitions          += fp32Data.snippets->multiOutputDefinitionsSnippet;
3790                 commands                        += replace(fp32Data.snippets->multiArgumentsFromInputSnippet, "${attr}", attribute) +
3791                                                            "%result32             = OpFAdd %type_f32 %arg1_f32 %arg2_f32\n";
3792                 saveResult                      += fp32Data.snippets->multiStoreResultsSnippet;
3793                 attributeOffset         += 2 * static_cast<deUint32>(sizeof(float));
3794                 attributeIndex++;
3795
3796                 fp32Data.values->fillInputData(addArgs, inputData, inputOffset);
3797
3798                 BufferSp fp32OutBufferSp = fp32Data.values->constructOutputBuffer(fp32resultValue);
3799                 csSpec.outputs.push_back(Resource(fp32OutBufferSp, vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, reinterpret_cast<void*>(BufferDataType::DATA_FP32)));
3800         }
3801         if (fp16Required)
3802         {
3803                 if (testCaseInfo.fp16Without16BitStorage)
3804                 {
3805                         capabilities    += fp16Data.snippets->capabilitiesFp16Without16BitStorage;
3806                         extensions              += fp16Data.snippets->extensionsFp16Without16BitStorage;
3807                         executionModes  += "OpExecutionMode %main " + fp16behaviorName + " 16\n";
3808                         attribute                = to_string(attributeIndex);
3809                         ioAnnotations   += "OpMemberDecorate %SSBO_in " + attribute + " Offset " + to_string(attributeOffset) +"\n" +
3810                                                            fp16Data.snippets->multiOutputAnnotationsFp16Snippet +
3811                                                            "OpDecorate %ssbo_u32_out Binding " + to_string(attributeIndex+1) + "\n";
3812                         types                   += fp16Data.snippets->minTypeDefinitionsSnippet + fp16Data.snippets->typeDefinitionsFp16Snippet + "%type_f16_vec2        = OpTypeVector %type_f16 2\n";
3813                         inStruct                += " %type_u32_arr_1";
3814                         outDefinitions  += fp16Data.snippets->multiOutputDefinitionsFp16Snippet;
3815                         commands                += replace(fp16Data.snippets->multiArgumentsFromInputFp16Snippet, "${attr}", attribute) +
3816                                                            "%result16             = OpFAdd %type_f16 %arg1_f16 %arg2_f16\n";
3817                         saveResult              += fp16Data.snippets->multiStoreResultsFp16Snippet;
3818
3819                         csSpec.extensions.push_back("VK_KHR_shader_float16_int8");
3820                         csSpec.requestedVulkanFeatures.extFloat16Int8 = EXTFLOAT16INT8FEATURES_FLOAT16;
3821                 }
3822                 else
3823                 {
3824                         capabilities    += fp16Data.snippets->capabilities +
3825                                                            "OpCapability Float16\n";
3826                         extensions              += fp16Data.snippets->extensions;
3827                         executionModes  += "OpExecutionMode %main " + fp16behaviorName + " 16\n";
3828                         attribute               = to_string(attributeIndex);
3829                         ioAnnotations   += "OpMemberDecorate %SSBO_in " + attribute + " Offset " + to_string(attributeOffset) +"\n" +
3830                                                            fp16Data.snippets->multiOutputAnnotationsSnippet +
3831                                                            "OpDecorate %ssbo_f16_out Binding " + to_string(attributeIndex+1) + "\n";
3832                         types                   += fp16Data.snippets->minTypeDefinitionsSnippet;
3833                         inStruct                += " %type_f16_arr_2";
3834                         outDefinitions  += fp16Data.snippets->multiOutputDefinitionsSnippet;
3835                         commands                += replace(fp16Data.snippets->multiArgumentsFromInputSnippet, "${attr}", attribute) +
3836                                                            "%result16             = OpFAdd %type_f16 %arg1_f16 %arg2_f16\n";
3837                         saveResult              += fp16Data.snippets->multiStoreResultsSnippet;
3838
3839                         csSpec.extensions.push_back("VK_KHR_16bit_storage");
3840                         csSpec.requestedVulkanFeatures.ext16BitStorage = EXT16BITSTORAGEFEATURES_UNIFORM_BUFFER_BLOCK;
3841                 }
3842
3843                 fp16Data.values->fillInputData(addArgs, inputData, inputOffset);
3844
3845                 BufferSp fp16OutBufferSp = fp16Data.values->constructOutputBuffer(fp16resultValue);
3846                 csSpec.outputs.push_back(Resource(fp16OutBufferSp, vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, reinterpret_cast<void*>(BufferDataType::DATA_FP16)));
3847         }
3848
3849         BufferSp inBufferSp(new Buffer<deUint8>(inputData));
3850         csSpec.inputs.push_back(Resource(inBufferSp, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
3851
3852         map<string, string> specializations =
3853         {
3854                 { "capabilities",               capabilities },
3855                 { "extensions",                 extensions },
3856                 { "execution_modes",    executionModes },
3857                 { "io_annotations",             ioAnnotations },
3858                 { "types",                              types },
3859                 { "in_struct",                  inStruct },
3860                 { "out_definitions",    outDefinitions },
3861                 { "commands",                   commands },
3862                 { "save_result",                saveResult }
3863         };
3864
3865         // specialize shader
3866         const string shaderCode = m_settingsShaderTemplate.specialize(specializations);
3867
3868         csSpec.assembly                 = shaderCode;
3869         csSpec.numWorkGroups    = IVec3(1, 1, 1);
3870         csSpec.verifyIO                 = checkMixedFloats;
3871         csSpec.extensions.push_back("VK_KHR_shader_float_controls");
3872 }
3873
3874 void getGraphicsShaderCode (vk::SourceCollections& dst, InstanceContext context)
3875 {
3876         // this function is used only by GraphicsTestGroupBuilder but it couldn't
3877         // be implemented as a method because of how addFunctionCaseWithPrograms
3878         // was implemented
3879
3880         SpirvVersion    targetSpirvVersion      = context.resources.spirvVersion;
3881         const deUint32  vulkanVersion           = dst.usedVulkanVersion;
3882
3883         static const string vertexTemplate =
3884                 "OpCapability Shader\n"
3885                 "${vert_capabilities}"
3886
3887                 "OpExtension \"SPV_KHR_float_controls\"\n"
3888                 "${vert_extensions}"
3889
3890                 "%std450            = OpExtInstImport \"GLSL.std.450\"\n"
3891                 "OpMemoryModel Logical GLSL450\n"
3892                 "OpEntryPoint Vertex %main \"main\" %BP_stream %BP_position %BP_color %BP_gl_VertexIndex %BP_gl_InstanceIndex %BP_vertex_color %BP_vertex_result \n"
3893                 "${vert_execution_mode}"
3894
3895                 "OpMemberDecorate %BP_gl_PerVertex 0 BuiltIn Position\n"
3896                 "OpMemberDecorate %BP_gl_PerVertex 1 BuiltIn PointSize\n"
3897                 "OpMemberDecorate %BP_gl_PerVertex 2 BuiltIn ClipDistance\n"
3898                 "OpMemberDecorate %BP_gl_PerVertex 3 BuiltIn CullDistance\n"
3899                 "OpDecorate %BP_gl_PerVertex Block\n"
3900                 "OpDecorate %BP_position Location 0\n"
3901                 "OpDecorate %BP_color Location 1\n"
3902                 "OpDecorate %BP_vertex_color Location 1\n"
3903                 "OpDecorate %BP_vertex_result Location 2\n"
3904                 "OpDecorate %BP_vertex_result Flat\n"
3905                 "OpDecorate %BP_gl_VertexIndex BuiltIn VertexIndex\n"
3906                 "OpDecorate %BP_gl_InstanceIndex BuiltIn InstanceIndex\n"
3907
3908                 // some tests require additional annotations
3909                 "${vert_annotations}"
3910
3911                 // types required by most of tests
3912                 "%type_void            = OpTypeVoid\n"
3913                 "%type_voidf           = OpTypeFunction %type_void\n"
3914                 "%type_bool            = OpTypeBool\n"
3915                 "%type_i32             = OpTypeInt 32 1\n"
3916                 "%type_u32             = OpTypeInt 32 0\n"
3917                 "%type_u32_vec2        = OpTypeVector %type_u32 2\n"
3918                 "%type_i32_iptr        = OpTypePointer Input %type_i32\n"
3919                 "%type_i32_optr        = OpTypePointer Output %type_i32\n"
3920                 "%type_i32_fptr        = OpTypePointer Function %type_i32\n"
3921
3922                 // constants required by most of tests
3923                 "%c_i32_0              = OpConstant %type_i32 0\n"
3924                 "%c_i32_1              = OpConstant %type_i32 1\n"
3925                 "%c_i32_2              = OpConstant %type_i32 2\n"
3926                 "%c_u32_1              = OpConstant %type_u32 1\n"
3927
3928                 // if input float type has different width then output then
3929                 // both types are defined here along with all types derived from
3930                 // them that are commonly used by tests; some tests also define
3931                 // their own types (those that are needed just by this single test)
3932                 "${vert_types}"
3933
3934                 // SSBO is not universally supported for storing
3935                 // data in vertex stages - it is onle read here
3936                 "${vert_io_definitions}"
3937
3938                 "%BP_gl_PerVertex      = OpTypeStruct %type_f32_vec4 %type_f32 %type_f32_arr_1 %type_f32_arr_1\n"
3939                 "%BP_gl_PerVertex_optr = OpTypePointer Output %BP_gl_PerVertex\n"
3940                 "%BP_stream            = OpVariable %BP_gl_PerVertex_optr Output\n"
3941                 "%BP_position          = OpVariable %type_f32_vec4_iptr Input\n"
3942                 "%BP_color             = OpVariable %type_f32_vec4_iptr Input\n"
3943                 "%BP_gl_VertexIndex    = OpVariable %type_i32_iptr Input\n"
3944                 "%BP_gl_InstanceIndex  = OpVariable %type_i32_iptr Input\n"
3945                 "%BP_vertex_color      = OpVariable %type_f32_vec4_optr Output\n"
3946
3947                 // set of default constants per float type is placed here,
3948                 // operation tests can also define additional constants.
3949                 "${vert_constants}"
3950
3951                 // O_RETURN_VAL defines function here and because
3952                 // of that this token needs to be directly before main function.
3953                 "${vert_functions}"
3954
3955                 "%main                 = OpFunction %type_void None %type_voidf\n"
3956                 "%label                = OpLabel\n"
3957
3958                 "${vert_variables}"
3959
3960                 "%position             = OpLoad %type_f32_vec4 %BP_position\n"
3961                 "%gl_pos               = OpAccessChain %type_f32_vec4_optr %BP_stream %c_i32_0\n"
3962                 "OpStore %gl_pos %position\n"
3963                 "%color                = OpLoad %type_f32_vec4 %BP_color\n"
3964                 "OpStore %BP_vertex_color %color\n"
3965
3966                 // this token is filled only when vertex stage is tested;
3967                 // depending on test case arguments are either read from input ssbo
3968                 // or generated in spir-v code - in later case ssbo is not used
3969                 "${vert_arguments}"
3970
3971                 // when vertex shader is tested then test operations are performed
3972                 // here and passed to fragment stage; if fragment stage ts tested
3973                 // then ${comands} and ${vert_process_result} are rplaced with nop
3974                 "${vert_commands}"
3975
3976                 "${vert_process_result}"
3977
3978                 "OpReturn\n"
3979                 "OpFunctionEnd\n";
3980
3981
3982         static const string fragmentTemplate =
3983                 "OpCapability Shader\n"
3984                 "${frag_capabilities}"
3985
3986                 "OpExtension \"SPV_KHR_float_controls\"\n"
3987                 "${frag_extensions}"
3988
3989                 "%std450            = OpExtInstImport \"GLSL.std.450\"\n"
3990                 "OpMemoryModel Logical GLSL450\n"
3991                 "OpEntryPoint Fragment %main \"main\" %BP_vertex_color %BP_vertex_result %BP_fragColor %BP_gl_FragCoord \n"
3992                 "OpExecutionMode %main OriginUpperLeft\n"
3993                 "${frag_execution_mode}"
3994
3995                 "OpDecorate %BP_fragColor Location 0\n"
3996                 "OpDecorate %BP_vertex_color Location 1\n"
3997                 "OpDecorate %BP_vertex_result Location 2\n"
3998                 "OpDecorate %BP_vertex_result Flat\n"
3999                 "OpDecorate %BP_gl_FragCoord BuiltIn FragCoord\n"
4000
4001                 // some tests require additional annotations
4002                 "${frag_annotations}"
4003
4004                 // types required by most of tests
4005                 "%type_void            = OpTypeVoid\n"
4006                 "%type_voidf           = OpTypeFunction %type_void\n"
4007                 "%type_bool            = OpTypeBool\n"
4008                 "%type_i32             = OpTypeInt 32 1\n"
4009                 "%type_u32             = OpTypeInt 32 0\n"
4010                 "%type_u32_vec2        = OpTypeVector %type_u32 2\n"
4011                 "%type_i32_iptr        = OpTypePointer Input %type_i32\n"
4012                 "%type_i32_optr        = OpTypePointer Output %type_i32\n"
4013                 "%type_i32_fptr        = OpTypePointer Function %type_i32\n"
4014
4015                 // constants required by most of tests
4016                 "%c_i32_0              = OpConstant %type_i32 0\n"
4017                 "%c_i32_1              = OpConstant %type_i32 1\n"
4018                 "%c_i32_2              = OpConstant %type_i32 2\n"
4019                 "%c_u32_1              = OpConstant %type_u32 1\n"
4020
4021                 // if input float type has different width then output then
4022                 // both types are defined here along with all types derived from
4023                 // them that are commonly used by tests; some tests also define
4024                 // their own types (those that are needed just by this single test)
4025                 "${frag_types}"
4026
4027                 "%BP_gl_FragCoord      = OpVariable %type_f32_vec4_iptr Input\n"
4028                 "%BP_vertex_color      = OpVariable %type_f32_vec4_iptr Input\n"
4029                 "%BP_fragColor         = OpVariable %type_f32_vec4_optr Output\n"
4030
4031                 // SSBO definitions
4032                 "${frag_io_definitions}"
4033
4034                 // set of default constants per float type is placed here,
4035                 // operation tests can also define additional constants.
4036                 "${frag_constants}"
4037
4038                 // O_RETURN_VAL defines function here and because
4039                 // of that this token needs to be directly before main function.
4040                 "${frag_functions}"
4041
4042                 "%main                 = OpFunction %type_void None %type_voidf\n"
4043                 "%label                = OpLabel\n"
4044
4045                 "${frag_variables}"
4046
4047                 // just pass vertex color - rendered image is not important in our case
4048                 "%vertex_color         = OpLoad %type_f32_vec4 %BP_vertex_color\n"
4049                 "OpStore %BP_fragColor %vertex_color\n"
4050
4051                 // this token is filled only when fragment stage is tested;
4052                 // depending on test case arguments are either read from input ssbo or
4053                 // generated in spir-v code - in later case ssbo is used only for output
4054                 "${frag_arguments}"
4055
4056                 // when fragment shader is tested then test operations are performed
4057                 // here and saved to ssbo; if vertex stage was tested then its
4058                 // result is just saved to ssbo here
4059                 "${frag_commands}"
4060                 "${frag_process_result}"
4061
4062                 "OpReturn\n"
4063                 "OpFunctionEnd\n";
4064
4065         dst.spirvAsmSources.add("vert", DE_NULL)
4066                 << StringTemplate(vertexTemplate).specialize(context.testCodeFragments)
4067                 << SpirVAsmBuildOptions(vulkanVersion, targetSpirvVersion);
4068         dst.spirvAsmSources.add("frag", DE_NULL)
4069                 << StringTemplate(fragmentTemplate).specialize(context.testCodeFragments)
4070                 << SpirVAsmBuildOptions(vulkanVersion, targetSpirvVersion);
4071 }
4072
4073 // GraphicsTestGroupBuilder iterates over all test cases and creates test for both
4074 // vertex and fragment stages. As in most spirv-assembly tests, tests here are also
4075 // executed using functionality defined in vktSpvAsmGraphicsShaderTestUtil.cpp but
4076 // because one of requirements during development was that SSBO wont be used in
4077 // vertex stage we couldn't use createTestForStage functions - we need a custom
4078 // version for both vertex and fragmen shaders at the same time. This was required
4079 // as we needed to pass result from vertex stage to fragment stage where it could
4080 // be saved to ssbo. To achieve that InstanceContext is created manually in
4081 // createInstanceContext method.
4082 class GraphicsTestGroupBuilder: public TestGroupBuilderBase
4083 {
4084 public:
4085
4086         void init();
4087
4088         void createOperationTests(TestCaseGroup* parentGroup, const char* groupName, FloatType floatType, bool argumentsFromInput) override;
4089         void createSettingsTests(TestCaseGroup* parentGroup) override;
4090
4091 protected:
4092
4093         InstanceContext createInstanceContext(const OperationTestCaseInfo& testCaseInfo) const;
4094
4095 private:
4096
4097         TestCasesBuilder        m_testCaseBuilder;
4098 };
4099
4100 void GraphicsTestGroupBuilder::init()
4101 {
4102         m_testCaseBuilder.init();
4103 }
4104
4105 void GraphicsTestGroupBuilder::createOperationTests(TestCaseGroup* parentGroup, const char* groupName, FloatType floatType, bool argumentsFromInput)
4106 {
4107         TestContext&    testCtx = parentGroup->getTestContext();
4108         TestCaseGroup*  group   = new TestCaseGroup(testCtx, groupName, "");
4109         parentGroup->addChild(group);
4110
4111         // create test cases for vertex stage
4112         TestCaseVect testCases;
4113         m_testCaseBuilder.build(testCases, m_typeData[floatType].testResults, argumentsFromInput);
4114
4115         TestCaseVect::const_iterator currTestCase = testCases.begin();
4116         TestCaseVect::const_iterator lastTestCase = testCases.end();
4117         while(currTestCase != lastTestCase)
4118         {
4119                 const OperationTestCase& testCase = *currTestCase;
4120                 ++currTestCase;
4121
4122                 // skip cases with undefined output
4123                 if (testCase.expectedOutput == V_UNUSED)
4124                         continue;
4125
4126                 // FPRoundingMode decoration can be applied only to conversion instruction that is used as the object
4127                 // argument of an OpStore storing through a pointer to a 16-bit floating-point object in Uniform, or
4128                 // PushConstant, or Input, or Output Storage Classes. SSBO writes are not commonly supported
4129                 // in VS so this test case needs to be skiped for vertex stage.
4130                 if ((testCase.operationId == O_ORTZ_ROUND) || (testCase.operationId == O_ORTE_ROUND))
4131                         continue;
4132
4133                 OperationTestCaseInfo testCaseInfo =
4134                 {
4135                         floatType,
4136                         argumentsFromInput,
4137                         VK_SHADER_STAGE_VERTEX_BIT,
4138                         m_testCaseBuilder.getOperation(testCase.operationId),
4139                         testCase
4140                 };
4141
4142                 InstanceContext ctxVertex       = createInstanceContext(testCaseInfo);
4143                 string                  testName        = replace(testCase.baseName, "op", testCaseInfo.operation.name);
4144
4145                 addFunctionCaseWithPrograms<InstanceContext>(group, testName + "_vert", "", getGraphicsShaderCode, runAndVerifyDefaultPipeline, ctxVertex);
4146         }
4147
4148         // create test cases for fragment stage
4149         testCases.clear();
4150         m_testCaseBuilder.build(testCases, m_typeData[floatType].testResults, argumentsFromInput);
4151
4152         currTestCase = testCases.begin();
4153         lastTestCase = testCases.end();
4154         while(currTestCase != lastTestCase)
4155         {
4156                 const OperationTestCase& testCase = *currTestCase;
4157                 ++currTestCase;
4158
4159                 // skip cases with undefined output
4160                 if (testCase.expectedOutput == V_UNUSED)
4161                         continue;
4162
4163                 OperationTestCaseInfo testCaseInfo =
4164                 {
4165                         floatType,
4166                         argumentsFromInput,
4167                         VK_SHADER_STAGE_FRAGMENT_BIT,
4168                         m_testCaseBuilder.getOperation(testCase.operationId),
4169                         testCase
4170                 };
4171
4172                 InstanceContext ctxFragment     = createInstanceContext(testCaseInfo);
4173                 string                  testName        = replace(testCase.baseName, "op", testCaseInfo.operation.name);
4174
4175                 addFunctionCaseWithPrograms<InstanceContext>(group, testName + "_frag", "", getGraphicsShaderCode, runAndVerifyDefaultPipeline, ctxFragment);
4176         }
4177 }
4178
4179 void GraphicsTestGroupBuilder::createSettingsTests(TestCaseGroup* parentGroup)
4180 {
4181         DE_UNREF(parentGroup);
4182
4183         // WG decided that testing settings only for compute stage is sufficient
4184 }
4185
4186 InstanceContext GraphicsTestGroupBuilder::createInstanceContext(const OperationTestCaseInfo& testCaseInfo) const
4187 {
4188         // LUT storing functions used to verify test results
4189         const VerifyIOFunc checkFloatsLUT[] =
4190         {
4191                 checkFloats<Float16, deFloat16>,
4192                 checkFloats<Float32, float>,
4193                 checkFloats<Float64, double>
4194         };
4195
4196         // 32-bit float types are always needed for standard operations on color
4197         // if tested operation does not require fp32 for either input or output
4198         // then this minimal type definitions must be appended to types section
4199         const string f32TypeMinimalRequired =
4200                 "%type_f32             = OpTypeFloat 32\n"
4201                 "%type_f32_arr_1       = OpTypeArray %type_f32 %c_i32_1\n"
4202                 "%type_f32_iptr        = OpTypePointer Input %type_f32\n"
4203                 "%type_f32_optr        = OpTypePointer Output %type_f32\n"
4204                 "%type_f32_vec4        = OpTypeVector %type_f32 4\n"
4205                 "%type_f32_vec4_iptr   = OpTypePointer Input %type_f32_vec4\n"
4206                 "%type_f32_vec4_optr   = OpTypePointer Output %type_f32_vec4\n";
4207
4208         const Operation&                        testOperation   = testCaseInfo.operation;
4209         const OperationTestCase&        testCase                = testCaseInfo.testCase;
4210         FloatType                                       outFloatType    = testCaseInfo.outFloatType;
4211         VkShaderStageFlagBits           testedStage             = testCaseInfo.testedStage;
4212
4213         DE_ASSERT((testedStage == VK_SHADER_STAGE_VERTEX_BIT) || (testedStage == VK_SHADER_STAGE_FRAGMENT_BIT));
4214
4215         SpecializedOperation specOpData;
4216         specializeOperation(testCaseInfo, specOpData);
4217
4218         TypeSnippetsSP  inTypeSnippets          = specOpData.inTypeSnippets;
4219         TypeSnippetsSP  outTypeSnippets         = specOpData.outTypeSnippets;
4220         FloatType               inFloatType                     = specOpData.inFloatType;
4221
4222         deBool                  outFp16WithoutStorage   = (outFloatType == FP16) && testCase.fp16Without16BitStorage;
4223         deBool                  inFp16WithoutStorage    = (inFloatType == FP16) && testCase.fp16Without16BitStorage;
4224
4225         // There may be several reasons why we need the shaderFloat16 Vulkan feature.
4226         bool needsShaderFloat16 = inFp16WithoutStorage || outFp16WithoutStorage;
4227         // There are some weird cases where we need the constants, but would otherwise drop them.
4228         bool needsSpecialConstants = false;
4229
4230         // UnpackHalf2x16 is a corner case - it returns two 32-bit floats but
4231         // internaly operates on fp16 and this type should be used by float controls
4232         FloatType               inFloatTypeForCaps              = inFloatType;
4233         string                  inFloatWidthForCaps             = inTypeSnippets->bitWidth;
4234         if (testCase.operationId == O_UPH_DENORM)
4235         {
4236                 inFloatTypeForCaps      = FP16;
4237                 inFloatWidthForCaps     = "16";
4238         }
4239
4240         string behaviorCapability;
4241         string behaviorExecutionMode;
4242         getBehaviorCapabilityAndExecutionMode(testCase.behaviorFlags,
4243                                                                                   inFloatWidthForCaps,
4244                                                                                   outTypeSnippets->bitWidth,
4245                                                                                   behaviorCapability,
4246                                                                                   behaviorExecutionMode);
4247
4248         // check which format features are needed
4249         bool float16FeatureRequired = (inFloatType == FP16) || (outFloatType == FP16);
4250         bool float64FeatureRequired = (inFloatType == FP64) || (outFloatType == FP64);
4251
4252         string vertExecutionMode;
4253         string fragExecutionMode;
4254         string vertCapabilities;
4255         string fragCapabilities;
4256         string vertExtensions;
4257         string fragExtensions;
4258         string vertAnnotations;
4259         string fragAnnotations;
4260         string vertTypes;
4261         string fragTypes;
4262         string vertConstants;
4263         string fragConstants;
4264         string vertFunctions;
4265         string fragFunctions;
4266         string vertIODefinitions;
4267         string fragIODefinitions;
4268         string vertArguments;
4269         string fragArguments;
4270         string vertVariables;
4271         string fragVariables;
4272         string vertCommands;
4273         string fragCommands;
4274         string vertProcessResult;
4275         string fragProcessResult;
4276
4277         // check if operation should be executed in vertex stage
4278         if (testedStage == VK_SHADER_STAGE_VERTEX_BIT)
4279         {
4280                 vertAnnotations = inTypeSnippets->inputAnnotationsSnippet + inTypeSnippets->typeAnnotationsSnippet;
4281                 fragAnnotations = outTypeSnippets->outputAnnotationsSnippet + outTypeSnippets->typeAnnotationsSnippet;
4282                 vertFunctions = specOpData.functions;
4283
4284                 // check if input type is different from tested type (conversion operations)
4285                 if (testOperation.isInputTypeRestricted)
4286                 {
4287                         vertCapabilities        = behaviorCapability + inTypeSnippets->capabilities + outTypeSnippets->capabilities;
4288                         fragCapabilities        = outTypeSnippets->capabilities;
4289                         vertExtensions          = inTypeSnippets->extensions + outTypeSnippets->extensions;
4290                         fragExtensions          = outTypeSnippets->extensions;
4291                         vertTypes                       = inTypeSnippets->typeDefinitionsSnippet + outTypeSnippets->typeDefinitionsSnippet + outTypeSnippets->varyingsTypesSnippet;
4292                         if (inFp16WithoutStorage)
4293                                 vertTypes                       += inTypeSnippets->typeDefinitionsFp16Snippet;
4294
4295                         fragTypes                       = outTypeSnippets->typeDefinitionsSnippet + outTypeSnippets->varyingsTypesSnippet;
4296                         vertConstants           = inTypeSnippets->constantsDefinitionsSnippet + outTypeSnippets->constantsDefinitionsSnippet;
4297                         fragConstants           = outTypeSnippets->constantsDefinitionsSnippet;
4298                 }
4299                 else
4300                 {
4301                         // input and output types are the same (majority of operations)
4302
4303                         vertCapabilities        = behaviorCapability + outTypeSnippets->capabilities;
4304                         fragCapabilities        = vertCapabilities;
4305                         vertExtensions          = outTypeSnippets->extensions;
4306                         fragExtensions          = vertExtensions;
4307                         vertTypes                       = outTypeSnippets->typeDefinitionsSnippet + outTypeSnippets->varyingsTypesSnippet;
4308                         fragTypes                       = vertTypes;
4309                         vertConstants           = outTypeSnippets->constantsDefinitionsSnippet;
4310                         fragConstants           = outTypeSnippets->constantsDefinitionsSnippet;
4311                 }
4312
4313                 if (outFloatType != FP32)
4314                 {
4315                         fragTypes += f32TypeMinimalRequired;
4316                         if (inFloatType != FP32)
4317                                 vertTypes += f32TypeMinimalRequired;
4318                 }
4319
4320                 vertAnnotations += specOpData.annotations;
4321                 vertTypes               += specOpData.types;
4322                 vertConstants   += specOpData.constants;
4323
4324                 vertExecutionMode               = behaviorExecutionMode;
4325                 fragExecutionMode               = "";
4326                 vertIODefinitions               = inTypeSnippets->inputDefinitionsSnippet + outTypeSnippets->outputVaryingsSnippet;
4327                 fragIODefinitions               = outTypeSnippets->inputVaryingsSnippet + outTypeSnippets->outputDefinitionsSnippet;
4328                 vertArguments                   = specOpData.arguments;
4329                 fragArguments                   = "";
4330                 vertVariables                   = specOpData.variables;
4331                 fragVariables                   = "";
4332                 vertCommands                    = specOpData.commands;
4333                 fragCommands                    = "";
4334                 vertProcessResult               = outTypeSnippets->storeVertexResultSnippet;
4335                 fragProcessResult               = outTypeSnippets->loadVertexResultSnippet + outTypeSnippets->storeResultsSnippet;
4336
4337                 if (inFp16WithoutStorage)
4338                 {
4339                         vertAnnotations         += inTypeSnippets->typeAnnotationsFp16Snippet;
4340                         vertIODefinitions       = inTypeSnippets->inputDefinitionsFp16Snippet + outTypeSnippets->outputVaryingsSnippet;
4341                 }
4342
4343                 if (outFp16WithoutStorage)
4344                 {
4345                         vertTypes                       += outTypeSnippets->typeDefinitionsFp16Snippet;
4346                         fragTypes                       += outTypeSnippets->typeDefinitionsFp16Snippet;
4347                         fragAnnotations         += outTypeSnippets->typeAnnotationsFp16Snippet;
4348                         fragIODefinitions       = outTypeSnippets->inputVaryingsSnippet + outTypeSnippets->outputDefinitionsFp16Snippet;
4349                         fragProcessResult       = outTypeSnippets->loadVertexResultSnippet + outTypeSnippets->storeResultsFp16Snippet;
4350
4351                 }
4352
4353                 needsShaderFloat16              |= outTypeSnippets->loadStoreRequiresShaderFloat16;
4354         }
4355         else // perform test in fragment stage - vertex stage is empty
4356         {
4357                 fragFunctions = specOpData.functions;
4358                 // check if input type is different from tested type
4359                 if (testOperation.isInputTypeRestricted)
4360                 {
4361                         fragAnnotations         = inTypeSnippets->inputAnnotationsSnippet + inTypeSnippets->typeAnnotationsSnippet +
4362                                                                   outTypeSnippets->outputAnnotationsSnippet + outTypeSnippets->typeAnnotationsSnippet;
4363                         fragCapabilities        = behaviorCapability +
4364                                 (inFp16WithoutStorage ? inTypeSnippets->capabilitiesFp16Without16BitStorage : inTypeSnippets->capabilities) +
4365                                 (outFp16WithoutStorage ? outTypeSnippets->capabilitiesFp16Without16BitStorage : outTypeSnippets->capabilities);
4366                         fragExtensions          =
4367                                 (inFp16WithoutStorage ? inTypeSnippets->extensionsFp16Without16BitStorage : inTypeSnippets->extensions) +
4368                                 (outFp16WithoutStorage ? outTypeSnippets->extensionsFp16Without16BitStorage : outTypeSnippets->extensions);
4369                         fragTypes                       = inTypeSnippets->typeDefinitionsSnippet + outTypeSnippets->typeDefinitionsSnippet;
4370                         fragConstants           = inTypeSnippets->constantsDefinitionsSnippet + outTypeSnippets->constantsDefinitionsSnippet;
4371                 }
4372                 else
4373                 {
4374                         // input and output types are the same
4375
4376                         fragAnnotations         = inTypeSnippets->inputAnnotationsSnippet + inTypeSnippets->typeAnnotationsSnippet +
4377                                                                   outTypeSnippets->outputAnnotationsSnippet;
4378                         fragCapabilities        = behaviorCapability +
4379                                 (outFp16WithoutStorage ? outTypeSnippets->capabilitiesFp16Without16BitStorage : outTypeSnippets->capabilities);
4380                         fragExtensions          = (outFp16WithoutStorage ? outTypeSnippets->extensionsFp16Without16BitStorage : outTypeSnippets->extensions);
4381                         fragTypes                       = outTypeSnippets->typeDefinitionsSnippet;
4382                         fragConstants           = outTypeSnippets->constantsDefinitionsSnippet;
4383                 }
4384
4385                 // varying is not used but it needs to be specified so lets use type_i32 for it
4386                 string unusedVertVarying = "%BP_vertex_result     = OpVariable %type_i32_optr Output\n";
4387                 string unusedFragVarying = "%BP_vertex_result     = OpVariable %type_i32_iptr Input\n";
4388
4389                 vertCapabilities        = "";
4390                 vertExtensions          = "";
4391                 vertAnnotations         = "OpDecorate %type_f32_arr_1 ArrayStride 4\n";
4392                 vertTypes                       = f32TypeMinimalRequired;
4393                 vertConstants           = "";
4394
4395                 if ((outFloatType != FP32) && (inFloatType != FP32))
4396                         fragTypes += f32TypeMinimalRequired;
4397
4398                 fragAnnotations += specOpData.annotations;
4399                 fragTypes               += specOpData.types;
4400                 fragConstants   += specOpData.constants;
4401
4402                 vertExecutionMode       = "";
4403                 fragExecutionMode       = behaviorExecutionMode;
4404                 vertIODefinitions       = unusedVertVarying;
4405                 fragIODefinitions       = unusedFragVarying;
4406
4407                 vertArguments           = "";
4408                 fragArguments           = specOpData.arguments;
4409                 vertVariables           = "";
4410                 fragVariables           = specOpData.variables;
4411                 vertCommands            = "";
4412                 fragCommands            = specOpData.commands;
4413                 vertProcessResult       = "";
4414                 fragProcessResult       = outTypeSnippets->storeResultsSnippet;
4415
4416                 if (inFp16WithoutStorage)
4417                 {
4418                         fragAnnotations         += inTypeSnippets->typeAnnotationsFp16Snippet;
4419                         if (testOperation.isInputTypeRestricted)
4420                         {
4421                                 fragTypes                       += inTypeSnippets->typeDefinitionsFp16Snippet;
4422                         }
4423                         fragIODefinitions       += inTypeSnippets->inputDefinitionsFp16Snippet;
4424                 }
4425                 else
4426                 {
4427                         fragIODefinitions       += inTypeSnippets->inputDefinitionsSnippet;
4428                 }
4429
4430                 if (outFp16WithoutStorage)
4431                 {
4432                         if (testOperation.isInputTypeRestricted)
4433                         {
4434                                 fragAnnotations         += outTypeSnippets->typeAnnotationsFp16Snippet;
4435                         }
4436                         fragTypes                       += outTypeSnippets->typeDefinitionsFp16Snippet;
4437                         fragIODefinitions       += outTypeSnippets->outputDefinitionsFp16Snippet;
4438                         fragProcessResult       = outTypeSnippets->storeResultsFp16Snippet;
4439                 }
4440                 else
4441                 {
4442                         fragIODefinitions       += outTypeSnippets->outputDefinitionsSnippet;
4443                 }
4444
4445                 if (!testCaseInfo.argumentsFromInput)
4446                 {
4447                         switch(testCaseInfo.testCase.operationId)
4448                         {
4449                                 case O_CONV_FROM_FP32:
4450                                 case O_CONV_FROM_FP64:
4451                                         needsSpecialConstants = true;
4452                                         break;
4453                                 default:
4454                                         break;
4455                         }
4456                 }
4457         }
4458
4459         // Another reason we need shaderFloat16 is the executable instructions uses fp16
4460         // in a way not supported by the 16bit storage extension.
4461         needsShaderFloat16 |= float16FeatureRequired && testOperation.floatUsage == FLOAT_ARITHMETIC;
4462
4463         // Constants are only needed sometimes.  Drop them in the fp16 case if the code doesn't need
4464         // them, and if we don't otherwise need shaderFloat16.
4465         bool needsFP16Constants = needsShaderFloat16 || needsSpecialConstants || outFp16WithoutStorage;
4466
4467         if (!needsFP16Constants && float16FeatureRequired)
4468         {
4469                 // Check various code fragments
4470                 const FloatStatementUsageFlags  commandsFloatConstMask                          = B_STATEMENT_USAGE_COMMANDS_CONST_FLOAT | B_STATEMENT_USAGE_COMMANDS_CONST_FP16;
4471                 const bool                                              commandsUsesFloatConstant                       = (testCaseInfo.operation.statementUsageFlags & commandsFloatConstMask) != 0;;
4472                 const FloatStatementUsageFlags  argumentsFloatConstMask                         = B_STATEMENT_USAGE_ARGS_CONST_FLOAT | B_STATEMENT_USAGE_ARGS_CONST_FP16;
4473                 const bool                                              argumentsUsesFloatConstant                      = (specOpData.argumentsUsesFloatConstant & argumentsFloatConstMask) != 0;
4474                 bool                                                    hasFP16ConstsInCommandsOrArguments      = commandsUsesFloatConstant || argumentsUsesFloatConstant;
4475
4476                 needsFP16Constants |= hasFP16ConstsInCommandsOrArguments;
4477
4478                 if (!needsFP16Constants)
4479                 {
4480                         vertConstants = "";
4481                         fragConstants = "";
4482                 }
4483         }
4484         needsShaderFloat16 |= needsFP16Constants;
4485
4486         if (needsShaderFloat16)
4487         {
4488                 vertCapabilities += "OpCapability Float16\n";
4489                 fragCapabilities += "OpCapability Float16\n";
4490         }
4491
4492         map<string, string> specializations;
4493         specializations["vert_capabilities"]    = vertCapabilities;
4494         specializations["vert_extensions"]              = vertExtensions;
4495         specializations["vert_execution_mode"]  = vertExecutionMode;
4496         specializations["vert_annotations"]             = vertAnnotations;
4497         specializations["vert_types"]                   = vertTypes;
4498         specializations["vert_constants"]               = vertConstants;
4499         specializations["vert_io_definitions"]  = vertIODefinitions;
4500         specializations["vert_arguments"]               = vertArguments;
4501         specializations["vert_variables"]               = vertVariables;
4502         specializations["vert_functions"]               = vertFunctions;
4503         specializations["vert_commands"]                = vertCommands;
4504         specializations["vert_process_result"]  = vertProcessResult;
4505         specializations["frag_capabilities"]    = fragCapabilities;
4506         specializations["frag_extensions"]              = fragExtensions;
4507         specializations["frag_execution_mode"]  = fragExecutionMode;
4508         specializations["frag_annotations"]             = fragAnnotations;
4509         specializations["frag_types"]                   = fragTypes;
4510         specializations["frag_constants"]               = fragConstants;
4511         specializations["frag_functions"]               = fragFunctions;
4512         specializations["frag_io_definitions"]  = fragIODefinitions;
4513         specializations["frag_arguments"]               = fragArguments;
4514         specializations["frag_variables"]               = fragVariables;
4515         specializations["frag_commands"]                = fragCommands;
4516         specializations["frag_process_result"]  = fragProcessResult;
4517
4518         // colors are not used by the test - input is passed via uniform buffer
4519         RGBA defaultColors[4] = { RGBA::white(), RGBA::red(), RGBA::green(), RGBA::blue() };
4520
4521         // construct input and output buffers of proper types
4522         TypeValuesSP inTypeValues       = m_typeData.at(inFloatType).values;
4523         TypeValuesSP outTypeValues      = m_typeData.at(outFloatType).values;
4524         BufferSp inBufferSp                     = inTypeValues->constructInputBuffer(testCase.input);
4525         BufferSp outBufferSp            = outTypeValues->constructOutputBuffer(testCase.expectedOutput);
4526
4527         vkt::SpirVAssembly::GraphicsResources resources;
4528         resources.inputs.push_back( Resource(inBufferSp, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
4529         resources.outputs.push_back(Resource(outBufferSp, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
4530         resources.verifyIO = checkFloatsLUT[outFloatType];
4531
4532         StageToSpecConstantMap  noSpecConstants;
4533         PushConstants                   noPushConstants;
4534         GraphicsInterfaces              noInterfaces;
4535
4536         VulkanFeatures vulkanFeatures;
4537         setupVulkanFeatures(inFloatTypeForCaps,         // usualy same as inFloatType - different only for UnpackHalf2x16
4538                                                 outFloatType,
4539                                                 testCase.behaviorFlags,
4540                                                 float64FeatureRequired,
4541                                                 vulkanFeatures);
4542         vulkanFeatures.coreFeatures.fragmentStoresAndAtomics = true;
4543
4544         vector<string> extensions;
4545         extensions.push_back("VK_KHR_shader_float_controls");
4546         if (needsShaderFloat16)
4547         {
4548                 extensions.push_back("VK_KHR_shader_float16_int8");
4549                 vulkanFeatures.extFloat16Int8 = EXTFLOAT16INT8FEATURES_FLOAT16;
4550         }
4551         if (float16FeatureRequired && !testCase.fp16Without16BitStorage)
4552         {
4553                 extensions.push_back("VK_KHR_16bit_storage");
4554                 vulkanFeatures.ext16BitStorage = EXT16BITSTORAGEFEATURES_UNIFORM_BUFFER_BLOCK;
4555         }
4556
4557         InstanceContext ctx(defaultColors,
4558                                                 defaultColors,
4559                                                 specializations,
4560                                                 noSpecConstants,
4561                                                 noPushConstants,
4562                                                 resources,
4563                                                 noInterfaces,
4564                                                 extensions,
4565                                                 vulkanFeatures,
4566                                                 testedStage);
4567
4568         ctx.moduleMap["vert"].push_back(std::make_pair("main", VK_SHADER_STAGE_VERTEX_BIT));
4569         ctx.moduleMap["frag"].push_back(std::make_pair("main", VK_SHADER_STAGE_FRAGMENT_BIT));
4570
4571         ctx.requiredStages                      = static_cast<VkShaderStageFlagBits>(VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT);
4572         ctx.failResult                          = QP_TEST_RESULT_FAIL;
4573         ctx.failMessageTemplate         = "Output doesn't match with expected";
4574
4575         return ctx;
4576 }
4577
4578 } // anonymous
4579
4580 tcu::TestCaseGroup* createFloatControlsTestGroup (TestContext& testCtx, TestGroupBuilderBase* groupBuilder)
4581 {
4582         de::MovePtr<TestCaseGroup>      group(new TestCaseGroup(testCtx, "float_controls", "Tests for VK_KHR_shader_float_controls extension"));
4583
4584         struct TestGroup
4585         {
4586                 FloatType               floatType;
4587                 const char*             groupName;
4588         };
4589         TestGroup testGroups[] =
4590         {
4591                 { FP16, "fp16" },
4592                 { FP32, "fp32" },
4593                 { FP64, "fp64" },
4594         };
4595
4596         for (int i = 0 ; i < DE_LENGTH_OF_ARRAY(testGroups) ; ++i)
4597         {
4598                 const TestGroup& testGroup = testGroups[i];
4599                 TestCaseGroup* typeGroup = new TestCaseGroup(testCtx, testGroup.groupName, "");
4600                 group->addChild(typeGroup);
4601
4602                 groupBuilder->createOperationTests(typeGroup, "input_args", testGroup.floatType, true);
4603                 groupBuilder->createOperationTests(typeGroup, "generated_args", testGroup.floatType, false);
4604         }
4605
4606         groupBuilder->createSettingsTests(group.get());
4607
4608         return group.release();
4609 }
4610
4611 tcu::TestCaseGroup* createFloatControlsComputeGroup (TestContext& testCtx)
4612 {
4613         ComputeTestGroupBuilder computeTestGroupBuilder;
4614         computeTestGroupBuilder.init();
4615
4616         return createFloatControlsTestGroup(testCtx, &computeTestGroupBuilder);
4617 }
4618
4619 tcu::TestCaseGroup* createFloatControlsGraphicsGroup (TestContext& testCtx)
4620 {
4621         GraphicsTestGroupBuilder graphicsTestGroupBuilder;
4622         graphicsTestGroupBuilder.init();
4623
4624         return createFloatControlsTestGroup(testCtx, &graphicsTestGroupBuilder);
4625 }
4626
4627 } // SpirVAssembly
4628 } // vkt