Limit changes by xor to upper 8 bits in mixed atomic tests am: 6bc3c7a634 am: eef2e71...
[platform/upstream/VK-GL-CTS.git] / external / vulkancts / modules / vulkan / spirv_assembly / vktSpvAsm16bitStorageTests.cpp
1 /*-------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2017 Google Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief SPIR-V Assembly Tests for the VK_KHR_16bit_storage
22  *//*--------------------------------------------------------------------*/
23
24 // VK_KHR_16bit_storage
25 //
26 // \todo [2017-02-08 antiagainst] Additional corner cases to check:
27 //
28 // * Test OpAccessChain with subword types
29 //  * For newly enabled types T:
30 //    * For composite types: vector, matrix, structures, array over T:
31 //      1. Use OpAccessChain to form a pointer to a subword type.
32 //      2. Load the subword value X16.
33 //      3. Convert X16 to X32.
34 //      4. Store X32 to BufferBlock.
35 //      5. Host inspects X32.
36 // * Test {StorageInputOutput16} 16-to-16:
37 //   * For newly enabled types T:
38 //     1. Host creates X16 stream values of type T.
39 //     2. Shaders have corresponding capability.
40 //     3. For each viable shader stage:
41 //       3a. Load X16 Input variable.
42 //       3b. Store X16 to Output variable.
43 //     4. Host inspects resulting values.
44 // * Test {StorageInputOutput16} 16-to-16 one value to two:
45 //     Like the previous test, but write X16 to two different output variables.
46 //     (Checks that the 16-bit intermediate value can be used twice.)
47
48 #include "vktSpvAsm16bitStorageTests.hpp"
49
50 #include "tcuFloat.hpp"
51 #include "tcuRGBA.hpp"
52 #include "tcuStringTemplate.hpp"
53 #include "tcuTestLog.hpp"
54 #include "tcuVectorUtil.hpp"
55
56 #include "vkDefs.hpp"
57 #include "vkDeviceUtil.hpp"
58 #include "vkMemUtil.hpp"
59 #include "vkPlatform.hpp"
60 #include "vkPrograms.hpp"
61 #include "vkQueryUtil.hpp"
62 #include "vkRef.hpp"
63 #include "vkRefUtil.hpp"
64 #include "vkStrUtil.hpp"
65 #include "vkTypeUtil.hpp"
66
67 #include "deRandom.hpp"
68 #include "deStringUtil.hpp"
69 #include "deUniquePtr.hpp"
70 #include "deMath.h"
71
72 #include "vktSpvAsmComputeShaderCase.hpp"
73 #include "vktSpvAsmComputeShaderTestUtil.hpp"
74 #include "vktSpvAsmGraphicsShaderTestUtil.hpp"
75 #include "vktTestCaseUtil.hpp"
76 #include "vktTestGroupUtil.hpp"
77
78 #include <limits>
79 #include <map>
80 #include <string>
81 #include <sstream>
82 #include <utility>
83
84 namespace vkt
85 {
86 namespace SpirVAssembly
87 {
88
89 using namespace vk;
90 using std::map;
91 using std::string;
92 using std::vector;
93 using tcu::IVec3;
94 using tcu::IVec4;
95 using tcu::RGBA;
96 using tcu::TestLog;
97 using tcu::TestStatus;
98 using tcu::Vec4;
99 using de::UniquePtr;
100 using tcu::StringTemplate;
101 using tcu::Vec4;
102
103 namespace
104 {
105
106 struct Capability
107 {
108         const char*                             name;
109         const char*                             cap;
110         const char*                             decor;
111         vk::VkDescriptorType    dtype;
112 };
113
114 static const Capability CAPABILITIES[]  =
115 {
116         {"uniform_buffer_block",        "StorageUniformBufferBlock16",  "BufferBlock",  VK_DESCRIPTOR_TYPE_STORAGE_BUFFER},
117         {"uniform",                                     "StorageUniform16",                             "Block",                VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER},
118 };
119
120 VulkanFeatures  get16BitStorageFeatures (const char* cap)
121 {
122         VulkanFeatures features;
123         if (string(cap) == "uniform_buffer_block")
124                 features.ext16BitStorage = EXT16BITSTORAGEFEATURES_UNIFORM_BUFFER_BLOCK;
125         else if (string(cap) == "uniform")
126                 features.ext16BitStorage = EXT16BITSTORAGEFEATURES_UNIFORM;
127         else
128                 DE_ASSERT(false && "not supported");
129
130         return features;
131 }
132
133
134 // Batch function to check arrays of 16-bit floats.
135 //
136 // For comparing 16-bit floats, we need to consider both RTZ and RTE. So we can only recalculate
137 // the expected values here instead of get the expected values directly from the test case.
138 // Thus we need original floats here but not expected outputs.
139 template<RoundingModeFlags RoundingMode>
140 bool graphicsCheck16BitFloats (const std::vector<Resource>&     originalFloats,
141                                                            const vector<AllocationSp>&  outputAllocs,
142                                                            const std::vector<Resource>& /* expectedOutputs */,
143                                                            tcu::TestLog&                                log)
144 {
145         if (outputAllocs.size() != originalFloats.size())
146                 return false;
147
148         for (deUint32 outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
149         {
150                 const deUint16* returned        = static_cast<const deUint16*>(outputAllocs[outputNdx]->getHostPtr());
151                 const float*    original        = static_cast<const float*>(originalFloats[outputNdx].second->data());
152                 const deUint32  count           = static_cast<deUint32>(originalFloats[outputNdx].second->getNumBytes() / sizeof(float));
153
154                 for (deUint32 numNdx = 0; numNdx < count; ++numNdx)
155                         if (!compare16BitFloat(original[numNdx], returned[numNdx], RoundingMode, log))
156                                 return false;
157         }
158
159         return true;
160 }
161
162 template<RoundingModeFlags RoundingMode>
163 bool computeCheck16BitFloats (const std::vector<BufferSp>&      originalFloats,
164                                                           const vector<AllocationSp>&   outputAllocs,
165                                                           const std::vector<BufferSp>&  /* expectedOutputs */,
166                                                           tcu::TestLog&                                 log)
167 {
168         if (outputAllocs.size() != originalFloats.size())
169                 return false;
170
171         for (deUint32 outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
172         {
173                 const deUint16* returned        = static_cast<const deUint16*>(outputAllocs[outputNdx]->getHostPtr());
174                 const float*    original        = static_cast<const float*>(originalFloats[outputNdx]->data());
175                 const deUint32  count           = static_cast<deUint32>(originalFloats[outputNdx]->getNumBytes() / sizeof(float));
176
177                 for (deUint32 numNdx = 0; numNdx < count; ++numNdx)
178                         if (!compare16BitFloat(original[numNdx], returned[numNdx], RoundingMode, log))
179                                 return false;
180         }
181
182         return true;
183 }
184
185
186 // Batch function to check arrays of 32-bit floats.
187 //
188 // For comparing 32-bit floats, we just need the expected value precomputed in the test case.
189 // So we need expected outputs here but not original floats.
190 bool check32BitFloats (const std::vector<Resource>&             /* originalFloats */,
191                                            const std::vector<AllocationSp>& outputAllocs,
192                                            const std::vector<Resource>&         expectedOutputs,
193                                            tcu::TestLog&                                        log)
194 {
195         if (outputAllocs.size() != expectedOutputs.size())
196                 return false;
197
198         for (deUint32 outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
199         {
200                 const float*    returnedAsFloat = static_cast<const float*>(outputAllocs[outputNdx]->getHostPtr());
201                 const float*    expectedAsFloat = static_cast<const float*>(expectedOutputs[outputNdx].second->data());
202                 const deUint32  count                   = static_cast<deUint32>(expectedOutputs[outputNdx].second->getNumBytes() / sizeof(float));
203
204                 for (deUint32 numNdx = 0; numNdx < count; ++numNdx)
205                         if (!compare32BitFloat(expectedAsFloat[numNdx], returnedAsFloat[numNdx], log))
206                                 return false;
207         }
208
209         return true;
210 }
211
212 // Overload for compute pipeline
213 bool check32BitFloats (const std::vector<BufferSp>&             /* originalFloats */,
214                                            const std::vector<AllocationSp>& outputAllocs,
215                                            const std::vector<BufferSp>&         expectedOutputs,
216                                            tcu::TestLog&                                        log)
217 {
218         if (outputAllocs.size() != expectedOutputs.size())
219                 return false;
220
221         for (deUint32 outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
222         {
223                 const float*    returnedAsFloat = static_cast<const float*>(outputAllocs[outputNdx]->getHostPtr());
224                 const float*    expectedAsFloat = static_cast<const float*>(expectedOutputs[outputNdx]->data());
225                 const deUint32  count                   = static_cast<deUint32>(expectedOutputs[outputNdx]->getNumBytes() / sizeof(float));
226
227                 for (deUint32 numNdx = 0; numNdx < count; ++numNdx)
228                         if (!compare32BitFloat(expectedAsFloat[numNdx], returnedAsFloat[numNdx], log))
229                                 return false;
230         }
231
232         return true;
233 }
234
235 // Generate and return 32-bit integers.
236 //
237 // Expected count to be at least 16.
238 vector<deInt32> getInt32s (de::Random& rnd, const deUint32 count)
239 {
240         vector<deInt32>         data;
241
242         data.reserve(count);
243
244         // Make sure we have boundary numbers.
245         data.push_back(deInt32(0x00000000));  // 0
246         data.push_back(deInt32(0x00000001));  // 1
247         data.push_back(deInt32(0x0000002a));  // 42
248         data.push_back(deInt32(0x00007fff));  // 32767
249         data.push_back(deInt32(0x00008000));  // 32768
250         data.push_back(deInt32(0x0000ffff));  // 65535
251         data.push_back(deInt32(0x00010000));  // 65536
252         data.push_back(deInt32(0x7fffffff));  // 2147483647
253         data.push_back(deInt32(0x80000000));  // -2147483648
254         data.push_back(deInt32(0x80000001));  // -2147483647
255         data.push_back(deInt32(0xffff0000));  // -65536
256         data.push_back(deInt32(0xffff0001));  // -65535
257         data.push_back(deInt32(0xffff8000));  // -32768
258         data.push_back(deInt32(0xffff8001));  // -32767
259         data.push_back(deInt32(0xffffffd6));  // -42
260         data.push_back(deInt32(0xffffffff));  // -1
261
262         DE_ASSERT(count >= data.size());
263
264         for (deUint32 numNdx = static_cast<deUint32>(data.size()); numNdx < count; ++numNdx)
265                 data.push_back(static_cast<deInt32>(rnd.getUint32()));
266
267         return data;
268 }
269
270 // Generate and return 16-bit integers.
271 //
272 // Expected count to be at least 8.
273 vector<deInt16> getInt16s (de::Random& rnd, const deUint32 count)
274 {
275         vector<deInt16>         data;
276
277         data.reserve(count);
278
279         // Make sure we have boundary numbers.
280         data.push_back(deInt16(0x0000));  // 0
281         data.push_back(deInt16(0x0001));  // 1
282         data.push_back(deInt16(0x002a));  // 42
283         data.push_back(deInt16(0x7fff));  // 32767
284         data.push_back(deInt16(0x8000));  // -32868
285         data.push_back(deInt16(0x8001));  // -32767
286         data.push_back(deInt16(0xffd6));  // -42
287         data.push_back(deInt16(0xffff));  // -1
288
289         DE_ASSERT(count >= data.size());
290
291         for (deUint32 numNdx = static_cast<deUint32>(data.size()); numNdx < count; ++numNdx)
292                 data.push_back(static_cast<deInt16>(rnd.getUint16()));
293
294         return data;
295 }
296
297 // IEEE-754 floating point numbers:
298 // +--------+------+----------+-------------+
299 // | binary | sign | exponent | significand |
300 // +--------+------+----------+-------------+
301 // | 16-bit |  1   |    5     |     10      |
302 // +--------+------+----------+-------------+
303 // | 32-bit |  1   |    8     |     23      |
304 // +--------+------+----------+-------------+
305 //
306 // 16-bit floats:
307 //
308 // 0   000 00   00 0000 0001 (0x0001: 2e-24:         minimum positive denormalized)
309 // 0   000 00   11 1111 1111 (0x03ff: 2e-14 - 2e-24: maximum positive denormalized)
310 // 0   000 01   00 0000 0000 (0x0400: 2e-14:         minimum positive normalized)
311 //
312 // 32-bit floats:
313 //
314 // 0   011 1110 1   001 0000 0000 0000 0000 0000 (0x3e900000: 0.28125: with exact match in 16-bit normalized)
315 // 0   011 1000 1   000 0000 0011 0000 0000 0000 (0x38803000: exact half way within two 16-bit normalized; round to zero: 0x0401)
316 // 1   011 1000 1   000 0000 0011 0000 0000 0000 (0xb8803000: exact half way within two 16-bit normalized; round to zero: 0x8402)
317 // 0   011 1000 1   000 0000 1111 1111 0000 0000 (0x3880ff00: not exact half way within two 16-bit normalized; round to zero: 0x0403)
318 // 1   011 1000 1   000 0000 1111 1111 0000 0000 (0xb880ff00: not exact half way within two 16-bit normalized; round to zero: 0x8404)
319
320
321 // Generate and return 32-bit floats
322 //
323 // The first 24 number pairs are manually picked, while the rest are randomly generated.
324 // Expected count to be at least 24 (numPicks).
325 vector<float> getFloat32s (de::Random& rnd, deUint32 count)
326 {
327         vector<float>           float32;
328
329         float32.reserve(count);
330
331         // Zero
332         float32.push_back(0.f);
333         float32.push_back(-0.f);
334         // Infinity
335         float32.push_back(std::numeric_limits<float>::infinity());
336         float32.push_back(-std::numeric_limits<float>::infinity());
337         // SNaN
338         float32.push_back(std::numeric_limits<float>::signaling_NaN());
339         float32.push_back(-std::numeric_limits<float>::signaling_NaN());
340         // QNaN
341         float32.push_back(std::numeric_limits<float>::quiet_NaN());
342         float32.push_back(-std::numeric_limits<float>::quiet_NaN());
343
344         // Denormalized 32-bit float matching 0 in 16-bit
345         float32.push_back(deFloatLdExp(1.f, -127));
346         float32.push_back(-deFloatLdExp(1.f, -127));
347
348         // Normalized 32-bit float matching 0 in 16-bit
349         float32.push_back(deFloatLdExp(1.f, -100));
350         float32.push_back(-deFloatLdExp(1.f, -100));
351         // Normalized 32-bit float with exact denormalized match in 16-bit
352         float32.push_back(deFloatLdExp(1.f, -24));  // 2e-24: minimum 16-bit positive denormalized
353         float32.push_back(-deFloatLdExp(1.f, -24)); // 2e-24: maximum 16-bit negative denormalized
354         // Normalized 32-bit float with exact normalized match in 16-bit
355         float32.push_back(deFloatLdExp(1.f, -14));  // 2e-14: minimum 16-bit positive normalized
356         float32.push_back(-deFloatLdExp(1.f, -14)); // 2e-14: maximum 16-bit negative normalized
357         // Normalized 32-bit float falling above half way within two 16-bit normalized
358         float32.push_back(bitwiseCast<float>(deUint32(0x3880ff00)));
359         float32.push_back(bitwiseCast<float>(deUint32(0xb880ff00)));
360         // Normalized 32-bit float falling exact half way within two 16-bit normalized
361         float32.push_back(bitwiseCast<float>(deUint32(0x38803000)));
362         float32.push_back(bitwiseCast<float>(deUint32(0xb8803000)));
363         // Some number
364         float32.push_back(0.28125f);
365         float32.push_back(-0.28125f);
366         // Normalized 32-bit float matching infinity in 16-bit
367         float32.push_back(deFloatLdExp(1.f, 100));
368         float32.push_back(-deFloatLdExp(1.f, 100));
369
370         const deUint32          numPicks        = static_cast<deUint32>(float32.size());
371
372         DE_ASSERT(count >= numPicks);
373         count -= numPicks;
374
375         for (deUint32 numNdx = 0; numNdx < count; ++numNdx)
376                 float32.push_back(rnd.getFloat());
377
378         return float32;
379 }
380
381 // IEEE-754 floating point numbers:
382 // +--------+------+----------+-------------+
383 // | binary | sign | exponent | significand |
384 // +--------+------+----------+-------------+
385 // | 16-bit |  1   |    5     |     10      |
386 // +--------+------+----------+-------------+
387 // | 32-bit |  1   |    8     |     23      |
388 // +--------+------+----------+-------------+
389 //
390 // 16-bit floats:
391 //
392 // 0   000 00   00 0000 0001 (0x0001: 2e-24:         minimum positive denormalized)
393 // 0   000 00   11 1111 1111 (0x03ff: 2e-14 - 2e-24: maximum positive denormalized)
394 // 0   000 01   00 0000 0000 (0x0400: 2e-14:         minimum positive normalized)
395 //
396 // 0   000 00   00 0000 0000 (0x0000: +0)
397 // 0   111 11   00 0000 0000 (0x7c00: +Inf)
398 // 0   000 00   11 1111 0000 (0x03f0: +Denorm)
399 // 0   000 01   00 0000 0001 (0x0401: +Norm)
400 // 0   111 11   00 0000 1111 (0x7c0f: +SNaN)
401 // 0   111 11   00 1111 0000 (0x7c0f: +QNaN)
402
403
404 // Generate and return 16-bit floats and their corresponding 32-bit values.
405 //
406 // The first 14 number pairs are manually picked, while the rest are randomly generated.
407 // Expected count to be at least 14 (numPicks).
408 vector<deFloat16> getFloat16s (de::Random& rnd, deUint32 count)
409 {
410         vector<deFloat16>       float16;
411
412         float16.reserve(count);
413
414         // Zero
415         float16.push_back(deUint16(0x0000));
416         float16.push_back(deUint16(0x8000));
417         // Infinity
418         float16.push_back(deUint16(0x7c00));
419         float16.push_back(deUint16(0xfc00));
420         // SNaN
421         float16.push_back(deUint16(0x7c0f));
422         float16.push_back(deUint16(0xfc0f));
423         // QNaN
424         float16.push_back(deUint16(0x7cf0));
425         float16.push_back(deUint16(0xfcf0));
426
427         // Denormalized
428         float16.push_back(deUint16(0x03f0));
429         float16.push_back(deUint16(0x83f0));
430         // Normalized
431         float16.push_back(deUint16(0x0401));
432         float16.push_back(deUint16(0x8401));
433         // Some normal number
434         float16.push_back(deUint16(0x14cb));
435         float16.push_back(deUint16(0x94cb));
436
437         const deUint32          numPicks        = static_cast<deUint32>(float16.size());
438
439         DE_ASSERT(count >= numPicks);
440         count -= numPicks;
441
442         for (deUint32 numIdx = 0; numIdx < count; ++numIdx)
443                 float16.push_back(rnd.getUint16());
444
445         return float16;
446 }
447
448 void addCompute16bitStorageUniform16To32Group (tcu::TestCaseGroup* group)
449 {
450         tcu::TestContext&                               testCtx                 = group->getTestContext();
451         de::Random                                              rnd                             (deStringHash(group->getName()));
452         const int                                               numElements             = 128;
453
454         const StringTemplate                    shaderTemplate  (
455                 "OpCapability Shader\n"
456                 "OpCapability ${capability}\n"
457                 "OpExtension \"SPV_KHR_16bit_storage\"\n"
458                 "OpMemoryModel Logical GLSL450\n"
459                 "OpEntryPoint GLCompute %main \"main\" %id\n"
460                 "OpExecutionMode %main LocalSize 1 1 1\n"
461                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
462
463                 "${stride}"
464
465                 "OpMemberDecorate %SSBO32 0 Offset 0\n"
466                 "OpMemberDecorate %SSBO16 0 Offset 0\n"
467                 "OpDecorate %SSBO32 BufferBlock\n"
468                 "OpDecorate %SSBO16 ${storage}\n"
469                 "OpDecorate %ssbo32 DescriptorSet 0\n"
470                 "OpDecorate %ssbo16 DescriptorSet 0\n"
471                 "OpDecorate %ssbo32 Binding 1\n"
472                 "OpDecorate %ssbo16 Binding 0\n"
473
474                 "${matrix_decor:opt}\n"
475
476                 "%bool      = OpTypeBool\n"
477                 "%void      = OpTypeVoid\n"
478                 "%voidf     = OpTypeFunction %void\n"
479                 "%u32       = OpTypeInt 32 0\n"
480                 "%i32       = OpTypeInt 32 1\n"
481                 "%f32       = OpTypeFloat 32\n"
482                 "%uvec3     = OpTypeVector %u32 3\n"
483                 "%fvec3     = OpTypeVector %f32 3\n"
484                 "%uvec3ptr  = OpTypePointer Input %uvec3\n"
485                 "%i32ptr    = OpTypePointer Uniform %i32\n"
486                 "%f32ptr    = OpTypePointer Uniform %f32\n"
487
488                 "%zero      = OpConstant %i32 0\n"
489                 "%c_i32_1   = OpConstant %i32 1\n"
490                 "%c_i32_2   = OpConstant %i32 2\n"
491                 "%c_i32_3   = OpConstant %i32 3\n"
492                 "%c_i32_16  = OpConstant %i32 16\n"
493                 "%c_i32_32  = OpConstant %i32 32\n"
494                 "%c_i32_64  = OpConstant %i32 64\n"
495                 "%c_i32_128 = OpConstant %i32 128\n"
496
497                 "%i32arr    = OpTypeArray %i32 %c_i32_128\n"
498                 "%f32arr    = OpTypeArray %f32 %c_i32_128\n"
499
500                 "${types}\n"
501                 "${matrix_types:opt}\n"
502
503                 "%SSBO32    = OpTypeStruct %${matrix_prefix:opt}${base32}arr\n"
504                 "%SSBO16    = OpTypeStruct %${matrix_prefix:opt}${base16}arr\n"
505                 "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
506                 "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
507                 "%ssbo32    = OpVariable %up_SSBO32 Uniform\n"
508                 "%ssbo16    = OpVariable %up_SSBO16 Uniform\n"
509
510                 "%id        = OpVariable %uvec3ptr Input\n"
511
512                 "%main      = OpFunction %void None %voidf\n"
513                 "%label     = OpLabel\n"
514                 "%idval     = OpLoad %uvec3 %id\n"
515                 "%x         = OpCompositeExtract %u32 %idval 0\n"
516                 "%inloc     = OpAccessChain %${base16}ptr %ssbo16 %zero %x ${index0:opt}\n"
517                 "%val16     = OpLoad %${base16} %inloc\n"
518                 "%val32     = ${convert} %${base32} %val16\n"
519                 "%outloc    = OpAccessChain %${base32}ptr %ssbo32 %zero %x ${index0:opt}\n"
520                 "             OpStore %outloc %val32\n"
521                 "${matrix_store:opt}\n"
522                 "             OpReturn\n"
523                 "             OpFunctionEnd\n");
524
525         {  // floats
526                 const char                                                                              floatTypes[]    =
527                         "%f16       = OpTypeFloat 16\n"
528                         "%f16ptr    = OpTypePointer Uniform %f16\n"
529                         "%f16arr    = OpTypeArray %f16 %c_i32_128\n"
530                         "%v2f16     = OpTypeVector %f16 2\n"
531                         "%v2f32     = OpTypeVector %f32 2\n"
532                         "%v2f16ptr  = OpTypePointer Uniform %v2f16\n"
533                         "%v2f32ptr  = OpTypePointer Uniform %v2f32\n"
534                         "%v2f16arr  = OpTypeArray %v2f16 %c_i32_64\n"
535                         "%v2f32arr  = OpTypeArray %v2f32 %c_i32_64\n";
536
537                 struct CompositeType
538                 {
539                         const char*     name;
540                         const char*     base32;
541                         const char*     base16;
542                         const char*     stride;
543                         unsigned        count;
544                 };
545
546                 const CompositeType     cTypes[]        =
547                 {
548                         {"scalar",      "f32",          "f16",          "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 2\n",                         numElements},
549                         {"vector",      "v2f32",        "v2f16",        "OpDecorate %v2f32arr ArrayStride 8\nOpDecorate %v2f16arr ArrayStride 4\n",                     numElements / 2},
550                         {"matrix",      "v2f32",        "v2f16",        "OpDecorate %m4v2f32arr ArrayStride 32\nOpDecorate %m4v2f16arr ArrayStride 16\n",       numElements / 8},
551                 };
552
553                 vector<deFloat16>       float16Data                     = getFloat16s(rnd, numElements);
554                 vector<float>           float32Data;
555
556                 float32Data.reserve(numElements);
557                 for (deUint32 numIdx = 0; numIdx < numElements; ++numIdx)
558                         float32Data.push_back(deFloat16To32(float16Data[numIdx]));
559
560                 for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
561                         for (deUint32 tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes); ++tyIdx)
562                         {
563                                 ComputeShaderSpec               spec;
564                                 map<string, string>             specs;
565                                 string                                  testName        = string(CAPABILITIES[capIdx].name) + "_" + cTypes[tyIdx].name + "_float";
566
567                                 specs["capability"]             = CAPABILITIES[capIdx].cap;
568                                 specs["storage"]                = CAPABILITIES[capIdx].decor;
569                                 specs["stride"]                 = cTypes[tyIdx].stride;
570                                 specs["base32"]                 = cTypes[tyIdx].base32;
571                                 specs["base16"]                 = cTypes[tyIdx].base16;
572                                 specs["types"]                  = floatTypes;
573                                 specs["convert"]                = "OpFConvert";
574
575                                 if (strcmp(cTypes[tyIdx].name, "matrix") == 0)
576                                 {
577                                         specs["index0"]                 = "%zero";
578                                         specs["matrix_prefix"]  = "m4";
579                                         specs["matrix_types"]   =
580                                                 "%m4v2f16 = OpTypeMatrix %v2f16 4\n"
581                                                 "%m4v2f32 = OpTypeMatrix %v2f32 4\n"
582                                                 "%m4v2f16arr = OpTypeArray %m4v2f16 %c_i32_16\n"
583                                                 "%m4v2f32arr = OpTypeArray %m4v2f32 %c_i32_16\n";
584                                         specs["matrix_decor"]   =
585                                                 "OpMemberDecorate %SSBO32 0 ColMajor\n"
586                                                 "OpMemberDecorate %SSBO32 0 MatrixStride 8\n"
587                                                 "OpMemberDecorate %SSBO16 0 ColMajor\n"
588                                                 "OpMemberDecorate %SSBO16 0 MatrixStride 4\n";
589                                         specs["matrix_store"]   =
590                                                 "%inloc_1  = OpAccessChain %v2f16ptr %ssbo16 %zero %x %c_i32_1\n"
591                                                 "%val16_1  = OpLoad %v2f16 %inloc_1\n"
592                                                 "%val32_1  = OpFConvert %v2f32 %val16_1\n"
593                                                 "%outloc_1 = OpAccessChain %v2f32ptr %ssbo32 %zero %x %c_i32_1\n"
594                                                 "            OpStore %outloc_1 %val32_1\n"
595
596                                                 "%inloc_2  = OpAccessChain %v2f16ptr %ssbo16 %zero %x %c_i32_2\n"
597                                                 "%val16_2  = OpLoad %v2f16 %inloc_2\n"
598                                                 "%val32_2  = OpFConvert %v2f32 %val16_2\n"
599                                                 "%outloc_2 = OpAccessChain %v2f32ptr %ssbo32 %zero %x %c_i32_2\n"
600                                                 "            OpStore %outloc_2 %val32_2\n"
601
602                                                 "%inloc_3  = OpAccessChain %v2f16ptr %ssbo16 %zero %x %c_i32_3\n"
603                                                 "%val16_3  = OpLoad %v2f16 %inloc_3\n"
604                                                 "%val32_3  = OpFConvert %v2f32 %val16_3\n"
605                                                 "%outloc_3 = OpAccessChain %v2f32ptr %ssbo32 %zero %x %c_i32_3\n"
606                                                 "            OpStore %outloc_3 %val32_3\n";
607                                 }
608
609                                 spec.assembly                   = shaderTemplate.specialize(specs);
610                                 spec.numWorkGroups              = IVec3(cTypes[tyIdx].count, 1, 1);
611                                 spec.verifyIO                   = check32BitFloats;
612                                 spec.inputTypes[0]              = CAPABILITIES[capIdx].dtype;
613
614                                 spec.inputs.push_back(BufferSp(new Float16Buffer(float16Data)));
615                                 spec.outputs.push_back(BufferSp(new Float32Buffer(float32Data)));
616                                 spec.extensions.push_back("VK_KHR_16bit_storage");
617                                 spec.requestedVulkanFeatures = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
618
619                                 group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), testName.c_str(), spec));
620                         }
621         }
622
623         {  // Integers
624                 const char              sintTypes[]             =
625                         "%i16       = OpTypeInt 16 1\n"
626                         "%i16ptr    = OpTypePointer Uniform %i16\n"
627                         "%i16arr    = OpTypeArray %i16 %c_i32_128\n"
628                         "%v4i16     = OpTypeVector %i16 4\n"
629                         "%v4i32     = OpTypeVector %i32 4\n"
630                         "%v4i16ptr  = OpTypePointer Uniform %v4i16\n"
631                         "%v4i32ptr  = OpTypePointer Uniform %v4i32\n"
632                         "%v4i16arr  = OpTypeArray %v4i16 %c_i32_32\n"
633                         "%v4i32arr  = OpTypeArray %v4i32 %c_i32_32\n";
634
635                 const char              uintTypes[]             =
636                         "%u16       = OpTypeInt 16 0\n"
637                         "%u16ptr    = OpTypePointer Uniform %u16\n"
638                         "%u32ptr    = OpTypePointer Uniform %u32\n"
639                         "%u16arr    = OpTypeArray %u16 %c_i32_128\n"
640                         "%u32arr    = OpTypeArray %u32 %c_i32_128\n"
641                         "%v4u16     = OpTypeVector %u16 4\n"
642                         "%v4u32     = OpTypeVector %u32 4\n"
643                         "%v4u16ptr  = OpTypePointer Uniform %v4u16\n"
644                         "%v4u32ptr  = OpTypePointer Uniform %v4u32\n"
645                         "%v4u16arr  = OpTypeArray %v4u16 %c_i32_32\n"
646                         "%v4u32arr  = OpTypeArray %v4u32 %c_i32_32\n";
647
648                 struct CompositeType
649                 {
650                         const char*     name;
651                         bool            isSigned;
652                         const char* types;
653                         const char*     base32;
654                         const char*     base16;
655                         const char* opcode;
656                         const char*     stride;
657                         unsigned        count;
658                 };
659
660                 const CompositeType     cTypes[]        =
661                 {
662                         {"scalar_sint", true,   sintTypes,      "i32",          "i16",          "OpSConvert",   "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 2\n",                 numElements},
663                         {"scalar_uint", false,  uintTypes,      "u32",          "u16",          "OpUConvert",   "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 2\n",                 numElements},
664                         {"vector_sint", true,   sintTypes,      "v4i32",        "v4i16",        "OpSConvert",   "OpDecorate %v4i32arr ArrayStride 16\nOpDecorate %v4i16arr ArrayStride 8\n",    numElements / 4},
665                         {"vector_uint", false,  uintTypes,      "v4u32",        "v4u16",        "OpUConvert",   "OpDecorate %v4u32arr ArrayStride 16\nOpDecorate %v4u16arr ArrayStride 8\n",    numElements / 4},
666                 };
667
668                 vector<deInt16> inputs                  = getInt16s(rnd, numElements);
669                 vector<deInt32> sOutputs;
670                 vector<deInt32> uOutputs;
671                 const deUint16  signBitMask             = 0x8000;
672                 const deUint32  signExtendMask  = 0xffff0000;
673
674                 sOutputs.reserve(inputs.size());
675                 uOutputs.reserve(inputs.size());
676
677                 for (deUint32 numNdx = 0; numNdx < inputs.size(); ++numNdx)
678                 {
679                         uOutputs.push_back(static_cast<deUint16>(inputs[numNdx]));
680                         if (inputs[numNdx] & signBitMask)
681                                 sOutputs.push_back(static_cast<deInt32>(inputs[numNdx] | signExtendMask));
682                         else
683                                 sOutputs.push_back(static_cast<deInt32>(inputs[numNdx]));
684                 }
685
686                 for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
687                         for (deUint32 tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes); ++tyIdx)
688                         {
689                                 ComputeShaderSpec               spec;
690                                 map<string, string>             specs;
691                                 string                                  testName        = string(CAPABILITIES[capIdx].name) + "_" + cTypes[tyIdx].name;
692
693                                 specs["capability"]             = CAPABILITIES[capIdx].cap;
694                                 specs["storage"]                = CAPABILITIES[capIdx].decor;
695                                 specs["stride"]                 = cTypes[tyIdx].stride;
696                                 specs["base32"]                 = cTypes[tyIdx].base32;
697                                 specs["base16"]                 = cTypes[tyIdx].base16;
698                                 specs["types"]                  = cTypes[tyIdx].types;
699                                 specs["convert"]                = cTypes[tyIdx].opcode;
700
701                                 spec.assembly                   = shaderTemplate.specialize(specs);
702                                 spec.numWorkGroups              = IVec3(cTypes[tyIdx].count, 1, 1);
703                                 spec.inputTypes[0]              = CAPABILITIES[capIdx].dtype;
704
705                                 spec.inputs.push_back(BufferSp(new Int16Buffer(inputs)));
706                                 if (cTypes[tyIdx].isSigned)
707                                         spec.outputs.push_back(BufferSp(new Int32Buffer(sOutputs)));
708                                 else
709                                         spec.outputs.push_back(BufferSp(new Int32Buffer(uOutputs)));
710                                 spec.extensions.push_back("VK_KHR_16bit_storage");
711                                 spec.requestedVulkanFeatures = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
712
713                                 group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), testName.c_str(), spec));
714                         }
715         }
716 }
717
718 void addCompute16bitStoragePushConstant16To32Group (tcu::TestCaseGroup* group)
719 {
720         tcu::TestContext&                               testCtx                 = group->getTestContext();
721         de::Random                                              rnd                             (deStringHash(group->getName()));
722         const int                                               numElements             = 64;
723
724         const StringTemplate                    shaderTemplate  (
725                 "OpCapability Shader\n"
726                 "OpCapability StoragePushConstant16\n"
727                 "OpExtension \"SPV_KHR_16bit_storage\"\n"
728                 "OpMemoryModel Logical GLSL450\n"
729                 "OpEntryPoint GLCompute %main \"main\" %id\n"
730                 "OpExecutionMode %main LocalSize 1 1 1\n"
731                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
732
733                 "${stride}"
734
735                 "OpDecorate %PC16 Block\n"
736                 "OpMemberDecorate %PC16 0 Offset 0\n"
737                 "OpMemberDecorate %SSBO32 0 Offset 0\n"
738                 "OpDecorate %SSBO32 BufferBlock\n"
739                 "OpDecorate %ssbo32 DescriptorSet 0\n"
740                 "OpDecorate %ssbo32 Binding 0\n"
741
742                 "${matrix_decor:opt}\n"
743
744                 "%bool      = OpTypeBool\n"
745                 "%void      = OpTypeVoid\n"
746                 "%voidf     = OpTypeFunction %void\n"
747                 "%u32       = OpTypeInt 32 0\n"
748                 "%i32       = OpTypeInt 32 1\n"
749                 "%f32       = OpTypeFloat 32\n"
750                 "%uvec3     = OpTypeVector %u32 3\n"
751                 "%fvec3     = OpTypeVector %f32 3\n"
752                 "%uvec3ptr  = OpTypePointer Input %uvec3\n"
753                 "%i32ptr    = OpTypePointer Uniform %i32\n"
754                 "%f32ptr    = OpTypePointer Uniform %f32\n"
755
756                 "%zero      = OpConstant %i32 0\n"
757                 "%c_i32_1   = OpConstant %i32 1\n"
758                 "%c_i32_8   = OpConstant %i32 8\n"
759                 "%c_i32_16  = OpConstant %i32 16\n"
760                 "%c_i32_32  = OpConstant %i32 32\n"
761                 "%c_i32_64  = OpConstant %i32 64\n"
762
763                 "%i32arr    = OpTypeArray %i32 %c_i32_64\n"
764                 "%f32arr    = OpTypeArray %f32 %c_i32_64\n"
765
766                 "${types}\n"
767                 "${matrix_types:opt}\n"
768
769                 "%PC16      = OpTypeStruct %${matrix_prefix:opt}${base16}arr\n"
770                 "%pp_PC16   = OpTypePointer PushConstant %PC16\n"
771                 "%pc16      = OpVariable %pp_PC16 PushConstant\n"
772                 "%SSBO32    = OpTypeStruct %${matrix_prefix:opt}${base32}arr\n"
773                 "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
774                 "%ssbo32    = OpVariable %up_SSBO32 Uniform\n"
775
776                 "%id        = OpVariable %uvec3ptr Input\n"
777
778                 "%main      = OpFunction %void None %voidf\n"
779                 "%label     = OpLabel\n"
780                 "%idval     = OpLoad %uvec3 %id\n"
781                 "%x         = OpCompositeExtract %u32 %idval 0\n"
782                 "%inloc     = OpAccessChain %${base16}ptr %pc16 %zero %x ${index0:opt}\n"
783                 "%val16     = OpLoad %${base16} %inloc\n"
784                 "%val32     = ${convert} %${base32} %val16\n"
785                 "%outloc    = OpAccessChain %${base32}ptr %ssbo32 %zero %x ${index0:opt}\n"
786                 "             OpStore %outloc %val32\n"
787                 "${matrix_store:opt}\n"
788                 "             OpReturn\n"
789                 "             OpFunctionEnd\n");
790
791         {  // floats
792                 const char                                                                              floatTypes[]    =
793                         "%f16       = OpTypeFloat 16\n"
794                         "%f16ptr    = OpTypePointer PushConstant %f16\n"
795                         "%f16arr    = OpTypeArray %f16 %c_i32_64\n"
796                         "%v4f16     = OpTypeVector %f16 4\n"
797                         "%v4f32     = OpTypeVector %f32 4\n"
798                         "%v4f16ptr  = OpTypePointer PushConstant %v4f16\n"
799                         "%v4f32ptr  = OpTypePointer Uniform %v4f32\n"
800                         "%v4f16arr  = OpTypeArray %v4f16 %c_i32_16\n"
801                         "%v4f32arr  = OpTypeArray %v4f32 %c_i32_16\n";
802
803                 struct CompositeType
804                 {
805                         const char*     name;
806                         const char*     base32;
807                         const char*     base16;
808                         const char*     stride;
809                         unsigned        count;
810                 };
811
812                 const CompositeType     cTypes[]        =
813                 {
814                         {"scalar",      "f32",          "f16",          "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 2\n",                         numElements},
815                         {"vector",      "v4f32",        "v4f16",        "OpDecorate %v4f32arr ArrayStride 16\nOpDecorate %v4f16arr ArrayStride 8\n",            numElements / 4},
816                         {"matrix",      "v4f32",        "v4f16",        "OpDecorate %m2v4f32arr ArrayStride 32\nOpDecorate %m2v4f16arr ArrayStride 16\n",       numElements / 8},
817                 };
818
819                 vector<deFloat16>       float16Data                     = getFloat16s(rnd, numElements);
820                 vector<float>           float32Data;
821
822                 float32Data.reserve(numElements);
823                 for (deUint32 numIdx = 0; numIdx < numElements; ++numIdx)
824                         float32Data.push_back(deFloat16To32(float16Data[numIdx]));
825
826                 for (deUint32 tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes); ++tyIdx)
827                 {
828                         ComputeShaderSpec               spec;
829                         map<string, string>             specs;
830                         string                                  testName        = string(cTypes[tyIdx].name) + "_float";
831
832                         specs["stride"]                 = cTypes[tyIdx].stride;
833                         specs["base32"]                 = cTypes[tyIdx].base32;
834                         specs["base16"]                 = cTypes[tyIdx].base16;
835                         specs["types"]                  = floatTypes;
836                         specs["convert"]                = "OpFConvert";
837
838                         if (strcmp(cTypes[tyIdx].name, "matrix") == 0)
839                         {
840                                 specs["index0"]                 = "%zero";
841                                 specs["matrix_prefix"]  = "m2";
842                                 specs["matrix_types"]   =
843                                         "%m2v4f16 = OpTypeMatrix %v4f16 2\n"
844                                         "%m2v4f32 = OpTypeMatrix %v4f32 2\n"
845                                         "%m2v4f16arr = OpTypeArray %m2v4f16 %c_i32_8\n"
846                                         "%m2v4f32arr = OpTypeArray %m2v4f32 %c_i32_8\n";
847                                 specs["matrix_decor"]   =
848                                         "OpMemberDecorate %SSBO32 0 ColMajor\n"
849                                         "OpMemberDecorate %SSBO32 0 MatrixStride 16\n"
850                                         "OpMemberDecorate %PC16 0 ColMajor\n"
851                                         "OpMemberDecorate %PC16 0 MatrixStride 8\n";
852                                 specs["matrix_store"]   =
853                                         "%inloc_1  = OpAccessChain %v4f16ptr %pc16 %zero %x %c_i32_1\n"
854                                         "%val16_1  = OpLoad %v4f16 %inloc_1\n"
855                                         "%val32_1  = OpFConvert %v4f32 %val16_1\n"
856                                         "%outloc_1 = OpAccessChain %v4f32ptr %ssbo32 %zero %x %c_i32_1\n"
857                                         "            OpStore %outloc_1 %val32_1\n";
858                         }
859
860                         spec.assembly                   = shaderTemplate.specialize(specs);
861                         spec.numWorkGroups              = IVec3(cTypes[tyIdx].count, 1, 1);
862                         spec.verifyIO                   = check32BitFloats;
863                         spec.pushConstants              = BufferSp(new Float16Buffer(float16Data));
864
865                         spec.outputs.push_back(BufferSp(new Float32Buffer(float32Data)));
866                         spec.extensions.push_back("VK_KHR_16bit_storage");
867                         spec.requestedVulkanFeatures.ext16BitStorage = EXT16BITSTORAGEFEATURES_PUSH_CONSTANT;
868
869                         group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), testName.c_str(), spec));
870                 }
871         }
872         {  // integers
873                 const char              sintTypes[]             =
874                         "%i16       = OpTypeInt 16 1\n"
875                         "%i16ptr    = OpTypePointer PushConstant %i16\n"
876                         "%i16arr    = OpTypeArray %i16 %c_i32_64\n"
877                         "%v2i16     = OpTypeVector %i16 2\n"
878                         "%v2i32     = OpTypeVector %i32 2\n"
879                         "%v2i16ptr  = OpTypePointer PushConstant %v2i16\n"
880                         "%v2i32ptr  = OpTypePointer Uniform %v2i32\n"
881                         "%v2i16arr  = OpTypeArray %v2i16 %c_i32_32\n"
882                         "%v2i32arr  = OpTypeArray %v2i32 %c_i32_32\n";
883
884                 const char              uintTypes[]             =
885                         "%u16       = OpTypeInt 16 0\n"
886                         "%u16ptr    = OpTypePointer PushConstant %u16\n"
887                         "%u32ptr    = OpTypePointer Uniform %u32\n"
888                         "%u16arr    = OpTypeArray %u16 %c_i32_64\n"
889                         "%u32arr    = OpTypeArray %u32 %c_i32_64\n"
890                         "%v2u16     = OpTypeVector %u16 2\n"
891                         "%v2u32     = OpTypeVector %u32 2\n"
892                         "%v2u16ptr  = OpTypePointer PushConstant %v2u16\n"
893                         "%v2u32ptr  = OpTypePointer Uniform %v2u32\n"
894                         "%v2u16arr  = OpTypeArray %v2u16 %c_i32_32\n"
895                         "%v2u32arr  = OpTypeArray %v2u32 %c_i32_32\n";
896
897                 struct CompositeType
898                 {
899                         const char*     name;
900                         bool            isSigned;
901                         const char* types;
902                         const char*     base32;
903                         const char*     base16;
904                         const char* opcode;
905                         const char*     stride;
906                         unsigned        count;
907                 };
908
909                 const CompositeType     cTypes[]        =
910                 {
911                         {"scalar_sint", true,   sintTypes,      "i32",          "i16",          "OpSConvert",   "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 2\n",         numElements},
912                         {"scalar_uint", false,  uintTypes,      "u32",          "u16",          "OpUConvert",   "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 2\n",         numElements},
913                         {"vector_sint", true,   sintTypes,      "v2i32",        "v2i16",        "OpSConvert",   "OpDecorate %v2i32arr ArrayStride 8\nOpDecorate %v2i16arr ArrayStride 4\n",     numElements / 2},
914                         {"vector_uint", false,  uintTypes,      "v2u32",        "v2u16",        "OpUConvert",   "OpDecorate %v2u32arr ArrayStride 8\nOpDecorate %v2u16arr ArrayStride 4\n",     numElements / 2},
915                 };
916
917                 vector<deInt16> inputs                  = getInt16s(rnd, numElements);
918                 vector<deInt32> sOutputs;
919                 vector<deInt32> uOutputs;
920                 const deUint16  signBitMask             = 0x8000;
921                 const deUint32  signExtendMask  = 0xffff0000;
922
923                 sOutputs.reserve(inputs.size());
924                 uOutputs.reserve(inputs.size());
925
926                 for (deUint32 numNdx = 0; numNdx < inputs.size(); ++numNdx)
927                 {
928                         uOutputs.push_back(static_cast<deUint16>(inputs[numNdx]));
929                         if (inputs[numNdx] & signBitMask)
930                                 sOutputs.push_back(static_cast<deInt32>(inputs[numNdx] | signExtendMask));
931                         else
932                                 sOutputs.push_back(static_cast<deInt32>(inputs[numNdx]));
933                 }
934
935                 for (deUint32 tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes); ++tyIdx)
936                 {
937                         ComputeShaderSpec               spec;
938                         map<string, string>             specs;
939                         const char*                             testName        = cTypes[tyIdx].name;
940
941                         specs["stride"]                 = cTypes[tyIdx].stride;
942                         specs["base32"]                 = cTypes[tyIdx].base32;
943                         specs["base16"]                 = cTypes[tyIdx].base16;
944                         specs["types"]                  = cTypes[tyIdx].types;
945                         specs["convert"]                = cTypes[tyIdx].opcode;
946
947                         spec.assembly                   = shaderTemplate.specialize(specs);
948                         spec.numWorkGroups              = IVec3(cTypes[tyIdx].count, 1, 1);
949                         spec.pushConstants              = BufferSp(new Int16Buffer(inputs));
950
951                         if (cTypes[tyIdx].isSigned)
952                                 spec.outputs.push_back(BufferSp(new Int32Buffer(sOutputs)));
953                         else
954                                 spec.outputs.push_back(BufferSp(new Int32Buffer(uOutputs)));
955                         spec.extensions.push_back("VK_KHR_16bit_storage");
956                         spec.requestedVulkanFeatures.ext16BitStorage = EXT16BITSTORAGEFEATURES_PUSH_CONSTANT;
957
958                         group->addChild(new SpvAsmComputeShaderCase(testCtx, testName, testName, spec));
959                 }
960         }
961 }
962
963 void addGraphics16BitStorageUniformInt32To16Group (tcu::TestCaseGroup* testGroup)
964 {
965         de::Random                                                      rnd                                     (deStringHash(testGroup->getName()));
966         map<string, string>                                     fragments;
967         const deUint32                                          numDataPoints           = 256;
968         RGBA                                                            defaultColors[4];
969         GraphicsResources                                       resources;
970         vector<string>                                          extensions;
971         const StringTemplate                            capabilities            ("OpCapability ${cap}\n");
972         // inputs and outputs are declared to be vectors of signed integers.
973         // However, depending on the test, they may be interpreted as unsiged
974         // integers. That won't be a problem as long as we passed the bits
975         // in faithfully to the pipeline.
976         vector<deInt32>                                         inputs                          = getInt32s(rnd, numDataPoints);
977         vector<deInt16>                                         outputs;
978
979         outputs.reserve(inputs.size());
980         for (deUint32 numNdx = 0; numNdx < inputs.size(); ++numNdx)
981                 outputs.push_back(static_cast<deInt16>(0xffff & inputs[numNdx]));
982
983         resources.inputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Int32Buffer(inputs))));
984         resources.outputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Int16Buffer(outputs))));
985
986         extensions.push_back("VK_KHR_16bit_storage");
987         fragments["extension"]  = "OpExtension \"SPV_KHR_16bit_storage\"";
988
989         getDefaultColors(defaultColors);
990
991         struct IntegerFacts
992         {
993                 const char*     name;
994                 const char*     type32;
995                 const char*     type16;
996                 const char* opcode;
997                 const char*     isSigned;
998         };
999
1000         const IntegerFacts      intFacts[]              =
1001         {
1002                 {"sint",        "%i32",         "%i16",         "OpSConvert",   "1"},
1003                 {"uint",        "%u32",         "%u16",         "OpUConvert",   "0"},
1004         };
1005
1006         const StringTemplate    scalarPreMain(
1007                         "${itype16} = OpTypeInt 16 ${signed}\n"
1008                         "%c_i32_256 = OpConstant %i32 256\n"
1009                         "   %up_i32 = OpTypePointer Uniform ${itype32}\n"
1010                         "   %up_i16 = OpTypePointer Uniform ${itype16}\n"
1011                         "   %ra_i32 = OpTypeArray ${itype32} %c_i32_256\n"
1012                         "   %ra_i16 = OpTypeArray ${itype16} %c_i32_256\n"
1013                         "   %SSBO32 = OpTypeStruct %ra_i32\n"
1014                         "   %SSBO16 = OpTypeStruct %ra_i16\n"
1015                         "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
1016                         "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
1017                         "   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
1018                         "   %ssbo16 = OpVariable %up_SSBO16 Uniform\n");
1019
1020         const StringTemplate    scalarDecoration(
1021                         "OpDecorate %ra_i32 ArrayStride 4\n"
1022                         "OpDecorate %ra_i16 ArrayStride 2\n"
1023                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
1024                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
1025                         "OpDecorate %SSBO32 ${indecor}\n"
1026                         "OpDecorate %SSBO16 BufferBlock\n"
1027                         "OpDecorate %ssbo32 DescriptorSet 0\n"
1028                         "OpDecorate %ssbo16 DescriptorSet 0\n"
1029                         "OpDecorate %ssbo32 Binding 0\n"
1030                         "OpDecorate %ssbo16 Binding 1\n");
1031
1032         const StringTemplate    scalarTestFunc(
1033                         "%test_code = OpFunction %v4f32 None %v4f32_function\n"
1034                         "    %param = OpFunctionParameter %v4f32\n"
1035
1036                         "%entry = OpLabel\n"
1037                         "    %i = OpVariable %fp_i32 Function\n"
1038                         "         OpStore %i %c_i32_0\n"
1039                         "         OpBranch %loop\n"
1040
1041                         " %loop = OpLabel\n"
1042                         "   %15 = OpLoad %i32 %i\n"
1043                         "   %lt = OpSLessThan %bool %15 %c_i32_256\n"
1044                         "         OpLoopMerge %merge %inc None\n"
1045                         "         OpBranchConditional %lt %write %merge\n"
1046
1047                         "%write = OpLabel\n"
1048                         "   %30 = OpLoad %i32 %i\n"
1049                         "  %src = OpAccessChain %up_i32 %ssbo32 %c_i32_0 %30\n"
1050                         "%val32 = OpLoad ${itype32} %src\n"
1051                         "%val16 = ${convert} ${itype16} %val32\n"
1052                         "  %dst = OpAccessChain %up_i16 %ssbo16 %c_i32_0 %30\n"
1053                         "         OpStore %dst %val16\n"
1054                         "         OpBranch %inc\n"
1055
1056                         "  %inc = OpLabel\n"
1057                         "   %37 = OpLoad %i32 %i\n"
1058                         "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
1059                         "         OpStore %i %39\n"
1060                         "         OpBranch %loop\n"
1061
1062                         "%merge = OpLabel\n"
1063                         "         OpReturnValue %param\n"
1064
1065                         "OpFunctionEnd\n");
1066
1067         const StringTemplate    vecPreMain(
1068                         "${itype16} = OpTypeInt 16 ${signed}\n"
1069                         " %c_i32_64 = OpConstant %i32 64\n"
1070                         "%v4itype32 = OpTypeVector ${itype32} 4\n"
1071                         "%v4itype16 = OpTypeVector ${itype16} 4\n"
1072                         " %up_v4i32 = OpTypePointer Uniform %v4itype32\n"
1073                         " %up_v4i16 = OpTypePointer Uniform %v4itype16\n"
1074                         " %ra_v4i32 = OpTypeArray %v4itype32 %c_i32_64\n"
1075                         " %ra_v4i16 = OpTypeArray %v4itype16 %c_i32_64\n"
1076                         "   %SSBO32 = OpTypeStruct %ra_v4i32\n"
1077                         "   %SSBO16 = OpTypeStruct %ra_v4i16\n"
1078                         "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
1079                         "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
1080                         "   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
1081                         "   %ssbo16 = OpVariable %up_SSBO16 Uniform\n");
1082
1083         const StringTemplate    vecDecoration(
1084                         "OpDecorate %ra_v4i32 ArrayStride 16\n"
1085                         "OpDecorate %ra_v4i16 ArrayStride 8\n"
1086                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
1087                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
1088                         "OpDecorate %SSBO32 ${indecor}\n"
1089                         "OpDecorate %SSBO16 BufferBlock\n"
1090                         "OpDecorate %ssbo32 DescriptorSet 0\n"
1091                         "OpDecorate %ssbo16 DescriptorSet 0\n"
1092                         "OpDecorate %ssbo32 Binding 0\n"
1093                         "OpDecorate %ssbo16 Binding 1\n");
1094
1095         const StringTemplate    vecTestFunc(
1096                         "%test_code = OpFunction %v4f32 None %v4f32_function\n"
1097                         "    %param = OpFunctionParameter %v4f32\n"
1098
1099                         "%entry = OpLabel\n"
1100                         "    %i = OpVariable %fp_i32 Function\n"
1101                         "         OpStore %i %c_i32_0\n"
1102                         "         OpBranch %loop\n"
1103
1104                         " %loop = OpLabel\n"
1105                         "   %15 = OpLoad %i32 %i\n"
1106                         "   %lt = OpSLessThan %bool %15 %c_i32_64\n"
1107                         "         OpLoopMerge %merge %inc None\n"
1108                         "         OpBranchConditional %lt %write %merge\n"
1109
1110                         "%write = OpLabel\n"
1111                         "   %30 = OpLoad %i32 %i\n"
1112                         "  %src = OpAccessChain %up_v4i32 %ssbo32 %c_i32_0 %30\n"
1113                         "%val32 = OpLoad %v4itype32 %src\n"
1114                         "%val16 = ${convert} %v4itype16 %val32\n"
1115                         "  %dst = OpAccessChain %up_v4i16 %ssbo16 %c_i32_0 %30\n"
1116                         "         OpStore %dst %val16\n"
1117                         "         OpBranch %inc\n"
1118
1119                         "  %inc = OpLabel\n"
1120                         "   %37 = OpLoad %i32 %i\n"
1121                         "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
1122                         "         OpStore %i %39\n"
1123                         "         OpBranch %loop\n"
1124
1125                         "%merge = OpLabel\n"
1126                         "         OpReturnValue %param\n"
1127
1128                         "OpFunctionEnd\n");
1129
1130         struct Category
1131         {
1132                 const char*                             name;
1133                 const StringTemplate&   preMain;
1134                 const StringTemplate&   decoration;
1135                 const StringTemplate&   testFunction;
1136         };
1137
1138         const Category          categories[]    =
1139         {
1140                 {"scalar",      scalarPreMain,  scalarDecoration,       scalarTestFunc},
1141                 {"vector",      vecPreMain,             vecDecoration,          vecTestFunc},
1142         };
1143
1144         for (deUint32 catIdx = 0; catIdx < DE_LENGTH_OF_ARRAY(categories); ++catIdx)
1145                 for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
1146                         for (deUint32 factIdx = 0; factIdx < DE_LENGTH_OF_ARRAY(intFacts); ++factIdx)
1147                         {
1148                                 map<string, string>     specs;
1149                                 string                          name            = string(CAPABILITIES[capIdx].name) + "_" + categories[catIdx].name + "_" + intFacts[factIdx].name;
1150
1151                                 specs["cap"]                                    = CAPABILITIES[capIdx].cap;
1152                                 specs["indecor"]                                = CAPABILITIES[capIdx].decor;
1153                                 specs["itype32"]                                = intFacts[factIdx].type32;
1154                                 specs["itype16"]                                = intFacts[factIdx].type16;
1155                                 specs["signed"]                                 = intFacts[factIdx].isSigned;
1156                                 specs["convert"]                                = intFacts[factIdx].opcode;
1157
1158                                 fragments["pre_main"]                   = categories[catIdx].preMain.specialize(specs);
1159                                 fragments["testfun"]                    = categories[catIdx].testFunction.specialize(specs);
1160                                 fragments["capability"]                 = capabilities.specialize(specs);
1161                                 fragments["decoration"]                 = categories[catIdx].decoration.specialize(specs);
1162
1163                                 resources.inputs.back().first   = CAPABILITIES[capIdx].dtype;
1164
1165                                 createTestsForAllStages(name, defaultColors, defaultColors, fragments, resources, extensions, testGroup, get16BitStorageFeatures(CAPABILITIES[capIdx].name));
1166                         }
1167 }
1168
1169 void addCompute16bitStorageUniform32To16Group (tcu::TestCaseGroup* group)
1170 {
1171         tcu::TestContext&                               testCtx                 = group->getTestContext();
1172         de::Random                                              rnd                             (deStringHash(group->getName()));
1173         const int                                               numElements             = 128;
1174
1175         const StringTemplate                    shaderTemplate  (
1176                 "OpCapability Shader\n"
1177                 "OpCapability ${capability}\n"
1178                 "OpExtension \"SPV_KHR_16bit_storage\"\n"
1179                 "OpMemoryModel Logical GLSL450\n"
1180                 "OpEntryPoint GLCompute %main \"main\" %id\n"
1181                 "OpExecutionMode %main LocalSize 1 1 1\n"
1182                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1183
1184                 "${stride}"
1185
1186                 "OpMemberDecorate %SSBO32 0 Offset 0\n"
1187                 "OpMemberDecorate %SSBO16 0 Offset 0\n"
1188                 "OpDecorate %SSBO32 ${storage}\n"
1189                 "OpDecorate %SSBO16 BufferBlock\n"
1190                 "OpDecorate %ssbo32 DescriptorSet 0\n"
1191                 "OpDecorate %ssbo16 DescriptorSet 0\n"
1192                 "OpDecorate %ssbo32 Binding 0\n"
1193                 "OpDecorate %ssbo16 Binding 1\n"
1194
1195                 "${matrix_decor:opt}\n"
1196
1197                 "${rounding:opt}\n"
1198
1199                 "%bool      = OpTypeBool\n"
1200                 "%void      = OpTypeVoid\n"
1201                 "%voidf     = OpTypeFunction %void\n"
1202                 "%u32       = OpTypeInt 32 0\n"
1203                 "%i32       = OpTypeInt 32 1\n"
1204                 "%f32       = OpTypeFloat 32\n"
1205                 "%uvec3     = OpTypeVector %u32 3\n"
1206                 "%fvec3     = OpTypeVector %f32 3\n"
1207                 "%uvec3ptr  = OpTypePointer Input %uvec3\n"
1208                 "%i32ptr    = OpTypePointer Uniform %i32\n"
1209                 "%f32ptr    = OpTypePointer Uniform %f32\n"
1210
1211                 "%zero      = OpConstant %i32 0\n"
1212                 "%c_i32_1   = OpConstant %i32 1\n"
1213                 "%c_i32_16  = OpConstant %i32 16\n"
1214                 "%c_i32_32  = OpConstant %i32 32\n"
1215                 "%c_i32_64  = OpConstant %i32 64\n"
1216                 "%c_i32_128 = OpConstant %i32 128\n"
1217
1218                 "%i32arr    = OpTypeArray %i32 %c_i32_128\n"
1219                 "%f32arr    = OpTypeArray %f32 %c_i32_128\n"
1220
1221                 "${types}\n"
1222                 "${matrix_types:opt}\n"
1223
1224                 "%SSBO32    = OpTypeStruct %${matrix_prefix:opt}${base32}arr\n"
1225                 "%SSBO16    = OpTypeStruct %${matrix_prefix:opt}${base16}arr\n"
1226                 "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
1227                 "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
1228                 "%ssbo32    = OpVariable %up_SSBO32 Uniform\n"
1229                 "%ssbo16    = OpVariable %up_SSBO16 Uniform\n"
1230
1231                 "%id        = OpVariable %uvec3ptr Input\n"
1232
1233                 "%main      = OpFunction %void None %voidf\n"
1234                 "%label     = OpLabel\n"
1235                 "%idval     = OpLoad %uvec3 %id\n"
1236                 "%x         = OpCompositeExtract %u32 %idval 0\n"
1237                 "%inloc     = OpAccessChain %${base32}ptr %ssbo32 %zero %x ${index0:opt}\n"
1238                 "%val32     = OpLoad %${base32} %inloc\n"
1239                 "%val16     = ${convert} %${base16} %val32\n"
1240                 "%outloc    = OpAccessChain %${base16}ptr %ssbo16 %zero %x ${index0:opt}\n"
1241                 "             OpStore %outloc %val16\n"
1242                 "${matrix_store:opt}\n"
1243                 "             OpReturn\n"
1244                 "             OpFunctionEnd\n");
1245
1246         {  // Floats
1247                 const char                                              floatTypes[]    =
1248                         "%f16       = OpTypeFloat 16\n"
1249                         "%f16ptr    = OpTypePointer Uniform %f16\n"
1250                         "%f16arr    = OpTypeArray %f16 %c_i32_128\n"
1251                         "%v4f16     = OpTypeVector %f16 4\n"
1252                         "%v4f32     = OpTypeVector %f32 4\n"
1253                         "%v4f16ptr  = OpTypePointer Uniform %v4f16\n"
1254                         "%v4f32ptr  = OpTypePointer Uniform %v4f32\n"
1255                         "%v4f16arr  = OpTypeArray %v4f16 %c_i32_32\n"
1256                         "%v4f32arr  = OpTypeArray %v4f32 %c_i32_32\n";
1257
1258                 struct RndMode
1259                 {
1260                         const char*                             name;
1261                         const char*                             decor;
1262                         ComputeVerifyIOFunc             func;
1263                 };
1264
1265                 const RndMode           rndModes[]              =
1266                 {
1267                         {"rtz",                                         "OpDecorate %val16  FPRoundingMode RTZ",        computeCheck16BitFloats<ROUNDINGMODE_RTZ>},
1268                         {"rte",                                         "OpDecorate %val16  FPRoundingMode RTE",        computeCheck16BitFloats<ROUNDINGMODE_RTE>},
1269                         {"unspecified_rnd_mode",        "",                                                                                     computeCheck16BitFloats<RoundingModeFlags(ROUNDINGMODE_RTE | ROUNDINGMODE_RTZ)>},
1270                 };
1271
1272                 struct CompositeType
1273                 {
1274                         const char*     name;
1275                         const char*     base32;
1276                         const char*     base16;
1277                         const char*     stride;
1278                         unsigned        count;
1279                 };
1280
1281                 const CompositeType     cTypes[]        =
1282                 {
1283                         {"scalar",      "f32",          "f16",          "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 2\n",                         numElements},
1284                         {"vector",      "v4f32",        "v4f16",        "OpDecorate %v4f32arr ArrayStride 16\nOpDecorate %v4f16arr ArrayStride 8\n",            numElements / 4},
1285                         {"matrix",      "v4f32",        "v4f16",        "OpDecorate %m2v4f32arr ArrayStride 32\nOpDecorate %m2v4f16arr ArrayStride 16\n",       numElements / 8},
1286                 };
1287
1288                 vector<float>           float32Data                     = getFloat32s(rnd, numElements);
1289                 vector<deFloat16>       float16DummyData        (numElements, 0);
1290
1291                 for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
1292                         for (deUint32 tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes); ++tyIdx)
1293                                 for (deUint32 rndModeIdx = 0; rndModeIdx < DE_LENGTH_OF_ARRAY(rndModes); ++rndModeIdx)
1294                                 {
1295                                         ComputeShaderSpec               spec;
1296                                         map<string, string>             specs;
1297                                         string                                  testName        = string(CAPABILITIES[capIdx].name) + "_" + cTypes[tyIdx].name + "_float_" + rndModes[rndModeIdx].name;
1298
1299                                         specs["capability"]             = CAPABILITIES[capIdx].cap;
1300                                         specs["storage"]                = CAPABILITIES[capIdx].decor;
1301                                         specs["stride"]                 = cTypes[tyIdx].stride;
1302                                         specs["base32"]                 = cTypes[tyIdx].base32;
1303                                         specs["base16"]                 = cTypes[tyIdx].base16;
1304                                         specs["rounding"]               = rndModes[rndModeIdx].decor;
1305                                         specs["types"]                  = floatTypes;
1306                                         specs["convert"]                = "OpFConvert";
1307
1308                                         if (strcmp(cTypes[tyIdx].name, "matrix") == 0)
1309                                         {
1310                                                 if (strcmp(rndModes[rndModeIdx].name, "rtz") == 0)
1311                                                         specs["rounding"] += "\nOpDecorate %val16_1  FPRoundingMode RTZ\n";
1312                                                 else if (strcmp(rndModes[rndModeIdx].name, "rte") == 0)
1313                                                         specs["rounding"] += "\nOpDecorate %val16_1  FPRoundingMode RTE\n";
1314
1315                                                 specs["index0"]                 = "%zero";
1316                                                 specs["matrix_prefix"]  = "m2";
1317                                                 specs["matrix_types"]   =
1318                                                         "%m2v4f16 = OpTypeMatrix %v4f16 2\n"
1319                                                         "%m2v4f32 = OpTypeMatrix %v4f32 2\n"
1320                                                         "%m2v4f16arr = OpTypeArray %m2v4f16 %c_i32_16\n"
1321                                                         "%m2v4f32arr = OpTypeArray %m2v4f32 %c_i32_16\n";
1322                                                 specs["matrix_decor"]   =
1323                                                         "OpMemberDecorate %SSBO32 0 ColMajor\n"
1324                                                         "OpMemberDecorate %SSBO32 0 MatrixStride 16\n"
1325                                                         "OpMemberDecorate %SSBO16 0 ColMajor\n"
1326                                                         "OpMemberDecorate %SSBO16 0 MatrixStride 8\n";
1327                                                 specs["matrix_store"]   =
1328                                                         "%inloc_1  = OpAccessChain %v4f32ptr %ssbo32 %zero %x %c_i32_1\n"
1329                                                         "%val32_1  = OpLoad %v4f32 %inloc_1\n"
1330                                                         "%val16_1  = OpFConvert %v4f16 %val32_1\n"
1331                                                         "%outloc_1 = OpAccessChain %v4f16ptr %ssbo16 %zero %x %c_i32_1\n"
1332                                                         "            OpStore %outloc_1 %val16_1\n";
1333                                         }
1334
1335                                         spec.assembly                   = shaderTemplate.specialize(specs);
1336                                         spec.numWorkGroups              = IVec3(cTypes[tyIdx].count, 1, 1);
1337                                         spec.verifyIO                   = rndModes[rndModeIdx].func;
1338                                         spec.inputTypes[0]              = CAPABILITIES[capIdx].dtype;
1339
1340                                         spec.inputs.push_back(BufferSp(new Float32Buffer(float32Data)));
1341                                         // We provided a custom verifyIO in the above in which inputs will be used for checking.
1342                                         // So put dummy data in the expected values.
1343                                         spec.outputs.push_back(BufferSp(new Float16Buffer(float16DummyData)));
1344                                         spec.extensions.push_back("VK_KHR_16bit_storage");
1345                                         spec.requestedVulkanFeatures = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
1346
1347                                         group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), testName.c_str(), spec));
1348                                 }
1349         }
1350
1351         {  // Integers
1352                 const char              sintTypes[]     =
1353                         "%i16       = OpTypeInt 16 1\n"
1354                         "%i16ptr    = OpTypePointer Uniform %i16\n"
1355                         "%i16arr    = OpTypeArray %i16 %c_i32_128\n"
1356                         "%v2i16     = OpTypeVector %i16 2\n"
1357                         "%v2i32     = OpTypeVector %i32 2\n"
1358                         "%v2i16ptr  = OpTypePointer Uniform %v2i16\n"
1359                         "%v2i32ptr  = OpTypePointer Uniform %v2i32\n"
1360                         "%v2i16arr  = OpTypeArray %v2i16 %c_i32_64\n"
1361                         "%v2i32arr  = OpTypeArray %v2i32 %c_i32_64\n";
1362
1363                 const char              uintTypes[]     =
1364                         "%u16       = OpTypeInt 16 0\n"
1365                         "%u16ptr    = OpTypePointer Uniform %u16\n"
1366                         "%u32ptr    = OpTypePointer Uniform %u32\n"
1367                         "%u16arr    = OpTypeArray %u16 %c_i32_128\n"
1368                         "%u32arr    = OpTypeArray %u32 %c_i32_128\n"
1369                         "%v2u16     = OpTypeVector %u16 2\n"
1370                         "%v2u32     = OpTypeVector %u32 2\n"
1371                         "%v2u16ptr  = OpTypePointer Uniform %v2u16\n"
1372                         "%v2u32ptr  = OpTypePointer Uniform %v2u32\n"
1373                         "%v2u16arr  = OpTypeArray %v2u16 %c_i32_64\n"
1374                         "%v2u32arr  = OpTypeArray %v2u32 %c_i32_64\n";
1375
1376                 struct CompositeType
1377                 {
1378                         const char*     name;
1379                         const char* types;
1380                         const char*     base32;
1381                         const char*     base16;
1382                         const char* opcode;
1383                         const char*     stride;
1384                         unsigned        count;
1385                 };
1386
1387                 const CompositeType     cTypes[]        =
1388                 {
1389                         {"scalar_sint", sintTypes,      "i32",          "i16",          "OpSConvert",   "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 2\n",         numElements},
1390                         {"scalar_uint", uintTypes,      "u32",          "u16",          "OpUConvert",   "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 2\n",         numElements},
1391                         {"vector_sint", sintTypes,      "v2i32",        "v2i16",        "OpSConvert",   "OpDecorate %v2i32arr ArrayStride 8\nOpDecorate %v2i16arr ArrayStride 4\n",     numElements / 2},
1392                         {"vector_uint", uintTypes,      "v2u32",        "v2u16",        "OpUConvert",   "OpDecorate %v2u32arr ArrayStride 8\nOpDecorate %v2u16arr ArrayStride 4\n",     numElements / 2},
1393                 };
1394
1395                 vector<deInt32> inputs                  = getInt32s(rnd, numElements);
1396                 vector<deInt16> outputs;
1397
1398                 outputs.reserve(inputs.size());
1399                 for (deUint32 numNdx = 0; numNdx < inputs.size(); ++numNdx)
1400                         outputs.push_back(static_cast<deInt16>(0xffff & inputs[numNdx]));
1401
1402                 for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
1403                         for (deUint32 tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes); ++tyIdx)
1404                         {
1405                                 ComputeShaderSpec               spec;
1406                                 map<string, string>             specs;
1407                                 string                                  testName        = string(CAPABILITIES[capIdx].name) + "_" + cTypes[tyIdx].name;
1408
1409                                 specs["capability"]             = CAPABILITIES[capIdx].cap;
1410                                 specs["storage"]                = CAPABILITIES[capIdx].decor;
1411                                 specs["stride"]                 = cTypes[tyIdx].stride;
1412                                 specs["base32"]                 = cTypes[tyIdx].base32;
1413                                 specs["base16"]                 = cTypes[tyIdx].base16;
1414                                 specs["types"]                  = cTypes[tyIdx].types;
1415                                 specs["convert"]                = cTypes[tyIdx].opcode;
1416
1417                                 spec.assembly                   = shaderTemplate.specialize(specs);
1418                                 spec.numWorkGroups              = IVec3(cTypes[tyIdx].count, 1, 1);
1419                                 spec.inputTypes[0]              = CAPABILITIES[capIdx].dtype;
1420
1421                                 spec.inputs.push_back(BufferSp(new Int32Buffer(inputs)));
1422                                 spec.outputs.push_back(BufferSp(new Int16Buffer(outputs)));
1423                                 spec.extensions.push_back("VK_KHR_16bit_storage");
1424                                 spec.requestedVulkanFeatures = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
1425
1426                                 group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), testName.c_str(), spec));
1427                         }
1428         }
1429 }
1430
1431 void addGraphics16BitStorageUniformFloat32To16Group (tcu::TestCaseGroup* testGroup)
1432 {
1433         de::Random                                                      rnd                                     (deStringHash(testGroup->getName()));
1434         map<string, string>                                     fragments;
1435         GraphicsResources                                       resources;
1436         vector<string>                                          extensions;
1437         const deUint32                                          numDataPoints           = 256;
1438         RGBA                                                            defaultColors[4];
1439         vector<float>                                           float32Data                     = getFloat32s(rnd, numDataPoints);
1440         vector<deFloat16>                                       float16DummyData        (numDataPoints, 0);
1441         const StringTemplate                            capabilities            ("OpCapability ${cap}\n");
1442
1443         resources.inputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Float32Buffer(float32Data))));
1444         // We use a custom verifyIO to check the result via computing directly from inputs; the contents in outputs do not matter.
1445         resources.outputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Float16Buffer(float16DummyData))));
1446
1447         extensions.push_back("VK_KHR_16bit_storage");
1448         fragments["extension"]  = "OpExtension \"SPV_KHR_16bit_storage\"";
1449
1450         struct RndMode
1451         {
1452                 const char*                             name;
1453                 const char*                             decor;
1454                 GraphicsVerifyIOFunc    f;
1455         };
1456
1457         getDefaultColors(defaultColors);
1458
1459         {  // scalar cases
1460                 fragments["pre_main"]                           =
1461                         "      %f16 = OpTypeFloat 16\n"
1462                         "%c_i32_256 = OpConstant %i32 256\n"
1463                         "   %up_f32 = OpTypePointer Uniform %f32\n"
1464                         "   %up_f16 = OpTypePointer Uniform %f16\n"
1465                         "   %ra_f32 = OpTypeArray %f32 %c_i32_256\n"
1466                         "   %ra_f16 = OpTypeArray %f16 %c_i32_256\n"
1467                         "   %SSBO32 = OpTypeStruct %ra_f32\n"
1468                         "   %SSBO16 = OpTypeStruct %ra_f16\n"
1469                         "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
1470                         "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
1471                         "   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
1472                         "   %ssbo16 = OpVariable %up_SSBO16 Uniform\n";
1473
1474                 const StringTemplate decoration         (
1475                         "OpDecorate %ra_f32 ArrayStride 4\n"
1476                         "OpDecorate %ra_f16 ArrayStride 2\n"
1477                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
1478                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
1479                         "OpDecorate %SSBO32 ${indecor}\n"
1480                         "OpDecorate %SSBO16 BufferBlock\n"
1481                         "OpDecorate %ssbo32 DescriptorSet 0\n"
1482                         "OpDecorate %ssbo16 DescriptorSet 0\n"
1483                         "OpDecorate %ssbo32 Binding 0\n"
1484                         "OpDecorate %ssbo16 Binding 1\n"
1485                         "${rounddecor}\n");
1486
1487                 fragments["testfun"]                            =
1488                         "%test_code = OpFunction %v4f32 None %v4f32_function\n"
1489                         "    %param = OpFunctionParameter %v4f32\n"
1490
1491                         "%entry = OpLabel\n"
1492                         "    %i = OpVariable %fp_i32 Function\n"
1493                         "         OpStore %i %c_i32_0\n"
1494                         "         OpBranch %loop\n"
1495
1496                         " %loop = OpLabel\n"
1497                         "   %15 = OpLoad %i32 %i\n"
1498                         "   %lt = OpSLessThan %bool %15 %c_i32_256\n"
1499                         "         OpLoopMerge %merge %inc None\n"
1500                         "         OpBranchConditional %lt %write %merge\n"
1501
1502                         "%write = OpLabel\n"
1503                         "   %30 = OpLoad %i32 %i\n"
1504                         "  %src = OpAccessChain %up_f32 %ssbo32 %c_i32_0 %30\n"
1505                         "%val32 = OpLoad %f32 %src\n"
1506                         "%val16 = OpFConvert %f16 %val32\n"
1507                         "  %dst = OpAccessChain %up_f16 %ssbo16 %c_i32_0 %30\n"
1508                         "         OpStore %dst %val16\n"
1509                         "         OpBranch %inc\n"
1510
1511                         "  %inc = OpLabel\n"
1512                         "   %37 = OpLoad %i32 %i\n"
1513                         "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
1514                         "         OpStore %i %39\n"
1515                         "         OpBranch %loop\n"
1516
1517                         "%merge = OpLabel\n"
1518                         "         OpReturnValue %param\n"
1519
1520                         "OpFunctionEnd\n";
1521
1522                 const RndMode   rndModes[] =
1523                 {
1524                         {"rtz",                                         "OpDecorate %val16  FPRoundingMode RTZ",        graphicsCheck16BitFloats<ROUNDINGMODE_RTZ>},
1525                         {"rte",                                         "OpDecorate %val16  FPRoundingMode RTE",        graphicsCheck16BitFloats<ROUNDINGMODE_RTE>},
1526                         {"unspecified_rnd_mode",        "",                                                                                     graphicsCheck16BitFloats<RoundingModeFlags(ROUNDINGMODE_RTE | ROUNDINGMODE_RTZ)>},
1527                 };
1528
1529                 for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
1530                         for (deUint32 rndModeIdx = 0; rndModeIdx < DE_LENGTH_OF_ARRAY(rndModes); ++rndModeIdx)
1531                         {
1532                                 map<string, string>     specs;
1533                                 string                          testName        = string(CAPABILITIES[capIdx].name) + "_scalar_float_" + rndModes[rndModeIdx].name;
1534
1535                                 specs["cap"]                                    = CAPABILITIES[capIdx].cap;
1536                                 specs["indecor"]                                = CAPABILITIES[capIdx].decor;
1537                                 specs["rounddecor"]                             = rndModes[rndModeIdx].decor;
1538
1539                                 fragments["capability"]                 = capabilities.specialize(specs);
1540                                 fragments["decoration"]                 = decoration.specialize(specs);
1541
1542                                 resources.inputs.back().first   = CAPABILITIES[capIdx].dtype;
1543                                 resources.verifyIO                              = rndModes[rndModeIdx].f;
1544
1545
1546                                 createTestsForAllStages(testName, defaultColors, defaultColors, fragments, resources, extensions, testGroup, get16BitStorageFeatures(CAPABILITIES[capIdx].name));
1547                         }
1548         }
1549
1550         {  // vector cases
1551                 fragments["pre_main"]                           =
1552                         "      %f16 = OpTypeFloat 16\n"
1553                         " %c_i32_64 = OpConstant %i32 64\n"
1554                         "        %v4f16 = OpTypeVector %f16 4\n"
1555                         " %up_v4f32 = OpTypePointer Uniform %v4f32\n"
1556                         " %up_v4f16 = OpTypePointer Uniform %v4f16\n"
1557                         " %ra_v4f32 = OpTypeArray %v4f32 %c_i32_64\n"
1558                         " %ra_v4f16 = OpTypeArray %v4f16 %c_i32_64\n"
1559                         "   %SSBO32 = OpTypeStruct %ra_v4f32\n"
1560                         "   %SSBO16 = OpTypeStruct %ra_v4f16\n"
1561                         "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
1562                         "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
1563                         "   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
1564                         "   %ssbo16 = OpVariable %up_SSBO16 Uniform\n";
1565
1566                 const StringTemplate decoration         (
1567                         "OpDecorate %ra_v4f32 ArrayStride 16\n"
1568                         "OpDecorate %ra_v4f16 ArrayStride 8\n"
1569                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
1570                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
1571                         "OpDecorate %SSBO32 ${indecor}\n"
1572                         "OpDecorate %SSBO16 BufferBlock\n"
1573                         "OpDecorate %ssbo32 DescriptorSet 0\n"
1574                         "OpDecorate %ssbo16 DescriptorSet 0\n"
1575                         "OpDecorate %ssbo32 Binding 0\n"
1576                         "OpDecorate %ssbo16 Binding 1\n"
1577                         "${rounddecor}\n");
1578
1579                 // ssbo16[] <- convert ssbo32[] to 16bit float
1580                 fragments["testfun"]                            =
1581                         "%test_code = OpFunction %v4f32 None %v4f32_function\n"
1582                         "    %param = OpFunctionParameter %v4f32\n"
1583
1584                         "%entry = OpLabel\n"
1585                         "    %i = OpVariable %fp_i32 Function\n"
1586                         "         OpStore %i %c_i32_0\n"
1587                         "         OpBranch %loop\n"
1588
1589                         " %loop = OpLabel\n"
1590                         "   %15 = OpLoad %i32 %i\n"
1591                         "   %lt = OpSLessThan %bool %15 %c_i32_64\n"
1592                         "         OpLoopMerge %merge %inc None\n"
1593                         "         OpBranchConditional %lt %write %merge\n"
1594
1595                         "%write = OpLabel\n"
1596                         "   %30 = OpLoad %i32 %i\n"
1597                         "  %src = OpAccessChain %up_v4f32 %ssbo32 %c_i32_0 %30\n"
1598                         "%val32 = OpLoad %v4f32 %src\n"
1599                         "%val16 = OpFConvert %v4f16 %val32\n"
1600                         "  %dst = OpAccessChain %up_v4f16 %ssbo16 %c_i32_0 %30\n"
1601                         "         OpStore %dst %val16\n"
1602                         "         OpBranch %inc\n"
1603
1604                         "  %inc = OpLabel\n"
1605                         "   %37 = OpLoad %i32 %i\n"
1606                         "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
1607                         "         OpStore %i %39\n"
1608                         "         OpBranch %loop\n"
1609
1610                         "%merge = OpLabel\n"
1611                         "         OpReturnValue %param\n"
1612
1613                         "OpFunctionEnd\n";
1614
1615                 const RndMode   rndModes[] =
1616                 {
1617                         {"rtz",                                         "OpDecorate %val16  FPRoundingMode RTZ",        graphicsCheck16BitFloats<ROUNDINGMODE_RTZ>},
1618                         {"rte",                                         "OpDecorate %val16  FPRoundingMode RTE",        graphicsCheck16BitFloats<ROUNDINGMODE_RTE>},
1619                         {"unspecified_rnd_mode",        "",                                                                                     graphicsCheck16BitFloats<RoundingModeFlags(ROUNDINGMODE_RTE | ROUNDINGMODE_RTZ)>},
1620                 };
1621
1622                 for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
1623                         for (deUint32 rndModeIdx = 0; rndModeIdx < DE_LENGTH_OF_ARRAY(rndModes); ++rndModeIdx)
1624                         {
1625                                 map<string, string>     specs;
1626                                 string                          testName        = string(CAPABILITIES[capIdx].name) + "_vector_float_" + rndModes[rndModeIdx].name;
1627
1628                                 specs["cap"]                                    = CAPABILITIES[capIdx].cap;
1629                                 specs["indecor"]                                = CAPABILITIES[capIdx].decor;
1630                                 specs["rounddecor"]                             = rndModes[rndModeIdx].decor;
1631
1632                                 fragments["capability"]                 = capabilities.specialize(specs);
1633                                 fragments["decoration"]                 = decoration.specialize(specs);
1634
1635                                 resources.inputs.back().first   = CAPABILITIES[capIdx].dtype;
1636                                 resources.verifyIO                              = rndModes[rndModeIdx].f;
1637
1638
1639                                 createTestsForAllStages(testName, defaultColors, defaultColors, fragments, resources, extensions, testGroup, get16BitStorageFeatures(CAPABILITIES[capIdx].name));
1640                         }
1641         }
1642
1643         {  // matrix cases
1644                 fragments["pre_main"]                           =
1645                         "       %f16 = OpTypeFloat 16\n"
1646                         "  %c_i32_16 = OpConstant %i32 16\n"
1647                         "     %v4f16 = OpTypeVector %f16 4\n"
1648                         "   %m4x4f32 = OpTypeMatrix %v4f32 4\n"
1649                         "   %m4x4f16 = OpTypeMatrix %v4f16 4\n"
1650                         "  %up_v4f32 = OpTypePointer Uniform %v4f32\n"
1651                         "  %up_v4f16 = OpTypePointer Uniform %v4f16\n"
1652                         "%a16m4x4f32 = OpTypeArray %m4x4f32 %c_i32_16\n"
1653                         "%a16m4x4f16 = OpTypeArray %m4x4f16 %c_i32_16\n"
1654                         "    %SSBO32 = OpTypeStruct %a16m4x4f32\n"
1655                         "    %SSBO16 = OpTypeStruct %a16m4x4f16\n"
1656                         " %up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
1657                         " %up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
1658                         "    %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
1659                         "    %ssbo16 = OpVariable %up_SSBO16 Uniform\n";
1660
1661                 const StringTemplate decoration         (
1662                         "OpDecorate %a16m4x4f32 ArrayStride 64\n"
1663                         "OpDecorate %a16m4x4f16 ArrayStride 32\n"
1664                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
1665                         "OpMemberDecorate %SSBO32 0 ColMajor\n"
1666                         "OpMemberDecorate %SSBO32 0 MatrixStride 16\n"
1667                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
1668                         "OpMemberDecorate %SSBO16 0 ColMajor\n"
1669                         "OpMemberDecorate %SSBO16 0 MatrixStride 8\n"
1670                         "OpDecorate %SSBO32 ${indecor}\n"
1671                         "OpDecorate %SSBO16 BufferBlock\n"
1672                         "OpDecorate %ssbo32 DescriptorSet 0\n"
1673                         "OpDecorate %ssbo16 DescriptorSet 0\n"
1674                         "OpDecorate %ssbo32 Binding 0\n"
1675                         "OpDecorate %ssbo16 Binding 1\n"
1676                         "${rounddecor}\n");
1677
1678                 fragments["testfun"]                            =
1679                         "%test_code = OpFunction %v4f32 None %v4f32_function\n"
1680                         "    %param = OpFunctionParameter %v4f32\n"
1681
1682                         "%entry = OpLabel\n"
1683                         "    %i = OpVariable %fp_i32 Function\n"
1684                         "         OpStore %i %c_i32_0\n"
1685                         "         OpBranch %loop\n"
1686
1687                         " %loop = OpLabel\n"
1688                         "   %15 = OpLoad %i32 %i\n"
1689                         "   %lt = OpSLessThan %bool %15 %c_i32_16\n"
1690                         "         OpLoopMerge %merge %inc None\n"
1691                         "         OpBranchConditional %lt %write %merge\n"
1692
1693                         "  %write = OpLabel\n"
1694                         "     %30 = OpLoad %i32 %i\n"
1695                         "  %src_0 = OpAccessChain %up_v4f32 %ssbo32 %c_i32_0 %30 %c_i32_0\n"
1696                         "  %src_1 = OpAccessChain %up_v4f32 %ssbo32 %c_i32_0 %30 %c_i32_1\n"
1697                         "  %src_2 = OpAccessChain %up_v4f32 %ssbo32 %c_i32_0 %30 %c_i32_2\n"
1698                         "  %src_3 = OpAccessChain %up_v4f32 %ssbo32 %c_i32_0 %30 %c_i32_3\n"
1699                         "%val32_0 = OpLoad %v4f32 %src_0\n"
1700                         "%val32_1 = OpLoad %v4f32 %src_1\n"
1701                         "%val32_2 = OpLoad %v4f32 %src_2\n"
1702                         "%val32_3 = OpLoad %v4f32 %src_3\n"
1703                         "%val16_0 = OpFConvert %v4f16 %val32_0\n"
1704                         "%val16_1 = OpFConvert %v4f16 %val32_1\n"
1705                         "%val16_2 = OpFConvert %v4f16 %val32_2\n"
1706                         "%val16_3 = OpFConvert %v4f16 %val32_3\n"
1707                         "  %dst_0 = OpAccessChain %up_v4f16 %ssbo16 %c_i32_0 %30 %c_i32_0\n"
1708                         "  %dst_1 = OpAccessChain %up_v4f16 %ssbo16 %c_i32_0 %30 %c_i32_1\n"
1709                         "  %dst_2 = OpAccessChain %up_v4f16 %ssbo16 %c_i32_0 %30 %c_i32_2\n"
1710                         "  %dst_3 = OpAccessChain %up_v4f16 %ssbo16 %c_i32_0 %30 %c_i32_3\n"
1711                         "           OpStore %dst_0 %val16_0\n"
1712                         "           OpStore %dst_1 %val16_1\n"
1713                         "           OpStore %dst_2 %val16_2\n"
1714                         "           OpStore %dst_3 %val16_3\n"
1715                         "           OpBranch %inc\n"
1716
1717                         "  %inc = OpLabel\n"
1718                         "   %37 = OpLoad %i32 %i\n"
1719                         "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
1720                         "         OpStore %i %39\n"
1721                         "         OpBranch %loop\n"
1722
1723                         "%merge = OpLabel\n"
1724                         "         OpReturnValue %param\n"
1725
1726                         "OpFunctionEnd\n";
1727
1728                 const RndMode   rndModes[] =
1729                 {
1730                         {"rte",                                         "OpDecorate %val16_0  FPRoundingMode RTE\nOpDecorate %val16_1  FPRoundingMode RTE\nOpDecorate %val16_2  FPRoundingMode RTE\nOpDecorate %val16_3  FPRoundingMode RTE",   graphicsCheck16BitFloats<ROUNDINGMODE_RTE>},
1731                         {"rtz",                                         "OpDecorate %val16_0  FPRoundingMode RTZ\nOpDecorate %val16_1  FPRoundingMode RTZ\nOpDecorate %val16_2  FPRoundingMode RTZ\nOpDecorate %val16_3  FPRoundingMode RTZ",   graphicsCheck16BitFloats<ROUNDINGMODE_RTZ>},
1732                         {"unspecified_rnd_mode",        "",                                                                                                                                                                                                                                                                                                                                             graphicsCheck16BitFloats<RoundingModeFlags(ROUNDINGMODE_RTE | ROUNDINGMODE_RTZ)>},
1733                 };
1734
1735                 for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
1736                         for (deUint32 rndModeIdx = 0; rndModeIdx < DE_LENGTH_OF_ARRAY(rndModes); ++rndModeIdx)
1737                         {
1738                                 map<string, string>     specs;
1739                                 string                          testName        = string(CAPABILITIES[capIdx].name) + "_matrix_float_" + rndModes[rndModeIdx].name;
1740
1741                                 specs["cap"]                                    = CAPABILITIES[capIdx].cap;
1742                                 specs["indecor"]                                = CAPABILITIES[capIdx].decor;
1743                                 specs["rounddecor"]                             = rndModes[rndModeIdx].decor;
1744
1745                                 fragments["capability"]                 = capabilities.specialize(specs);
1746                                 fragments["decoration"]                 = decoration.specialize(specs);
1747
1748                                 resources.inputs.back().first   = CAPABILITIES[capIdx].dtype;
1749                                 resources.verifyIO                              = rndModes[rndModeIdx].f;
1750
1751
1752                                 createTestsForAllStages(testName, defaultColors, defaultColors, fragments, resources, extensions, testGroup, get16BitStorageFeatures(CAPABILITIES[capIdx].name));
1753                         }
1754         }
1755 }
1756
1757 void addGraphics16BitStorageInputOutputFloat32To16Group (tcu::TestCaseGroup* testGroup)
1758 {
1759         de::Random                      rnd                                     (deStringHash(testGroup->getName()));
1760         RGBA                            defaultColors[4];
1761         vector<string>          extensions;
1762         map<string, string>     fragments                       = passthruFragments();
1763         const deUint32          numDataPoints           = 64;
1764         vector<float>           float32Data                     = getFloat32s(rnd, numDataPoints);
1765
1766         extensions.push_back("VK_KHR_16bit_storage");
1767
1768         fragments["capability"]                         = "OpCapability StorageInputOutput16\n";
1769         fragments["extension"]                          = "OpExtension \"SPV_KHR_16bit_storage\"\n";
1770
1771         getDefaultColors(defaultColors);
1772
1773         struct RndMode
1774         {
1775                 const char*                             name;
1776                 const char*                             decor;
1777                 RoundingModeFlags               flags;
1778         };
1779
1780         const RndMode           rndModes[]              =
1781         {
1782                 {"rtz",                                         "OpDecorate %ret  FPRoundingMode RTZ",  ROUNDINGMODE_RTZ},
1783                 {"rte",                                         "OpDecorate %ret  FPRoundingMode RTE",  ROUNDINGMODE_RTE},
1784                 {"unspecified_rnd_mode",        "",                                                                             RoundingModeFlags(ROUNDINGMODE_RTE | ROUNDINGMODE_RTZ)},
1785         };
1786
1787         struct Case
1788         {
1789                 const char*     name;
1790                 const char*     interfaceOpFunc;
1791                 const char*     preMain;
1792                 const char*     inputType;
1793                 const char*     outputType;
1794                 deUint32        numPerCase;
1795                 deUint32        numElements;
1796         };
1797
1798         const Case      cases[]         =
1799         {
1800                 { // Scalar cases
1801                         "scalar",
1802
1803                         "%interface_op_func = OpFunction %f16 None %f16_f32_function\n"
1804                         "        %io_param1 = OpFunctionParameter %f32\n"
1805                         "            %entry = OpLabel\n"
1806                         "                          %ret = OpFConvert %f16 %io_param1\n"
1807                         "                     OpReturnValue %ret\n"
1808                         "                     OpFunctionEnd\n",
1809
1810                         "             %f16 = OpTypeFloat 16\n"
1811                         "          %op_f16 = OpTypePointer Output %f16\n"
1812                         "           %a3f16 = OpTypeArray %f16 %c_i32_3\n"
1813                         "        %op_a3f16 = OpTypePointer Output %a3f16\n"
1814                         "%f16_f32_function = OpTypeFunction %f16 %f32\n"
1815                         "           %a3f32 = OpTypeArray %f32 %c_i32_3\n"
1816                         "        %ip_a3f32 = OpTypePointer Input %a3f32\n",
1817
1818                         "f32",
1819                         "f16",
1820                         4,
1821                         1,
1822                 },
1823                 { // Vector cases
1824                         "vector",
1825
1826                         "%interface_op_func = OpFunction %v2f16 None %v2f16_v2f32_function\n"
1827                         "        %io_param1 = OpFunctionParameter %v2f32\n"
1828                         "            %entry = OpLabel\n"
1829                         "                          %ret = OpFConvert %v2f16 %io_param1\n"
1830                         "                     OpReturnValue %ret\n"
1831                         "                     OpFunctionEnd\n",
1832
1833                         "                 %f16 = OpTypeFloat 16\n"
1834                         "               %v2f16 = OpTypeVector %f16 2\n"
1835                         "            %op_v2f16 = OpTypePointer Output %v2f16\n"
1836                         "             %a3v2f16 = OpTypeArray %v2f16 %c_i32_3\n"
1837                         "          %op_a3v2f16 = OpTypePointer Output %a3v2f16\n"
1838                         "%v2f16_v2f32_function = OpTypeFunction %v2f16 %v2f32\n"
1839                         "             %a3v2f32 = OpTypeArray %v2f32 %c_i32_3\n"
1840                         "          %ip_a3v2f32 = OpTypePointer Input %a3v2f32\n",
1841
1842                         "v2f32",
1843                         "v2f16",
1844                         2 * 4,
1845                         2,
1846                 }
1847         };
1848
1849         VulkanFeatures  requiredFeatures;
1850         requiredFeatures.ext16BitStorage = EXT16BITSTORAGEFEATURES_INPUT_OUTPUT;
1851
1852         for (deUint32 caseIdx = 0; caseIdx < DE_LENGTH_OF_ARRAY(cases); ++caseIdx)
1853                 for (deUint32 rndModeIdx = 0; rndModeIdx < DE_LENGTH_OF_ARRAY(rndModes); ++rndModeIdx)
1854                 {
1855                         fragments["interface_op_func"]  = cases[caseIdx].interfaceOpFunc;
1856                         fragments["pre_main"]                   = cases[caseIdx].preMain;
1857                         fragments["decoration"]                 = rndModes[rndModeIdx].decor;
1858
1859                         fragments["input_type"]                 = cases[caseIdx].inputType;
1860                         fragments["output_type"]                = cases[caseIdx].outputType;
1861
1862                         GraphicsInterfaces      interfaces;
1863                         const deUint32          numPerCase      = cases[caseIdx].numPerCase;
1864                         vector<float>           subInputs       (numPerCase);
1865                         vector<deFloat16>       subOutputs      (numPerCase);
1866
1867                         // The pipeline need this to call compare16BitFloat() when checking the result.
1868                         interfaces.setRoundingMode(rndModes[rndModeIdx].flags);
1869
1870                         for (deUint32 caseNdx = 0; caseNdx < numDataPoints / numPerCase; ++caseNdx)
1871                         {
1872                                 string                  testName        = string(cases[caseIdx].name) + numberToString(caseNdx) + "_" + rndModes[rndModeIdx].name;
1873
1874                                 for (deUint32 numNdx = 0; numNdx < numPerCase; ++numNdx)
1875                                 {
1876                                         subInputs[numNdx]       = float32Data[caseNdx * numPerCase + numNdx];
1877                                         // We derive the expected result from inputs directly in the graphics pipeline.
1878                                         subOutputs[numNdx]      = 0;
1879                                 }
1880                                 interfaces.setInputOutput(std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_FLOAT32), BufferSp(new Float32Buffer(subInputs))),
1881                                                                                   std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_FLOAT16), BufferSp(new Float16Buffer(subOutputs))));
1882                                 createTestsForAllStages(testName, defaultColors, defaultColors, fragments, interfaces, extensions, testGroup, requiredFeatures);
1883                         }
1884                 }
1885 }
1886
1887 void addGraphics16BitStorageInputOutputFloat16To32Group (tcu::TestCaseGroup* testGroup)
1888 {
1889         de::Random                              rnd                                     (deStringHash(testGroup->getName()));
1890         RGBA                                    defaultColors[4];
1891         vector<string>                  extensions;
1892         map<string, string>             fragments                       = passthruFragments();
1893         const deUint32                  numDataPoints           = 64;
1894         vector<deFloat16>               float16Data                     (getFloat16s(rnd, numDataPoints));
1895         vector<float>                   float32Data;
1896
1897         float32Data.reserve(numDataPoints);
1898         for (deUint32 numIdx = 0; numIdx < numDataPoints; ++numIdx)
1899                 float32Data.push_back(deFloat16To32(float16Data[numIdx]));
1900
1901         extensions.push_back("VK_KHR_16bit_storage");
1902
1903         fragments["capability"]                         = "OpCapability StorageInputOutput16\n";
1904         fragments["extension"]                          = "OpExtension \"SPV_KHR_16bit_storage\"\n";
1905
1906         getDefaultColors(defaultColors);
1907
1908         struct Case
1909         {
1910                 const char*     name;
1911                 const char*     interfaceOpFunc;
1912                 const char*     preMain;
1913                 const char*     inputType;
1914                 const char*     outputType;
1915                 deUint32        numPerCase;
1916                 deUint32        numElements;
1917         };
1918
1919         Case    cases[]         =
1920         {
1921                 { // Scalar cases
1922                         "scalar",
1923
1924                         "%interface_op_func = OpFunction %f32 None %f32_f16_function\n"
1925                         "        %io_param1 = OpFunctionParameter %f16\n"
1926                         "            %entry = OpLabel\n"
1927                         "                          %ret = OpFConvert %f32 %io_param1\n"
1928                         "                     OpReturnValue %ret\n"
1929                         "                     OpFunctionEnd\n",
1930
1931                         "             %f16 = OpTypeFloat 16\n"
1932                         "          %ip_f16 = OpTypePointer Input %f16\n"
1933                         "           %a3f16 = OpTypeArray %f16 %c_i32_3\n"
1934                         "        %ip_a3f16 = OpTypePointer Input %a3f16\n"
1935                         "%f32_f16_function = OpTypeFunction %f32 %f16\n"
1936                         "           %a3f32 = OpTypeArray %f32 %c_i32_3\n"
1937                         "        %op_a3f32 = OpTypePointer Output %a3f32\n",
1938
1939                         "f16",
1940                         "f32",
1941                         4,
1942                         1,
1943                 },
1944                 { // Vector cases
1945                         "vector",
1946
1947                         "%interface_op_func = OpFunction %v2f32 None %v2f32_v2f16_function\n"
1948                         "        %io_param1 = OpFunctionParameter %v2f16\n"
1949                         "            %entry = OpLabel\n"
1950                         "                          %ret = OpFConvert %v2f32 %io_param1\n"
1951                         "                     OpReturnValue %ret\n"
1952                         "                     OpFunctionEnd\n",
1953
1954                         "                 %f16 = OpTypeFloat 16\n"
1955                         "                       %v2f16 = OpTypeVector %f16 2\n"
1956                         "            %ip_v2f16 = OpTypePointer Input %v2f16\n"
1957                         "             %a3v2f16 = OpTypeArray %v2f16 %c_i32_3\n"
1958                         "          %ip_a3v2f16 = OpTypePointer Input %a3v2f16\n"
1959                         "%v2f32_v2f16_function = OpTypeFunction %v2f32 %v2f16\n"
1960                         "             %a3v2f32 = OpTypeArray %v2f32 %c_i32_3\n"
1961                         "          %op_a3v2f32 = OpTypePointer Output %a3v2f32\n",
1962
1963                         "v2f16",
1964                         "v2f32",
1965                         2 * 4,
1966                         2,
1967                 }
1968         };
1969
1970         VulkanFeatures  requiredFeatures;
1971         requiredFeatures.ext16BitStorage = EXT16BITSTORAGEFEATURES_INPUT_OUTPUT;
1972
1973         for (deUint32 caseIdx = 0; caseIdx < DE_LENGTH_OF_ARRAY(cases); ++caseIdx)
1974         {
1975                 fragments["interface_op_func"]  = cases[caseIdx].interfaceOpFunc;
1976                 fragments["pre_main"]                   = cases[caseIdx].preMain;
1977
1978                 fragments["input_type"]                 = cases[caseIdx].inputType;
1979                 fragments["output_type"]                = cases[caseIdx].outputType;
1980
1981                 GraphicsInterfaces      interfaces;
1982                 const deUint32          numPerCase      = cases[caseIdx].numPerCase;
1983                 vector<deFloat16>       subInputs       (numPerCase);
1984                 vector<float>           subOutputs      (numPerCase);
1985
1986                 for (deUint32 caseNdx = 0; caseNdx < numDataPoints / numPerCase; ++caseNdx)
1987                 {
1988                         string                  testName        = string(cases[caseIdx].name) + numberToString(caseNdx);
1989
1990                         for (deUint32 numNdx = 0; numNdx < numPerCase; ++numNdx)
1991                         {
1992                                 subInputs[numNdx]       = float16Data[caseNdx * numPerCase + numNdx];
1993                                 subOutputs[numNdx]      = float32Data[caseNdx * numPerCase + numNdx];
1994                         }
1995                         interfaces.setInputOutput(std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_FLOAT16), BufferSp(new Float16Buffer(subInputs))),
1996                                                                           std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_FLOAT32), BufferSp(new Float32Buffer(subOutputs))));
1997                         createTestsForAllStages(testName, defaultColors, defaultColors, fragments, interfaces, extensions, testGroup, requiredFeatures);
1998                 }
1999         }
2000 }
2001
2002 void addGraphics16BitStorageInputOutputInt32To16Group (tcu::TestCaseGroup* testGroup)
2003 {
2004         de::Random                                                      rnd                                     (deStringHash(testGroup->getName()));
2005         RGBA                                                            defaultColors[4];
2006         vector<string>                                          extensions;
2007         map<string, string>                                     fragments                       = passthruFragments();
2008         const deUint32                                          numDataPoints           = 64;
2009         // inputs and outputs are declared to be vectors of signed integers.
2010         // However, depending on the test, they may be interpreted as unsiged
2011         // integers. That won't be a problem as long as we passed the bits
2012         // in faithfully to the pipeline.
2013         vector<deInt32>                                         inputs                          = getInt32s(rnd, numDataPoints);
2014         vector<deInt16>                                         outputs;
2015
2016         outputs.reserve(inputs.size());
2017         for (deUint32 numNdx = 0; numNdx < inputs.size(); ++numNdx)
2018                 outputs.push_back(static_cast<deInt16>(0xffff & inputs[numNdx]));
2019
2020         extensions.push_back("VK_KHR_16bit_storage");
2021
2022         fragments["capability"]                         = "OpCapability StorageInputOutput16\n";
2023         fragments["extension"]                          = "OpExtension \"SPV_KHR_16bit_storage\"\n";
2024
2025         getDefaultColors(defaultColors);
2026
2027         const StringTemplate    scalarInterfaceOpFunc(
2028                         "%interface_op_func = OpFunction %${type16} None %${type16}_${type32}_function\n"
2029                         "        %io_param1 = OpFunctionParameter %${type32}\n"
2030                         "            %entry = OpLabel\n"
2031                         "                          %ret = ${convert} %${type16} %io_param1\n"
2032                         "                     OpReturnValue %ret\n"
2033                         "                     OpFunctionEnd\n");
2034
2035         const StringTemplate    scalarPreMain(
2036                         "             %${type16} = OpTypeInt 16 ${signed}\n"
2037                         "          %op_${type16} = OpTypePointer Output %${type16}\n"
2038                         "           %a3${type16} = OpTypeArray %${type16} %c_i32_3\n"
2039                         "        %op_a3${type16} = OpTypePointer Output %a3${type16}\n"
2040                         "%${type16}_${type32}_function = OpTypeFunction %${type16} %${type32}\n"
2041                         "           %a3${type32} = OpTypeArray %${type32} %c_i32_3\n"
2042                         "        %ip_a3${type32} = OpTypePointer Input %a3${type32}\n");
2043
2044         const StringTemplate    vecInterfaceOpFunc(
2045                         "%interface_op_func = OpFunction %${type16} None %${type16}_${type32}_function\n"
2046                         "        %io_param1 = OpFunctionParameter %${type32}\n"
2047                         "            %entry = OpLabel\n"
2048                         "                          %ret = ${convert} %${type16} %io_param1\n"
2049                         "                     OpReturnValue %ret\n"
2050                         "                     OpFunctionEnd\n");
2051
2052         const StringTemplate    vecPreMain(
2053                         "                       %i16 = OpTypeInt 16 1\n"
2054                         "                       %u16 = OpTypeInt 16 0\n"
2055                         "                 %v4i16 = OpTypeVector %i16 4\n"
2056                         "                 %v4u16 = OpTypeVector %u16 4\n"
2057                         "          %op_${type16} = OpTypePointer Output %${type16}\n"
2058                         "           %a3${type16} = OpTypeArray %${type16} %c_i32_3\n"
2059                         "        %op_a3${type16} = OpTypePointer Output %a3${type16}\n"
2060                         "%${type16}_${type32}_function = OpTypeFunction %${type16} %${type32}\n"
2061                         "           %a3${type32} = OpTypeArray %${type32} %c_i32_3\n"
2062                         "        %ip_a3${type32} = OpTypePointer Input %a3${type32}\n");
2063
2064         struct Case
2065         {
2066                 const char*                             name;
2067                 const StringTemplate&   interfaceOpFunc;
2068                 const StringTemplate&   preMain;
2069                 const char*                             type32;
2070                 const char*                             type16;
2071                 const char*                             sign;
2072                 const char*                             opcode;
2073                 deUint32                                numPerCase;
2074                 deUint32                                numElements;
2075         };
2076
2077         Case    cases[]         =
2078         {
2079                 {"scalar_sint", scalarInterfaceOpFunc,  scalarPreMain,  "i32",          "i16",          "1",    "OpSConvert",   4,              1},
2080                 {"scalar_uint", scalarInterfaceOpFunc,  scalarPreMain,  "u32",          "u16",          "0",    "OpUConvert",   4,              1},
2081                 {"vector_sint", vecInterfaceOpFunc,             vecPreMain,             "v4i32",        "v4i16",        "1",    "OpSConvert",   4 * 4,  4},
2082                 {"vector_uint", vecInterfaceOpFunc,             vecPreMain,             "v4u32",        "v4u16",        "0",    "OpUConvert",   4 * 4,  4},
2083         };
2084
2085         VulkanFeatures  requiredFeatures;
2086         requiredFeatures.ext16BitStorage = EXT16BITSTORAGEFEATURES_INPUT_OUTPUT;
2087
2088         for (deUint32 caseIdx = 0; caseIdx < DE_LENGTH_OF_ARRAY(cases); ++caseIdx)
2089         {
2090                 map<string, string>                             specs;
2091
2092                 specs["type32"]                                 = cases[caseIdx].type32;
2093                 specs["type16"]                                 = cases[caseIdx].type16;
2094                 specs["signed"]                                 = cases[caseIdx].sign;
2095                 specs["convert"]                                = cases[caseIdx].opcode;
2096
2097                 fragments["pre_main"]                   = cases[caseIdx].preMain.specialize(specs);
2098                 fragments["interface_op_func"]  = cases[caseIdx].interfaceOpFunc.specialize(specs);
2099                 fragments["input_type"]                 = cases[caseIdx].type32;
2100                 fragments["output_type"]                = cases[caseIdx].type16;
2101
2102                 GraphicsInterfaces                              interfaces;
2103                 const deUint32                                  numPerCase      = cases[caseIdx].numPerCase;
2104                 vector<deInt32>                                 subInputs       (numPerCase);
2105                 vector<deInt16>                                 subOutputs      (numPerCase);
2106
2107                 for (deUint32 caseNdx = 0; caseNdx < numDataPoints / numPerCase; ++caseNdx)
2108                 {
2109                         string                  testName        = string(cases[caseIdx].name) + numberToString(caseNdx);
2110
2111                         for (deUint32 numNdx = 0; numNdx < numPerCase; ++numNdx)
2112                         {
2113                                 subInputs[numNdx]       = inputs[caseNdx * numPerCase + numNdx];
2114                                 subOutputs[numNdx]      = outputs[caseNdx * numPerCase + numNdx];
2115                         }
2116                         if (strcmp(cases[caseIdx].sign, "1") == 0)
2117                         {
2118                                 interfaces.setInputOutput(std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_INT32), BufferSp(new Int32Buffer(subInputs))),
2119                                                                                   std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_INT16), BufferSp(new Int16Buffer(subOutputs))));
2120                         }
2121                         else
2122                         {
2123                                 interfaces.setInputOutput(std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_UINT32), BufferSp(new Int32Buffer(subInputs))),
2124                                                                                   std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_UINT16), BufferSp(new Int16Buffer(subOutputs))));
2125                         }
2126                         createTestsForAllStages(testName, defaultColors, defaultColors, fragments, interfaces, extensions, testGroup, requiredFeatures);
2127                 }
2128         }
2129 }
2130
2131 void addGraphics16BitStorageInputOutputInt16To32Group (tcu::TestCaseGroup* testGroup)
2132 {
2133         de::Random                                                      rnd                                     (deStringHash(testGroup->getName()));
2134         RGBA                                                            defaultColors[4];
2135         vector<string>                                          extensions;
2136         map<string, string>                                     fragments                       = passthruFragments();
2137         const deUint32                                          numDataPoints           = 64;
2138         // inputs and outputs are declared to be vectors of signed integers.
2139         // However, depending on the test, they may be interpreted as unsiged
2140         // integers. That won't be a problem as long as we passed the bits
2141         // in faithfully to the pipeline.
2142         vector<deInt16>                                         inputs                          = getInt16s(rnd, numDataPoints);
2143         vector<deInt32>                                         sOutputs;
2144         vector<deInt32>                                         uOutputs;
2145         const deUint16                                          signBitMask                     = 0x8000;
2146         const deUint32                                          signExtendMask          = 0xffff0000;
2147
2148         sOutputs.reserve(inputs.size());
2149         uOutputs.reserve(inputs.size());
2150
2151         for (deUint32 numNdx = 0; numNdx < inputs.size(); ++numNdx)
2152         {
2153                 uOutputs.push_back(static_cast<deUint16>(inputs[numNdx]));
2154                 if (inputs[numNdx] & signBitMask)
2155                         sOutputs.push_back(static_cast<deInt32>(inputs[numNdx] | signExtendMask));
2156                 else
2157                         sOutputs.push_back(static_cast<deInt32>(inputs[numNdx]));
2158         }
2159
2160         extensions.push_back("VK_KHR_16bit_storage");
2161
2162         fragments["capability"]                         = "OpCapability StorageInputOutput16\n";
2163         fragments["extension"]                          = "OpExtension \"SPV_KHR_16bit_storage\"\n";
2164
2165         getDefaultColors(defaultColors);
2166
2167         const StringTemplate scalarIfOpFunc     (
2168                         "%interface_op_func = OpFunction %${type32} None %${type32}_${type16}_function\n"
2169                         "        %io_param1 = OpFunctionParameter %${type16}\n"
2170                         "            %entry = OpLabel\n"
2171                         "                          %ret = ${convert} %${type32} %io_param1\n"
2172                         "                     OpReturnValue %ret\n"
2173                         "                     OpFunctionEnd\n");
2174
2175         const StringTemplate scalarPreMain      (
2176                         "             %${type16} = OpTypeInt 16 ${signed}\n"
2177                         "          %ip_${type16} = OpTypePointer Input %${type16}\n"
2178                         "           %a3${type16} = OpTypeArray %${type16} %c_i32_3\n"
2179                         "        %ip_a3${type16} = OpTypePointer Input %a3${type16}\n"
2180                         "%${type32}_${type16}_function = OpTypeFunction %${type32} %${type16}\n"
2181                         "           %a3${type32} = OpTypeArray %${type32} %c_i32_3\n"
2182                         "        %op_a3${type32} = OpTypePointer Output %a3${type32}\n");
2183
2184         const StringTemplate vecIfOpFunc        (
2185                         "%interface_op_func = OpFunction %${type32} None %${type32}_${type16}_function\n"
2186                         "        %io_param1 = OpFunctionParameter %${type16}\n"
2187                         "            %entry = OpLabel\n"
2188                         "                          %ret = ${convert} %${type32} %io_param1\n"
2189                         "                     OpReturnValue %ret\n"
2190                         "                     OpFunctionEnd\n");
2191
2192         const StringTemplate vecPreMain (
2193                         "                       %i16 = OpTypeInt 16 1\n"
2194                         "                       %u16 = OpTypeInt 16 0\n"
2195                         "                 %v4i16 = OpTypeVector %i16 4\n"
2196                         "                 %v4u16 = OpTypeVector %u16 4\n"
2197                         "          %ip_${type16} = OpTypePointer Input %${type16}\n"
2198                         "           %a3${type16} = OpTypeArray %${type16} %c_i32_3\n"
2199                         "        %ip_a3${type16} = OpTypePointer Input %a3${type16}\n"
2200                         "%${type32}_${type16}_function = OpTypeFunction %${type32} %${type16}\n"
2201                         "           %a3${type32} = OpTypeArray %${type32} %c_i32_3\n"
2202                         "        %op_a3${type32} = OpTypePointer Output %a3${type32}\n");
2203
2204         struct Case
2205         {
2206                 const char*                             name;
2207                 const StringTemplate&   interfaceOpFunc;
2208                 const StringTemplate&   preMain;
2209                 const char*                             type32;
2210                 const char*                             type16;
2211                 const char*                             sign;
2212                 const char*                             opcode;
2213                 deUint32                                numPerCase;
2214                 deUint32                                numElements;
2215         };
2216
2217         Case    cases[]         =
2218         {
2219                 {"scalar_sint", scalarIfOpFunc, scalarPreMain,  "i32",          "i16",          "1",    "OpSConvert",   4,              1},
2220                 {"scalar_uint", scalarIfOpFunc, scalarPreMain,  "u32",          "u16",          "0",    "OpUConvert",   4,              1},
2221                 {"vector_sint", vecIfOpFunc,    vecPreMain,             "v4i32",        "v4i16",        "1",    "OpSConvert",   4 * 4,  4},
2222                 {"vector_uint", vecIfOpFunc,    vecPreMain,             "v4u32",        "v4u16",        "0",    "OpUConvert",   4 * 4,  4},
2223         };
2224
2225         VulkanFeatures  requiredFeatures;
2226         requiredFeatures.ext16BitStorage = EXT16BITSTORAGEFEATURES_INPUT_OUTPUT;
2227
2228         for (deUint32 caseIdx = 0; caseIdx < DE_LENGTH_OF_ARRAY(cases); ++caseIdx)
2229         {
2230                 map<string, string>                             specs;
2231
2232                 specs["type32"]                                 = cases[caseIdx].type32;
2233                 specs["type16"]                                 = cases[caseIdx].type16;
2234                 specs["signed"]                                 = cases[caseIdx].sign;
2235                 specs["convert"]                                = cases[caseIdx].opcode;
2236
2237                 fragments["pre_main"]                   = cases[caseIdx].preMain.specialize(specs);
2238                 fragments["interface_op_func"]  = cases[caseIdx].interfaceOpFunc.specialize(specs);
2239                 fragments["input_type"]                 = cases[caseIdx].type16;
2240                 fragments["output_type"]                = cases[caseIdx].type32;
2241
2242                 GraphicsInterfaces                              interfaces;
2243                 const deUint32                                  numPerCase      = cases[caseIdx].numPerCase;
2244                 vector<deInt16>                                 subInputs       (numPerCase);
2245                 vector<deInt32>                                 subOutputs      (numPerCase);
2246
2247                 for (deUint32 caseNdx = 0; caseNdx < numDataPoints / numPerCase; ++caseNdx)
2248                 {
2249                         string                  testName        = string(cases[caseIdx].name) + numberToString(caseNdx);
2250
2251                         for (deUint32 numNdx = 0; numNdx < numPerCase; ++numNdx)
2252                         {
2253                                 subInputs[numNdx]       = inputs[caseNdx * numPerCase + numNdx];
2254                                 if (cases[caseIdx].sign[0] == '1')
2255                                         subOutputs[numNdx]      = sOutputs[caseNdx * numPerCase + numNdx];
2256                                 else
2257                                         subOutputs[numNdx]      = uOutputs[caseNdx * numPerCase + numNdx];
2258                         }
2259                         if (strcmp(cases[caseIdx].sign, "1") == 0)
2260                         {
2261                                 interfaces.setInputOutput(std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_INT16), BufferSp(new Int16Buffer(subInputs))),
2262                                                                                   std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_INT32), BufferSp(new Int32Buffer(subOutputs))));
2263                         }
2264                         else
2265                         {
2266                                 interfaces.setInputOutput(std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_UINT16), BufferSp(new Int16Buffer(subInputs))),
2267                                                                                   std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_UINT32), BufferSp(new Int32Buffer(subOutputs))));
2268                         }
2269                         createTestsForAllStages(testName, defaultColors, defaultColors, fragments, interfaces, extensions, testGroup, requiredFeatures);
2270                 }
2271         }
2272 }
2273
2274 void addGraphics16BitStoragePushConstantFloat16To32Group (tcu::TestCaseGroup* testGroup)
2275 {
2276         de::Random                                                      rnd                                     (deStringHash(testGroup->getName()));
2277         map<string, string>                                     fragments;
2278         RGBA                                                            defaultColors[4];
2279         vector<string>                                          extensions;
2280         GraphicsResources                                       resources;
2281         PushConstants                                           pcs;
2282         const deUint32                                          numDataPoints           = 64;
2283         vector<deFloat16>                                       float16Data                     (getFloat16s(rnd, numDataPoints));
2284         vector<float>                                           float32Data;
2285         VulkanFeatures                                          requiredFeatures;
2286
2287         float32Data.reserve(numDataPoints);
2288         for (deUint32 numIdx = 0; numIdx < numDataPoints; ++numIdx)
2289                 float32Data.push_back(deFloat16To32(float16Data[numIdx]));
2290
2291         extensions.push_back("VK_KHR_16bit_storage");
2292         requiredFeatures.ext16BitStorage = EXT16BITSTORAGEFEATURES_PUSH_CONSTANT;
2293
2294         fragments["capability"]                         = "OpCapability StoragePushConstant16\n";
2295         fragments["extension"]                          = "OpExtension \"SPV_KHR_16bit_storage\"";
2296
2297         pcs.setPushConstant(BufferSp(new Float16Buffer(float16Data)));
2298         resources.outputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Float32Buffer(float32Data))));
2299         resources.verifyIO = check32BitFloats;
2300
2301         getDefaultColors(defaultColors);
2302
2303         const StringTemplate    testFun         (
2304                 "%test_code = OpFunction %v4f32 None %v4f32_function\n"
2305                 "    %param = OpFunctionParameter %v4f32\n"
2306
2307                 "%entry = OpLabel\n"
2308                 "    %i = OpVariable %fp_i32 Function\n"
2309                 "         OpStore %i %c_i32_0\n"
2310                 "         OpBranch %loop\n"
2311
2312                 " %loop = OpLabel\n"
2313                 "   %15 = OpLoad %i32 %i\n"
2314                 "   %lt = OpSLessThan %bool %15 ${count}\n"
2315                 "         OpLoopMerge %merge %inc None\n"
2316                 "         OpBranchConditional %lt %write %merge\n"
2317
2318                 "%write = OpLabel\n"
2319                 "   %30 = OpLoad %i32 %i\n"
2320                 "  %src = OpAccessChain ${pp_type16} %pc16 %c_i32_0 %30 ${index0:opt}\n"
2321                 "%val16 = OpLoad ${f_type16} %src\n"
2322                 "%val32 = OpFConvert ${f_type32} %val16\n"
2323                 "  %dst = OpAccessChain ${up_type32} %ssbo32 %c_i32_0 %30 ${index0:opt}\n"
2324                 "         OpStore %dst %val32\n"
2325
2326                 "${store:opt}\n"
2327
2328                 "         OpBranch %inc\n"
2329
2330                 "  %inc = OpLabel\n"
2331                 "   %37 = OpLoad %i32 %i\n"
2332                 "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
2333                 "         OpStore %i %39\n"
2334                 "         OpBranch %loop\n"
2335
2336                 "%merge = OpLabel\n"
2337                 "         OpReturnValue %param\n"
2338
2339                 "OpFunctionEnd\n");
2340
2341         {  // Scalar cases
2342                 fragments["pre_main"]                           =
2343                         "      %f16 = OpTypeFloat 16\n"
2344                         " %c_i32_64 = OpConstant %i32 64\n"                                     // Should be the same as numDataPoints
2345                         "   %a64f16 = OpTypeArray %f16 %c_i32_64\n"
2346                         "   %a64f32 = OpTypeArray %f32 %c_i32_64\n"
2347                         "   %pp_f16 = OpTypePointer PushConstant %f16\n"
2348                         "   %up_f32 = OpTypePointer Uniform %f32\n"
2349                         "   %SSBO32 = OpTypeStruct %a64f32\n"
2350                         "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
2351                         "   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
2352                         "     %PC16 = OpTypeStruct %a64f16\n"
2353                         "  %pp_PC16 = OpTypePointer PushConstant %PC16\n"
2354                         "     %pc16 = OpVariable %pp_PC16 PushConstant\n";
2355
2356                 fragments["decoration"]                         =
2357                         "OpDecorate %a64f16 ArrayStride 2\n"
2358                         "OpDecorate %a64f32 ArrayStride 4\n"
2359                         "OpDecorate %SSBO32 BufferBlock\n"
2360                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
2361                         "OpDecorate %PC16 Block\n"
2362                         "OpMemberDecorate %PC16 0 Offset 0\n"
2363                         "OpDecorate %ssbo32 DescriptorSet 0\n"
2364                         "OpDecorate %ssbo32 Binding 0\n";
2365
2366                 map<string, string>             specs;
2367
2368                 specs["count"]                  = "%c_i32_64";
2369                 specs["pp_type16"]              = "%pp_f16";
2370                 specs["f_type16"]               = "%f16";
2371                 specs["f_type32"]               = "%f32";
2372                 specs["up_type32"]              = "%up_f32";
2373
2374                 fragments["testfun"]    = testFun.specialize(specs);
2375
2376                 createTestsForAllStages("scalar", defaultColors, defaultColors, fragments, pcs, resources, extensions, testGroup, requiredFeatures);
2377         }
2378
2379         {  // Vector cases
2380                 fragments["pre_main"]                           =
2381                         "      %f16 = OpTypeFloat 16\n"
2382                         "    %v4f16 = OpTypeVector %f16 4\n"
2383                         " %c_i32_16 = OpConstant %i32 16\n"
2384                         " %a16v4f16 = OpTypeArray %v4f16 %c_i32_16\n"
2385                         " %a16v4f32 = OpTypeArray %v4f32 %c_i32_16\n"
2386                         " %pp_v4f16 = OpTypePointer PushConstant %v4f16\n"
2387                         " %up_v4f32 = OpTypePointer Uniform %v4f32\n"
2388                         "   %SSBO32 = OpTypeStruct %a16v4f32\n"
2389                         "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
2390                         "   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
2391                         "     %PC16 = OpTypeStruct %a16v4f16\n"
2392                         "  %pp_PC16 = OpTypePointer PushConstant %PC16\n"
2393                         "     %pc16 = OpVariable %pp_PC16 PushConstant\n";
2394
2395                 fragments["decoration"]                         =
2396                         "OpDecorate %a16v4f16 ArrayStride 8\n"
2397                         "OpDecorate %a16v4f32 ArrayStride 16\n"
2398                         "OpDecorate %SSBO32 BufferBlock\n"
2399                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
2400                         "OpDecorate %PC16 Block\n"
2401                         "OpMemberDecorate %PC16 0 Offset 0\n"
2402                         "OpDecorate %ssbo32 DescriptorSet 0\n"
2403                         "OpDecorate %ssbo32 Binding 0\n";
2404
2405                 map<string, string>             specs;
2406
2407                 specs["count"]                  = "%c_i32_16";
2408                 specs["pp_type16"]              = "%pp_v4f16";
2409                 specs["f_type16"]               = "%v4f16";
2410                 specs["f_type32"]               = "%v4f32";
2411                 specs["up_type32"]              = "%up_v4f32";
2412
2413                 fragments["testfun"]    = testFun.specialize(specs);
2414
2415                 createTestsForAllStages("vector", defaultColors, defaultColors, fragments, pcs, resources, extensions, testGroup, requiredFeatures);
2416         }
2417
2418         {  // Matrix cases
2419                 fragments["pre_main"]                           =
2420                         "  %c_i32_8 = OpConstant %i32 8\n"
2421                         "      %f16 = OpTypeFloat 16\n"
2422                         "    %v4f16 = OpTypeVector %f16 4\n"
2423                         "  %m2v4f16 = OpTypeMatrix %v4f16 2\n"
2424                         "  %m2v4f32 = OpTypeMatrix %v4f32 2\n"
2425                         "%a8m2v4f16 = OpTypeArray %m2v4f16 %c_i32_8\n"
2426                         "%a8m2v4f32 = OpTypeArray %m2v4f32 %c_i32_8\n"
2427                         " %pp_v4f16 = OpTypePointer PushConstant %v4f16\n"
2428                         " %up_v4f32 = OpTypePointer Uniform %v4f32\n"
2429                         "   %SSBO32 = OpTypeStruct %a8m2v4f32\n"
2430                         "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
2431                         "   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
2432                         "     %PC16 = OpTypeStruct %a8m2v4f16\n"
2433                         "  %pp_PC16 = OpTypePointer PushConstant %PC16\n"
2434                         "     %pc16 = OpVariable %pp_PC16 PushConstant\n";
2435
2436                 fragments["decoration"]                         =
2437                         "OpDecorate %a8m2v4f16 ArrayStride 16\n"
2438                         "OpDecorate %a8m2v4f32 ArrayStride 32\n"
2439                         "OpDecorate %SSBO32 BufferBlock\n"
2440                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
2441                         "OpMemberDecorate %SSBO32 0 ColMajor\n"
2442                         "OpMemberDecorate %SSBO32 0 MatrixStride 16\n"
2443                         "OpDecorate %PC16 Block\n"
2444                         "OpMemberDecorate %PC16 0 Offset 0\n"
2445                         "OpMemberDecorate %PC16 0 ColMajor\n"
2446                         "OpMemberDecorate %PC16 0 MatrixStride 8\n"
2447                         "OpDecorate %ssbo32 DescriptorSet 0\n"
2448                         "OpDecorate %ssbo32 Binding 0\n";
2449
2450                 map<string, string>             specs;
2451
2452                 specs["count"]                  = "%c_i32_8";
2453                 specs["pp_type16"]              = "%pp_v4f16";
2454                 specs["up_type32"]              = "%up_v4f32";
2455                 specs["f_type16"]               = "%v4f16";
2456                 specs["f_type32"]               = "%v4f32";
2457                 specs["index0"]                 = "%c_i32_0";
2458                 specs["store"]                  =
2459                         "  %src_1 = OpAccessChain %pp_v4f16 %pc16 %c_i32_0 %30 %c_i32_1\n"
2460                         "%val16_1 = OpLoad %v4f16 %src_1\n"
2461                         "%val32_1 = OpFConvert %v4f32 %val16_1\n"
2462                         "  %dst_1 = OpAccessChain %up_v4f32 %ssbo32 %c_i32_0 %30 %c_i32_1\n"
2463                         "           OpStore %dst_1 %val32_1\n";
2464
2465                 fragments["testfun"]    = testFun.specialize(specs);
2466
2467                 createTestsForAllStages("matrix", defaultColors, defaultColors, fragments, pcs, resources, extensions, testGroup, requiredFeatures);
2468         }
2469 }
2470
2471 void addGraphics16BitStoragePushConstantInt16To32Group (tcu::TestCaseGroup* testGroup)
2472 {
2473         de::Random                                                      rnd                                     (deStringHash(testGroup->getName()));
2474         map<string, string>                                     fragments;
2475         RGBA                                                            defaultColors[4];
2476         const deUint32                                          numDataPoints           = 64;
2477         vector<deInt16>                                         inputs                          = getInt16s(rnd, numDataPoints);
2478         vector<deInt32>                                         sOutputs;
2479         vector<deInt32>                                         uOutputs;
2480         PushConstants                                           pcs;
2481         GraphicsResources                                       resources;
2482         vector<string>                                          extensions;
2483         const deUint16                                          signBitMask                     = 0x8000;
2484         const deUint32                                          signExtendMask          = 0xffff0000;
2485         VulkanFeatures                                          requiredFeatures;
2486
2487         sOutputs.reserve(inputs.size());
2488         uOutputs.reserve(inputs.size());
2489
2490         for (deUint32 numNdx = 0; numNdx < inputs.size(); ++numNdx)
2491         {
2492                 uOutputs.push_back(static_cast<deUint16>(inputs[numNdx]));
2493                 if (inputs[numNdx] & signBitMask)
2494                         sOutputs.push_back(static_cast<deInt32>(inputs[numNdx] | signExtendMask));
2495                 else
2496                         sOutputs.push_back(static_cast<deInt32>(inputs[numNdx]));
2497         }
2498
2499         extensions.push_back("VK_KHR_16bit_storage");
2500         requiredFeatures.ext16BitStorage = EXT16BITSTORAGEFEATURES_PUSH_CONSTANT;
2501
2502         fragments["capability"]                         = "OpCapability StoragePushConstant16\n";
2503         fragments["extension"]                          = "OpExtension \"SPV_KHR_16bit_storage\"";
2504
2505         pcs.setPushConstant(BufferSp(new Int16Buffer(inputs)));
2506
2507         getDefaultColors(defaultColors);
2508
2509         const StringTemplate    testFun         (
2510                 "%test_code = OpFunction %v4f32 None %v4f32_function\n"
2511                 "    %param = OpFunctionParameter %v4f32\n"
2512
2513                 "%entry = OpLabel\n"
2514                 "    %i = OpVariable %fp_i32 Function\n"
2515                 "         OpStore %i %c_i32_0\n"
2516                 "         OpBranch %loop\n"
2517
2518                 " %loop = OpLabel\n"
2519                 "   %15 = OpLoad %i32 %i\n"
2520                 "   %lt = OpSLessThan %bool %15 %c_i32_${count}\n"
2521                 "         OpLoopMerge %merge %inc None\n"
2522                 "         OpBranchConditional %lt %write %merge\n"
2523
2524                 "%write = OpLabel\n"
2525                 "   %30 = OpLoad %i32 %i\n"
2526                 "  %src = OpAccessChain %pp_${type16} %pc16 %c_i32_0 %30\n"
2527                 "%val16 = OpLoad %${type16} %src\n"
2528                 "%val32 = ${convert} %${type32} %val16\n"
2529                 "  %dst = OpAccessChain %up_${type32} %ssbo32 %c_i32_0 %30\n"
2530                 "         OpStore %dst %val32\n"
2531                 "         OpBranch %inc\n"
2532
2533                 "  %inc = OpLabel\n"
2534                 "   %37 = OpLoad %i32 %i\n"
2535                 "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
2536                 "         OpStore %i %39\n"
2537                 "         OpBranch %loop\n"
2538
2539                 "%merge = OpLabel\n"
2540                 "         OpReturnValue %param\n"
2541
2542                 "OpFunctionEnd\n");
2543
2544         {  // Scalar cases
2545                 const StringTemplate    preMain         (
2546                         "         %${type16} = OpTypeInt 16 ${signed}\n"
2547                         "    %c_i32_${count} = OpConstant %i32 ${count}\n"                                      // Should be the same as numDataPoints
2548                         "%a${count}${type16} = OpTypeArray %${type16} %c_i32_${count}\n"
2549                         "%a${count}${type32} = OpTypeArray %${type32} %c_i32_${count}\n"
2550                         "      %pp_${type16} = OpTypePointer PushConstant %${type16}\n"
2551                         "      %up_${type32} = OpTypePointer Uniform      %${type32}\n"
2552                         "            %SSBO32 = OpTypeStruct %a${count}${type32}\n"
2553                         "         %up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
2554                         "            %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
2555                         "              %PC16 = OpTypeStruct %a${count}${type16}\n"
2556                         "           %pp_PC16 = OpTypePointer PushConstant %PC16\n"
2557                         "              %pc16 = OpVariable %pp_PC16 PushConstant\n");
2558
2559                 const StringTemplate    decoration      (
2560                         "OpDecorate %a${count}${type16} ArrayStride 2\n"
2561                         "OpDecorate %a${count}${type32} ArrayStride 4\n"
2562                         "OpDecorate %SSBO32 BufferBlock\n"
2563                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
2564                         "OpDecorate %PC16 Block\n"
2565                         "OpMemberDecorate %PC16 0 Offset 0\n"
2566                         "OpDecorate %ssbo32 DescriptorSet 0\n"
2567                         "OpDecorate %ssbo32 Binding 0\n");
2568
2569                 {  // signed int
2570                         map<string, string>             specs;
2571
2572                         specs["type16"]                 = "i16";
2573                         specs["type32"]                 = "i32";
2574                         specs["signed"]                 = "1";
2575                         specs["count"]                  = "64";
2576                         specs["convert"]                = "OpSConvert";
2577
2578                         fragments["testfun"]    = testFun.specialize(specs);
2579                         fragments["pre_main"]   = preMain.specialize(specs);
2580                         fragments["decoration"] = decoration.specialize(specs);
2581
2582                         resources.outputs.clear();
2583                         resources.outputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Int32Buffer(sOutputs))));
2584                         createTestsForAllStages("sint_scalar", defaultColors, defaultColors, fragments, pcs, resources, extensions, testGroup, requiredFeatures);
2585                 }
2586                 {  // signed int
2587                         map<string, string>             specs;
2588
2589                         specs["type16"]                 = "u16";
2590                         specs["type32"]                 = "u32";
2591                         specs["signed"]                 = "0";
2592                         specs["count"]                  = "64";
2593                         specs["convert"]                = "OpUConvert";
2594
2595                         fragments["testfun"]    = testFun.specialize(specs);
2596                         fragments["pre_main"]   = preMain.specialize(specs);
2597                         fragments["decoration"] = decoration.specialize(specs);
2598
2599                         resources.outputs.clear();
2600                         resources.outputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Int32Buffer(uOutputs))));
2601                         createTestsForAllStages("uint_scalar", defaultColors, defaultColors, fragments, pcs, resources, extensions, testGroup, requiredFeatures);
2602                 }
2603         }
2604
2605         {  // Vector cases
2606                 const StringTemplate    preMain         (
2607                         "    %${base_type16} = OpTypeInt 16 ${signed}\n"
2608                         "         %${type16} = OpTypeVector %${base_type16} 2\n"
2609                         "    %c_i32_${count} = OpConstant %i32 ${count}\n"
2610                         "%a${count}${type16} = OpTypeArray %${type16} %c_i32_${count}\n"
2611                         "%a${count}${type32} = OpTypeArray %${type32} %c_i32_${count}\n"
2612                         "      %pp_${type16} = OpTypePointer PushConstant %${type16}\n"
2613                         "      %up_${type32} = OpTypePointer Uniform      %${type32}\n"
2614                         "            %SSBO32 = OpTypeStruct %a${count}${type32}\n"
2615                         "         %up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
2616                         "            %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
2617                         "              %PC16 = OpTypeStruct %a${count}${type16}\n"
2618                         "           %pp_PC16 = OpTypePointer PushConstant %PC16\n"
2619                         "              %pc16 = OpVariable %pp_PC16 PushConstant\n");
2620
2621                 const StringTemplate    decoration      (
2622                         "OpDecorate %a${count}${type16} ArrayStride 4\n"
2623                         "OpDecorate %a${count}${type32} ArrayStride 8\n"
2624                         "OpDecorate %SSBO32 BufferBlock\n"
2625                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
2626                         "OpDecorate %PC16 Block\n"
2627                         "OpMemberDecorate %PC16 0 Offset 0\n"
2628                         "OpDecorate %ssbo32 DescriptorSet 0\n"
2629                         "OpDecorate %ssbo32 Binding 0\n");
2630
2631                 {  // signed int
2632                         map<string, string>             specs;
2633
2634                         specs["base_type16"]    = "i16";
2635                         specs["type16"]                 = "v2i16";
2636                         specs["type32"]                 = "v2i32";
2637                         specs["signed"]                 = "1";
2638                         specs["count"]                  = "32";                         // 64 / 2
2639                         specs["convert"]                = "OpSConvert";
2640
2641                         fragments["testfun"]    = testFun.specialize(specs);
2642                         fragments["pre_main"]   = preMain.specialize(specs);
2643                         fragments["decoration"] = decoration.specialize(specs);
2644
2645                         resources.outputs.clear();
2646                         resources.outputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Int32Buffer(sOutputs))));
2647                         createTestsForAllStages("sint_vector", defaultColors, defaultColors, fragments, pcs, resources, extensions, testGroup, requiredFeatures);
2648                 }
2649                 {  // signed int
2650                         map<string, string>             specs;
2651
2652                         specs["base_type16"]    = "u16";
2653                         specs["type16"]                 = "v2u16";
2654                         specs["type32"]                 = "v2u32";
2655                         specs["signed"]                 = "0";
2656                         specs["count"]                  = "32";
2657                         specs["convert"]                = "OpUConvert";
2658
2659                         fragments["testfun"]    = testFun.specialize(specs);
2660                         fragments["pre_main"]   = preMain.specialize(specs);
2661                         fragments["decoration"] = decoration.specialize(specs);
2662
2663                         resources.outputs.clear();
2664                         resources.outputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Int32Buffer(uOutputs))));
2665                         createTestsForAllStages("uint_vector", defaultColors, defaultColors, fragments, pcs, resources, extensions, testGroup, requiredFeatures);
2666                 }
2667         }
2668 }
2669
2670 void addGraphics16BitStorageUniformInt16To32Group (tcu::TestCaseGroup* testGroup)
2671 {
2672         de::Random                                                      rnd                                     (deStringHash(testGroup->getName()));
2673         map<string, string>                                     fragments;
2674         const deUint32                                          numDataPoints           = 256;
2675         RGBA                                                            defaultColors[4];
2676         vector<deInt16>                                         inputs                          = getInt16s(rnd, numDataPoints);
2677         vector<deInt32>                                         sOutputs;
2678         vector<deInt32>                                         uOutputs;
2679         GraphicsResources                                       resources;
2680         vector<string>                                          extensions;
2681         const deUint16                                          signBitMask                     = 0x8000;
2682         const deUint32                                          signExtendMask          = 0xffff0000;
2683         const StringTemplate                            capabilities            ("OpCapability ${cap}\n");
2684
2685         sOutputs.reserve(inputs.size());
2686         uOutputs.reserve(inputs.size());
2687
2688         for (deUint32 numNdx = 0; numNdx < inputs.size(); ++numNdx)
2689         {
2690                 uOutputs.push_back(static_cast<deUint16>(inputs[numNdx]));
2691                 if (inputs[numNdx] & signBitMask)
2692                         sOutputs.push_back(static_cast<deInt32>(inputs[numNdx] | signExtendMask));
2693                 else
2694                         sOutputs.push_back(static_cast<deInt32>(inputs[numNdx]));
2695         }
2696
2697         resources.inputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Int16Buffer(inputs))));
2698
2699         extensions.push_back("VK_KHR_16bit_storage");
2700         fragments["extension"]  = "OpExtension \"SPV_KHR_16bit_storage\"";
2701
2702         getDefaultColors(defaultColors);
2703
2704         struct IntegerFacts
2705         {
2706                 const char*     name;
2707                 const char*     type32;
2708                 const char*     type16;
2709                 const char* opcode;
2710                 bool            isSigned;
2711         };
2712
2713         const IntegerFacts      intFacts[]      =
2714         {
2715                 {"sint",        "%i32",         "%i16",         "OpSConvert",   true},
2716                 {"uint",        "%u32",         "%u16",         "OpUConvert",   false},
2717         };
2718
2719         const StringTemplate scalarPreMain              (
2720                         "${itype16} = OpTypeInt 16 ${signed}\n"
2721                         " %c_i32_256 = OpConstant %i32 256\n"
2722                         "   %up_i32 = OpTypePointer Uniform ${itype32}\n"
2723                         "   %up_i16 = OpTypePointer Uniform ${itype16}\n"
2724                         "   %ra_i32 = OpTypeArray ${itype32} %c_i32_256\n"
2725                         "   %ra_i16 = OpTypeArray ${itype16} %c_i32_256\n"
2726                         "   %SSBO32 = OpTypeStruct %ra_i32\n"
2727                         "   %SSBO16 = OpTypeStruct %ra_i16\n"
2728                         "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
2729                         "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
2730                         "   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
2731                         "   %ssbo16 = OpVariable %up_SSBO16 Uniform\n");
2732
2733         const StringTemplate scalarDecoration           (
2734                         "OpDecorate %ra_i32 ArrayStride 4\n"
2735                         "OpDecorate %ra_i16 ArrayStride 2\n"
2736                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
2737                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
2738                         "OpDecorate %SSBO32 BufferBlock\n"
2739                         "OpDecorate %SSBO16 ${indecor}\n"
2740                         "OpDecorate %ssbo32 DescriptorSet 0\n"
2741                         "OpDecorate %ssbo16 DescriptorSet 0\n"
2742                         "OpDecorate %ssbo32 Binding 1\n"
2743                         "OpDecorate %ssbo16 Binding 0\n");
2744
2745         const StringTemplate scalarTestFunc     (
2746                         "%test_code = OpFunction %v4f32 None %v4f32_function\n"
2747                         "    %param = OpFunctionParameter %v4f32\n"
2748
2749                         "%entry = OpLabel\n"
2750                         "    %i = OpVariable %fp_i32 Function\n"
2751                         "         OpStore %i %c_i32_0\n"
2752                         "         OpBranch %loop\n"
2753
2754                         " %loop = OpLabel\n"
2755                         "   %15 = OpLoad %i32 %i\n"
2756                         "   %lt = OpSLessThan %bool %15 %c_i32_256\n"
2757                         "         OpLoopMerge %merge %inc None\n"
2758                         "         OpBranchConditional %lt %write %merge\n"
2759
2760                         "%write = OpLabel\n"
2761                         "   %30 = OpLoad %i32 %i\n"
2762                         "  %src = OpAccessChain %up_i16 %ssbo16 %c_i32_0 %30\n"
2763                         "%val16 = OpLoad ${itype16} %src\n"
2764                         "%val32 = ${convert} ${itype32} %val16\n"
2765                         "  %dst = OpAccessChain %up_i32 %ssbo32 %c_i32_0 %30\n"
2766                         "         OpStore %dst %val32\n"
2767                         "         OpBranch %inc\n"
2768
2769                         "  %inc = OpLabel\n"
2770                         "   %37 = OpLoad %i32 %i\n"
2771                         "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
2772                         "         OpStore %i %39\n"
2773                         "         OpBranch %loop\n"
2774                         "%merge = OpLabel\n"
2775                         "         OpReturnValue %param\n"
2776
2777                         "OpFunctionEnd\n");
2778
2779         const StringTemplate vecPreMain         (
2780                         "${itype16} = OpTypeInt 16 ${signed}\n"
2781                         "%c_i32_128 = OpConstant %i32 128\n"
2782                         "%v2itype16 = OpTypeVector ${itype16} 2\n"
2783                         "%v2itype32 = OpTypeVector ${itype32} 2\n"
2784                         " %up_v2i32 = OpTypePointer Uniform %v2itype32\n"
2785                         " %up_v2i16 = OpTypePointer Uniform %v2itype16\n"
2786                         " %ra_v2i32 = OpTypeArray %v2itype32 %c_i32_128\n"
2787                         " %ra_v2i16 = OpTypeArray %v2itype16 %c_i32_128\n"
2788                         "   %SSBO32 = OpTypeStruct %ra_v2i32\n"
2789                         "   %SSBO16 = OpTypeStruct %ra_v2i16\n"
2790                         "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
2791                         "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
2792                         "   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
2793                         "   %ssbo16 = OpVariable %up_SSBO16 Uniform\n");
2794
2795         const StringTemplate vecDecoration              (
2796                         "OpDecorate %ra_v2i32 ArrayStride 8\n"
2797                         "OpDecorate %ra_v2i16 ArrayStride 4\n"
2798                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
2799                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
2800                         "OpDecorate %SSBO32 BufferBlock\n"
2801                         "OpDecorate %SSBO16 ${indecor}\n"
2802                         "OpDecorate %ssbo32 DescriptorSet 0\n"
2803                         "OpDecorate %ssbo16 DescriptorSet 0\n"
2804                         "OpDecorate %ssbo32 Binding 1\n"
2805                         "OpDecorate %ssbo16 Binding 0\n");
2806
2807         const StringTemplate vecTestFunc        (
2808                         "%test_code = OpFunction %v4f32 None %v4f32_function\n"
2809                         "    %param = OpFunctionParameter %v4f32\n"
2810
2811                         "%entry = OpLabel\n"
2812                         "    %i = OpVariable %fp_i32 Function\n"
2813                         "         OpStore %i %c_i32_0\n"
2814                         "         OpBranch %loop\n"
2815
2816                         " %loop = OpLabel\n"
2817                         "   %15 = OpLoad %i32 %i\n"
2818                         "   %lt = OpSLessThan %bool %15 %c_i32_128\n"
2819                         "         OpLoopMerge %merge %inc None\n"
2820                         "         OpBranchConditional %lt %write %merge\n"
2821
2822                         "%write = OpLabel\n"
2823                         "   %30 = OpLoad %i32 %i\n"
2824                         "  %src = OpAccessChain %up_v2i16 %ssbo16 %c_i32_0 %30\n"
2825                         "%val16 = OpLoad %v2itype16 %src\n"
2826                         "%val32 = ${convert} %v2itype32 %val16\n"
2827                         "  %dst = OpAccessChain %up_v2i32 %ssbo32 %c_i32_0 %30\n"
2828                         "         OpStore %dst %val32\n"
2829                         "         OpBranch %inc\n"
2830
2831                         "  %inc = OpLabel\n"
2832                         "   %37 = OpLoad %i32 %i\n"
2833                         "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
2834                         "         OpStore %i %39\n"
2835                         "         OpBranch %loop\n"
2836                         "%merge = OpLabel\n"
2837                         "         OpReturnValue %param\n"
2838
2839                         "OpFunctionEnd\n");
2840
2841         struct Category
2842         {
2843                 const char*                             name;
2844                 const StringTemplate&   preMain;
2845                 const StringTemplate&   decoration;
2846                 const StringTemplate&   testFunction;
2847         };
2848
2849         const Category          categories[]    =
2850         {
2851                 {"scalar", scalarPreMain, scalarDecoration, scalarTestFunc},
2852                 {"vector", vecPreMain, vecDecoration, vecTestFunc},
2853         };
2854
2855         for (deUint32 catIdx = 0; catIdx < DE_LENGTH_OF_ARRAY(categories); ++catIdx)
2856                 for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
2857                         for (deUint32 factIdx = 0; factIdx < DE_LENGTH_OF_ARRAY(intFacts); ++factIdx)
2858                         {
2859                                 map<string, string>     specs;
2860                                 string                          name            = string(CAPABILITIES[capIdx].name) + "_" + categories[catIdx].name + "_" + intFacts[factIdx].name;
2861
2862                                 specs["cap"]                                    = CAPABILITIES[capIdx].cap;
2863                                 specs["indecor"]                                = CAPABILITIES[capIdx].decor;
2864                                 specs["itype32"]                                = intFacts[factIdx].type32;
2865                                 specs["itype16"]                                = intFacts[factIdx].type16;
2866                                 if (intFacts[factIdx].isSigned)
2867                                         specs["signed"]                         = "1";
2868                                 else
2869                                         specs["signed"]                         = "0";
2870                                 specs["convert"]                                = intFacts[factIdx].opcode;
2871
2872                                 fragments["pre_main"]                   = categories[catIdx].preMain.specialize(specs);
2873                                 fragments["testfun"]                    = categories[catIdx].testFunction.specialize(specs);
2874                                 fragments["capability"]                 = capabilities.specialize(specs);
2875                                 fragments["decoration"]                 = categories[catIdx].decoration.specialize(specs);
2876
2877                                 resources.inputs.back().first   = CAPABILITIES[capIdx].dtype;
2878                                 resources.outputs.clear();
2879                                 if (intFacts[factIdx].isSigned)
2880                                         resources.outputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Int32Buffer(sOutputs))));
2881                                 else
2882                                         resources.outputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Int32Buffer(uOutputs))));
2883
2884                                 createTestsForAllStages(name, defaultColors, defaultColors, fragments, resources, extensions, testGroup, get16BitStorageFeatures(CAPABILITIES[capIdx].name));
2885                         }
2886 }
2887
2888 void addGraphics16BitStorageUniformFloat16To32Group (tcu::TestCaseGroup* testGroup)
2889 {
2890         de::Random                                                      rnd                                     (deStringHash(testGroup->getName()));
2891         map<string, string>                                     fragments;
2892         GraphicsResources                                       resources;
2893         vector<string>                                          extensions;
2894         const deUint32                                          numDataPoints           = 256;
2895         RGBA                                                            defaultColors[4];
2896         const StringTemplate                            capabilities            ("OpCapability ${cap}\n");
2897         vector<deFloat16>                                       float16Data                     = getFloat16s(rnd, numDataPoints);
2898         vector<float>                                           float32Data;
2899
2900         float32Data.reserve(numDataPoints);
2901         for (deUint32 numIdx = 0; numIdx < numDataPoints; ++numIdx)
2902                 float32Data.push_back(deFloat16To32(float16Data[numIdx]));
2903
2904         resources.inputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Float16Buffer(float16Data))));
2905         resources.outputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Float32Buffer(float32Data))));
2906         resources.verifyIO = check32BitFloats;
2907
2908         extensions.push_back("VK_KHR_16bit_storage");
2909         fragments["extension"]  = "OpExtension \"SPV_KHR_16bit_storage\"";
2910
2911         getDefaultColors(defaultColors);
2912
2913         { // scalar cases
2914                 fragments["pre_main"]                           =
2915                         "      %f16 = OpTypeFloat 16\n"
2916                         "%c_i32_256 = OpConstant %i32 256\n"
2917                         "   %up_f32 = OpTypePointer Uniform %f32\n"
2918                         "   %up_f16 = OpTypePointer Uniform %f16\n"
2919                         "   %ra_f32 = OpTypeArray %f32 %c_i32_256\n"
2920                         "   %ra_f16 = OpTypeArray %f16 %c_i32_256\n"
2921                         "   %SSBO32 = OpTypeStruct %ra_f32\n"
2922                         "   %SSBO16 = OpTypeStruct %ra_f16\n"
2923                         "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
2924                         "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
2925                         "   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
2926                         "   %ssbo16 = OpVariable %up_SSBO16 Uniform\n";
2927
2928                 const StringTemplate decoration         (
2929                         "OpDecorate %ra_f32 ArrayStride 4\n"
2930                         "OpDecorate %ra_f16 ArrayStride 2\n"
2931                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
2932                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
2933                         "OpDecorate %SSBO32 BufferBlock\n"
2934                         "OpDecorate %SSBO16 ${indecor}\n"
2935                         "OpDecorate %ssbo32 DescriptorSet 0\n"
2936                         "OpDecorate %ssbo16 DescriptorSet 0\n"
2937                         "OpDecorate %ssbo32 Binding 1\n"
2938                         "OpDecorate %ssbo16 Binding 0\n");
2939
2940                 // ssbo32[] <- convert ssbo16[] to 32bit float
2941                 fragments["testfun"]                            =
2942                         "%test_code = OpFunction %v4f32 None %v4f32_function\n"
2943                         "    %param = OpFunctionParameter %v4f32\n"
2944
2945                         "%entry = OpLabel\n"
2946                         "    %i = OpVariable %fp_i32 Function\n"
2947                         "         OpStore %i %c_i32_0\n"
2948                         "         OpBranch %loop\n"
2949
2950                         " %loop = OpLabel\n"
2951                         "   %15 = OpLoad %i32 %i\n"
2952                         "   %lt = OpSLessThan %bool %15 %c_i32_256\n"
2953                         "         OpLoopMerge %merge %inc None\n"
2954                         "         OpBranchConditional %lt %write %merge\n"
2955
2956                         "%write = OpLabel\n"
2957                         "   %30 = OpLoad %i32 %i\n"
2958                         "  %src = OpAccessChain %up_f16 %ssbo16 %c_i32_0 %30\n"
2959                         "%val16 = OpLoad %f16 %src\n"
2960                         "%val32 = OpFConvert %f32 %val16\n"
2961                         "  %dst = OpAccessChain %up_f32 %ssbo32 %c_i32_0 %30\n"
2962                         "         OpStore %dst %val32\n"
2963                         "         OpBranch %inc\n"
2964
2965                         "  %inc = OpLabel\n"
2966                         "   %37 = OpLoad %i32 %i\n"
2967                         "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
2968                         "         OpStore %i %39\n"
2969                         "         OpBranch %loop\n"
2970
2971                         "%merge = OpLabel\n"
2972                         "         OpReturnValue %param\n"
2973
2974                         "OpFunctionEnd\n";
2975
2976                 for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
2977                 {
2978                         map<string, string>     specs;
2979                         string                          testName        = string(CAPABILITIES[capIdx].name) + "_scalar_float";
2980
2981                         specs["cap"]                                    = CAPABILITIES[capIdx].cap;
2982                         specs["indecor"]                                = CAPABILITIES[capIdx].decor;
2983
2984                         fragments["capability"]                 = capabilities.specialize(specs);
2985                         fragments["decoration"]                 = decoration.specialize(specs);
2986
2987                         resources.inputs.back().first   = CAPABILITIES[capIdx].dtype;
2988
2989                         createTestsForAllStages(testName, defaultColors, defaultColors, fragments, resources, extensions, testGroup, get16BitStorageFeatures(CAPABILITIES[capIdx].name));
2990                 }
2991         }
2992
2993         { // vector cases
2994                 fragments["pre_main"]                           =
2995                         "      %f16 = OpTypeFloat 16\n"
2996                         "%c_i32_128 = OpConstant %i32 128\n"
2997                         "        %v2f16 = OpTypeVector %f16 2\n"
2998                         " %up_v2f32 = OpTypePointer Uniform %v2f32\n"
2999                         " %up_v2f16 = OpTypePointer Uniform %v2f16\n"
3000                         " %ra_v2f32 = OpTypeArray %v2f32 %c_i32_128\n"
3001                         " %ra_v2f16 = OpTypeArray %v2f16 %c_i32_128\n"
3002                         "   %SSBO32 = OpTypeStruct %ra_v2f32\n"
3003                         "   %SSBO16 = OpTypeStruct %ra_v2f16\n"
3004                         "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
3005                         "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
3006                         "   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
3007                         "   %ssbo16 = OpVariable %up_SSBO16 Uniform\n";
3008
3009                 const StringTemplate decoration         (
3010                         "OpDecorate %ra_v2f32 ArrayStride 8\n"
3011                         "OpDecorate %ra_v2f16 ArrayStride 4\n"
3012                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
3013                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
3014                         "OpDecorate %SSBO32 BufferBlock\n"
3015                         "OpDecorate %SSBO16 ${indecor}\n"
3016                         "OpDecorate %ssbo32 DescriptorSet 0\n"
3017                         "OpDecorate %ssbo16 DescriptorSet 0\n"
3018                         "OpDecorate %ssbo32 Binding 1\n"
3019                         "OpDecorate %ssbo16 Binding 0\n");
3020
3021                 // ssbo32[] <- convert ssbo16[] to 32bit float
3022                 fragments["testfun"]                            =
3023                         "%test_code = OpFunction %v4f32 None %v4f32_function\n"
3024                         "    %param = OpFunctionParameter %v4f32\n"
3025
3026                         "%entry = OpLabel\n"
3027                         "    %i = OpVariable %fp_i32 Function\n"
3028                         "         OpStore %i %c_i32_0\n"
3029                         "         OpBranch %loop\n"
3030
3031                         " %loop = OpLabel\n"
3032                         "   %15 = OpLoad %i32 %i\n"
3033                         "   %lt = OpSLessThan %bool %15 %c_i32_128\n"
3034                         "         OpLoopMerge %merge %inc None\n"
3035                         "         OpBranchConditional %lt %write %merge\n"
3036
3037                         "%write = OpLabel\n"
3038                         "   %30 = OpLoad %i32 %i\n"
3039                         "  %src = OpAccessChain %up_v2f16 %ssbo16 %c_i32_0 %30\n"
3040                         "%val16 = OpLoad %v2f16 %src\n"
3041                         "%val32 = OpFConvert %v2f32 %val16\n"
3042                         "  %dst = OpAccessChain %up_v2f32 %ssbo32 %c_i32_0 %30\n"
3043                         "         OpStore %dst %val32\n"
3044                         "         OpBranch %inc\n"
3045
3046                         "  %inc = OpLabel\n"
3047                         "   %37 = OpLoad %i32 %i\n"
3048                         "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
3049                         "         OpStore %i %39\n"
3050                         "         OpBranch %loop\n"
3051
3052                         "%merge = OpLabel\n"
3053                         "         OpReturnValue %param\n"
3054
3055                         "OpFunctionEnd\n";
3056
3057                 for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
3058                 {
3059                         map<string, string>     specs;
3060                         string                          testName        = string(CAPABILITIES[capIdx].name) + "_vector_float";
3061
3062                         specs["cap"]                                    = CAPABILITIES[capIdx].cap;
3063                         specs["indecor"]                                = CAPABILITIES[capIdx].decor;
3064
3065                         fragments["capability"]                 = capabilities.specialize(specs);
3066                         fragments["decoration"]                 = decoration.specialize(specs);
3067
3068                         resources.inputs.back().first   = CAPABILITIES[capIdx].dtype;
3069
3070                         createTestsForAllStages(testName, defaultColors, defaultColors, fragments, resources, extensions, testGroup, get16BitStorageFeatures(CAPABILITIES[capIdx].name));
3071                 }
3072         }
3073
3074         { // matrix cases
3075                 fragments["pre_main"]                           =
3076                         " %c_i32_32 = OpConstant %i32 32\n"
3077                         "      %f16 = OpTypeFloat 16\n"
3078                         "    %v2f16 = OpTypeVector %f16 2\n"
3079                         "  %m4x2f32 = OpTypeMatrix %v2f32 4\n"
3080                         "  %m4x2f16 = OpTypeMatrix %v2f16 4\n"
3081                         " %up_v2f32 = OpTypePointer Uniform %v2f32\n"
3082                         " %up_v2f16 = OpTypePointer Uniform %v2f16\n"
3083                         "%a8m4x2f32 = OpTypeArray %m4x2f32 %c_i32_32\n"
3084                         "%a8m4x2f16 = OpTypeArray %m4x2f16 %c_i32_32\n"
3085                         "   %SSBO32 = OpTypeStruct %a8m4x2f32\n"
3086                         "   %SSBO16 = OpTypeStruct %a8m4x2f16\n"
3087                         "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
3088                         "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
3089                         "   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
3090                         "   %ssbo16 = OpVariable %up_SSBO16 Uniform\n";
3091
3092                 const StringTemplate decoration         (
3093                         "OpDecorate %a8m4x2f32 ArrayStride 32\n"
3094                         "OpDecorate %a8m4x2f16 ArrayStride 16\n"
3095                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
3096                         "OpMemberDecorate %SSBO32 0 ColMajor\n"
3097                         "OpMemberDecorate %SSBO32 0 MatrixStride 8\n"
3098                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
3099                         "OpMemberDecorate %SSBO16 0 ColMajor\n"
3100                         "OpMemberDecorate %SSBO16 0 MatrixStride 4\n"
3101                         "OpDecorate %SSBO32 BufferBlock\n"
3102                         "OpDecorate %SSBO16 ${indecor}\n"
3103                         "OpDecorate %ssbo32 DescriptorSet 0\n"
3104                         "OpDecorate %ssbo16 DescriptorSet 0\n"
3105                         "OpDecorate %ssbo32 Binding 1\n"
3106                         "OpDecorate %ssbo16 Binding 0\n");
3107
3108                 fragments["testfun"]                            =
3109                         "%test_code = OpFunction %v4f32 None %v4f32_function\n"
3110                         "    %param = OpFunctionParameter %v4f32\n"
3111
3112                         "%entry = OpLabel\n"
3113                         "    %i = OpVariable %fp_i32 Function\n"
3114                         "         OpStore %i %c_i32_0\n"
3115                         "         OpBranch %loop\n"
3116
3117                         " %loop = OpLabel\n"
3118                         "   %15 = OpLoad %i32 %i\n"
3119                         "   %lt = OpSLessThan %bool %15 %c_i32_32\n"
3120                         "         OpLoopMerge %merge %inc None\n"
3121                         "         OpBranchConditional %lt %write %merge\n"
3122
3123                         "  %write = OpLabel\n"
3124                         "     %30 = OpLoad %i32 %i\n"
3125                         "  %src_0 = OpAccessChain %up_v2f16 %ssbo16 %c_i32_0 %30 %c_i32_0\n"
3126                         "  %src_1 = OpAccessChain %up_v2f16 %ssbo16 %c_i32_0 %30 %c_i32_1\n"
3127                         "  %src_2 = OpAccessChain %up_v2f16 %ssbo16 %c_i32_0 %30 %c_i32_2\n"
3128                         "  %src_3 = OpAccessChain %up_v2f16 %ssbo16 %c_i32_0 %30 %c_i32_3\n"
3129                         "%val16_0 = OpLoad %v2f16 %src_0\n"
3130                         "%val16_1 = OpLoad %v2f16 %src_1\n"
3131                         "%val16_2 = OpLoad %v2f16 %src_2\n"
3132                         "%val16_3 = OpLoad %v2f16 %src_3\n"
3133                         "%val32_0 = OpFConvert %v2f32 %val16_0\n"
3134                         "%val32_1 = OpFConvert %v2f32 %val16_1\n"
3135                         "%val32_2 = OpFConvert %v2f32 %val16_2\n"
3136                         "%val32_3 = OpFConvert %v2f32 %val16_3\n"
3137                         "  %dst_0 = OpAccessChain %up_v2f32 %ssbo32 %c_i32_0 %30 %c_i32_0\n"
3138                         "  %dst_1 = OpAccessChain %up_v2f32 %ssbo32 %c_i32_0 %30 %c_i32_1\n"
3139                         "  %dst_2 = OpAccessChain %up_v2f32 %ssbo32 %c_i32_0 %30 %c_i32_2\n"
3140                         "  %dst_3 = OpAccessChain %up_v2f32 %ssbo32 %c_i32_0 %30 %c_i32_3\n"
3141                         "           OpStore %dst_0 %val32_0\n"
3142                         "           OpStore %dst_1 %val32_1\n"
3143                         "           OpStore %dst_2 %val32_2\n"
3144                         "           OpStore %dst_3 %val32_3\n"
3145                         "           OpBranch %inc\n"
3146
3147                         "  %inc = OpLabel\n"
3148                         "   %37 = OpLoad %i32 %i\n"
3149                         "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
3150                         "         OpStore %i %39\n"
3151                         "         OpBranch %loop\n"
3152
3153                         "%merge = OpLabel\n"
3154                         "         OpReturnValue %param\n"
3155
3156                         "OpFunctionEnd\n";
3157
3158                 for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
3159                 {
3160                         map<string, string>     specs;
3161                         string                          testName        = string(CAPABILITIES[capIdx].name) + "_matrix_float";
3162
3163                         specs["cap"]                                    = CAPABILITIES[capIdx].cap;
3164                         specs["indecor"]                                = CAPABILITIES[capIdx].decor;
3165
3166                         fragments["capability"]                 = capabilities.specialize(specs);
3167                         fragments["decoration"]                 = decoration.specialize(specs);
3168
3169                         resources.inputs.back().first   = CAPABILITIES[capIdx].dtype;
3170
3171                         createTestsForAllStages(testName, defaultColors, defaultColors, fragments, resources, extensions, testGroup, get16BitStorageFeatures(CAPABILITIES[capIdx].name));
3172                 }
3173         }
3174 }
3175
3176 } // anonymous
3177
3178 tcu::TestCaseGroup* create16BitStorageComputeGroup (tcu::TestContext& testCtx)
3179 {
3180         de::MovePtr<tcu::TestCaseGroup> group           (new tcu::TestCaseGroup(testCtx, "16bit_storage", "Compute tests for VK_KHR_16bit_storage extension"));
3181         addTestGroup(group.get(), "uniform_32_to_16", "32bit floats/ints to 16bit tests under capability StorageUniform{|BufferBlock}", addCompute16bitStorageUniform32To16Group);
3182         addTestGroup(group.get(), "uniform_16_to_32", "16bit floats/ints to 32bit tests under capability StorageUniform{|BufferBlock}", addCompute16bitStorageUniform16To32Group);
3183         addTestGroup(group.get(), "push_constant_16_to_32", "16bit floats/ints to 32bit tests under capability StoragePushConstant16", addCompute16bitStoragePushConstant16To32Group);
3184
3185         return group.release();
3186 }
3187
3188 tcu::TestCaseGroup* create16BitStorageGraphicsGroup (tcu::TestContext& testCtx)
3189 {
3190         de::MovePtr<tcu::TestCaseGroup> group           (new tcu::TestCaseGroup(testCtx, "16bit_storage", "Graphics tests for VK_KHR_16bit_storage extension"));
3191
3192         addTestGroup(group.get(), "uniform_float_32_to_16", "32-bit floats into 16-bit tests under capability StorageUniform{|BufferBlock}16", addGraphics16BitStorageUniformFloat32To16Group);
3193         addTestGroup(group.get(), "uniform_float_16_to_32", "16-bit floats into 32-bit testsunder capability StorageUniform{|BufferBlock}16", addGraphics16BitStorageUniformFloat16To32Group);
3194         addTestGroup(group.get(), "uniform_int_32_to_16", "32-bit int into 16-bit tests under capability StorageUniform{|BufferBlock}16", addGraphics16BitStorageUniformInt32To16Group);
3195         addTestGroup(group.get(), "uniform_int_16_to_32", "16-bit int into 32-bit tests under capability StorageUniform{|BufferBlock}16", addGraphics16BitStorageUniformInt16To32Group);
3196         addTestGroup(group.get(), "input_output_float_32_to_16", "32-bit floats into 16-bit tests under capability StorageInputOutput16", addGraphics16BitStorageInputOutputFloat32To16Group);
3197         addTestGroup(group.get(), "input_output_float_16_to_32", "16-bit floats into 32-bit tests under capability StorageInputOutput16", addGraphics16BitStorageInputOutputFloat16To32Group);
3198         addTestGroup(group.get(), "input_output_int_32_to_16", "32-bit int into 16-bit tests under capability StorageInputOutput16", addGraphics16BitStorageInputOutputInt32To16Group);
3199         addTestGroup(group.get(), "input_output_int_16_to_32", "16-bit int into 32-bit tests under capability StorageInputOutput16", addGraphics16BitStorageInputOutputInt16To32Group);
3200         addTestGroup(group.get(), "push_constant_float_16_to_32", "16-bit floats into 32-bit tests under capability StoragePushConstant16", addGraphics16BitStoragePushConstantFloat16To32Group);
3201         addTestGroup(group.get(), "push_constant_int_16_to_32", "16-bit int into 32-bit tests under capability StoragePushConstant16", addGraphics16BitStoragePushConstantInt16To32Group);
3202
3203         return group.release();
3204 }
3205
3206 } // SpirVAssembly
3207 } // vkt