external/vulkancts/modules/vulkan/spirv_assembly/vktSpvAsm16bitStorageTests.cpp

   1 /*-------------------------------------------------------------------------
   2  * Vulkan Conformance Tests
   3  * ------------------------
   4  *
   5  * Copyright (c) 2017 Google Inc.
   6  *
   7  * Licensed under the Apache License, Version 2.0 (the "License");
   8  * you may not use this file except in compliance with the License.
   9  * You may obtain a copy of the License at
  10  *
  11  *      http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  *
  19  *//*!
  20  * \file
  21  * \brief SPIR-V Assembly Tests for the VK_KHR_16bit_storage
  22  *//*--------------------------------------------------------------------*/
  23
  24 // VK_KHR_16bit_storage
  25 //
  26 // \todo [2017-02-08 antiagainst] Additional corner cases to check:
  27 //
  28 // * Test OpAccessChain with subword types
  29 //  * For newly enabled types T:
  30 //    * For composite types: vector, matrix, structures, array over T:
  31 //      1. Use OpAccessChain to form a pointer to a subword type.
  32 //      2. Load the subword value X16.
  33 //      3. Convert X16 to X32.
  34 //      4. Store X32 to BufferBlock.
  35 //      5. Host inspects X32.
  36 // * Test {StorageInputOutput16} 16-to-16:
  37 //   * For newly enabled types T:
  38 //     1. Host creates X16 stream values of type T.
  39 //     2. Shaders have corresponding capability.
  40 //     3. For each viable shader stage:
  41 //       3a. Load X16 Input variable.
  42 //       3b. Store X16 to Output variable.
  43 //     4. Host inspects resulting values.
  44 // * Test {StorageInputOutput16} 16-to-16 one value to two:
  45 //     Like the previous test, but write X16 to two different output variables.
  46 //     (Checks that the 16-bit intermediate value can be used twice.)
  47
  48 #include "vktSpvAsm16bitStorageTests.hpp"
  49
  50 #include "tcuFloat.hpp"
  51 #include "tcuRGBA.hpp"
  52 #include "tcuStringTemplate.hpp"
  53 #include "tcuTestLog.hpp"
  54 #include "tcuVectorUtil.hpp"
  55
  56 #include "vkDefs.hpp"
  57 #include "vkDeviceUtil.hpp"
  58 #include "vkMemUtil.hpp"
  59 #include "vkPlatform.hpp"
  60 #include "vkPrograms.hpp"
  61 #include "vkQueryUtil.hpp"
  62 #include "vkRef.hpp"
  63 #include "vkRefUtil.hpp"
  64 #include "vkStrUtil.hpp"
  65 #include "vkTypeUtil.hpp"
  66
  67 #include "deRandom.hpp"
  68 #include "deStringUtil.hpp"
  69 #include "deUniquePtr.hpp"
  70 #include "deMath.h"
  71
  72 #include "vktSpvAsmComputeShaderCase.hpp"
  73 #include "vktSpvAsmComputeShaderTestUtil.hpp"
  74 #include "vktSpvAsmGraphicsShaderTestUtil.hpp"
  75 #include "vktTestCaseUtil.hpp"
  76 #include "vktTestGroupUtil.hpp"
  77
  78 #include <limits>
  79 #include <map>
  80 #include <string>
  81 #include <sstream>
  82 #include <utility>
  83
  84 namespace vkt
  85 {
  86 namespace SpirVAssembly
  87 {
  88
  89 using namespace vk;
  90 using std::map;
  91 using std::string;
  92 using std::vector;
  93 using tcu::IVec3;
  94 using tcu::IVec4;
  95 using tcu::RGBA;
  96 using tcu::TestLog;
  97 using tcu::TestStatus;
  98 using tcu::Vec4;
  99 using de::UniquePtr;
 100 using tcu::StringTemplate;
 101 using tcu::Vec4;
 102
 103 namespace
 104 {
 105
 106 struct Capability
 107 {
 108         const char*                             name;
 109         const char*                             cap;
 110         const char*                             decor;
 111         vk::VkDescriptorType    dtype;
 112 };
 113
 114 static const Capability CAPABILITIES[]  =
 115 {
 116         {"uniform_buffer_block",        "StorageUniformBufferBlock16",  "BufferBlock",  VK_DESCRIPTOR_TYPE_STORAGE_BUFFER},
 117         {"uniform",                                     "StorageUniform16",                             "Block",                VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER},
 118 };
 119
 120 VulkanFeatures  get16BitStorageFeatures (const char* cap)
 121 {
 122         VulkanFeatures features;
 123         if (string(cap) == "uniform_buffer_block")
 124                 features.ext16BitStorage = EXT16BITSTORAGEFEATURES_UNIFORM_BUFFER_BLOCK;
 125         else if (string(cap) == "uniform")
 126                 features.ext16BitStorage = EXT16BITSTORAGEFEATURES_UNIFORM;
 127         else
 128                 DE_ASSERT(false && "not supported");
 129
 130         return features;
 131 }
 132
 133
 134 // Batch function to check arrays of 16-bit floats.
 135 //
 136 // For comparing 16-bit floats, we need to consider both RTZ and RTE. So we can only recalculate
 137 // the expected values here instead of get the expected values directly from the test case.
 138 // Thus we need original floats here but not expected outputs.
 139 template<RoundingModeFlags RoundingMode>
 140 bool graphicsCheck16BitFloats (const std::vector<Resource>&     originalFloats,
 141                                                            const vector<AllocationSp>&  outputAllocs,
 142                                                            const std::vector<Resource>& /* expectedOutputs */,
 143                                                            tcu::TestLog&                                log)
 144 {
 145         if (outputAllocs.size() != originalFloats.size())
 146                 return false;
 147
 148         for (deUint32 outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
 149         {
 150                 vector<deUint8> originalBytes;
 151                 originalFloats[outputNdx].second->getBytes(originalBytes);
 152
 153                 const deUint16* returned        = static_cast<const deUint16*>(outputAllocs[outputNdx]->getHostPtr());
 154                 const float*    original        = reinterpret_cast<const float*>(&originalBytes.front());
 155                 const deUint32  count           = static_cast<deUint32>(originalBytes.size() / sizeof(float));
 156
 157                 for (deUint32 numNdx = 0; numNdx < count; ++numNdx)
 158                         if (!compare16BitFloat(original[numNdx], returned[numNdx], RoundingMode, log))
 159                                 return false;
 160         }
 161
 162         return true;
 163 }
 164
 165 template<RoundingModeFlags RoundingMode>
 166 bool computeCheck16BitFloats (const std::vector<BufferSp>&      originalFloats,
 167                                                           const vector<AllocationSp>&   outputAllocs,
 168                                                           const std::vector<BufferSp>&  /* expectedOutputs */,
 169                                                           tcu::TestLog&                                 log)
 170 {
 171         if (outputAllocs.size() != originalFloats.size())
 172                 return false;
 173
 174         for (deUint32 outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
 175         {
 176                 vector<deUint8> originalBytes;
 177                 originalFloats[outputNdx]->getBytes(originalBytes);
 178
 179                 const deUint16* returned        = static_cast<const deUint16*>(outputAllocs[outputNdx]->getHostPtr());
 180                 const float*    original        = reinterpret_cast<const float*>(&originalBytes.front());
 181                 const deUint32  count           = static_cast<deUint32>(originalBytes.size() / sizeof(float));
 182
 183                 for (deUint32 numNdx = 0; numNdx < count; ++numNdx)
 184                         if (!compare16BitFloat(original[numNdx], returned[numNdx], RoundingMode, log))
 185                                 return false;
 186         }
 187
 188         return true;
 189 }
 190
 191
 192 // Batch function to check arrays of 32-bit floats.
 193 //
 194 // For comparing 32-bit floats, we just need the expected value precomputed in the test case.
 195 // So we need expected outputs here but not original floats.
 196 bool check32BitFloats (const std::vector<Resource>&             /* originalFloats */,
 197                                            const std::vector<AllocationSp>& outputAllocs,
 198                                            const std::vector<Resource>&         expectedOutputs,
 199                                            tcu::TestLog&                                        log)
 200 {
 201         if (outputAllocs.size() != expectedOutputs.size())
 202                 return false;
 203
 204         for (deUint32 outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
 205         {
 206                 vector<deUint8> expectedBytes;
 207                 expectedOutputs[outputNdx].second->getBytes(expectedBytes);
 208
 209                 const float*    returnedAsFloat = static_cast<const float*>(outputAllocs[outputNdx]->getHostPtr());
 210                 const float*    expectedAsFloat = reinterpret_cast<const float*>(&expectedBytes.front());
 211                 const deUint32  count                   = static_cast<deUint32>(expectedBytes.size() / sizeof(float));
 212
 213                 for (deUint32 numNdx = 0; numNdx < count; ++numNdx)
 214                         if (!compare32BitFloat(expectedAsFloat[numNdx], returnedAsFloat[numNdx], log))
 215                                 return false;
 216         }
 217
 218         return true;
 219 }
 220
 221 // Overload for compute pipeline
 222 bool check32BitFloats (const std::vector<BufferSp>&             /* originalFloats */,
 223                                            const std::vector<AllocationSp>& outputAllocs,
 224                                            const std::vector<BufferSp>&         expectedOutputs,
 225                                            tcu::TestLog&                                        log)
 226 {
 227         if (outputAllocs.size() != expectedOutputs.size())
 228                 return false;
 229
 230         for (deUint32 outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
 231         {
 232                 vector<deUint8> expectedBytes;
 233                 expectedOutputs[outputNdx]->getBytes(expectedBytes);
 234
 235                 const float*    returnedAsFloat = static_cast<const float*>(outputAllocs[outputNdx]->getHostPtr());
 236                 const float*    expectedAsFloat = reinterpret_cast<const float*>(&expectedBytes.front());
 237                 const deUint32  count                   = static_cast<deUint32>(expectedBytes.size() / sizeof(float));
 238
 239                 for (deUint32 numNdx = 0; numNdx < count; ++numNdx)
 240                         if (!compare32BitFloat(expectedAsFloat[numNdx], returnedAsFloat[numNdx], log))
 241                                 return false;
 242         }
 243
 244         return true;
 245 }
 246
 247 // Generate and return 32-bit integers.
 248 //
 249 // Expected count to be at least 16.
 250 vector<deInt32> getInt32s (de::Random& rnd, const deUint32 count)
 251 {
 252         vector<deInt32>         data;
 253
 254         data.reserve(count);
 255
 256         // Make sure we have boundary numbers.
 257         data.push_back(deInt32(0x00000000));  // 0
 258         data.push_back(deInt32(0x00000001));  // 1
 259         data.push_back(deInt32(0x0000002a));  // 42
 260         data.push_back(deInt32(0x00007fff));  // 32767
 261         data.push_back(deInt32(0x00008000));  // 32768
 262         data.push_back(deInt32(0x0000ffff));  // 65535
 263         data.push_back(deInt32(0x00010000));  // 65536
 264         data.push_back(deInt32(0x7fffffff));  // 2147483647
 265         data.push_back(deInt32(0x80000000));  // -2147483648
 266         data.push_back(deInt32(0x80000001));  // -2147483647
 267         data.push_back(deInt32(0xffff0000));  // -65536
 268         data.push_back(deInt32(0xffff0001));  // -65535
 269         data.push_back(deInt32(0xffff8000));  // -32768
 270         data.push_back(deInt32(0xffff8001));  // -32767
 271         data.push_back(deInt32(0xffffffd6));  // -42
 272         data.push_back(deInt32(0xffffffff));  // -1
 273
 274         DE_ASSERT(count >= data.size());
 275
 276         for (deUint32 numNdx = static_cast<deUint32>(data.size()); numNdx < count; ++numNdx)
 277                 data.push_back(static_cast<deInt32>(rnd.getUint32()));
 278
 279         return data;
 280 }
 281
 282 // Generate and return 16-bit integers.
 283 //
 284 // Expected count to be at least 8.
 285 vector<deInt16> getInt16s (de::Random& rnd, const deUint32 count)
 286 {
 287         vector<deInt16>         data;
 288
 289         data.reserve(count);
 290
 291         // Make sure we have boundary numbers.
 292         data.push_back(deInt16(0x0000));  // 0
 293         data.push_back(deInt16(0x0001));  // 1
 294         data.push_back(deInt16(0x002a));  // 42
 295         data.push_back(deInt16(0x7fff));  // 32767
 296         data.push_back(deInt16(0x8000));  // -32868
 297         data.push_back(deInt16(0x8001));  // -32767
 298         data.push_back(deInt16(0xffd6));  // -42
 299         data.push_back(deInt16(0xffff));  // -1
 300
 301         DE_ASSERT(count >= data.size());
 302
 303         for (deUint32 numNdx = static_cast<deUint32>(data.size()); numNdx < count; ++numNdx)
 304                 data.push_back(static_cast<deInt16>(rnd.getUint16()));
 305
 306         return data;
 307 }
 308
 309 // IEEE-754 floating point numbers:
 310 // +--------+------+----------+-------------+
 311 // | binary | sign | exponent | significand |
 312 // +--------+------+----------+-------------+
 313 // | 16-bit |  1   |    5     |     10      |
 314 // +--------+------+----------+-------------+
 315 // | 32-bit |  1   |    8     |     23      |
 316 // +--------+------+----------+-------------+
 317 //
 318 // 16-bit floats:
 319 //
 320 // 0   000 00   00 0000 0001 (0x0001: 2e-24:         minimum positive denormalized)
 321 // 0   000 00   11 1111 1111 (0x03ff: 2e-14 - 2e-24: maximum positive denormalized)
 322 // 0   000 01   00 0000 0000 (0x0400: 2e-14:         minimum positive normalized)
 323 //
 324 // 32-bit floats:
 325 //
 326 // 0   011 1110 1   001 0000 0000 0000 0000 0000 (0x3e900000: 0.28125: with exact match in 16-bit normalized)
 327 // 0   011 1000 1   000 0000 0011 0000 0000 0000 (0x38803000: exact half way within two 16-bit normalized; round to zero: 0x0401)
 328 // 1   011 1000 1   000 0000 0011 0000 0000 0000 (0xb8803000: exact half way within two 16-bit normalized; round to zero: 0x8402)
 329 // 0   011 1000 1   000 0000 1111 1111 0000 0000 (0x3880ff00: not exact half way within two 16-bit normalized; round to zero: 0x0403)
 330 // 1   011 1000 1   000 0000 1111 1111 0000 0000 (0xb880ff00: not exact half way within two 16-bit normalized; round to zero: 0x8404)
 331
 332
 333 // Generate and return 32-bit floats
 334 //
 335 // The first 24 number pairs are manually picked, while the rest are randomly generated.
 336 // Expected count to be at least 24 (numPicks).
 337 vector<float> getFloat32s (de::Random& rnd, deUint32 count)
 338 {
 339         vector<float>           float32;
 340
 341         float32.reserve(count);
 342
 343         // Zero
 344         float32.push_back(0.f);
 345         float32.push_back(-0.f);
 346         // Infinity
 347         float32.push_back(std::numeric_limits<float>::infinity());
 348         float32.push_back(-std::numeric_limits<float>::infinity());
 349         // SNaN
 350         float32.push_back(std::numeric_limits<float>::signaling_NaN());
 351         float32.push_back(-std::numeric_limits<float>::signaling_NaN());
 352         // QNaN
 353         float32.push_back(std::numeric_limits<float>::quiet_NaN());
 354         float32.push_back(-std::numeric_limits<float>::quiet_NaN());
 355
 356         // Denormalized 32-bit float matching 0 in 16-bit
 357         float32.push_back(deFloatLdExp(1.f, -127));
 358         float32.push_back(-deFloatLdExp(1.f, -127));
 359
 360         // Normalized 32-bit float matching 0 in 16-bit
 361         float32.push_back(deFloatLdExp(1.f, -100));
 362         float32.push_back(-deFloatLdExp(1.f, -100));
 363         // Normalized 32-bit float with exact denormalized match in 16-bit
 364         float32.push_back(deFloatLdExp(1.f, -24));  // 2e-24: minimum 16-bit positive denormalized
 365         float32.push_back(-deFloatLdExp(1.f, -24)); // 2e-24: maximum 16-bit negative denormalized
 366         // Normalized 32-bit float with exact normalized match in 16-bit
 367         float32.push_back(deFloatLdExp(1.f, -14));  // 2e-14: minimum 16-bit positive normalized
 368         float32.push_back(-deFloatLdExp(1.f, -14)); // 2e-14: maximum 16-bit negative normalized
 369         // Normalized 32-bit float falling above half way within two 16-bit normalized
 370         float32.push_back(bitwiseCast<float>(deUint32(0x3880ff00)));
 371         float32.push_back(bitwiseCast<float>(deUint32(0xb880ff00)));
 372         // Normalized 32-bit float falling exact half way within two 16-bit normalized
 373         float32.push_back(bitwiseCast<float>(deUint32(0x38803000)));
 374         float32.push_back(bitwiseCast<float>(deUint32(0xb8803000)));
 375         // Some number
 376         float32.push_back(0.28125f);
 377         float32.push_back(-0.28125f);
 378         // Normalized 32-bit float matching infinity in 16-bit
 379         float32.push_back(deFloatLdExp(1.f, 100));
 380         float32.push_back(-deFloatLdExp(1.f, 100));
 381
 382         const deUint32          numPicks        = static_cast<deUint32>(float32.size());
 383
 384         DE_ASSERT(count >= numPicks);
 385         count -= numPicks;
 386
 387         for (deUint32 numNdx = 0; numNdx < count; ++numNdx)
 388                 float32.push_back(rnd.getFloat());
 389
 390         return float32;
 391 }
 392
 393 // IEEE-754 floating point numbers:
 394 // +--------+------+----------+-------------+
 395 // | binary | sign | exponent | significand |
 396 // +--------+------+----------+-------------+
 397 // | 16-bit |  1   |    5     |     10      |
 398 // +--------+------+----------+-------------+
 399 // | 32-bit |  1   |    8     |     23      |
 400 // +--------+------+----------+-------------+
 401 //
 402 // 16-bit floats:
 403 //
 404 // 0   000 00   00 0000 0001 (0x0001: 2e-24:         minimum positive denormalized)
 405 // 0   000 00   11 1111 1111 (0x03ff: 2e-14 - 2e-24: maximum positive denormalized)
 406 // 0   000 01   00 0000 0000 (0x0400: 2e-14:         minimum positive normalized)
 407 //
 408 // 0   000 00   00 0000 0000 (0x0000: +0)
 409 // 0   111 11   00 0000 0000 (0x7c00: +Inf)
 410 // 0   000 00   11 1111 0000 (0x03f0: +Denorm)
 411 // 0   000 01   00 0000 0001 (0x0401: +Norm)
 412 // 0   111 11   00 0000 1111 (0x7c0f: +SNaN)
 413 // 0   111 11   00 1111 0000 (0x7c0f: +QNaN)
 414
 415
 416 // Generate and return 16-bit floats and their corresponding 32-bit values.
 417 //
 418 // The first 14 number pairs are manually picked, while the rest are randomly generated.
 419 // Expected count to be at least 14 (numPicks).
 420 vector<deFloat16> getFloat16s (de::Random& rnd, deUint32 count)
 421 {
 422         vector<deFloat16>       float16;
 423
 424         float16.reserve(count);
 425
 426         // Zero
 427         float16.push_back(deUint16(0x0000));
 428         float16.push_back(deUint16(0x8000));
 429         // Infinity
 430         float16.push_back(deUint16(0x7c00));
 431         float16.push_back(deUint16(0xfc00));
 432         // SNaN
 433         float16.push_back(deUint16(0x7c0f));
 434         float16.push_back(deUint16(0xfc0f));
 435         // QNaN
 436         float16.push_back(deUint16(0x7cf0));
 437         float16.push_back(deUint16(0xfcf0));
 438
 439         // Denormalized
 440         float16.push_back(deUint16(0x03f0));
 441         float16.push_back(deUint16(0x83f0));
 442         // Normalized
 443         float16.push_back(deUint16(0x0401));
 444         float16.push_back(deUint16(0x8401));
 445         // Some normal number
 446         float16.push_back(deUint16(0x14cb));
 447         float16.push_back(deUint16(0x94cb));
 448
 449         const deUint32          numPicks        = static_cast<deUint32>(float16.size());
 450
 451         DE_ASSERT(count >= numPicks);
 452         count -= numPicks;
 453
 454         for (deUint32 numIdx = 0; numIdx < count; ++numIdx)
 455                 float16.push_back(rnd.getUint16());
 456
 457         return float16;
 458 }
 459
 460 void addCompute16bitStorageUniform16To32Group (tcu::TestCaseGroup* group)
 461 {
 462         tcu::TestContext&                               testCtx                 = group->getTestContext();
 463         de::Random                                              rnd                             (deStringHash(group->getName()));
 464         const int                                               numElements             = 128;
 465
 466         const StringTemplate                    shaderTemplate  (
 467                 "OpCapability Shader\n"
 468                 "OpCapability ${capability}\n"
 469                 "OpExtension \"SPV_KHR_16bit_storage\"\n"
 470                 "OpMemoryModel Logical GLSL450\n"
 471                 "OpEntryPoint GLCompute %main \"main\" %id\n"
 472                 "OpExecutionMode %main LocalSize 1 1 1\n"
 473                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
 474
 475                 "${stride}"
 476
 477                 "OpMemberDecorate %SSBO32 0 Offset 0\n"
 478                 "OpMemberDecorate %SSBO16 0 Offset 0\n"
 479                 "OpDecorate %SSBO32 BufferBlock\n"
 480                 "OpDecorate %SSBO16 ${storage}\n"
 481                 "OpDecorate %ssbo32 DescriptorSet 0\n"
 482                 "OpDecorate %ssbo16 DescriptorSet 0\n"
 483                 "OpDecorate %ssbo32 Binding 1\n"
 484                 "OpDecorate %ssbo16 Binding 0\n"
 485
 486                 "${matrix_decor:opt}\n"
 487
 488                 "%bool      = OpTypeBool\n"
 489                 "%void      = OpTypeVoid\n"
 490                 "%voidf     = OpTypeFunction %void\n"
 491                 "%u32       = OpTypeInt 32 0\n"
 492                 "%i32       = OpTypeInt 32 1\n"
 493                 "%f32       = OpTypeFloat 32\n"
 494                 "%uvec3     = OpTypeVector %u32 3\n"
 495                 "%fvec3     = OpTypeVector %f32 3\n"
 496                 "%uvec3ptr  = OpTypePointer Input %uvec3\n"
 497                 "%i32ptr    = OpTypePointer Uniform %i32\n"
 498                 "%f32ptr    = OpTypePointer Uniform %f32\n"
 499
 500                 "%zero      = OpConstant %i32 0\n"
 501                 "%c_i32_1   = OpConstant %i32 1\n"
 502                 "%c_i32_2   = OpConstant %i32 2\n"
 503                 "%c_i32_3   = OpConstant %i32 3\n"
 504                 "%c_i32_16  = OpConstant %i32 16\n"
 505                 "%c_i32_32  = OpConstant %i32 32\n"
 506                 "%c_i32_64  = OpConstant %i32 64\n"
 507                 "%c_i32_128 = OpConstant %i32 128\n"
 508
 509                 "%i32arr    = OpTypeArray %i32 %c_i32_128\n"
 510                 "%f32arr    = OpTypeArray %f32 %c_i32_128\n"
 511
 512                 "${types}\n"
 513                 "${matrix_types:opt}\n"
 514
 515                 "%SSBO32    = OpTypeStruct %${matrix_prefix:opt}${base32}arr\n"
 516                 "%SSBO16    = OpTypeStruct %${matrix_prefix:opt}${base16}arr\n"
 517                 "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
 518                 "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
 519                 "%ssbo32    = OpVariable %up_SSBO32 Uniform\n"
 520                 "%ssbo16    = OpVariable %up_SSBO16 Uniform\n"
 521
 522                 "%id        = OpVariable %uvec3ptr Input\n"
 523
 524                 "%main      = OpFunction %void None %voidf\n"
 525                 "%label     = OpLabel\n"
 526                 "%idval     = OpLoad %uvec3 %id\n"
 527                 "%x         = OpCompositeExtract %u32 %idval 0\n"
 528                 "%inloc     = OpAccessChain %${base16}ptr %ssbo16 %zero %x ${index0:opt}\n"
 529                 "%val16     = OpLoad %${base16} %inloc\n"
 530                 "%val32     = ${convert} %${base32} %val16\n"
 531                 "%outloc    = OpAccessChain %${base32}ptr %ssbo32 %zero %x ${index0:opt}\n"
 532                 "             OpStore %outloc %val32\n"
 533                 "${matrix_store:opt}\n"
 534                 "             OpReturn\n"
 535                 "             OpFunctionEnd\n");
 536
 537         {  // floats
 538                 const char                                                                              floatTypes[]    =
 539                         "%f16       = OpTypeFloat 16\n"
 540                         "%f16ptr    = OpTypePointer Uniform %f16\n"
 541                         "%f16arr    = OpTypeArray %f16 %c_i32_128\n"
 542                         "%v2f16     = OpTypeVector %f16 2\n"
 543                         "%v2f32     = OpTypeVector %f32 2\n"
 544                         "%v2f16ptr  = OpTypePointer Uniform %v2f16\n"
 545                         "%v2f32ptr  = OpTypePointer Uniform %v2f32\n"
 546                         "%v2f16arr  = OpTypeArray %v2f16 %c_i32_64\n"
 547                         "%v2f32arr  = OpTypeArray %v2f32 %c_i32_64\n";
 548
 549                 struct CompositeType
 550                 {
 551                         const char*     name;
 552                         const char*     base32;
 553                         const char*     base16;
 554                         const char*     stride;
 555                         unsigned        count;
 556                 };
 557
 558                 const CompositeType     cTypes[]        =
 559                 {
 560                         {"scalar",      "f32",          "f16",          "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 2\n",                         numElements},
 561                         {"vector",      "v2f32",        "v2f16",        "OpDecorate %v2f32arr ArrayStride 8\nOpDecorate %v2f16arr ArrayStride 4\n",                     numElements / 2},
 562                         {"matrix",      "v2f32",        "v2f16",        "OpDecorate %m4v2f32arr ArrayStride 32\nOpDecorate %m4v2f16arr ArrayStride 16\n",       numElements / 8},
 563                 };
 564
 565                 vector<deFloat16>       float16Data                     = getFloat16s(rnd, numElements);
 566                 vector<float>           float32Data;
 567
 568                 float32Data.reserve(numElements);
 569                 for (deUint32 numIdx = 0; numIdx < numElements; ++numIdx)
 570                         float32Data.push_back(deFloat16To32(float16Data[numIdx]));
 571
 572                 for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
 573                         for (deUint32 tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes); ++tyIdx)
 574                         {
 575                                 ComputeShaderSpec               spec;
 576                                 map<string, string>             specs;
 577                                 string                                  testName        = string(CAPABILITIES[capIdx].name) + "_" + cTypes[tyIdx].name + "_float";
 578
 579                                 specs["capability"]             = CAPABILITIES[capIdx].cap;
 580                                 specs["storage"]                = CAPABILITIES[capIdx].decor;
 581                                 specs["stride"]                 = cTypes[tyIdx].stride;
 582                                 specs["base32"]                 = cTypes[tyIdx].base32;
 583                                 specs["base16"]                 = cTypes[tyIdx].base16;
 584                                 specs["types"]                  = floatTypes;
 585                                 specs["convert"]                = "OpFConvert";
 586
 587                                 if (strcmp(cTypes[tyIdx].name, "matrix") == 0)
 588                                 {
 589                                         specs["index0"]                 = "%zero";
 590                                         specs["matrix_prefix"]  = "m4";
 591                                         specs["matrix_types"]   =
 592                                                 "%m4v2f16 = OpTypeMatrix %v2f16 4\n"
 593                                                 "%m4v2f32 = OpTypeMatrix %v2f32 4\n"
 594                                                 "%m4v2f16arr = OpTypeArray %m4v2f16 %c_i32_16\n"
 595                                                 "%m4v2f32arr = OpTypeArray %m4v2f32 %c_i32_16\n";
 596                                         specs["matrix_decor"]   =
 597                                                 "OpMemberDecorate %SSBO32 0 ColMajor\n"
 598                                                 "OpMemberDecorate %SSBO32 0 MatrixStride 8\n"
 599                                                 "OpMemberDecorate %SSBO16 0 ColMajor\n"
 600                                                 "OpMemberDecorate %SSBO16 0 MatrixStride 4\n";
 601                                         specs["matrix_store"]   =
 602                                                 "%inloc_1  = OpAccessChain %v2f16ptr %ssbo16 %zero %x %c_i32_1\n"
 603                                                 "%val16_1  = OpLoad %v2f16 %inloc_1\n"
 604                                                 "%val32_1  = OpFConvert %v2f32 %val16_1\n"
 605                                                 "%outloc_1 = OpAccessChain %v2f32ptr %ssbo32 %zero %x %c_i32_1\n"
 606                                                 "            OpStore %outloc_1 %val32_1\n"
 607
 608                                                 "%inloc_2  = OpAccessChain %v2f16ptr %ssbo16 %zero %x %c_i32_2\n"
 609                                                 "%val16_2  = OpLoad %v2f16 %inloc_2\n"
 610                                                 "%val32_2  = OpFConvert %v2f32 %val16_2\n"
 611                                                 "%outloc_2 = OpAccessChain %v2f32ptr %ssbo32 %zero %x %c_i32_2\n"
 612                                                 "            OpStore %outloc_2 %val32_2\n"
 613
 614                                                 "%inloc_3  = OpAccessChain %v2f16ptr %ssbo16 %zero %x %c_i32_3\n"
 615                                                 "%val16_3  = OpLoad %v2f16 %inloc_3\n"
 616                                                 "%val32_3  = OpFConvert %v2f32 %val16_3\n"
 617                                                 "%outloc_3 = OpAccessChain %v2f32ptr %ssbo32 %zero %x %c_i32_3\n"
 618                                                 "            OpStore %outloc_3 %val32_3\n";
 619                                 }
 620
 621                                 spec.assembly                   = shaderTemplate.specialize(specs);
 622                                 spec.numWorkGroups              = IVec3(cTypes[tyIdx].count, 1, 1);
 623                                 spec.verifyIO                   = check32BitFloats;
 624                                 spec.inputTypes[0]              = CAPABILITIES[capIdx].dtype;
 625
 626                                 spec.inputs.push_back(BufferSp(new Float16Buffer(float16Data)));
 627                                 spec.outputs.push_back(BufferSp(new Float32Buffer(float32Data)));
 628                                 spec.extensions.push_back("VK_KHR_16bit_storage");
 629                                 spec.requestedVulkanFeatures = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
 630
 631                                 group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), testName.c_str(), spec));
 632                         }
 633         }
 634
 635         {  // Integers
 636                 const char              sintTypes[]             =
 637                         "%i16       = OpTypeInt 16 1\n"
 638                         "%i16ptr    = OpTypePointer Uniform %i16\n"
 639                         "%i16arr    = OpTypeArray %i16 %c_i32_128\n"
 640                         "%v4i16     = OpTypeVector %i16 4\n"
 641                         "%v4i32     = OpTypeVector %i32 4\n"
 642                         "%v4i16ptr  = OpTypePointer Uniform %v4i16\n"
 643                         "%v4i32ptr  = OpTypePointer Uniform %v4i32\n"
 644                         "%v4i16arr  = OpTypeArray %v4i16 %c_i32_32\n"
 645                         "%v4i32arr  = OpTypeArray %v4i32 %c_i32_32\n";
 646
 647                 const char              uintTypes[]             =
 648                         "%u16       = OpTypeInt 16 0\n"
 649                         "%u16ptr    = OpTypePointer Uniform %u16\n"
 650                         "%u32ptr    = OpTypePointer Uniform %u32\n"
 651                         "%u16arr    = OpTypeArray %u16 %c_i32_128\n"
 652                         "%u32arr    = OpTypeArray %u32 %c_i32_128\n"
 653                         "%v4u16     = OpTypeVector %u16 4\n"
 654                         "%v4u32     = OpTypeVector %u32 4\n"
 655                         "%v4u16ptr  = OpTypePointer Uniform %v4u16\n"
 656                         "%v4u32ptr  = OpTypePointer Uniform %v4u32\n"
 657                         "%v4u16arr  = OpTypeArray %v4u16 %c_i32_32\n"
 658                         "%v4u32arr  = OpTypeArray %v4u32 %c_i32_32\n";
 659
 660                 struct CompositeType
 661                 {
 662                         const char*     name;
 663                         bool            isSigned;
 664                         const char* types;
 665                         const char*     base32;
 666                         const char*     base16;
 667                         const char* opcode;
 668                         const char*     stride;
 669                         unsigned        count;
 670                 };
 671
 672                 const CompositeType     cTypes[]        =
 673                 {
 674                         {"scalar_sint", true,   sintTypes,      "i32",          "i16",          "OpSConvert",   "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 2\n",                 numElements},
 675                         {"scalar_uint", false,  uintTypes,      "u32",          "u16",          "OpUConvert",   "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 2\n",                 numElements},
 676                         {"vector_sint", true,   sintTypes,      "v4i32",        "v4i16",        "OpSConvert",   "OpDecorate %v4i32arr ArrayStride 16\nOpDecorate %v4i16arr ArrayStride 8\n",    numElements / 4},
 677                         {"vector_uint", false,  uintTypes,      "v4u32",        "v4u16",        "OpUConvert",   "OpDecorate %v4u32arr ArrayStride 16\nOpDecorate %v4u16arr ArrayStride 8\n",    numElements / 4},
 678                 };
 679
 680                 vector<deInt16> inputs                  = getInt16s(rnd, numElements);
 681                 vector<deInt32> sOutputs;
 682                 vector<deInt32> uOutputs;
 683                 const deUint16  signBitMask             = 0x8000;
 684                 const deUint32  signExtendMask  = 0xffff0000;
 685
 686                 sOutputs.reserve(inputs.size());
 687                 uOutputs.reserve(inputs.size());
 688
 689                 for (deUint32 numNdx = 0; numNdx < inputs.size(); ++numNdx)
 690                 {
 691                         uOutputs.push_back(static_cast<deUint16>(inputs[numNdx]));
 692                         if (inputs[numNdx] & signBitMask)
 693                                 sOutputs.push_back(static_cast<deInt32>(inputs[numNdx] | signExtendMask));
 694                         else
 695                                 sOutputs.push_back(static_cast<deInt32>(inputs[numNdx]));
 696                 }
 697
 698                 for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
 699                         for (deUint32 tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes); ++tyIdx)
 700                         {
 701                                 ComputeShaderSpec               spec;
 702                                 map<string, string>             specs;
 703                                 string                                  testName        = string(CAPABILITIES[capIdx].name) + "_" + cTypes[tyIdx].name;
 704
 705                                 specs["capability"]             = CAPABILITIES[capIdx].cap;
 706                                 specs["storage"]                = CAPABILITIES[capIdx].decor;
 707                                 specs["stride"]                 = cTypes[tyIdx].stride;
 708                                 specs["base32"]                 = cTypes[tyIdx].base32;
 709                                 specs["base16"]                 = cTypes[tyIdx].base16;
 710                                 specs["types"]                  = cTypes[tyIdx].types;
 711                                 specs["convert"]                = cTypes[tyIdx].opcode;
 712
 713                                 spec.assembly                   = shaderTemplate.specialize(specs);
 714                                 spec.numWorkGroups              = IVec3(cTypes[tyIdx].count, 1, 1);
 715                                 spec.inputTypes[0]              = CAPABILITIES[capIdx].dtype;
 716
 717                                 spec.inputs.push_back(BufferSp(new Int16Buffer(inputs)));
 718                                 if (cTypes[tyIdx].isSigned)
 719                                         spec.outputs.push_back(BufferSp(new Int32Buffer(sOutputs)));
 720                                 else
 721                                         spec.outputs.push_back(BufferSp(new Int32Buffer(uOutputs)));
 722                                 spec.extensions.push_back("VK_KHR_16bit_storage");
 723                                 spec.requestedVulkanFeatures = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
 724
 725                                 group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), testName.c_str(), spec));
 726                         }
 727         }
 728 }
 729
 730 void addCompute16bitStoragePushConstant16To32Group (tcu::TestCaseGroup* group)
 731 {
 732         tcu::TestContext&                               testCtx                 = group->getTestContext();
 733         de::Random                                              rnd                             (deStringHash(group->getName()));
 734         const int                                               numElements             = 64;
 735
 736         const StringTemplate                    shaderTemplate  (
 737                 "OpCapability Shader\n"
 738                 "OpCapability StoragePushConstant16\n"
 739                 "OpExtension \"SPV_KHR_16bit_storage\"\n"
 740                 "OpMemoryModel Logical GLSL450\n"
 741                 "OpEntryPoint GLCompute %main \"main\" %id\n"
 742                 "OpExecutionMode %main LocalSize 1 1 1\n"
 743                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
 744
 745                 "${stride}"
 746
 747                 "OpDecorate %PC16 Block\n"
 748                 "OpMemberDecorate %PC16 0 Offset 0\n"
 749                 "OpMemberDecorate %SSBO32 0 Offset 0\n"
 750                 "OpDecorate %SSBO32 BufferBlock\n"
 751                 "OpDecorate %ssbo32 DescriptorSet 0\n"
 752                 "OpDecorate %ssbo32 Binding 0\n"
 753
 754                 "${matrix_decor:opt}\n"
 755
 756                 "%bool      = OpTypeBool\n"
 757                 "%void      = OpTypeVoid\n"
 758                 "%voidf     = OpTypeFunction %void\n"
 759                 "%u32       = OpTypeInt 32 0\n"
 760                 "%i32       = OpTypeInt 32 1\n"
 761                 "%f32       = OpTypeFloat 32\n"
 762                 "%uvec3     = OpTypeVector %u32 3\n"
 763                 "%fvec3     = OpTypeVector %f32 3\n"
 764                 "%uvec3ptr  = OpTypePointer Input %uvec3\n"
 765                 "%i32ptr    = OpTypePointer Uniform %i32\n"
 766                 "%f32ptr    = OpTypePointer Uniform %f32\n"
 767
 768                 "%zero      = OpConstant %i32 0\n"
 769                 "%c_i32_1   = OpConstant %i32 1\n"
 770                 "%c_i32_8   = OpConstant %i32 8\n"
 771                 "%c_i32_16  = OpConstant %i32 16\n"
 772                 "%c_i32_32  = OpConstant %i32 32\n"
 773                 "%c_i32_64  = OpConstant %i32 64\n"
 774
 775                 "%i32arr    = OpTypeArray %i32 %c_i32_64\n"
 776                 "%f32arr    = OpTypeArray %f32 %c_i32_64\n"
 777
 778                 "${types}\n"
 779                 "${matrix_types:opt}\n"
 780
 781                 "%PC16      = OpTypeStruct %${matrix_prefix:opt}${base16}arr\n"
 782                 "%pp_PC16   = OpTypePointer PushConstant %PC16\n"
 783                 "%pc16      = OpVariable %pp_PC16 PushConstant\n"
 784                 "%SSBO32    = OpTypeStruct %${matrix_prefix:opt}${base32}arr\n"
 785                 "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
 786                 "%ssbo32    = OpVariable %up_SSBO32 Uniform\n"
 787
 788                 "%id        = OpVariable %uvec3ptr Input\n"
 789
 790                 "%main      = OpFunction %void None %voidf\n"
 791                 "%label     = OpLabel\n"
 792                 "%idval     = OpLoad %uvec3 %id\n"
 793                 "%x         = OpCompositeExtract %u32 %idval 0\n"
 794                 "%inloc     = OpAccessChain %${base16}ptr %pc16 %zero %x ${index0:opt}\n"
 795                 "%val16     = OpLoad %${base16} %inloc\n"
 796                 "%val32     = ${convert} %${base32} %val16\n"
 797                 "%outloc    = OpAccessChain %${base32}ptr %ssbo32 %zero %x ${index0:opt}\n"
 798                 "             OpStore %outloc %val32\n"
 799                 "${matrix_store:opt}\n"
 800                 "             OpReturn\n"
 801                 "             OpFunctionEnd\n");
 802
 803         {  // floats
 804                 const char                                                                              floatTypes[]    =
 805                         "%f16       = OpTypeFloat 16\n"
 806                         "%f16ptr    = OpTypePointer PushConstant %f16\n"
 807                         "%f16arr    = OpTypeArray %f16 %c_i32_64\n"
 808                         "%v4f16     = OpTypeVector %f16 4\n"
 809                         "%v4f32     = OpTypeVector %f32 4\n"
 810                         "%v4f16ptr  = OpTypePointer PushConstant %v4f16\n"
 811                         "%v4f32ptr  = OpTypePointer Uniform %v4f32\n"
 812                         "%v4f16arr  = OpTypeArray %v4f16 %c_i32_16\n"
 813                         "%v4f32arr  = OpTypeArray %v4f32 %c_i32_16\n";
 814
 815                 struct CompositeType
 816                 {
 817                         const char*     name;
 818                         const char*     base32;
 819                         const char*     base16;
 820                         const char*     stride;
 821                         unsigned        count;
 822                 };
 823
 824                 const CompositeType     cTypes[]        =
 825                 {
 826                         {"scalar",      "f32",          "f16",          "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 2\n",                         numElements},
 827                         {"vector",      "v4f32",        "v4f16",        "OpDecorate %v4f32arr ArrayStride 16\nOpDecorate %v4f16arr ArrayStride 8\n",            numElements / 4},
 828                         {"matrix",      "v4f32",        "v4f16",        "OpDecorate %m2v4f32arr ArrayStride 32\nOpDecorate %m2v4f16arr ArrayStride 16\n",       numElements / 8},
 829                 };
 830
 831                 vector<deFloat16>       float16Data                     = getFloat16s(rnd, numElements);
 832                 vector<float>           float32Data;
 833
 834                 float32Data.reserve(numElements);
 835                 for (deUint32 numIdx = 0; numIdx < numElements; ++numIdx)
 836                         float32Data.push_back(deFloat16To32(float16Data[numIdx]));
 837
 838                 for (deUint32 tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes); ++tyIdx)
 839                 {
 840                         ComputeShaderSpec               spec;
 841                         map<string, string>             specs;
 842                         string                                  testName        = string(cTypes[tyIdx].name) + "_float";
 843
 844                         specs["stride"]                 = cTypes[tyIdx].stride;
 845                         specs["base32"]                 = cTypes[tyIdx].base32;
 846                         specs["base16"]                 = cTypes[tyIdx].base16;
 847                         specs["types"]                  = floatTypes;
 848                         specs["convert"]                = "OpFConvert";
 849
 850                         if (strcmp(cTypes[tyIdx].name, "matrix") == 0)
 851                         {
 852                                 specs["index0"]                 = "%zero";
 853                                 specs["matrix_prefix"]  = "m2";
 854                                 specs["matrix_types"]   =
 855                                         "%m2v4f16 = OpTypeMatrix %v4f16 2\n"
 856                                         "%m2v4f32 = OpTypeMatrix %v4f32 2\n"
 857                                         "%m2v4f16arr = OpTypeArray %m2v4f16 %c_i32_8\n"
 858                                         "%m2v4f32arr = OpTypeArray %m2v4f32 %c_i32_8\n";
 859                                 specs["matrix_decor"]   =
 860                                         "OpMemberDecorate %SSBO32 0 ColMajor\n"
 861                                         "OpMemberDecorate %SSBO32 0 MatrixStride 16\n"
 862                                         "OpMemberDecorate %PC16 0 ColMajor\n"
 863                                         "OpMemberDecorate %PC16 0 MatrixStride 8\n";
 864                                 specs["matrix_store"]   =
 865                                         "%inloc_1  = OpAccessChain %v4f16ptr %pc16 %zero %x %c_i32_1\n"
 866                                         "%val16_1  = OpLoad %v4f16 %inloc_1\n"
 867                                         "%val32_1  = OpFConvert %v4f32 %val16_1\n"
 868                                         "%outloc_1 = OpAccessChain %v4f32ptr %ssbo32 %zero %x %c_i32_1\n"
 869                                         "            OpStore %outloc_1 %val32_1\n";
 870                         }
 871
 872                         spec.assembly                   = shaderTemplate.specialize(specs);
 873                         spec.numWorkGroups              = IVec3(cTypes[tyIdx].count, 1, 1);
 874                         spec.verifyIO                   = check32BitFloats;
 875                         spec.pushConstants              = BufferSp(new Float16Buffer(float16Data));
 876
 877                         spec.outputs.push_back(BufferSp(new Float32Buffer(float32Data)));
 878                         spec.extensions.push_back("VK_KHR_16bit_storage");
 879                         spec.requestedVulkanFeatures.ext16BitStorage = EXT16BITSTORAGEFEATURES_PUSH_CONSTANT;
 880
 881                         group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), testName.c_str(), spec));
 882                 }
 883         }
 884         {  // integers
 885                 const char              sintTypes[]             =
 886                         "%i16       = OpTypeInt 16 1\n"
 887                         "%i16ptr    = OpTypePointer PushConstant %i16\n"
 888                         "%i16arr    = OpTypeArray %i16 %c_i32_64\n"
 889                         "%v2i16     = OpTypeVector %i16 2\n"
 890                         "%v2i32     = OpTypeVector %i32 2\n"
 891                         "%v2i16ptr  = OpTypePointer PushConstant %v2i16\n"
 892                         "%v2i32ptr  = OpTypePointer Uniform %v2i32\n"
 893                         "%v2i16arr  = OpTypeArray %v2i16 %c_i32_32\n"
 894                         "%v2i32arr  = OpTypeArray %v2i32 %c_i32_32\n";
 895
 896                 const char              uintTypes[]             =
 897                         "%u16       = OpTypeInt 16 0\n"
 898                         "%u16ptr    = OpTypePointer PushConstant %u16\n"
 899                         "%u32ptr    = OpTypePointer Uniform %u32\n"
 900                         "%u16arr    = OpTypeArray %u16 %c_i32_64\n"
 901                         "%u32arr    = OpTypeArray %u32 %c_i32_64\n"
 902                         "%v2u16     = OpTypeVector %u16 2\n"
 903                         "%v2u32     = OpTypeVector %u32 2\n"
 904                         "%v2u16ptr  = OpTypePointer PushConstant %v2u16\n"
 905                         "%v2u32ptr  = OpTypePointer Uniform %v2u32\n"
 906                         "%v2u16arr  = OpTypeArray %v2u16 %c_i32_32\n"
 907                         "%v2u32arr  = OpTypeArray %v2u32 %c_i32_32\n";
 908
 909                 struct CompositeType
 910                 {
 911                         const char*     name;
 912                         bool            isSigned;
 913                         const char* types;
 914                         const char*     base32;
 915                         const char*     base16;
 916                         const char* opcode;
 917                         const char*     stride;
 918                         unsigned        count;
 919                 };
 920
 921                 const CompositeType     cTypes[]        =
 922                 {
 923                         {"scalar_sint", true,   sintTypes,      "i32",          "i16",          "OpSConvert",   "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 2\n",         numElements},
 924                         {"scalar_uint", false,  uintTypes,      "u32",          "u16",          "OpUConvert",   "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 2\n",         numElements},
 925                         {"vector_sint", true,   sintTypes,      "v2i32",        "v2i16",        "OpSConvert",   "OpDecorate %v2i32arr ArrayStride 8\nOpDecorate %v2i16arr ArrayStride 4\n",     numElements / 2},
 926                         {"vector_uint", false,  uintTypes,      "v2u32",        "v2u16",        "OpUConvert",   "OpDecorate %v2u32arr ArrayStride 8\nOpDecorate %v2u16arr ArrayStride 4\n",     numElements / 2},
 927                 };
 928
 929                 vector<deInt16> inputs                  = getInt16s(rnd, numElements);
 930                 vector<deInt32> sOutputs;
 931                 vector<deInt32> uOutputs;
 932                 const deUint16  signBitMask             = 0x8000;
 933                 const deUint32  signExtendMask  = 0xffff0000;
 934
 935                 sOutputs.reserve(inputs.size());
 936                 uOutputs.reserve(inputs.size());
 937
 938                 for (deUint32 numNdx = 0; numNdx < inputs.size(); ++numNdx)
 939                 {
 940                         uOutputs.push_back(static_cast<deUint16>(inputs[numNdx]));
 941                         if (inputs[numNdx] & signBitMask)
 942                                 sOutputs.push_back(static_cast<deInt32>(inputs[numNdx] | signExtendMask));
 943                         else
 944                                 sOutputs.push_back(static_cast<deInt32>(inputs[numNdx]));
 945                 }
 946
 947                 for (deUint32 tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes); ++tyIdx)
 948                 {
 949                         ComputeShaderSpec               spec;
 950                         map<string, string>             specs;
 951                         const char*                             testName        = cTypes[tyIdx].name;
 952
 953                         specs["stride"]                 = cTypes[tyIdx].stride;
 954                         specs["base32"]                 = cTypes[tyIdx].base32;
 955                         specs["base16"]                 = cTypes[tyIdx].base16;
 956                         specs["types"]                  = cTypes[tyIdx].types;
 957                         specs["convert"]                = cTypes[tyIdx].opcode;
 958
 959                         spec.assembly                   = shaderTemplate.specialize(specs);
 960                         spec.numWorkGroups              = IVec3(cTypes[tyIdx].count, 1, 1);
 961                         spec.pushConstants              = BufferSp(new Int16Buffer(inputs));
 962
 963                         if (cTypes[tyIdx].isSigned)
 964                                 spec.outputs.push_back(BufferSp(new Int32Buffer(sOutputs)));
 965                         else
 966                                 spec.outputs.push_back(BufferSp(new Int32Buffer(uOutputs)));
 967                         spec.extensions.push_back("VK_KHR_16bit_storage");
 968                         spec.requestedVulkanFeatures.ext16BitStorage = EXT16BITSTORAGEFEATURES_PUSH_CONSTANT;
 969
 970                         group->addChild(new SpvAsmComputeShaderCase(testCtx, testName, testName, spec));
 971                 }
 972         }
 973 }
 974
 975 void addGraphics16BitStorageUniformInt32To16Group (tcu::TestCaseGroup* testGroup)
 976 {
 977         de::Random                                                      rnd                                     (deStringHash(testGroup->getName()));
 978         map<string, string>                                     fragments;
 979         const deUint32                                          numDataPoints           = 256;
 980         RGBA                                                            defaultColors[4];
 981         GraphicsResources                                       resources;
 982         vector<string>                                          extensions;
 983         const StringTemplate                            capabilities            ("OpCapability ${cap}\n");
 984         // inputs and outputs are declared to be vectors of signed integers.
 985         // However, depending on the test, they may be interpreted as unsiged
 986         // integers. That won't be a problem as long as we passed the bits
 987         // in faithfully to the pipeline.
 988         vector<deInt32>                                         inputs                          = getInt32s(rnd, numDataPoints);
 989         vector<deInt16>                                         outputs;
 990
 991         outputs.reserve(inputs.size());
 992         for (deUint32 numNdx = 0; numNdx < inputs.size(); ++numNdx)
 993                 outputs.push_back(static_cast<deInt16>(0xffff & inputs[numNdx]));
 994
 995         resources.inputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Int32Buffer(inputs))));
 996         resources.outputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Int16Buffer(outputs))));
 997
 998         extensions.push_back("VK_KHR_16bit_storage");
 999         fragments["extension"]  = "OpExtension \"SPV_KHR_16bit_storage\"";
1000
1001         getDefaultColors(defaultColors);
1002
1003         struct IntegerFacts
1004         {
1005                 const char*     name;
1006                 const char*     type32;
1007                 const char*     type16;
1008                 const char* opcode;
1009                 const char*     isSigned;
1010         };
1011
1012         const IntegerFacts      intFacts[]              =
1013         {
1014                 {"sint",        "%i32",         "%i16",         "OpSConvert",   "1"},
1015                 {"uint",        "%u32",         "%u16",         "OpUConvert",   "0"},
1016         };
1017
1018         const StringTemplate    scalarPreMain(
1019                         "${itype16} = OpTypeInt 16 ${signed}\n"
1020                         "%c_i32_256 = OpConstant %i32 256\n"
1021                         "   %up_i32 = OpTypePointer Uniform ${itype32}\n"
1022                         "   %up_i16 = OpTypePointer Uniform ${itype16}\n"
1023                         "   %ra_i32 = OpTypeArray ${itype32} %c_i32_256\n"
1024                         "   %ra_i16 = OpTypeArray ${itype16} %c_i32_256\n"
1025                         "   %SSBO32 = OpTypeStruct %ra_i32\n"
1026                         "   %SSBO16 = OpTypeStruct %ra_i16\n"
1027                         "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
1028                         "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
1029                         "   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
1030                         "   %ssbo16 = OpVariable %up_SSBO16 Uniform\n");
1031
1032         const StringTemplate    scalarDecoration(
1033                         "OpDecorate %ra_i32 ArrayStride 4\n"
1034                         "OpDecorate %ra_i16 ArrayStride 2\n"
1035                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
1036                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
1037                         "OpDecorate %SSBO32 ${indecor}\n"
1038                         "OpDecorate %SSBO16 BufferBlock\n"
1039                         "OpDecorate %ssbo32 DescriptorSet 0\n"
1040                         "OpDecorate %ssbo16 DescriptorSet 0\n"
1041                         "OpDecorate %ssbo32 Binding 0\n"
1042                         "OpDecorate %ssbo16 Binding 1\n");
1043
1044         const StringTemplate    scalarTestFunc(
1045                         "%test_code = OpFunction %v4f32 None %v4f32_function\n"
1046                         "    %param = OpFunctionParameter %v4f32\n"
1047
1048                         "%entry = OpLabel\n"
1049                         "    %i = OpVariable %fp_i32 Function\n"
1050                         "         OpStore %i %c_i32_0\n"
1051                         "         OpBranch %loop\n"
1052
1053                         " %loop = OpLabel\n"
1054                         "   %15 = OpLoad %i32 %i\n"
1055                         "   %lt = OpSLessThan %bool %15 %c_i32_256\n"
1056                         "         OpLoopMerge %merge %inc None\n"
1057                         "         OpBranchConditional %lt %write %merge\n"
1058
1059                         "%write = OpLabel\n"
1060                         "   %30 = OpLoad %i32 %i\n"
1061                         "  %src = OpAccessChain %up_i32 %ssbo32 %c_i32_0 %30\n"
1062                         "%val32 = OpLoad ${itype32} %src\n"
1063                         "%val16 = ${convert} ${itype16} %val32\n"
1064                         "  %dst = OpAccessChain %up_i16 %ssbo16 %c_i32_0 %30\n"
1065                         "         OpStore %dst %val16\n"
1066                         "         OpBranch %inc\n"
1067
1068                         "  %inc = OpLabel\n"
1069                         "   %37 = OpLoad %i32 %i\n"
1070                         "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
1071                         "         OpStore %i %39\n"
1072                         "         OpBranch %loop\n"
1073
1074                         "%merge = OpLabel\n"
1075                         "         OpReturnValue %param\n"
1076
1077                         "OpFunctionEnd\n");
1078
1079         const StringTemplate    vecPreMain(
1080                         "${itype16} = OpTypeInt 16 ${signed}\n"
1081                         " %c_i32_64 = OpConstant %i32 64\n"
1082                         "%v4itype16 = OpTypeVector ${itype16} 4\n"
1083                         " %up_v4i32 = OpTypePointer Uniform ${v4itype32}\n"
1084                         " %up_v4i16 = OpTypePointer Uniform %v4itype16\n"
1085                         " %ra_v4i32 = OpTypeArray ${v4itype32} %c_i32_64\n"
1086                         " %ra_v4i16 = OpTypeArray %v4itype16 %c_i32_64\n"
1087                         "   %SSBO32 = OpTypeStruct %ra_v4i32\n"
1088                         "   %SSBO16 = OpTypeStruct %ra_v4i16\n"
1089                         "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
1090                         "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
1091                         "   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
1092                         "   %ssbo16 = OpVariable %up_SSBO16 Uniform\n");
1093
1094         const StringTemplate    vecDecoration(
1095                         "OpDecorate %ra_v4i32 ArrayStride 16\n"
1096                         "OpDecorate %ra_v4i16 ArrayStride 8\n"
1097                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
1098                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
1099                         "OpDecorate %SSBO32 ${indecor}\n"
1100                         "OpDecorate %SSBO16 BufferBlock\n"
1101                         "OpDecorate %ssbo32 DescriptorSet 0\n"
1102                         "OpDecorate %ssbo16 DescriptorSet 0\n"
1103                         "OpDecorate %ssbo32 Binding 0\n"
1104                         "OpDecorate %ssbo16 Binding 1\n");
1105
1106         const StringTemplate    vecTestFunc(
1107                         "%test_code = OpFunction %v4f32 None %v4f32_function\n"
1108                         "    %param = OpFunctionParameter %v4f32\n"
1109
1110                         "%entry = OpLabel\n"
1111                         "    %i = OpVariable %fp_i32 Function\n"
1112                         "         OpStore %i %c_i32_0\n"
1113                         "         OpBranch %loop\n"
1114
1115                         " %loop = OpLabel\n"
1116                         "   %15 = OpLoad %i32 %i\n"
1117                         "   %lt = OpSLessThan %bool %15 %c_i32_64\n"
1118                         "         OpLoopMerge %merge %inc None\n"
1119                         "         OpBranchConditional %lt %write %merge\n"
1120
1121                         "%write = OpLabel\n"
1122                         "   %30 = OpLoad %i32 %i\n"
1123                         "  %src = OpAccessChain %up_v4i32 %ssbo32 %c_i32_0 %30\n"
1124                         "%val32 = OpLoad ${v4itype32} %src\n"
1125                         "%val16 = ${convert} %v4itype16 %val32\n"
1126                         "  %dst = OpAccessChain %up_v4i16 %ssbo16 %c_i32_0 %30\n"
1127                         "         OpStore %dst %val16\n"
1128                         "         OpBranch %inc\n"
1129
1130                         "  %inc = OpLabel\n"
1131                         "   %37 = OpLoad %i32 %i\n"
1132                         "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
1133                         "         OpStore %i %39\n"
1134                         "         OpBranch %loop\n"
1135
1136                         "%merge = OpLabel\n"
1137                         "         OpReturnValue %param\n"
1138
1139                         "OpFunctionEnd\n");
1140
1141         struct Category
1142         {
1143                 const char*                             name;
1144                 const StringTemplate&   preMain;
1145                 const StringTemplate&   decoration;
1146                 const StringTemplate&   testFunction;
1147         };
1148
1149         const Category          categories[]    =
1150         {
1151                 {"scalar",      scalarPreMain,  scalarDecoration,       scalarTestFunc},
1152                 {"vector",      vecPreMain,             vecDecoration,          vecTestFunc},
1153         };
1154
1155         for (deUint32 catIdx = 0; catIdx < DE_LENGTH_OF_ARRAY(categories); ++catIdx)
1156                 for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
1157                         for (deUint32 factIdx = 0; factIdx < DE_LENGTH_OF_ARRAY(intFacts); ++factIdx)
1158                         {
1159                                 map<string, string>     specs;
1160                                 string                          name            = string(CAPABILITIES[capIdx].name) + "_" + categories[catIdx].name + "_" + intFacts[factIdx].name;
1161
1162                                 specs["cap"]                                    = CAPABILITIES[capIdx].cap;
1163                                 specs["indecor"]                                = CAPABILITIES[capIdx].decor;
1164                                 specs["itype32"]                                = intFacts[factIdx].type32;
1165                                 specs["v4itype32"]                              = "%v4" + string(intFacts[factIdx].type32).substr(1);
1166                                 specs["itype16"]                                = intFacts[factIdx].type16;
1167                                 specs["signed"]                                 = intFacts[factIdx].isSigned;
1168                                 specs["convert"]                                = intFacts[factIdx].opcode;
1169
1170                                 fragments["pre_main"]                   = categories[catIdx].preMain.specialize(specs);
1171                                 fragments["testfun"]                    = categories[catIdx].testFunction.specialize(specs);
1172                                 fragments["capability"]                 = capabilities.specialize(specs);
1173                                 fragments["decoration"]                 = categories[catIdx].decoration.specialize(specs);
1174
1175                                 resources.inputs.back().first   = CAPABILITIES[capIdx].dtype;
1176
1177                                 createTestsForAllStages(name, defaultColors, defaultColors, fragments, resources, extensions, testGroup, get16BitStorageFeatures(CAPABILITIES[capIdx].name));
1178                         }
1179 }
1180
1181 void addCompute16bitStorageUniform32To16Group (tcu::TestCaseGroup* group)
1182 {
1183         tcu::TestContext&                               testCtx                 = group->getTestContext();
1184         de::Random                                              rnd                             (deStringHash(group->getName()));
1185         const int                                               numElements             = 128;
1186
1187         const StringTemplate                    shaderTemplate  (
1188                 "OpCapability Shader\n"
1189                 "OpCapability ${capability}\n"
1190                 "OpExtension \"SPV_KHR_16bit_storage\"\n"
1191                 "OpMemoryModel Logical GLSL450\n"
1192                 "OpEntryPoint GLCompute %main \"main\" %id\n"
1193                 "OpExecutionMode %main LocalSize 1 1 1\n"
1194                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1195
1196                 "${stride}"
1197
1198                 "OpMemberDecorate %SSBO32 0 Offset 0\n"
1199                 "OpMemberDecorate %SSBO16 0 Offset 0\n"
1200                 "OpDecorate %SSBO32 ${storage}\n"
1201                 "OpDecorate %SSBO16 BufferBlock\n"
1202                 "OpDecorate %ssbo32 DescriptorSet 0\n"
1203                 "OpDecorate %ssbo16 DescriptorSet 0\n"
1204                 "OpDecorate %ssbo32 Binding 0\n"
1205                 "OpDecorate %ssbo16 Binding 1\n"
1206
1207                 "${matrix_decor:opt}\n"
1208
1209                 "${rounding:opt}\n"
1210
1211                 "%bool      = OpTypeBool\n"
1212                 "%void      = OpTypeVoid\n"
1213                 "%voidf     = OpTypeFunction %void\n"
1214                 "%u32       = OpTypeInt 32 0\n"
1215                 "%i32       = OpTypeInt 32 1\n"
1216                 "%f32       = OpTypeFloat 32\n"
1217                 "%uvec3     = OpTypeVector %u32 3\n"
1218                 "%fvec3     = OpTypeVector %f32 3\n"
1219                 "%uvec3ptr  = OpTypePointer Input %uvec3\n"
1220                 "%i32ptr    = OpTypePointer Uniform %i32\n"
1221                 "%f32ptr    = OpTypePointer Uniform %f32\n"
1222
1223                 "%zero      = OpConstant %i32 0\n"
1224                 "%c_i32_1   = OpConstant %i32 1\n"
1225                 "%c_i32_16  = OpConstant %i32 16\n"
1226                 "%c_i32_32  = OpConstant %i32 32\n"
1227                 "%c_i32_64  = OpConstant %i32 64\n"
1228                 "%c_i32_128 = OpConstant %i32 128\n"
1229
1230                 "%i32arr    = OpTypeArray %i32 %c_i32_128\n"
1231                 "%f32arr    = OpTypeArray %f32 %c_i32_128\n"
1232
1233                 "${types}\n"
1234                 "${matrix_types:opt}\n"
1235
1236                 "%SSBO32    = OpTypeStruct %${matrix_prefix:opt}${base32}arr\n"
1237                 "%SSBO16    = OpTypeStruct %${matrix_prefix:opt}${base16}arr\n"
1238                 "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
1239                 "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
1240                 "%ssbo32    = OpVariable %up_SSBO32 Uniform\n"
1241                 "%ssbo16    = OpVariable %up_SSBO16 Uniform\n"
1242
1243                 "%id        = OpVariable %uvec3ptr Input\n"
1244
1245                 "%main      = OpFunction %void None %voidf\n"
1246                 "%label     = OpLabel\n"
1247                 "%idval     = OpLoad %uvec3 %id\n"
1248                 "%x         = OpCompositeExtract %u32 %idval 0\n"
1249                 "%inloc     = OpAccessChain %${base32}ptr %ssbo32 %zero %x ${index0:opt}\n"
1250                 "%val32     = OpLoad %${base32} %inloc\n"
1251                 "%val16     = ${convert} %${base16} %val32\n"
1252                 "%outloc    = OpAccessChain %${base16}ptr %ssbo16 %zero %x ${index0:opt}\n"
1253                 "             OpStore %outloc %val16\n"
1254                 "${matrix_store:opt}\n"
1255                 "             OpReturn\n"
1256                 "             OpFunctionEnd\n");
1257
1258         {  // Floats
1259                 const char                                              floatTypes[]    =
1260                         "%f16       = OpTypeFloat 16\n"
1261                         "%f16ptr    = OpTypePointer Uniform %f16\n"
1262                         "%f16arr    = OpTypeArray %f16 %c_i32_128\n"
1263                         "%v4f16     = OpTypeVector %f16 4\n"
1264                         "%v4f32     = OpTypeVector %f32 4\n"
1265                         "%v4f16ptr  = OpTypePointer Uniform %v4f16\n"
1266                         "%v4f32ptr  = OpTypePointer Uniform %v4f32\n"
1267                         "%v4f16arr  = OpTypeArray %v4f16 %c_i32_32\n"
1268                         "%v4f32arr  = OpTypeArray %v4f32 %c_i32_32\n";
1269
1270                 struct RndMode
1271                 {
1272                         const char*                             name;
1273                         const char*                             decor;
1274                         ComputeVerifyIOFunc             func;
1275                 };
1276
1277                 const RndMode           rndModes[]              =
1278                 {
1279                         {"rtz",                                         "OpDecorate %val16  FPRoundingMode RTZ",        computeCheck16BitFloats<ROUNDINGMODE_RTZ>},
1280                         {"rte",                                         "OpDecorate %val16  FPRoundingMode RTE",        computeCheck16BitFloats<ROUNDINGMODE_RTE>},
1281                         {"unspecified_rnd_mode",        "",                                                                                     computeCheck16BitFloats<RoundingModeFlags(ROUNDINGMODE_RTE | ROUNDINGMODE_RTZ)>},
1282                 };
1283
1284                 struct CompositeType
1285                 {
1286                         const char*     name;
1287                         const char*     base32;
1288                         const char*     base16;
1289                         const char*     stride;
1290                         unsigned        count;
1291                 };
1292
1293                 const CompositeType     cTypes[]        =
1294                 {
1295                         {"scalar",      "f32",          "f16",          "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 2\n",                         numElements},
1296                         {"vector",      "v4f32",        "v4f16",        "OpDecorate %v4f32arr ArrayStride 16\nOpDecorate %v4f16arr ArrayStride 8\n",            numElements / 4},
1297                         {"matrix",      "v4f32",        "v4f16",        "OpDecorate %m2v4f32arr ArrayStride 32\nOpDecorate %m2v4f16arr ArrayStride 16\n",       numElements / 8},
1298                 };
1299
1300                 vector<float>           float32Data                     = getFloat32s(rnd, numElements);
1301                 vector<deFloat16>       float16DummyData        (numElements, 0);
1302
1303                 for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
1304                         for (deUint32 tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes); ++tyIdx)
1305                                 for (deUint32 rndModeIdx = 0; rndModeIdx < DE_LENGTH_OF_ARRAY(rndModes); ++rndModeIdx)
1306                                 {
1307                                         ComputeShaderSpec               spec;
1308                                         map<string, string>             specs;
1309                                         string                                  testName        = string(CAPABILITIES[capIdx].name) + "_" + cTypes[tyIdx].name + "_float_" + rndModes[rndModeIdx].name;
1310
1311                                         specs["capability"]             = CAPABILITIES[capIdx].cap;
1312                                         specs["storage"]                = CAPABILITIES[capIdx].decor;
1313                                         specs["stride"]                 = cTypes[tyIdx].stride;
1314                                         specs["base32"]                 = cTypes[tyIdx].base32;
1315                                         specs["base16"]                 = cTypes[tyIdx].base16;
1316                                         specs["rounding"]               = rndModes[rndModeIdx].decor;
1317                                         specs["types"]                  = floatTypes;
1318                                         specs["convert"]                = "OpFConvert";
1319
1320                                         if (strcmp(cTypes[tyIdx].name, "matrix") == 0)
1321                                         {
1322                                                 if (strcmp(rndModes[rndModeIdx].name, "rtz") == 0)
1323                                                         specs["rounding"] += "\nOpDecorate %val16_1  FPRoundingMode RTZ\n";
1324                                                 else if (strcmp(rndModes[rndModeIdx].name, "rte") == 0)
1325                                                         specs["rounding"] += "\nOpDecorate %val16_1  FPRoundingMode RTE\n";
1326
1327                                                 specs["index0"]                 = "%zero";
1328                                                 specs["matrix_prefix"]  = "m2";
1329                                                 specs["matrix_types"]   =
1330                                                         "%m2v4f16 = OpTypeMatrix %v4f16 2\n"
1331                                                         "%m2v4f32 = OpTypeMatrix %v4f32 2\n"
1332                                                         "%m2v4f16arr = OpTypeArray %m2v4f16 %c_i32_16\n"
1333                                                         "%m2v4f32arr = OpTypeArray %m2v4f32 %c_i32_16\n";
1334                                                 specs["matrix_decor"]   =
1335                                                         "OpMemberDecorate %SSBO32 0 ColMajor\n"
1336                                                         "OpMemberDecorate %SSBO32 0 MatrixStride 16\n"
1337                                                         "OpMemberDecorate %SSBO16 0 ColMajor\n"
1338                                                         "OpMemberDecorate %SSBO16 0 MatrixStride 8\n";
1339                                                 specs["matrix_store"]   =
1340                                                         "%inloc_1  = OpAccessChain %v4f32ptr %ssbo32 %zero %x %c_i32_1\n"
1341                                                         "%val32_1  = OpLoad %v4f32 %inloc_1\n"
1342                                                         "%val16_1  = OpFConvert %v4f16 %val32_1\n"
1343                                                         "%outloc_1 = OpAccessChain %v4f16ptr %ssbo16 %zero %x %c_i32_1\n"
1344                                                         "            OpStore %outloc_1 %val16_1\n";
1345                                         }
1346
1347                                         spec.assembly                   = shaderTemplate.specialize(specs);
1348                                         spec.numWorkGroups              = IVec3(cTypes[tyIdx].count, 1, 1);
1349                                         spec.verifyIO                   = rndModes[rndModeIdx].func;
1350                                         spec.inputTypes[0]              = CAPABILITIES[capIdx].dtype;
1351
1352                                         spec.inputs.push_back(BufferSp(new Float32Buffer(float32Data)));
1353                                         // We provided a custom verifyIO in the above in which inputs will be used for checking.
1354                                         // So put dummy data in the expected values.
1355                                         spec.outputs.push_back(BufferSp(new Float16Buffer(float16DummyData)));
1356                                         spec.extensions.push_back("VK_KHR_16bit_storage");
1357                                         spec.requestedVulkanFeatures = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
1358
1359                                         group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), testName.c_str(), spec));
1360                                 }
1361         }
1362
1363         {  // Integers
1364                 const char              sintTypes[]     =
1365                         "%i16       = OpTypeInt 16 1\n"
1366                         "%i16ptr    = OpTypePointer Uniform %i16\n"
1367                         "%i16arr    = OpTypeArray %i16 %c_i32_128\n"
1368                         "%v2i16     = OpTypeVector %i16 2\n"
1369                         "%v2i32     = OpTypeVector %i32 2\n"
1370                         "%v2i16ptr  = OpTypePointer Uniform %v2i16\n"
1371                         "%v2i32ptr  = OpTypePointer Uniform %v2i32\n"
1372                         "%v2i16arr  = OpTypeArray %v2i16 %c_i32_64\n"
1373                         "%v2i32arr  = OpTypeArray %v2i32 %c_i32_64\n";
1374
1375                 const char              uintTypes[]     =
1376                         "%u16       = OpTypeInt 16 0\n"
1377                         "%u16ptr    = OpTypePointer Uniform %u16\n"
1378                         "%u32ptr    = OpTypePointer Uniform %u32\n"
1379                         "%u16arr    = OpTypeArray %u16 %c_i32_128\n"
1380                         "%u32arr    = OpTypeArray %u32 %c_i32_128\n"
1381                         "%v2u16     = OpTypeVector %u16 2\n"
1382                         "%v2u32     = OpTypeVector %u32 2\n"
1383                         "%v2u16ptr  = OpTypePointer Uniform %v2u16\n"
1384                         "%v2u32ptr  = OpTypePointer Uniform %v2u32\n"
1385                         "%v2u16arr  = OpTypeArray %v2u16 %c_i32_64\n"
1386                         "%v2u32arr  = OpTypeArray %v2u32 %c_i32_64\n";
1387
1388                 struct CompositeType
1389                 {
1390                         const char*     name;
1391                         const char* types;
1392                         const char*     base32;
1393                         const char*     base16;
1394                         const char* opcode;
1395                         const char*     stride;
1396                         unsigned        count;
1397                 };
1398
1399                 const CompositeType     cTypes[]        =
1400                 {
1401                         {"scalar_sint", sintTypes,      "i32",          "i16",          "OpSConvert",   "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 2\n",         numElements},
1402                         {"scalar_uint", uintTypes,      "u32",          "u16",          "OpUConvert",   "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 2\n",         numElements},
1403                         {"vector_sint", sintTypes,      "v2i32",        "v2i16",        "OpSConvert",   "OpDecorate %v2i32arr ArrayStride 8\nOpDecorate %v2i16arr ArrayStride 4\n",     numElements / 2},
1404                         {"vector_uint", uintTypes,      "v2u32",        "v2u16",        "OpUConvert",   "OpDecorate %v2u32arr ArrayStride 8\nOpDecorate %v2u16arr ArrayStride 4\n",     numElements / 2},
1405                 };
1406
1407                 vector<deInt32> inputs                  = getInt32s(rnd, numElements);
1408                 vector<deInt16> outputs;
1409
1410                 outputs.reserve(inputs.size());
1411                 for (deUint32 numNdx = 0; numNdx < inputs.size(); ++numNdx)
1412                         outputs.push_back(static_cast<deInt16>(0xffff & inputs[numNdx]));
1413
1414                 for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
1415                         for (deUint32 tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes); ++tyIdx)
1416                         {
1417                                 ComputeShaderSpec               spec;
1418                                 map<string, string>             specs;
1419                                 string                                  testName        = string(CAPABILITIES[capIdx].name) + "_" + cTypes[tyIdx].name;
1420
1421                                 specs["capability"]             = CAPABILITIES[capIdx].cap;
1422                                 specs["storage"]                = CAPABILITIES[capIdx].decor;
1423                                 specs["stride"]                 = cTypes[tyIdx].stride;
1424                                 specs["base32"]                 = cTypes[tyIdx].base32;
1425                                 specs["base16"]                 = cTypes[tyIdx].base16;
1426                                 specs["types"]                  = cTypes[tyIdx].types;
1427                                 specs["convert"]                = cTypes[tyIdx].opcode;
1428
1429                                 spec.assembly                   = shaderTemplate.specialize(specs);
1430                                 spec.numWorkGroups              = IVec3(cTypes[tyIdx].count, 1, 1);
1431                                 spec.inputTypes[0]              = CAPABILITIES[capIdx].dtype;
1432
1433                                 spec.inputs.push_back(BufferSp(new Int32Buffer(inputs)));
1434                                 spec.outputs.push_back(BufferSp(new Int16Buffer(outputs)));
1435                                 spec.extensions.push_back("VK_KHR_16bit_storage");
1436                                 spec.requestedVulkanFeatures = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
1437
1438                                 group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), testName.c_str(), spec));
1439                         }
1440         }
1441 }
1442
1443 void addGraphics16BitStorageUniformFloat32To16Group (tcu::TestCaseGroup* testGroup)
1444 {
1445         de::Random                                                      rnd                                     (deStringHash(testGroup->getName()));
1446         map<string, string>                                     fragments;
1447         GraphicsResources                                       resources;
1448         vector<string>                                          extensions;
1449         const deUint32                                          numDataPoints           = 256;
1450         RGBA                                                            defaultColors[4];
1451         vector<float>                                           float32Data                     = getFloat32s(rnd, numDataPoints);
1452         vector<deFloat16>                                       float16DummyData        (numDataPoints, 0);
1453         const StringTemplate                            capabilities            ("OpCapability ${cap}\n");
1454
1455         resources.inputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Float32Buffer(float32Data))));
1456         // We use a custom verifyIO to check the result via computing directly from inputs; the contents in outputs do not matter.
1457         resources.outputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Float16Buffer(float16DummyData))));
1458
1459         extensions.push_back("VK_KHR_16bit_storage");
1460         fragments["extension"]  = "OpExtension \"SPV_KHR_16bit_storage\"";
1461
1462         struct RndMode
1463         {
1464                 const char*                             name;
1465                 const char*                             decor;
1466                 GraphicsVerifyIOFunc    f;
1467         };
1468
1469         getDefaultColors(defaultColors);
1470
1471         {  // scalar cases
1472                 fragments["pre_main"]                           =
1473                         "      %f16 = OpTypeFloat 16\n"
1474                         "%c_i32_256 = OpConstant %i32 256\n"
1475                         "   %up_f32 = OpTypePointer Uniform %f32\n"
1476                         "   %up_f16 = OpTypePointer Uniform %f16\n"
1477                         "   %ra_f32 = OpTypeArray %f32 %c_i32_256\n"
1478                         "   %ra_f16 = OpTypeArray %f16 %c_i32_256\n"
1479                         "   %SSBO32 = OpTypeStruct %ra_f32\n"
1480                         "   %SSBO16 = OpTypeStruct %ra_f16\n"
1481                         "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
1482                         "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
1483                         "   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
1484                         "   %ssbo16 = OpVariable %up_SSBO16 Uniform\n";
1485
1486                 const StringTemplate decoration         (
1487                         "OpDecorate %ra_f32 ArrayStride 4\n"
1488                         "OpDecorate %ra_f16 ArrayStride 2\n"
1489                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
1490                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
1491                         "OpDecorate %SSBO32 ${indecor}\n"
1492                         "OpDecorate %SSBO16 BufferBlock\n"
1493                         "OpDecorate %ssbo32 DescriptorSet 0\n"
1494                         "OpDecorate %ssbo16 DescriptorSet 0\n"
1495                         "OpDecorate %ssbo32 Binding 0\n"
1496                         "OpDecorate %ssbo16 Binding 1\n"
1497                         "${rounddecor}\n");
1498
1499                 fragments["testfun"]                            =
1500                         "%test_code = OpFunction %v4f32 None %v4f32_function\n"
1501                         "    %param = OpFunctionParameter %v4f32\n"
1502
1503                         "%entry = OpLabel\n"
1504                         "    %i = OpVariable %fp_i32 Function\n"
1505                         "         OpStore %i %c_i32_0\n"
1506                         "         OpBranch %loop\n"
1507
1508                         " %loop = OpLabel\n"
1509                         "   %15 = OpLoad %i32 %i\n"
1510                         "   %lt = OpSLessThan %bool %15 %c_i32_256\n"
1511                         "         OpLoopMerge %merge %inc None\n"
1512                         "         OpBranchConditional %lt %write %merge\n"
1513
1514                         "%write = OpLabel\n"
1515                         "   %30 = OpLoad %i32 %i\n"
1516                         "  %src = OpAccessChain %up_f32 %ssbo32 %c_i32_0 %30\n"
1517                         "%val32 = OpLoad %f32 %src\n"
1518                         "%val16 = OpFConvert %f16 %val32\n"
1519                         "  %dst = OpAccessChain %up_f16 %ssbo16 %c_i32_0 %30\n"
1520                         "         OpStore %dst %val16\n"
1521                         "         OpBranch %inc\n"
1522
1523                         "  %inc = OpLabel\n"
1524                         "   %37 = OpLoad %i32 %i\n"
1525                         "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
1526                         "         OpStore %i %39\n"
1527                         "         OpBranch %loop\n"
1528
1529                         "%merge = OpLabel\n"
1530                         "         OpReturnValue %param\n"
1531
1532                         "OpFunctionEnd\n";
1533
1534                 const RndMode   rndModes[] =
1535                 {
1536                         {"rtz",                                         "OpDecorate %val16  FPRoundingMode RTZ",        graphicsCheck16BitFloats<ROUNDINGMODE_RTZ>},
1537                         {"rte",                                         "OpDecorate %val16  FPRoundingMode RTE",        graphicsCheck16BitFloats<ROUNDINGMODE_RTE>},
1538                         {"unspecified_rnd_mode",        "",                                                                                     graphicsCheck16BitFloats<RoundingModeFlags(ROUNDINGMODE_RTE | ROUNDINGMODE_RTZ)>},
1539                 };
1540
1541                 for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
1542                         for (deUint32 rndModeIdx = 0; rndModeIdx < DE_LENGTH_OF_ARRAY(rndModes); ++rndModeIdx)
1543                         {
1544                                 map<string, string>     specs;
1545                                 string                          testName        = string(CAPABILITIES[capIdx].name) + "_scalar_float_" + rndModes[rndModeIdx].name;
1546
1547                                 specs["cap"]                                    = CAPABILITIES[capIdx].cap;
1548                                 specs["indecor"]                                = CAPABILITIES[capIdx].decor;
1549                                 specs["rounddecor"]                             = rndModes[rndModeIdx].decor;
1550
1551                                 fragments["capability"]                 = capabilities.specialize(specs);
1552                                 fragments["decoration"]                 = decoration.specialize(specs);
1553
1554                                 resources.inputs.back().first   = CAPABILITIES[capIdx].dtype;
1555                                 resources.verifyIO                              = rndModes[rndModeIdx].f;
1556
1557
1558                                 createTestsForAllStages(testName, defaultColors, defaultColors, fragments, resources, extensions, testGroup, get16BitStorageFeatures(CAPABILITIES[capIdx].name));
1559                         }
1560         }
1561
1562         {  // vector cases
1563                 fragments["pre_main"]                           =
1564                         "      %f16 = OpTypeFloat 16\n"
1565                         " %c_i32_64 = OpConstant %i32 64\n"
1566                         "        %v4f16 = OpTypeVector %f16 4\n"
1567                         " %up_v4f32 = OpTypePointer Uniform %v4f32\n"
1568                         " %up_v4f16 = OpTypePointer Uniform %v4f16\n"
1569                         " %ra_v4f32 = OpTypeArray %v4f32 %c_i32_64\n"
1570                         " %ra_v4f16 = OpTypeArray %v4f16 %c_i32_64\n"
1571                         "   %SSBO32 = OpTypeStruct %ra_v4f32\n"
1572                         "   %SSBO16 = OpTypeStruct %ra_v4f16\n"
1573                         "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
1574                         "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
1575                         "   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
1576                         "   %ssbo16 = OpVariable %up_SSBO16 Uniform\n";
1577
1578                 const StringTemplate decoration         (
1579                         "OpDecorate %ra_v4f32 ArrayStride 16\n"
1580                         "OpDecorate %ra_v4f16 ArrayStride 8\n"
1581                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
1582                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
1583                         "OpDecorate %SSBO32 ${indecor}\n"
1584                         "OpDecorate %SSBO16 BufferBlock\n"
1585                         "OpDecorate %ssbo32 DescriptorSet 0\n"
1586                         "OpDecorate %ssbo16 DescriptorSet 0\n"
1587                         "OpDecorate %ssbo32 Binding 0\n"
1588                         "OpDecorate %ssbo16 Binding 1\n"
1589                         "${rounddecor}\n");
1590
1591                 // ssbo16[] <- convert ssbo32[] to 16bit float
1592                 fragments["testfun"]                            =
1593                         "%test_code = OpFunction %v4f32 None %v4f32_function\n"
1594                         "    %param = OpFunctionParameter %v4f32\n"
1595
1596                         "%entry = OpLabel\n"
1597                         "    %i = OpVariable %fp_i32 Function\n"
1598                         "         OpStore %i %c_i32_0\n"
1599                         "         OpBranch %loop\n"
1600
1601                         " %loop = OpLabel\n"
1602                         "   %15 = OpLoad %i32 %i\n"
1603                         "   %lt = OpSLessThan %bool %15 %c_i32_64\n"
1604                         "         OpLoopMerge %merge %inc None\n"
1605                         "         OpBranchConditional %lt %write %merge\n"
1606
1607                         "%write = OpLabel\n"
1608                         "   %30 = OpLoad %i32 %i\n"
1609                         "  %src = OpAccessChain %up_v4f32 %ssbo32 %c_i32_0 %30\n"
1610                         "%val32 = OpLoad %v4f32 %src\n"
1611                         "%val16 = OpFConvert %v4f16 %val32\n"
1612                         "  %dst = OpAccessChain %up_v4f16 %ssbo16 %c_i32_0 %30\n"
1613                         "         OpStore %dst %val16\n"
1614                         "         OpBranch %inc\n"
1615
1616                         "  %inc = OpLabel\n"
1617                         "   %37 = OpLoad %i32 %i\n"
1618                         "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
1619                         "         OpStore %i %39\n"
1620                         "         OpBranch %loop\n"
1621
1622                         "%merge = OpLabel\n"
1623                         "         OpReturnValue %param\n"
1624
1625                         "OpFunctionEnd\n";
1626
1627                 const RndMode   rndModes[] =
1628                 {
1629                         {"rtz",                                         "OpDecorate %val16  FPRoundingMode RTZ",        graphicsCheck16BitFloats<ROUNDINGMODE_RTZ>},
1630                         {"rte",                                         "OpDecorate %val16  FPRoundingMode RTE",        graphicsCheck16BitFloats<ROUNDINGMODE_RTE>},
1631                         {"unspecified_rnd_mode",        "",                                                                                     graphicsCheck16BitFloats<RoundingModeFlags(ROUNDINGMODE_RTE | ROUNDINGMODE_RTZ)>},
1632                 };
1633
1634                 for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
1635                         for (deUint32 rndModeIdx = 0; rndModeIdx < DE_LENGTH_OF_ARRAY(rndModes); ++rndModeIdx)
1636                         {
1637                                 map<string, string>     specs;
1638                                 string                          testName        = string(CAPABILITIES[capIdx].name) + "_vector_float_" + rndModes[rndModeIdx].name;
1639
1640                                 specs["cap"]                                    = CAPABILITIES[capIdx].cap;
1641                                 specs["indecor"]                                = CAPABILITIES[capIdx].decor;
1642                                 specs["rounddecor"]                             = rndModes[rndModeIdx].decor;
1643
1644                                 fragments["capability"]                 = capabilities.specialize(specs);
1645                                 fragments["decoration"]                 = decoration.specialize(specs);
1646
1647                                 resources.inputs.back().first   = CAPABILITIES[capIdx].dtype;
1648                                 resources.verifyIO                              = rndModes[rndModeIdx].f;
1649
1650
1651                                 createTestsForAllStages(testName, defaultColors, defaultColors, fragments, resources, extensions, testGroup, get16BitStorageFeatures(CAPABILITIES[capIdx].name));
1652                         }
1653         }
1654
1655         {  // matrix cases
1656                 fragments["pre_main"]                           =
1657                         "       %f16 = OpTypeFloat 16\n"
1658                         "  %c_i32_16 = OpConstant %i32 16\n"
1659                         "     %v4f16 = OpTypeVector %f16 4\n"
1660                         "   %m4x4f32 = OpTypeMatrix %v4f32 4\n"
1661                         "   %m4x4f16 = OpTypeMatrix %v4f16 4\n"
1662                         "  %up_v4f32 = OpTypePointer Uniform %v4f32\n"
1663                         "  %up_v4f16 = OpTypePointer Uniform %v4f16\n"
1664                         "%a16m4x4f32 = OpTypeArray %m4x4f32 %c_i32_16\n"
1665                         "%a16m4x4f16 = OpTypeArray %m4x4f16 %c_i32_16\n"
1666                         "    %SSBO32 = OpTypeStruct %a16m4x4f32\n"
1667                         "    %SSBO16 = OpTypeStruct %a16m4x4f16\n"
1668                         " %up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
1669                         " %up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
1670                         "    %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
1671                         "    %ssbo16 = OpVariable %up_SSBO16 Uniform\n";
1672
1673                 const StringTemplate decoration         (
1674                         "OpDecorate %a16m4x4f32 ArrayStride 64\n"
1675                         "OpDecorate %a16m4x4f16 ArrayStride 32\n"
1676                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
1677                         "OpMemberDecorate %SSBO32 0 ColMajor\n"
1678                         "OpMemberDecorate %SSBO32 0 MatrixStride 16\n"
1679                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
1680                         "OpMemberDecorate %SSBO16 0 ColMajor\n"
1681                         "OpMemberDecorate %SSBO16 0 MatrixStride 8\n"
1682                         "OpDecorate %SSBO32 ${indecor}\n"
1683                         "OpDecorate %SSBO16 BufferBlock\n"
1684                         "OpDecorate %ssbo32 DescriptorSet 0\n"
1685                         "OpDecorate %ssbo16 DescriptorSet 0\n"
1686                         "OpDecorate %ssbo32 Binding 0\n"
1687                         "OpDecorate %ssbo16 Binding 1\n"
1688                         "${rounddecor}\n");
1689
1690                 fragments["testfun"]                            =
1691                         "%test_code = OpFunction %v4f32 None %v4f32_function\n"
1692                         "    %param = OpFunctionParameter %v4f32\n"
1693
1694                         "%entry = OpLabel\n"
1695                         "    %i = OpVariable %fp_i32 Function\n"
1696                         "         OpStore %i %c_i32_0\n"
1697                         "         OpBranch %loop\n"
1698
1699                         " %loop = OpLabel\n"
1700                         "   %15 = OpLoad %i32 %i\n"
1701                         "   %lt = OpSLessThan %bool %15 %c_i32_16\n"
1702                         "         OpLoopMerge %merge %inc None\n"
1703                         "         OpBranchConditional %lt %write %merge\n"
1704
1705                         "  %write = OpLabel\n"
1706                         "     %30 = OpLoad %i32 %i\n"
1707                         "  %src_0 = OpAccessChain %up_v4f32 %ssbo32 %c_i32_0 %30 %c_i32_0\n"
1708                         "  %src_1 = OpAccessChain %up_v4f32 %ssbo32 %c_i32_0 %30 %c_i32_1\n"
1709                         "  %src_2 = OpAccessChain %up_v4f32 %ssbo32 %c_i32_0 %30 %c_i32_2\n"
1710                         "  %src_3 = OpAccessChain %up_v4f32 %ssbo32 %c_i32_0 %30 %c_i32_3\n"
1711                         "%val32_0 = OpLoad %v4f32 %src_0\n"
1712                         "%val32_1 = OpLoad %v4f32 %src_1\n"
1713                         "%val32_2 = OpLoad %v4f32 %src_2\n"
1714                         "%val32_3 = OpLoad %v4f32 %src_3\n"
1715                         "%val16_0 = OpFConvert %v4f16 %val32_0\n"
1716                         "%val16_1 = OpFConvert %v4f16 %val32_1\n"
1717                         "%val16_2 = OpFConvert %v4f16 %val32_2\n"
1718                         "%val16_3 = OpFConvert %v4f16 %val32_3\n"
1719                         "  %dst_0 = OpAccessChain %up_v4f16 %ssbo16 %c_i32_0 %30 %c_i32_0\n"
1720                         "  %dst_1 = OpAccessChain %up_v4f16 %ssbo16 %c_i32_0 %30 %c_i32_1\n"
1721                         "  %dst_2 = OpAccessChain %up_v4f16 %ssbo16 %c_i32_0 %30 %c_i32_2\n"
1722                         "  %dst_3 = OpAccessChain %up_v4f16 %ssbo16 %c_i32_0 %30 %c_i32_3\n"
1723                         "           OpStore %dst_0 %val16_0\n"
1724                         "           OpStore %dst_1 %val16_1\n"
1725                         "           OpStore %dst_2 %val16_2\n"
1726                         "           OpStore %dst_3 %val16_3\n"
1727                         "           OpBranch %inc\n"
1728
1729                         "  %inc = OpLabel\n"
1730                         "   %37 = OpLoad %i32 %i\n"
1731                         "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
1732                         "         OpStore %i %39\n"
1733                         "         OpBranch %loop\n"
1734
1735                         "%merge = OpLabel\n"
1736                         "         OpReturnValue %param\n"
1737
1738                         "OpFunctionEnd\n";
1739
1740                 const RndMode   rndModes[] =
1741                 {
1742                         {"rte",                                         "OpDecorate %val16_0  FPRoundingMode RTE\nOpDecorate %val16_1  FPRoundingMode RTE\nOpDecorate %val16_2  FPRoundingMode RTE\nOpDecorate %val16_3  FPRoundingMode RTE",   graphicsCheck16BitFloats<ROUNDINGMODE_RTE>},
1743                         {"rtz",                                         "OpDecorate %val16_0  FPRoundingMode RTZ\nOpDecorate %val16_1  FPRoundingMode RTZ\nOpDecorate %val16_2  FPRoundingMode RTZ\nOpDecorate %val16_3  FPRoundingMode RTZ",   graphicsCheck16BitFloats<ROUNDINGMODE_RTZ>},
1744                         {"unspecified_rnd_mode",        "",                                                                                                                                                                                                                                                                                                                                             graphicsCheck16BitFloats<RoundingModeFlags(ROUNDINGMODE_RTE | ROUNDINGMODE_RTZ)>},
1745                 };
1746
1747                 for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
1748                         for (deUint32 rndModeIdx = 0; rndModeIdx < DE_LENGTH_OF_ARRAY(rndModes); ++rndModeIdx)
1749                         {
1750                                 map<string, string>     specs;
1751                                 string                          testName        = string(CAPABILITIES[capIdx].name) + "_matrix_float_" + rndModes[rndModeIdx].name;
1752
1753                                 specs["cap"]                                    = CAPABILITIES[capIdx].cap;
1754                                 specs["indecor"]                                = CAPABILITIES[capIdx].decor;
1755                                 specs["rounddecor"]                             = rndModes[rndModeIdx].decor;
1756
1757                                 fragments["capability"]                 = capabilities.specialize(specs);
1758                                 fragments["decoration"]                 = decoration.specialize(specs);
1759
1760                                 resources.inputs.back().first   = CAPABILITIES[capIdx].dtype;
1761                                 resources.verifyIO                              = rndModes[rndModeIdx].f;
1762
1763
1764                                 createTestsForAllStages(testName, defaultColors, defaultColors, fragments, resources, extensions, testGroup, get16BitStorageFeatures(CAPABILITIES[capIdx].name));
1765                         }
1766         }
1767 }
1768
1769 void addGraphics16BitStorageInputOutputFloat32To16Group (tcu::TestCaseGroup* testGroup)
1770 {
1771         de::Random                      rnd                                     (deStringHash(testGroup->getName()));
1772         RGBA                            defaultColors[4];
1773         vector<string>          extensions;
1774         map<string, string>     fragments                       = passthruFragments();
1775         const deUint32          numDataPoints           = 64;
1776         vector<float>           float32Data                     = getFloat32s(rnd, numDataPoints);
1777
1778         extensions.push_back("VK_KHR_16bit_storage");
1779
1780         fragments["capability"]                         = "OpCapability StorageInputOutput16\n";
1781         fragments["extension"]                          = "OpExtension \"SPV_KHR_16bit_storage\"\n";
1782
1783         getDefaultColors(defaultColors);
1784
1785         struct RndMode
1786         {
1787                 const char*                             name;
1788                 const char*                             decor;
1789                 RoundingModeFlags               flags;
1790         };
1791
1792         const RndMode           rndModes[]              =
1793         {
1794                 {"rtz",                                         "OpDecorate %ret  FPRoundingMode RTZ",  ROUNDINGMODE_RTZ},
1795                 {"rte",                                         "OpDecorate %ret  FPRoundingMode RTE",  ROUNDINGMODE_RTE},
1796                 {"unspecified_rnd_mode",        "",                                                                             RoundingModeFlags(ROUNDINGMODE_RTE | ROUNDINGMODE_RTZ)},
1797         };
1798
1799         struct Case
1800         {
1801                 const char*     name;
1802                 const char*     interfaceOpFunc;
1803                 const char*     preMain;
1804                 const char*     inputType;
1805                 const char*     outputType;
1806                 deUint32        numPerCase;
1807                 deUint32        numElements;
1808         };
1809
1810         const Case      cases[]         =
1811         {
1812                 { // Scalar cases
1813                         "scalar",
1814
1815                         "%interface_op_func = OpFunction %f16 None %f16_f32_function\n"
1816                         "        %io_param1 = OpFunctionParameter %f32\n"
1817                         "            %entry = OpLabel\n"
1818                         "                          %ret = OpFConvert %f16 %io_param1\n"
1819                         "                     OpReturnValue %ret\n"
1820                         "                     OpFunctionEnd\n",
1821
1822                         "             %f16 = OpTypeFloat 16\n"
1823                         "          %op_f16 = OpTypePointer Output %f16\n"
1824                         "           %a3f16 = OpTypeArray %f16 %c_i32_3\n"
1825                         "        %op_a3f16 = OpTypePointer Output %a3f16\n"
1826                         "%f16_f32_function = OpTypeFunction %f16 %f32\n"
1827                         "           %a3f32 = OpTypeArray %f32 %c_i32_3\n"
1828                         "        %ip_a3f32 = OpTypePointer Input %a3f32\n",
1829
1830                         "f32",
1831                         "f16",
1832                         4,
1833                         1,
1834                 },
1835                 { // Vector cases
1836                         "vector",
1837
1838                         "%interface_op_func = OpFunction %v2f16 None %v2f16_v2f32_function\n"
1839                         "        %io_param1 = OpFunctionParameter %v2f32\n"
1840                         "            %entry = OpLabel\n"
1841                         "                          %ret = OpFConvert %v2f16 %io_param1\n"
1842                         "                     OpReturnValue %ret\n"
1843                         "                     OpFunctionEnd\n",
1844
1845                         "                 %f16 = OpTypeFloat 16\n"
1846                         "               %v2f16 = OpTypeVector %f16 2\n"
1847                         "            %op_v2f16 = OpTypePointer Output %v2f16\n"
1848                         "             %a3v2f16 = OpTypeArray %v2f16 %c_i32_3\n"
1849                         "          %op_a3v2f16 = OpTypePointer Output %a3v2f16\n"
1850                         "%v2f16_v2f32_function = OpTypeFunction %v2f16 %v2f32\n"
1851                         "             %a3v2f32 = OpTypeArray %v2f32 %c_i32_3\n"
1852                         "          %ip_a3v2f32 = OpTypePointer Input %a3v2f32\n",
1853
1854                         "v2f32",
1855                         "v2f16",
1856                         2 * 4,
1857                         2,
1858                 }
1859         };
1860
1861         VulkanFeatures  requiredFeatures;
1862         requiredFeatures.ext16BitStorage = EXT16BITSTORAGEFEATURES_INPUT_OUTPUT;
1863
1864         for (deUint32 caseIdx = 0; caseIdx < DE_LENGTH_OF_ARRAY(cases); ++caseIdx)
1865                 for (deUint32 rndModeIdx = 0; rndModeIdx < DE_LENGTH_OF_ARRAY(rndModes); ++rndModeIdx)
1866                 {
1867                         fragments["interface_op_func"]  = cases[caseIdx].interfaceOpFunc;
1868                         fragments["pre_main"]                   = cases[caseIdx].preMain;
1869                         fragments["decoration"]                 = rndModes[rndModeIdx].decor;
1870
1871                         fragments["input_type"]                 = cases[caseIdx].inputType;
1872                         fragments["output_type"]                = cases[caseIdx].outputType;
1873
1874                         GraphicsInterfaces      interfaces;
1875                         const deUint32          numPerCase      = cases[caseIdx].numPerCase;
1876                         vector<float>           subInputs       (numPerCase);
1877                         vector<deFloat16>       subOutputs      (numPerCase);
1878
1879                         // The pipeline need this to call compare16BitFloat() when checking the result.
1880                         interfaces.setRoundingMode(rndModes[rndModeIdx].flags);
1881
1882                         for (deUint32 caseNdx = 0; caseNdx < numDataPoints / numPerCase; ++caseNdx)
1883                         {
1884                                 string                  testName        = string(cases[caseIdx].name) + numberToString(caseNdx) + "_" + rndModes[rndModeIdx].name;
1885
1886                                 for (deUint32 numNdx = 0; numNdx < numPerCase; ++numNdx)
1887                                 {
1888                                         subInputs[numNdx]       = float32Data[caseNdx * numPerCase + numNdx];
1889                                         // We derive the expected result from inputs directly in the graphics pipeline.
1890                                         subOutputs[numNdx]      = 0;
1891                                 }
1892                                 interfaces.setInputOutput(std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_FLOAT32), BufferSp(new Float32Buffer(subInputs))),
1893                                                                                   std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_FLOAT16), BufferSp(new Float16Buffer(subOutputs))));
1894                                 createTestsForAllStages(testName, defaultColors, defaultColors, fragments, interfaces, extensions, testGroup, requiredFeatures);
1895                         }
1896                 }
1897 }
1898
1899 void addGraphics16BitStorageInputOutputFloat16To32Group (tcu::TestCaseGroup* testGroup)
1900 {
1901         de::Random                              rnd                                     (deStringHash(testGroup->getName()));
1902         RGBA                                    defaultColors[4];
1903         vector<string>                  extensions;
1904         map<string, string>             fragments                       = passthruFragments();
1905         const deUint32                  numDataPoints           = 64;
1906         vector<deFloat16>               float16Data                     (getFloat16s(rnd, numDataPoints));
1907         vector<float>                   float32Data;
1908
1909         float32Data.reserve(numDataPoints);
1910         for (deUint32 numIdx = 0; numIdx < numDataPoints; ++numIdx)
1911                 float32Data.push_back(deFloat16To32(float16Data[numIdx]));
1912
1913         extensions.push_back("VK_KHR_16bit_storage");
1914
1915         fragments["capability"]                         = "OpCapability StorageInputOutput16\n";
1916         fragments["extension"]                          = "OpExtension \"SPV_KHR_16bit_storage\"\n";
1917
1918         getDefaultColors(defaultColors);
1919
1920         struct Case
1921         {
1922                 const char*     name;
1923                 const char*     interfaceOpFunc;
1924                 const char*     preMain;
1925                 const char*     inputType;
1926                 const char*     outputType;
1927                 deUint32        numPerCase;
1928                 deUint32        numElements;
1929         };
1930
1931         Case    cases[]         =
1932         {
1933                 { // Scalar cases
1934                         "scalar",
1935
1936                         "%interface_op_func = OpFunction %f32 None %f32_f16_function\n"
1937                         "        %io_param1 = OpFunctionParameter %f16\n"
1938                         "            %entry = OpLabel\n"
1939                         "                          %ret = OpFConvert %f32 %io_param1\n"
1940                         "                     OpReturnValue %ret\n"
1941                         "                     OpFunctionEnd\n",
1942
1943                         "             %f16 = OpTypeFloat 16\n"
1944                         "          %ip_f16 = OpTypePointer Input %f16\n"
1945                         "           %a3f16 = OpTypeArray %f16 %c_i32_3\n"
1946                         "        %ip_a3f16 = OpTypePointer Input %a3f16\n"
1947                         "%f32_f16_function = OpTypeFunction %f32 %f16\n"
1948                         "           %a3f32 = OpTypeArray %f32 %c_i32_3\n"
1949                         "        %op_a3f32 = OpTypePointer Output %a3f32\n",
1950
1951                         "f16",
1952                         "f32",
1953                         4,
1954                         1,
1955                 },
1956                 { // Vector cases
1957                         "vector",
1958
1959                         "%interface_op_func = OpFunction %v2f32 None %v2f32_v2f16_function\n"
1960                         "        %io_param1 = OpFunctionParameter %v2f16\n"
1961                         "            %entry = OpLabel\n"
1962                         "                          %ret = OpFConvert %v2f32 %io_param1\n"
1963                         "                     OpReturnValue %ret\n"
1964                         "                     OpFunctionEnd\n",
1965
1966                         "                 %f16 = OpTypeFloat 16\n"
1967                         "                       %v2f16 = OpTypeVector %f16 2\n"
1968                         "            %ip_v2f16 = OpTypePointer Input %v2f16\n"
1969                         "             %a3v2f16 = OpTypeArray %v2f16 %c_i32_3\n"
1970                         "          %ip_a3v2f16 = OpTypePointer Input %a3v2f16\n"
1971                         "%v2f32_v2f16_function = OpTypeFunction %v2f32 %v2f16\n"
1972                         "             %a3v2f32 = OpTypeArray %v2f32 %c_i32_3\n"
1973                         "          %op_a3v2f32 = OpTypePointer Output %a3v2f32\n",
1974
1975                         "v2f16",
1976                         "v2f32",
1977                         2 * 4,
1978                         2,
1979                 }
1980         };
1981
1982         VulkanFeatures  requiredFeatures;
1983         requiredFeatures.ext16BitStorage = EXT16BITSTORAGEFEATURES_INPUT_OUTPUT;
1984
1985         for (deUint32 caseIdx = 0; caseIdx < DE_LENGTH_OF_ARRAY(cases); ++caseIdx)
1986         {
1987                 fragments["interface_op_func"]  = cases[caseIdx].interfaceOpFunc;
1988                 fragments["pre_main"]                   = cases[caseIdx].preMain;
1989
1990                 fragments["input_type"]                 = cases[caseIdx].inputType;
1991                 fragments["output_type"]                = cases[caseIdx].outputType;
1992
1993                 GraphicsInterfaces      interfaces;
1994                 const deUint32          numPerCase      = cases[caseIdx].numPerCase;
1995                 vector<deFloat16>       subInputs       (numPerCase);
1996                 vector<float>           subOutputs      (numPerCase);
1997
1998                 for (deUint32 caseNdx = 0; caseNdx < numDataPoints / numPerCase; ++caseNdx)
1999                 {
2000                         string                  testName        = string(cases[caseIdx].name) + numberToString(caseNdx);
2001
2002                         for (deUint32 numNdx = 0; numNdx < numPerCase; ++numNdx)
2003                         {
2004                                 subInputs[numNdx]       = float16Data[caseNdx * numPerCase + numNdx];
2005                                 subOutputs[numNdx]      = float32Data[caseNdx * numPerCase + numNdx];
2006                         }
2007                         interfaces.setInputOutput(std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_FLOAT16), BufferSp(new Float16Buffer(subInputs))),
2008                                                                           std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_FLOAT32), BufferSp(new Float32Buffer(subOutputs))));
2009                         createTestsForAllStages(testName, defaultColors, defaultColors, fragments, interfaces, extensions, testGroup, requiredFeatures);
2010                 }
2011         }
2012 }
2013
2014 void addGraphics16BitStorageInputOutputInt32To16Group (tcu::TestCaseGroup* testGroup)
2015 {
2016         de::Random                                                      rnd                                     (deStringHash(testGroup->getName()));
2017         RGBA                                                            defaultColors[4];
2018         vector<string>                                          extensions;
2019         map<string, string>                                     fragments                       = passthruFragments();
2020         const deUint32                                          numDataPoints           = 64;
2021         // inputs and outputs are declared to be vectors of signed integers.
2022         // However, depending on the test, they may be interpreted as unsiged
2023         // integers. That won't be a problem as long as we passed the bits
2024         // in faithfully to the pipeline.
2025         vector<deInt32>                                         inputs                          = getInt32s(rnd, numDataPoints);
2026         vector<deInt16>                                         outputs;
2027
2028         outputs.reserve(inputs.size());
2029         for (deUint32 numNdx = 0; numNdx < inputs.size(); ++numNdx)
2030                 outputs.push_back(static_cast<deInt16>(0xffff & inputs[numNdx]));
2031
2032         extensions.push_back("VK_KHR_16bit_storage");
2033
2034         fragments["capability"]                         = "OpCapability StorageInputOutput16\n";
2035         fragments["extension"]                          = "OpExtension \"SPV_KHR_16bit_storage\"\n";
2036
2037         getDefaultColors(defaultColors);
2038
2039         const StringTemplate    scalarInterfaceOpFunc(
2040                         "%interface_op_func = OpFunction %${type16} None %${type16}_${type32}_function\n"
2041                         "        %io_param1 = OpFunctionParameter %${type32}\n"
2042                         "            %entry = OpLabel\n"
2043                         "                          %ret = ${convert} %${type16} %io_param1\n"
2044                         "                     OpReturnValue %ret\n"
2045                         "                     OpFunctionEnd\n");
2046
2047         const StringTemplate    scalarPreMain(
2048                         "             %${type16} = OpTypeInt 16 ${signed}\n"
2049                         "          %op_${type16} = OpTypePointer Output %${type16}\n"
2050                         "           %a3${type16} = OpTypeArray %${type16} %c_i32_3\n"
2051                         "        %op_a3${type16} = OpTypePointer Output %a3${type16}\n"
2052                         "%${type16}_${type32}_function = OpTypeFunction %${type16} %${type32}\n"
2053                         "           %a3${type32} = OpTypeArray %${type32} %c_i32_3\n"
2054                         "        %ip_a3${type32} = OpTypePointer Input %a3${type32}\n");
2055
2056         const StringTemplate    vecInterfaceOpFunc(
2057                         "%interface_op_func = OpFunction %${type16} None %${type16}_${type32}_function\n"
2058                         "        %io_param1 = OpFunctionParameter %${type32}\n"
2059                         "            %entry = OpLabel\n"
2060                         "                          %ret = ${convert} %${type16} %io_param1\n"
2061                         "                     OpReturnValue %ret\n"
2062                         "                     OpFunctionEnd\n");
2063
2064         const StringTemplate    vecPreMain(
2065                         "                       %i16 = OpTypeInt 16 1\n"
2066                         "                       %u16 = OpTypeInt 16 0\n"
2067                         "                 %v4i16 = OpTypeVector %i16 4\n"
2068                         "                 %v4u16 = OpTypeVector %u16 4\n"
2069                         "          %op_${type16} = OpTypePointer Output %${type16}\n"
2070                         "           %a3${type16} = OpTypeArray %${type16} %c_i32_3\n"
2071                         "        %op_a3${type16} = OpTypePointer Output %a3${type16}\n"
2072                         "%${type16}_${type32}_function = OpTypeFunction %${type16} %${type32}\n"
2073                         "           %a3${type32} = OpTypeArray %${type32} %c_i32_3\n"
2074                         "        %ip_a3${type32} = OpTypePointer Input %a3${type32}\n");
2075
2076         struct Case
2077         {
2078                 const char*                             name;
2079                 const StringTemplate&   interfaceOpFunc;
2080                 const StringTemplate&   preMain;
2081                 const char*                             type32;
2082                 const char*                             type16;
2083                 const char*                             sign;
2084                 const char*                             opcode;
2085                 deUint32                                numPerCase;
2086                 deUint32                                numElements;
2087         };
2088
2089         Case    cases[]         =
2090         {
2091                 {"scalar_sint", scalarInterfaceOpFunc,  scalarPreMain,  "i32",          "i16",          "1",    "OpSConvert",   4,              1},
2092                 {"scalar_uint", scalarInterfaceOpFunc,  scalarPreMain,  "u32",          "u16",          "0",    "OpUConvert",   4,              1},
2093                 {"vector_sint", vecInterfaceOpFunc,             vecPreMain,             "v4i32",        "v4i16",        "1",    "OpSConvert",   4 * 4,  4},
2094                 {"vector_uint", vecInterfaceOpFunc,             vecPreMain,             "v4u32",        "v4u16",        "0",    "OpUConvert",   4 * 4,  4},
2095         };
2096
2097         VulkanFeatures  requiredFeatures;
2098         requiredFeatures.ext16BitStorage = EXT16BITSTORAGEFEATURES_INPUT_OUTPUT;
2099
2100         for (deUint32 caseIdx = 0; caseIdx < DE_LENGTH_OF_ARRAY(cases); ++caseIdx)
2101         {
2102                 map<string, string>                             specs;
2103
2104                 specs["type32"]                                 = cases[caseIdx].type32;
2105                 specs["type16"]                                 = cases[caseIdx].type16;
2106                 specs["signed"]                                 = cases[caseIdx].sign;
2107                 specs["convert"]                                = cases[caseIdx].opcode;
2108
2109                 fragments["pre_main"]                   = cases[caseIdx].preMain.specialize(specs);
2110                 fragments["interface_op_func"]  = cases[caseIdx].interfaceOpFunc.specialize(specs);
2111                 fragments["input_type"]                 = cases[caseIdx].type32;
2112                 fragments["output_type"]                = cases[caseIdx].type16;
2113
2114                 GraphicsInterfaces                              interfaces;
2115                 const deUint32                                  numPerCase      = cases[caseIdx].numPerCase;
2116                 vector<deInt32>                                 subInputs       (numPerCase);
2117                 vector<deInt16>                                 subOutputs      (numPerCase);
2118
2119                 for (deUint32 caseNdx = 0; caseNdx < numDataPoints / numPerCase; ++caseNdx)
2120                 {
2121                         string                  testName        = string(cases[caseIdx].name) + numberToString(caseNdx);
2122
2123                         for (deUint32 numNdx = 0; numNdx < numPerCase; ++numNdx)
2124                         {
2125                                 subInputs[numNdx]       = inputs[caseNdx * numPerCase + numNdx];
2126                                 subOutputs[numNdx]      = outputs[caseNdx * numPerCase + numNdx];
2127                         }
2128                         if (strcmp(cases[caseIdx].sign, "1") == 0)
2129                         {
2130                                 interfaces.setInputOutput(std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_INT32), BufferSp(new Int32Buffer(subInputs))),
2131                                                                                   std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_INT16), BufferSp(new Int16Buffer(subOutputs))));
2132                         }
2133                         else
2134                         {
2135                                 interfaces.setInputOutput(std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_UINT32), BufferSp(new Int32Buffer(subInputs))),
2136                                                                                   std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_UINT16), BufferSp(new Int16Buffer(subOutputs))));
2137                         }
2138                         createTestsForAllStages(testName, defaultColors, defaultColors, fragments, interfaces, extensions, testGroup, requiredFeatures);
2139                 }
2140         }
2141 }
2142
2143 void addGraphics16BitStorageInputOutputInt16To32Group (tcu::TestCaseGroup* testGroup)
2144 {
2145         de::Random                                                      rnd                                     (deStringHash(testGroup->getName()));
2146         RGBA                                                            defaultColors[4];
2147         vector<string>                                          extensions;
2148         map<string, string>                                     fragments                       = passthruFragments();
2149         const deUint32                                          numDataPoints           = 64;
2150         // inputs and outputs are declared to be vectors of signed integers.
2151         // However, depending on the test, they may be interpreted as unsiged
2152         // integers. That won't be a problem as long as we passed the bits
2153         // in faithfully to the pipeline.
2154         vector<deInt16>                                         inputs                          = getInt16s(rnd, numDataPoints);
2155         vector<deInt32>                                         sOutputs;
2156         vector<deInt32>                                         uOutputs;
2157         const deUint16                                          signBitMask                     = 0x8000;
2158         const deUint32                                          signExtendMask          = 0xffff0000;
2159
2160         sOutputs.reserve(inputs.size());
2161         uOutputs.reserve(inputs.size());
2162
2163         for (deUint32 numNdx = 0; numNdx < inputs.size(); ++numNdx)
2164         {
2165                 uOutputs.push_back(static_cast<deUint16>(inputs[numNdx]));
2166                 if (inputs[numNdx] & signBitMask)
2167                         sOutputs.push_back(static_cast<deInt32>(inputs[numNdx] | signExtendMask));
2168                 else
2169                         sOutputs.push_back(static_cast<deInt32>(inputs[numNdx]));
2170         }
2171
2172         extensions.push_back("VK_KHR_16bit_storage");
2173
2174         fragments["capability"]                         = "OpCapability StorageInputOutput16\n";
2175         fragments["extension"]                          = "OpExtension \"SPV_KHR_16bit_storage\"\n";
2176
2177         getDefaultColors(defaultColors);
2178
2179         const StringTemplate scalarIfOpFunc     (
2180                         "%interface_op_func = OpFunction %${type32} None %${type32}_${type16}_function\n"
2181                         "        %io_param1 = OpFunctionParameter %${type16}\n"
2182                         "            %entry = OpLabel\n"
2183                         "                          %ret = ${convert} %${type32} %io_param1\n"
2184                         "                     OpReturnValue %ret\n"
2185                         "                     OpFunctionEnd\n");
2186
2187         const StringTemplate scalarPreMain      (
2188                         "             %${type16} = OpTypeInt 16 ${signed}\n"
2189                         "          %ip_${type16} = OpTypePointer Input %${type16}\n"
2190                         "           %a3${type16} = OpTypeArray %${type16} %c_i32_3\n"
2191                         "        %ip_a3${type16} = OpTypePointer Input %a3${type16}\n"
2192                         "%${type32}_${type16}_function = OpTypeFunction %${type32} %${type16}\n"
2193                         "           %a3${type32} = OpTypeArray %${type32} %c_i32_3\n"
2194                         "        %op_a3${type32} = OpTypePointer Output %a3${type32}\n");
2195
2196         const StringTemplate vecIfOpFunc        (
2197                         "%interface_op_func = OpFunction %${type32} None %${type32}_${type16}_function\n"
2198                         "        %io_param1 = OpFunctionParameter %${type16}\n"
2199                         "            %entry = OpLabel\n"
2200                         "                          %ret = ${convert} %${type32} %io_param1\n"
2201                         "                     OpReturnValue %ret\n"
2202                         "                     OpFunctionEnd\n");
2203
2204         const StringTemplate vecPreMain (
2205                         "                       %i16 = OpTypeInt 16 1\n"
2206                         "                       %u16 = OpTypeInt 16 0\n"
2207                         "                 %v4i16 = OpTypeVector %i16 4\n"
2208                         "                 %v4u16 = OpTypeVector %u16 4\n"
2209                         "          %ip_${type16} = OpTypePointer Input %${type16}\n"
2210                         "           %a3${type16} = OpTypeArray %${type16} %c_i32_3\n"
2211                         "        %ip_a3${type16} = OpTypePointer Input %a3${type16}\n"
2212                         "%${type32}_${type16}_function = OpTypeFunction %${type32} %${type16}\n"
2213                         "           %a3${type32} = OpTypeArray %${type32} %c_i32_3\n"
2214                         "        %op_a3${type32} = OpTypePointer Output %a3${type32}\n");
2215
2216         struct Case
2217         {
2218                 const char*                             name;
2219                 const StringTemplate&   interfaceOpFunc;
2220                 const StringTemplate&   preMain;
2221                 const char*                             type32;
2222                 const char*                             type16;
2223                 const char*                             sign;
2224                 const char*                             opcode;
2225                 deUint32                                numPerCase;
2226                 deUint32                                numElements;
2227         };
2228
2229         Case    cases[]         =
2230         {
2231                 {"scalar_sint", scalarIfOpFunc, scalarPreMain,  "i32",          "i16",          "1",    "OpSConvert",   4,              1},
2232                 {"scalar_uint", scalarIfOpFunc, scalarPreMain,  "u32",          "u16",          "0",    "OpUConvert",   4,              1},
2233                 {"vector_sint", vecIfOpFunc,    vecPreMain,             "v4i32",        "v4i16",        "1",    "OpSConvert",   4 * 4,  4},
2234                 {"vector_uint", vecIfOpFunc,    vecPreMain,             "v4u32",        "v4u16",        "0",    "OpUConvert",   4 * 4,  4},
2235         };
2236
2237         VulkanFeatures  requiredFeatures;
2238         requiredFeatures.ext16BitStorage = EXT16BITSTORAGEFEATURES_INPUT_OUTPUT;
2239
2240         for (deUint32 caseIdx = 0; caseIdx < DE_LENGTH_OF_ARRAY(cases); ++caseIdx)
2241         {
2242                 map<string, string>                             specs;
2243
2244                 specs["type32"]                                 = cases[caseIdx].type32;
2245                 specs["type16"]                                 = cases[caseIdx].type16;
2246                 specs["signed"]                                 = cases[caseIdx].sign;
2247                 specs["convert"]                                = cases[caseIdx].opcode;
2248
2249                 fragments["pre_main"]                   = cases[caseIdx].preMain.specialize(specs);
2250                 fragments["interface_op_func"]  = cases[caseIdx].interfaceOpFunc.specialize(specs);
2251                 fragments["input_type"]                 = cases[caseIdx].type16;
2252                 fragments["output_type"]                = cases[caseIdx].type32;
2253
2254                 GraphicsInterfaces                              interfaces;
2255                 const deUint32                                  numPerCase      = cases[caseIdx].numPerCase;
2256                 vector<deInt16>                                 subInputs       (numPerCase);
2257                 vector<deInt32>                                 subOutputs      (numPerCase);
2258
2259                 for (deUint32 caseNdx = 0; caseNdx < numDataPoints / numPerCase; ++caseNdx)
2260                 {
2261                         string                  testName        = string(cases[caseIdx].name) + numberToString(caseNdx);
2262
2263                         for (deUint32 numNdx = 0; numNdx < numPerCase; ++numNdx)
2264                         {
2265                                 subInputs[numNdx]       = inputs[caseNdx * numPerCase + numNdx];
2266                                 if (cases[caseIdx].sign[0] == '1')
2267                                         subOutputs[numNdx]      = sOutputs[caseNdx * numPerCase + numNdx];
2268                                 else
2269                                         subOutputs[numNdx]      = uOutputs[caseNdx * numPerCase + numNdx];
2270                         }
2271                         if (strcmp(cases[caseIdx].sign, "1") == 0)
2272                         {
2273                                 interfaces.setInputOutput(std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_INT16), BufferSp(new Int16Buffer(subInputs))),
2274                                                                                   std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_INT32), BufferSp(new Int32Buffer(subOutputs))));
2275                         }
2276                         else
2277                         {
2278                                 interfaces.setInputOutput(std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_UINT16), BufferSp(new Int16Buffer(subInputs))),
2279                                                                                   std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_UINT32), BufferSp(new Int32Buffer(subOutputs))));
2280                         }
2281                         createTestsForAllStages(testName, defaultColors, defaultColors, fragments, interfaces, extensions, testGroup, requiredFeatures);
2282                 }
2283         }
2284 }
2285
2286 void addGraphics16BitStoragePushConstantFloat16To32Group (tcu::TestCaseGroup* testGroup)
2287 {
2288         de::Random                                                      rnd                                     (deStringHash(testGroup->getName()));
2289         map<string, string>                                     fragments;
2290         RGBA                                                            defaultColors[4];
2291         vector<string>                                          extensions;
2292         GraphicsResources                                       resources;
2293         PushConstants                                           pcs;
2294         const deUint32                                          numDataPoints           = 64;
2295         vector<deFloat16>                                       float16Data                     (getFloat16s(rnd, numDataPoints));
2296         vector<float>                                           float32Data;
2297         VulkanFeatures                                          requiredFeatures;
2298
2299         float32Data.reserve(numDataPoints);
2300         for (deUint32 numIdx = 0; numIdx < numDataPoints; ++numIdx)
2301                 float32Data.push_back(deFloat16To32(float16Data[numIdx]));
2302
2303         extensions.push_back("VK_KHR_16bit_storage");
2304         requiredFeatures.ext16BitStorage = EXT16BITSTORAGEFEATURES_PUSH_CONSTANT;
2305
2306         fragments["capability"]                         = "OpCapability StoragePushConstant16\n";
2307         fragments["extension"]                          = "OpExtension \"SPV_KHR_16bit_storage\"";
2308
2309         pcs.setPushConstant(BufferSp(new Float16Buffer(float16Data)));
2310         resources.outputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Float32Buffer(float32Data))));
2311         resources.verifyIO = check32BitFloats;
2312
2313         getDefaultColors(defaultColors);
2314
2315         const StringTemplate    testFun         (
2316                 "%test_code = OpFunction %v4f32 None %v4f32_function\n"
2317                 "    %param = OpFunctionParameter %v4f32\n"
2318
2319                 "%entry = OpLabel\n"
2320                 "    %i = OpVariable %fp_i32 Function\n"
2321                 "         OpStore %i %c_i32_0\n"
2322                 "         OpBranch %loop\n"
2323
2324                 " %loop = OpLabel\n"
2325                 "   %15 = OpLoad %i32 %i\n"
2326                 "   %lt = OpSLessThan %bool %15 ${count}\n"
2327                 "         OpLoopMerge %merge %inc None\n"
2328                 "         OpBranchConditional %lt %write %merge\n"
2329
2330                 "%write = OpLabel\n"
2331                 "   %30 = OpLoad %i32 %i\n"
2332                 "  %src = OpAccessChain ${pp_type16} %pc16 %c_i32_0 %30 ${index0:opt}\n"
2333                 "%val16 = OpLoad ${f_type16} %src\n"
2334                 "%val32 = OpFConvert ${f_type32} %val16\n"
2335                 "  %dst = OpAccessChain ${up_type32} %ssbo32 %c_i32_0 %30 ${index0:opt}\n"
2336                 "         OpStore %dst %val32\n"
2337
2338                 "${store:opt}\n"
2339
2340                 "         OpBranch %inc\n"
2341
2342                 "  %inc = OpLabel\n"
2343                 "   %37 = OpLoad %i32 %i\n"
2344                 "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
2345                 "         OpStore %i %39\n"
2346                 "         OpBranch %loop\n"
2347
2348                 "%merge = OpLabel\n"
2349                 "         OpReturnValue %param\n"
2350
2351                 "OpFunctionEnd\n");
2352
2353         {  // Scalar cases
2354                 fragments["pre_main"]                           =
2355                         "      %f16 = OpTypeFloat 16\n"
2356                         " %c_i32_64 = OpConstant %i32 64\n"                                     // Should be the same as numDataPoints
2357                         "   %a64f16 = OpTypeArray %f16 %c_i32_64\n"
2358                         "   %a64f32 = OpTypeArray %f32 %c_i32_64\n"
2359                         "   %pp_f16 = OpTypePointer PushConstant %f16\n"
2360                         "   %up_f32 = OpTypePointer Uniform %f32\n"
2361                         "   %SSBO32 = OpTypeStruct %a64f32\n"
2362                         "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
2363                         "   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
2364                         "     %PC16 = OpTypeStruct %a64f16\n"
2365                         "  %pp_PC16 = OpTypePointer PushConstant %PC16\n"
2366                         "     %pc16 = OpVariable %pp_PC16 PushConstant\n";
2367
2368                 fragments["decoration"]                         =
2369                         "OpDecorate %a64f16 ArrayStride 2\n"
2370                         "OpDecorate %a64f32 ArrayStride 4\n"
2371                         "OpDecorate %SSBO32 BufferBlock\n"
2372                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
2373                         "OpDecorate %PC16 Block\n"
2374                         "OpMemberDecorate %PC16 0 Offset 0\n"
2375                         "OpDecorate %ssbo32 DescriptorSet 0\n"
2376                         "OpDecorate %ssbo32 Binding 0\n";
2377
2378                 map<string, string>             specs;
2379
2380                 specs["count"]                  = "%c_i32_64";
2381                 specs["pp_type16"]              = "%pp_f16";
2382                 specs["f_type16"]               = "%f16";
2383                 specs["f_type32"]               = "%f32";
2384                 specs["up_type32"]              = "%up_f32";
2385
2386                 fragments["testfun"]    = testFun.specialize(specs);
2387
2388                 createTestsForAllStages("scalar", defaultColors, defaultColors, fragments, pcs, resources, extensions, testGroup, requiredFeatures);
2389         }
2390
2391         {  // Vector cases
2392                 fragments["pre_main"]                           =
2393                         "      %f16 = OpTypeFloat 16\n"
2394                         "    %v4f16 = OpTypeVector %f16 4\n"
2395                         " %c_i32_16 = OpConstant %i32 16\n"
2396                         " %a16v4f16 = OpTypeArray %v4f16 %c_i32_16\n"
2397                         " %a16v4f32 = OpTypeArray %v4f32 %c_i32_16\n"
2398                         " %pp_v4f16 = OpTypePointer PushConstant %v4f16\n"
2399                         " %up_v4f32 = OpTypePointer Uniform %v4f32\n"
2400                         "   %SSBO32 = OpTypeStruct %a16v4f32\n"
2401                         "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
2402                         "   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
2403                         "     %PC16 = OpTypeStruct %a16v4f16\n"
2404                         "  %pp_PC16 = OpTypePointer PushConstant %PC16\n"
2405                         "     %pc16 = OpVariable %pp_PC16 PushConstant\n";
2406
2407                 fragments["decoration"]                         =
2408                         "OpDecorate %a16v4f16 ArrayStride 8\n"
2409                         "OpDecorate %a16v4f32 ArrayStride 16\n"
2410                         "OpDecorate %SSBO32 BufferBlock\n"
2411                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
2412                         "OpDecorate %PC16 Block\n"
2413                         "OpMemberDecorate %PC16 0 Offset 0\n"
2414                         "OpDecorate %ssbo32 DescriptorSet 0\n"
2415                         "OpDecorate %ssbo32 Binding 0\n";
2416
2417                 map<string, string>             specs;
2418
2419                 specs["count"]                  = "%c_i32_16";
2420                 specs["pp_type16"]              = "%pp_v4f16";
2421                 specs["f_type16"]               = "%v4f16";
2422                 specs["f_type32"]               = "%v4f32";
2423                 specs["up_type32"]              = "%up_v4f32";
2424
2425                 fragments["testfun"]    = testFun.specialize(specs);
2426
2427                 createTestsForAllStages("vector", defaultColors, defaultColors, fragments, pcs, resources, extensions, testGroup, requiredFeatures);
2428         }
2429
2430         {  // Matrix cases
2431                 fragments["pre_main"]                           =
2432                         "  %c_i32_8 = OpConstant %i32 8\n"
2433                         "      %f16 = OpTypeFloat 16\n"
2434                         "    %v4f16 = OpTypeVector %f16 4\n"
2435                         "  %m2v4f16 = OpTypeMatrix %v4f16 2\n"
2436                         "  %m2v4f32 = OpTypeMatrix %v4f32 2\n"
2437                         "%a8m2v4f16 = OpTypeArray %m2v4f16 %c_i32_8\n"
2438                         "%a8m2v4f32 = OpTypeArray %m2v4f32 %c_i32_8\n"
2439                         " %pp_v4f16 = OpTypePointer PushConstant %v4f16\n"
2440                         " %up_v4f32 = OpTypePointer Uniform %v4f32\n"
2441                         "   %SSBO32 = OpTypeStruct %a8m2v4f32\n"
2442                         "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
2443                         "   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
2444                         "     %PC16 = OpTypeStruct %a8m2v4f16\n"
2445                         "  %pp_PC16 = OpTypePointer PushConstant %PC16\n"
2446                         "     %pc16 = OpVariable %pp_PC16 PushConstant\n";
2447
2448                 fragments["decoration"]                         =
2449                         "OpDecorate %a8m2v4f16 ArrayStride 16\n"
2450                         "OpDecorate %a8m2v4f32 ArrayStride 32\n"
2451                         "OpDecorate %SSBO32 BufferBlock\n"
2452                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
2453                         "OpMemberDecorate %SSBO32 0 ColMajor\n"
2454                         "OpMemberDecorate %SSBO32 0 MatrixStride 16\n"
2455                         "OpDecorate %PC16 Block\n"
2456                         "OpMemberDecorate %PC16 0 Offset 0\n"
2457                         "OpMemberDecorate %PC16 0 ColMajor\n"
2458                         "OpMemberDecorate %PC16 0 MatrixStride 8\n"
2459                         "OpDecorate %ssbo32 DescriptorSet 0\n"
2460                         "OpDecorate %ssbo32 Binding 0\n";
2461
2462                 map<string, string>             specs;
2463
2464                 specs["count"]                  = "%c_i32_8";
2465                 specs["pp_type16"]              = "%pp_v4f16";
2466                 specs["up_type32"]              = "%up_v4f32";
2467                 specs["f_type16"]               = "%v4f16";
2468                 specs["f_type32"]               = "%v4f32";
2469                 specs["index0"]                 = "%c_i32_0";
2470                 specs["store"]                  =
2471                         "  %src_1 = OpAccessChain %pp_v4f16 %pc16 %c_i32_0 %30 %c_i32_1\n"
2472                         "%val16_1 = OpLoad %v4f16 %src_1\n"
2473                         "%val32_1 = OpFConvert %v4f32 %val16_1\n"
2474                         "  %dst_1 = OpAccessChain %up_v4f32 %ssbo32 %c_i32_0 %30 %c_i32_1\n"
2475                         "           OpStore %dst_1 %val32_1\n";
2476
2477                 fragments["testfun"]    = testFun.specialize(specs);
2478
2479                 createTestsForAllStages("matrix", defaultColors, defaultColors, fragments, pcs, resources, extensions, testGroup, requiredFeatures);
2480         }
2481 }
2482
2483 void addGraphics16BitStoragePushConstantInt16To32Group (tcu::TestCaseGroup* testGroup)
2484 {
2485         de::Random                                                      rnd                                     (deStringHash(testGroup->getName()));
2486         map<string, string>                                     fragments;
2487         RGBA                                                            defaultColors[4];
2488         const deUint32                                          numDataPoints           = 64;
2489         vector<deInt16>                                         inputs                          = getInt16s(rnd, numDataPoints);
2490         vector<deInt32>                                         sOutputs;
2491         vector<deInt32>                                         uOutputs;
2492         PushConstants                                           pcs;
2493         GraphicsResources                                       resources;
2494         vector<string>                                          extensions;
2495         const deUint16                                          signBitMask                     = 0x8000;
2496         const deUint32                                          signExtendMask          = 0xffff0000;
2497         VulkanFeatures                                          requiredFeatures;
2498
2499         sOutputs.reserve(inputs.size());
2500         uOutputs.reserve(inputs.size());
2501
2502         for (deUint32 numNdx = 0; numNdx < inputs.size(); ++numNdx)
2503         {
2504                 uOutputs.push_back(static_cast<deUint16>(inputs[numNdx]));
2505                 if (inputs[numNdx] & signBitMask)
2506                         sOutputs.push_back(static_cast<deInt32>(inputs[numNdx] | signExtendMask));
2507                 else
2508                         sOutputs.push_back(static_cast<deInt32>(inputs[numNdx]));
2509         }
2510
2511         extensions.push_back("VK_KHR_16bit_storage");
2512         requiredFeatures.ext16BitStorage = EXT16BITSTORAGEFEATURES_PUSH_CONSTANT;
2513
2514         fragments["capability"]                         = "OpCapability StoragePushConstant16\n";
2515         fragments["extension"]                          = "OpExtension \"SPV_KHR_16bit_storage\"";
2516
2517         pcs.setPushConstant(BufferSp(new Int16Buffer(inputs)));
2518
2519         getDefaultColors(defaultColors);
2520
2521         const StringTemplate    testFun         (
2522                 "%test_code = OpFunction %v4f32 None %v4f32_function\n"
2523                 "    %param = OpFunctionParameter %v4f32\n"
2524
2525                 "%entry = OpLabel\n"
2526                 "    %i = OpVariable %fp_i32 Function\n"
2527                 "         OpStore %i %c_i32_0\n"
2528                 "         OpBranch %loop\n"
2529
2530                 " %loop = OpLabel\n"
2531                 "   %15 = OpLoad %i32 %i\n"
2532                 "   %lt = OpSLessThan %bool %15 %c_i32_${count}\n"
2533                 "         OpLoopMerge %merge %inc None\n"
2534                 "         OpBranchConditional %lt %write %merge\n"
2535
2536                 "%write = OpLabel\n"
2537                 "   %30 = OpLoad %i32 %i\n"
2538                 "  %src = OpAccessChain %pp_${type16} %pc16 %c_i32_0 %30\n"
2539                 "%val16 = OpLoad %${type16} %src\n"
2540                 "%val32 = ${convert} %${type32} %val16\n"
2541                 "  %dst = OpAccessChain %up_${type32} %ssbo32 %c_i32_0 %30\n"
2542                 "         OpStore %dst %val32\n"
2543                 "         OpBranch %inc\n"
2544
2545                 "  %inc = OpLabel\n"
2546                 "   %37 = OpLoad %i32 %i\n"
2547                 "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
2548                 "         OpStore %i %39\n"
2549                 "         OpBranch %loop\n"
2550
2551                 "%merge = OpLabel\n"
2552                 "         OpReturnValue %param\n"
2553
2554                 "OpFunctionEnd\n");
2555
2556         {  // Scalar cases
2557                 const StringTemplate    preMain         (
2558                         "         %${type16} = OpTypeInt 16 ${signed}\n"
2559                         "    %c_i32_${count} = OpConstant %i32 ${count}\n"                                      // Should be the same as numDataPoints
2560                         "%a${count}${type16} = OpTypeArray %${type16} %c_i32_${count}\n"
2561                         "%a${count}${type32} = OpTypeArray %${type32} %c_i32_${count}\n"
2562                         "      %pp_${type16} = OpTypePointer PushConstant %${type16}\n"
2563                         "      %up_${type32} = OpTypePointer Uniform      %${type32}\n"
2564                         "            %SSBO32 = OpTypeStruct %a${count}${type32}\n"
2565                         "         %up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
2566                         "            %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
2567                         "              %PC16 = OpTypeStruct %a${count}${type16}\n"
2568                         "           %pp_PC16 = OpTypePointer PushConstant %PC16\n"
2569                         "              %pc16 = OpVariable %pp_PC16 PushConstant\n");
2570
2571                 const StringTemplate    decoration      (
2572                         "OpDecorate %a${count}${type16} ArrayStride 2\n"
2573                         "OpDecorate %a${count}${type32} ArrayStride 4\n"
2574                         "OpDecorate %SSBO32 BufferBlock\n"
2575                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
2576                         "OpDecorate %PC16 Block\n"
2577                         "OpMemberDecorate %PC16 0 Offset 0\n"
2578                         "OpDecorate %ssbo32 DescriptorSet 0\n"
2579                         "OpDecorate %ssbo32 Binding 0\n");
2580
2581                 {  // signed int
2582                         map<string, string>             specs;
2583
2584                         specs["type16"]                 = "i16";
2585                         specs["type32"]                 = "i32";
2586                         specs["signed"]                 = "1";
2587                         specs["count"]                  = "64";
2588                         specs["convert"]                = "OpSConvert";
2589
2590                         fragments["testfun"]    = testFun.specialize(specs);
2591                         fragments["pre_main"]   = preMain.specialize(specs);
2592                         fragments["decoration"] = decoration.specialize(specs);
2593
2594                         resources.outputs.clear();
2595                         resources.outputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Int32Buffer(sOutputs))));
2596                         createTestsForAllStages("sint_scalar", defaultColors, defaultColors, fragments, pcs, resources, extensions, testGroup, requiredFeatures);
2597                 }
2598                 {  // signed int
2599                         map<string, string>             specs;
2600
2601                         specs["type16"]                 = "u16";
2602                         specs["type32"]                 = "u32";
2603                         specs["signed"]                 = "0";
2604                         specs["count"]                  = "64";
2605                         specs["convert"]                = "OpUConvert";
2606
2607                         fragments["testfun"]    = testFun.specialize(specs);
2608                         fragments["pre_main"]   = preMain.specialize(specs);
2609                         fragments["decoration"] = decoration.specialize(specs);
2610
2611                         resources.outputs.clear();
2612                         resources.outputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Int32Buffer(uOutputs))));
2613                         createTestsForAllStages("uint_scalar", defaultColors, defaultColors, fragments, pcs, resources, extensions, testGroup, requiredFeatures);
2614                 }
2615         }
2616
2617         {  // Vector cases
2618                 const StringTemplate    preMain         (
2619                         "    %${base_type16} = OpTypeInt 16 ${signed}\n"
2620                         "         %${type16} = OpTypeVector %${base_type16} 2\n"
2621                         "    %c_i32_${count} = OpConstant %i32 ${count}\n"
2622                         "%a${count}${type16} = OpTypeArray %${type16} %c_i32_${count}\n"
2623                         "%a${count}${type32} = OpTypeArray %${type32} %c_i32_${count}\n"
2624                         "      %pp_${type16} = OpTypePointer PushConstant %${type16}\n"
2625                         "      %up_${type32} = OpTypePointer Uniform      %${type32}\n"
2626                         "            %SSBO32 = OpTypeStruct %a${count}${type32}\n"
2627                         "         %up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
2628                         "            %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
2629                         "              %PC16 = OpTypeStruct %a${count}${type16}\n"
2630                         "           %pp_PC16 = OpTypePointer PushConstant %PC16\n"
2631                         "              %pc16 = OpVariable %pp_PC16 PushConstant\n");
2632
2633                 const StringTemplate    decoration      (
2634                         "OpDecorate %a${count}${type16} ArrayStride 4\n"
2635                         "OpDecorate %a${count}${type32} ArrayStride 8\n"
2636                         "OpDecorate %SSBO32 BufferBlock\n"
2637                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
2638                         "OpDecorate %PC16 Block\n"
2639                         "OpMemberDecorate %PC16 0 Offset 0\n"
2640                         "OpDecorate %ssbo32 DescriptorSet 0\n"
2641                         "OpDecorate %ssbo32 Binding 0\n");
2642
2643                 {  // signed int
2644                         map<string, string>             specs;
2645
2646                         specs["base_type16"]    = "i16";
2647                         specs["type16"]                 = "v2i16";
2648                         specs["type32"]                 = "v2i32";
2649                         specs["signed"]                 = "1";
2650                         specs["count"]                  = "32";                         // 64 / 2
2651                         specs["convert"]                = "OpSConvert";
2652
2653                         fragments["testfun"]    = testFun.specialize(specs);
2654                         fragments["pre_main"]   = preMain.specialize(specs);
2655                         fragments["decoration"] = decoration.specialize(specs);
2656
2657                         resources.outputs.clear();
2658                         resources.outputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Int32Buffer(sOutputs))));
2659                         createTestsForAllStages("sint_vector", defaultColors, defaultColors, fragments, pcs, resources, extensions, testGroup, requiredFeatures);
2660                 }
2661                 {  // signed int
2662                         map<string, string>             specs;
2663
2664                         specs["base_type16"]    = "u16";
2665                         specs["type16"]                 = "v2u16";
2666                         specs["type32"]                 = "v2u32";
2667                         specs["signed"]                 = "0";
2668                         specs["count"]                  = "32";
2669                         specs["convert"]                = "OpUConvert";
2670
2671                         fragments["testfun"]    = testFun.specialize(specs);
2672                         fragments["pre_main"]   = preMain.specialize(specs);
2673                         fragments["decoration"] = decoration.specialize(specs);
2674
2675                         resources.outputs.clear();
2676                         resources.outputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Int32Buffer(uOutputs))));
2677                         createTestsForAllStages("uint_vector", defaultColors, defaultColors, fragments, pcs, resources, extensions, testGroup, requiredFeatures);
2678                 }
2679         }
2680 }
2681
2682 void addGraphics16BitStorageUniformInt16To32Group (tcu::TestCaseGroup* testGroup)
2683 {
2684         de::Random                                                      rnd                                     (deStringHash(testGroup->getName()));
2685         map<string, string>                                     fragments;
2686         const deUint32                                          numDataPoints           = 256;
2687         RGBA                                                            defaultColors[4];
2688         vector<deInt16>                                         inputs                          = getInt16s(rnd, numDataPoints);
2689         vector<deInt32>                                         sOutputs;
2690         vector<deInt32>                                         uOutputs;
2691         GraphicsResources                                       resources;
2692         vector<string>                                          extensions;
2693         const deUint16                                          signBitMask                     = 0x8000;
2694         const deUint32                                          signExtendMask          = 0xffff0000;
2695         const StringTemplate                            capabilities            ("OpCapability ${cap}\n");
2696
2697         sOutputs.reserve(inputs.size());
2698         uOutputs.reserve(inputs.size());
2699
2700         for (deUint32 numNdx = 0; numNdx < inputs.size(); ++numNdx)
2701         {
2702                 uOutputs.push_back(static_cast<deUint16>(inputs[numNdx]));
2703                 if (inputs[numNdx] & signBitMask)
2704                         sOutputs.push_back(static_cast<deInt32>(inputs[numNdx] | signExtendMask));
2705                 else
2706                         sOutputs.push_back(static_cast<deInt32>(inputs[numNdx]));
2707         }
2708
2709         resources.inputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Int16Buffer(inputs))));
2710
2711         extensions.push_back("VK_KHR_16bit_storage");
2712         fragments["extension"]  = "OpExtension \"SPV_KHR_16bit_storage\"";
2713
2714         getDefaultColors(defaultColors);
2715
2716         struct IntegerFacts
2717         {
2718                 const char*     name;
2719                 const char*     type32;
2720                 const char*     type16;
2721                 const char* opcode;
2722                 bool            isSigned;
2723         };
2724
2725         const IntegerFacts      intFacts[]      =
2726         {
2727                 {"sint",        "%i32",         "%i16",         "OpSConvert",   true},
2728                 {"uint",        "%u32",         "%u16",         "OpUConvert",   false},
2729         };
2730
2731         const StringTemplate scalarPreMain              (
2732                         "${itype16} = OpTypeInt 16 ${signed}\n"
2733                         " %c_i32_256 = OpConstant %i32 256\n"
2734                         "   %up_i32 = OpTypePointer Uniform ${itype32}\n"
2735                         "   %up_i16 = OpTypePointer Uniform ${itype16}\n"
2736                         "   %ra_i32 = OpTypeArray ${itype32} %c_i32_256\n"
2737                         "   %ra_i16 = OpTypeArray ${itype16} %c_i32_256\n"
2738                         "   %SSBO32 = OpTypeStruct %ra_i32\n"
2739                         "   %SSBO16 = OpTypeStruct %ra_i16\n"
2740                         "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
2741                         "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
2742                         "   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
2743                         "   %ssbo16 = OpVariable %up_SSBO16 Uniform\n");
2744
2745         const StringTemplate scalarDecoration           (
2746                         "OpDecorate %ra_i32 ArrayStride 4\n"
2747                         "OpDecorate %ra_i16 ArrayStride 2\n"
2748                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
2749                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
2750                         "OpDecorate %SSBO32 BufferBlock\n"
2751                         "OpDecorate %SSBO16 ${indecor}\n"
2752                         "OpDecorate %ssbo32 DescriptorSet 0\n"
2753                         "OpDecorate %ssbo16 DescriptorSet 0\n"
2754                         "OpDecorate %ssbo32 Binding 1\n"
2755                         "OpDecorate %ssbo16 Binding 0\n");
2756
2757         const StringTemplate scalarTestFunc     (
2758                         "%test_code = OpFunction %v4f32 None %v4f32_function\n"
2759                         "    %param = OpFunctionParameter %v4f32\n"
2760
2761                         "%entry = OpLabel\n"
2762                         "    %i = OpVariable %fp_i32 Function\n"
2763                         "         OpStore %i %c_i32_0\n"
2764                         "         OpBranch %loop\n"
2765
2766                         " %loop = OpLabel\n"
2767                         "   %15 = OpLoad %i32 %i\n"
2768                         "   %lt = OpSLessThan %bool %15 %c_i32_256\n"
2769                         "         OpLoopMerge %merge %inc None\n"
2770                         "         OpBranchConditional %lt %write %merge\n"
2771
2772                         "%write = OpLabel\n"
2773                         "   %30 = OpLoad %i32 %i\n"
2774                         "  %src = OpAccessChain %up_i16 %ssbo16 %c_i32_0 %30\n"
2775                         "%val16 = OpLoad ${itype16} %src\n"
2776                         "%val32 = ${convert} ${itype32} %val16\n"
2777                         "  %dst = OpAccessChain %up_i32 %ssbo32 %c_i32_0 %30\n"
2778                         "         OpStore %dst %val32\n"
2779                         "         OpBranch %inc\n"
2780
2781                         "  %inc = OpLabel\n"
2782                         "   %37 = OpLoad %i32 %i\n"
2783                         "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
2784                         "         OpStore %i %39\n"
2785                         "         OpBranch %loop\n"
2786                         "%merge = OpLabel\n"
2787                         "         OpReturnValue %param\n"
2788
2789                         "OpFunctionEnd\n");
2790
2791         const StringTemplate vecPreMain         (
2792                         "${itype16} = OpTypeInt 16 ${signed}\n"
2793                         "%c_i32_128 = OpConstant %i32 128\n"
2794                         "%v2itype16 = OpTypeVector ${itype16} 2\n"
2795                         " %up_v2i32 = OpTypePointer Uniform ${v2itype32}\n"
2796                         " %up_v2i16 = OpTypePointer Uniform %v2itype16\n"
2797                         " %ra_v2i32 = OpTypeArray ${v2itype32} %c_i32_128\n"
2798                         " %ra_v2i16 = OpTypeArray %v2itype16 %c_i32_128\n"
2799                         "   %SSBO32 = OpTypeStruct %ra_v2i32\n"
2800                         "   %SSBO16 = OpTypeStruct %ra_v2i16\n"
2801                         "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
2802                         "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
2803                         "   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
2804                         "   %ssbo16 = OpVariable %up_SSBO16 Uniform\n");
2805
2806         const StringTemplate vecDecoration              (
2807                         "OpDecorate %ra_v2i32 ArrayStride 8\n"
2808                         "OpDecorate %ra_v2i16 ArrayStride 4\n"
2809                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
2810                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
2811                         "OpDecorate %SSBO32 BufferBlock\n"
2812                         "OpDecorate %SSBO16 ${indecor}\n"
2813                         "OpDecorate %ssbo32 DescriptorSet 0\n"
2814                         "OpDecorate %ssbo16 DescriptorSet 0\n"
2815                         "OpDecorate %ssbo32 Binding 1\n"
2816                         "OpDecorate %ssbo16 Binding 0\n");
2817
2818         const StringTemplate vecTestFunc        (
2819                         "%test_code = OpFunction %v4f32 None %v4f32_function\n"
2820                         "    %param = OpFunctionParameter %v4f32\n"
2821
2822                         "%entry = OpLabel\n"
2823                         "    %i = OpVariable %fp_i32 Function\n"
2824                         "         OpStore %i %c_i32_0\n"
2825                         "         OpBranch %loop\n"
2826
2827                         " %loop = OpLabel\n"
2828                         "   %15 = OpLoad %i32 %i\n"
2829                         "   %lt = OpSLessThan %bool %15 %c_i32_128\n"
2830                         "         OpLoopMerge %merge %inc None\n"
2831                         "         OpBranchConditional %lt %write %merge\n"
2832
2833                         "%write = OpLabel\n"
2834                         "   %30 = OpLoad %i32 %i\n"
2835                         "  %src = OpAccessChain %up_v2i16 %ssbo16 %c_i32_0 %30\n"
2836                         "%val16 = OpLoad %v2itype16 %src\n"
2837                         "%val32 = ${convert} ${v2itype32} %val16\n"
2838                         "  %dst = OpAccessChain %up_v2i32 %ssbo32 %c_i32_0 %30\n"
2839                         "         OpStore %dst %val32\n"
2840                         "         OpBranch %inc\n"
2841
2842                         "  %inc = OpLabel\n"
2843                         "   %37 = OpLoad %i32 %i\n"
2844                         "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
2845                         "         OpStore %i %39\n"
2846                         "         OpBranch %loop\n"
2847                         "%merge = OpLabel\n"
2848                         "         OpReturnValue %param\n"
2849
2850                         "OpFunctionEnd\n");
2851
2852         struct Category
2853         {
2854                 const char*                             name;
2855                 const StringTemplate&   preMain;
2856                 const StringTemplate&   decoration;
2857                 const StringTemplate&   testFunction;
2858         };
2859
2860         const Category          categories[]    =
2861         {
2862                 {"scalar", scalarPreMain, scalarDecoration, scalarTestFunc},
2863                 {"vector", vecPreMain, vecDecoration, vecTestFunc},
2864         };
2865
2866         for (deUint32 catIdx = 0; catIdx < DE_LENGTH_OF_ARRAY(categories); ++catIdx)
2867                 for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
2868                         for (deUint32 factIdx = 0; factIdx < DE_LENGTH_OF_ARRAY(intFacts); ++factIdx)
2869                         {
2870                                 map<string, string>     specs;
2871                                 string                          name            = string(CAPABILITIES[capIdx].name) + "_" + categories[catIdx].name + "_" + intFacts[factIdx].name;
2872
2873                                 specs["cap"]                                    = CAPABILITIES[capIdx].cap;
2874                                 specs["indecor"]                                = CAPABILITIES[capIdx].decor;
2875                                 specs["itype32"]                                = intFacts[factIdx].type32;
2876                                 specs["v2itype32"]                              = "%v2" + string(intFacts[factIdx].type32).substr(1);
2877                                 specs["itype16"]                                = intFacts[factIdx].type16;
2878                                 if (intFacts[factIdx].isSigned)
2879                                         specs["signed"]                         = "1";
2880                                 else
2881                                         specs["signed"]                         = "0";
2882                                 specs["convert"]                                = intFacts[factIdx].opcode;
2883
2884                                 fragments["pre_main"]                   = categories[catIdx].preMain.specialize(specs);
2885                                 fragments["testfun"]                    = categories[catIdx].testFunction.specialize(specs);
2886                                 fragments["capability"]                 = capabilities.specialize(specs);
2887                                 fragments["decoration"]                 = categories[catIdx].decoration.specialize(specs);
2888
2889                                 resources.inputs.back().first   = CAPABILITIES[capIdx].dtype;
2890                                 resources.outputs.clear();
2891                                 if (intFacts[factIdx].isSigned)
2892                                         resources.outputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Int32Buffer(sOutputs))));
2893                                 else
2894                                         resources.outputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Int32Buffer(uOutputs))));
2895
2896                                 createTestsForAllStages(name, defaultColors, defaultColors, fragments, resources, extensions, testGroup, get16BitStorageFeatures(CAPABILITIES[capIdx].name));
2897                         }
2898 }
2899
2900 void addGraphics16BitStorageUniformFloat16To32Group (tcu::TestCaseGroup* testGroup)
2901 {
2902         de::Random                                                      rnd                                     (deStringHash(testGroup->getName()));
2903         map<string, string>                                     fragments;
2904         GraphicsResources                                       resources;
2905         vector<string>                                          extensions;
2906         const deUint32                                          numDataPoints           = 256;
2907         RGBA                                                            defaultColors[4];
2908         const StringTemplate                            capabilities            ("OpCapability ${cap}\n");
2909         vector<deFloat16>                                       float16Data                     = getFloat16s(rnd, numDataPoints);
2910         vector<float>                                           float32Data;
2911
2912         float32Data.reserve(numDataPoints);
2913         for (deUint32 numIdx = 0; numIdx < numDataPoints; ++numIdx)
2914                 float32Data.push_back(deFloat16To32(float16Data[numIdx]));
2915
2916         resources.inputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Float16Buffer(float16Data))));
2917         resources.outputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Float32Buffer(float32Data))));
2918         resources.verifyIO = check32BitFloats;
2919
2920         extensions.push_back("VK_KHR_16bit_storage");
2921         fragments["extension"]  = "OpExtension \"SPV_KHR_16bit_storage\"";
2922
2923         getDefaultColors(defaultColors);
2924
2925         { // scalar cases
2926                 fragments["pre_main"]                           =
2927                         "      %f16 = OpTypeFloat 16\n"
2928                         "%c_i32_256 = OpConstant %i32 256\n"
2929                         "   %up_f32 = OpTypePointer Uniform %f32\n"
2930                         "   %up_f16 = OpTypePointer Uniform %f16\n"
2931                         "   %ra_f32 = OpTypeArray %f32 %c_i32_256\n"
2932                         "   %ra_f16 = OpTypeArray %f16 %c_i32_256\n"
2933                         "   %SSBO32 = OpTypeStruct %ra_f32\n"
2934                         "   %SSBO16 = OpTypeStruct %ra_f16\n"
2935                         "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
2936                         "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
2937                         "   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
2938                         "   %ssbo16 = OpVariable %up_SSBO16 Uniform\n";
2939
2940                 const StringTemplate decoration         (
2941                         "OpDecorate %ra_f32 ArrayStride 4\n"
2942                         "OpDecorate %ra_f16 ArrayStride 2\n"
2943                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
2944                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
2945                         "OpDecorate %SSBO32 BufferBlock\n"
2946                         "OpDecorate %SSBO16 ${indecor}\n"
2947                         "OpDecorate %ssbo32 DescriptorSet 0\n"
2948                         "OpDecorate %ssbo16 DescriptorSet 0\n"
2949                         "OpDecorate %ssbo32 Binding 1\n"
2950                         "OpDecorate %ssbo16 Binding 0\n");
2951
2952                 // ssbo32[] <- convert ssbo16[] to 32bit float
2953                 fragments["testfun"]                            =
2954                         "%test_code = OpFunction %v4f32 None %v4f32_function\n"
2955                         "    %param = OpFunctionParameter %v4f32\n"
2956
2957                         "%entry = OpLabel\n"
2958                         "    %i = OpVariable %fp_i32 Function\n"
2959                         "         OpStore %i %c_i32_0\n"
2960                         "         OpBranch %loop\n"
2961
2962                         " %loop = OpLabel\n"
2963                         "   %15 = OpLoad %i32 %i\n"
2964                         "   %lt = OpSLessThan %bool %15 %c_i32_256\n"
2965                         "         OpLoopMerge %merge %inc None\n"
2966                         "         OpBranchConditional %lt %write %merge\n"
2967
2968                         "%write = OpLabel\n"
2969                         "   %30 = OpLoad %i32 %i\n"
2970                         "  %src = OpAccessChain %up_f16 %ssbo16 %c_i32_0 %30\n"
2971                         "%val16 = OpLoad %f16 %src\n"
2972                         "%val32 = OpFConvert %f32 %val16\n"
2973                         "  %dst = OpAccessChain %up_f32 %ssbo32 %c_i32_0 %30\n"
2974                         "         OpStore %dst %val32\n"
2975                         "         OpBranch %inc\n"
2976
2977                         "  %inc = OpLabel\n"
2978                         "   %37 = OpLoad %i32 %i\n"
2979                         "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
2980                         "         OpStore %i %39\n"
2981                         "         OpBranch %loop\n"
2982
2983                         "%merge = OpLabel\n"
2984                         "         OpReturnValue %param\n"
2985
2986                         "OpFunctionEnd\n";
2987
2988                 for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
2989                 {
2990                         map<string, string>     specs;
2991                         string                          testName        = string(CAPABILITIES[capIdx].name) + "_scalar_float";
2992
2993                         specs["cap"]                                    = CAPABILITIES[capIdx].cap;
2994                         specs["indecor"]                                = CAPABILITIES[capIdx].decor;
2995
2996                         fragments["capability"]                 = capabilities.specialize(specs);
2997                         fragments["decoration"]                 = decoration.specialize(specs);
2998
2999                         resources.inputs.back().first   = CAPABILITIES[capIdx].dtype;
3000
3001                         createTestsForAllStages(testName, defaultColors, defaultColors, fragments, resources, extensions, testGroup, get16BitStorageFeatures(CAPABILITIES[capIdx].name));
3002                 }
3003         }
3004
3005         { // vector cases
3006                 fragments["pre_main"]                           =
3007                         "      %f16 = OpTypeFloat 16\n"
3008                         "%c_i32_128 = OpConstant %i32 128\n"
3009                         "        %v2f16 = OpTypeVector %f16 2\n"
3010                         " %up_v2f32 = OpTypePointer Uniform %v2f32\n"
3011                         " %up_v2f16 = OpTypePointer Uniform %v2f16\n"
3012                         " %ra_v2f32 = OpTypeArray %v2f32 %c_i32_128\n"
3013                         " %ra_v2f16 = OpTypeArray %v2f16 %c_i32_128\n"
3014                         "   %SSBO32 = OpTypeStruct %ra_v2f32\n"
3015                         "   %SSBO16 = OpTypeStruct %ra_v2f16\n"
3016                         "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
3017                         "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
3018                         "   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
3019                         "   %ssbo16 = OpVariable %up_SSBO16 Uniform\n";
3020
3021                 const StringTemplate decoration         (
3022                         "OpDecorate %ra_v2f32 ArrayStride 8\n"
3023                         "OpDecorate %ra_v2f16 ArrayStride 4\n"
3024                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
3025                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
3026                         "OpDecorate %SSBO32 BufferBlock\n"
3027                         "OpDecorate %SSBO16 ${indecor}\n"
3028                         "OpDecorate %ssbo32 DescriptorSet 0\n"
3029                         "OpDecorate %ssbo16 DescriptorSet 0\n"
3030                         "OpDecorate %ssbo32 Binding 1\n"
3031                         "OpDecorate %ssbo16 Binding 0\n");
3032
3033                 // ssbo32[] <- convert ssbo16[] to 32bit float
3034                 fragments["testfun"]                            =
3035                         "%test_code = OpFunction %v4f32 None %v4f32_function\n"
3036                         "    %param = OpFunctionParameter %v4f32\n"
3037
3038                         "%entry = OpLabel\n"
3039                         "    %i = OpVariable %fp_i32 Function\n"
3040                         "         OpStore %i %c_i32_0\n"
3041                         "         OpBranch %loop\n"
3042
3043                         " %loop = OpLabel\n"
3044                         "   %15 = OpLoad %i32 %i\n"
3045                         "   %lt = OpSLessThan %bool %15 %c_i32_128\n"
3046                         "         OpLoopMerge %merge %inc None\n"
3047                         "         OpBranchConditional %lt %write %merge\n"
3048
3049                         "%write = OpLabel\n"
3050                         "   %30 = OpLoad %i32 %i\n"
3051                         "  %src = OpAccessChain %up_v2f16 %ssbo16 %c_i32_0 %30\n"
3052                         "%val16 = OpLoad %v2f16 %src\n"
3053                         "%val32 = OpFConvert %v2f32 %val16\n"
3054                         "  %dst = OpAccessChain %up_v2f32 %ssbo32 %c_i32_0 %30\n"
3055                         "         OpStore %dst %val32\n"
3056                         "         OpBranch %inc\n"
3057
3058                         "  %inc = OpLabel\n"
3059                         "   %37 = OpLoad %i32 %i\n"
3060                         "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
3061                         "         OpStore %i %39\n"
3062                         "         OpBranch %loop\n"
3063
3064                         "%merge = OpLabel\n"
3065                         "         OpReturnValue %param\n"
3066
3067                         "OpFunctionEnd\n";
3068
3069                 for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
3070                 {
3071                         map<string, string>     specs;
3072                         string                          testName        = string(CAPABILITIES[capIdx].name) + "_vector_float";
3073
3074                         specs["cap"]                                    = CAPABILITIES[capIdx].cap;
3075                         specs["indecor"]                                = CAPABILITIES[capIdx].decor;
3076
3077                         fragments["capability"]                 = capabilities.specialize(specs);
3078                         fragments["decoration"]                 = decoration.specialize(specs);
3079
3080                         resources.inputs.back().first   = CAPABILITIES[capIdx].dtype;
3081
3082                         createTestsForAllStages(testName, defaultColors, defaultColors, fragments, resources, extensions, testGroup, get16BitStorageFeatures(CAPABILITIES[capIdx].name));
3083                 }
3084         }
3085
3086         { // matrix cases
3087                 fragments["pre_main"]                           =
3088                         " %c_i32_32 = OpConstant %i32 32\n"
3089                         "      %f16 = OpTypeFloat 16\n"
3090                         "    %v2f16 = OpTypeVector %f16 2\n"
3091                         "  %m4x2f32 = OpTypeMatrix %v2f32 4\n"
3092                         "  %m4x2f16 = OpTypeMatrix %v2f16 4\n"
3093                         " %up_v2f32 = OpTypePointer Uniform %v2f32\n"
3094                         " %up_v2f16 = OpTypePointer Uniform %v2f16\n"
3095                         "%a8m4x2f32 = OpTypeArray %m4x2f32 %c_i32_32\n"
3096                         "%a8m4x2f16 = OpTypeArray %m4x2f16 %c_i32_32\n"
3097                         "   %SSBO32 = OpTypeStruct %a8m4x2f32\n"
3098                         "   %SSBO16 = OpTypeStruct %a8m4x2f16\n"
3099                         "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
3100                         "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
3101                         "   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
3102                         "   %ssbo16 = OpVariable %up_SSBO16 Uniform\n";
3103
3104                 const StringTemplate decoration         (
3105                         "OpDecorate %a8m4x2f32 ArrayStride 32\n"
3106                         "OpDecorate %a8m4x2f16 ArrayStride 16\n"
3107                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
3108                         "OpMemberDecorate %SSBO32 0 ColMajor\n"
3109                         "OpMemberDecorate %SSBO32 0 MatrixStride 8\n"
3110                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
3111                         "OpMemberDecorate %SSBO16 0 ColMajor\n"
3112                         "OpMemberDecorate %SSBO16 0 MatrixStride 4\n"
3113                         "OpDecorate %SSBO32 BufferBlock\n"
3114                         "OpDecorate %SSBO16 ${indecor}\n"
3115                         "OpDecorate %ssbo32 DescriptorSet 0\n"
3116                         "OpDecorate %ssbo16 DescriptorSet 0\n"
3117                         "OpDecorate %ssbo32 Binding 1\n"
3118                         "OpDecorate %ssbo16 Binding 0\n");
3119
3120                 fragments["testfun"]                            =
3121                         "%test_code = OpFunction %v4f32 None %v4f32_function\n"
3122                         "    %param = OpFunctionParameter %v4f32\n"
3123
3124                         "%entry = OpLabel\n"
3125                         "    %i = OpVariable %fp_i32 Function\n"
3126                         "         OpStore %i %c_i32_0\n"
3127                         "         OpBranch %loop\n"
3128
3129                         " %loop = OpLabel\n"
3130                         "   %15 = OpLoad %i32 %i\n"
3131                         "   %lt = OpSLessThan %bool %15 %c_i32_32\n"
3132                         "         OpLoopMerge %merge %inc None\n"
3133                         "         OpBranchConditional %lt %write %merge\n"
3134
3135                         "  %write = OpLabel\n"
3136                         "     %30 = OpLoad %i32 %i\n"
3137                         "  %src_0 = OpAccessChain %up_v2f16 %ssbo16 %c_i32_0 %30 %c_i32_0\n"
3138                         "  %src_1 = OpAccessChain %up_v2f16 %ssbo16 %c_i32_0 %30 %c_i32_1\n"
3139                         "  %src_2 = OpAccessChain %up_v2f16 %ssbo16 %c_i32_0 %30 %c_i32_2\n"
3140                         "  %src_3 = OpAccessChain %up_v2f16 %ssbo16 %c_i32_0 %30 %c_i32_3\n"
3141                         "%val16_0 = OpLoad %v2f16 %src_0\n"
3142                         "%val16_1 = OpLoad %v2f16 %src_1\n"
3143                         "%val16_2 = OpLoad %v2f16 %src_2\n"
3144                         "%val16_3 = OpLoad %v2f16 %src_3\n"
3145                         "%val32_0 = OpFConvert %v2f32 %val16_0\n"
3146                         "%val32_1 = OpFConvert %v2f32 %val16_1\n"
3147                         "%val32_2 = OpFConvert %v2f32 %val16_2\n"
3148                         "%val32_3 = OpFConvert %v2f32 %val16_3\n"
3149                         "  %dst_0 = OpAccessChain %up_v2f32 %ssbo32 %c_i32_0 %30 %c_i32_0\n"
3150                         "  %dst_1 = OpAccessChain %up_v2f32 %ssbo32 %c_i32_0 %30 %c_i32_1\n"
3151                         "  %dst_2 = OpAccessChain %up_v2f32 %ssbo32 %c_i32_0 %30 %c_i32_2\n"
3152                         "  %dst_3 = OpAccessChain %up_v2f32 %ssbo32 %c_i32_0 %30 %c_i32_3\n"
3153                         "           OpStore %dst_0 %val32_0\n"
3154                         "           OpStore %dst_1 %val32_1\n"
3155                         "           OpStore %dst_2 %val32_2\n"
3156                         "           OpStore %dst_3 %val32_3\n"
3157                         "           OpBranch %inc\n"
3158
3159                         "  %inc = OpLabel\n"
3160                         "   %37 = OpLoad %i32 %i\n"
3161                         "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
3162                         "         OpStore %i %39\n"
3163                         "         OpBranch %loop\n"
3164
3165                         "%merge = OpLabel\n"
3166                         "         OpReturnValue %param\n"
3167
3168                         "OpFunctionEnd\n";
3169
3170                 for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
3171                 {
3172                         map<string, string>     specs;
3173                         string                          testName        = string(CAPABILITIES[capIdx].name) + "_matrix_float";
3174
3175                         specs["cap"]                                    = CAPABILITIES[capIdx].cap;
3176                         specs["indecor"]                                = CAPABILITIES[capIdx].decor;
3177
3178                         fragments["capability"]                 = capabilities.specialize(specs);
3179                         fragments["decoration"]                 = decoration.specialize(specs);
3180
3181                         resources.inputs.back().first   = CAPABILITIES[capIdx].dtype;
3182
3183                         createTestsForAllStages(testName, defaultColors, defaultColors, fragments, resources, extensions, testGroup, get16BitStorageFeatures(CAPABILITIES[capIdx].name));
3184                 }
3185         }
3186 }
3187
3188 } // anonymous
3189
3190 tcu::TestCaseGroup* create16BitStorageComputeGroup (tcu::TestContext& testCtx)
3191 {
3192         de::MovePtr<tcu::TestCaseGroup> group           (new tcu::TestCaseGroup(testCtx, "16bit_storage", "Compute tests for VK_KHR_16bit_storage extension"));
3193         addTestGroup(group.get(), "uniform_32_to_16", "32bit floats/ints to 16bit tests under capability StorageUniform{|BufferBlock}", addCompute16bitStorageUniform32To16Group);
3194         addTestGroup(group.get(), "uniform_16_to_32", "16bit floats/ints to 32bit tests under capability StorageUniform{|BufferBlock}", addCompute16bitStorageUniform16To32Group);
3195         addTestGroup(group.get(), "push_constant_16_to_32", "16bit floats/ints to 32bit tests under capability StoragePushConstant16", addCompute16bitStoragePushConstant16To32Group);
3196
3197         return group.release();
3198 }
3199
3200 tcu::TestCaseGroup* create16BitStorageGraphicsGroup (tcu::TestContext& testCtx)
3201 {
3202         de::MovePtr<tcu::TestCaseGroup> group           (new tcu::TestCaseGroup(testCtx, "16bit_storage", "Graphics tests for VK_KHR_16bit_storage extension"));
3203
3204         addTestGroup(group.get(), "uniform_float_32_to_16", "32-bit floats into 16-bit tests under capability StorageUniform{|BufferBlock}16", addGraphics16BitStorageUniformFloat32To16Group);
3205         addTestGroup(group.get(), "uniform_float_16_to_32", "16-bit floats into 32-bit testsunder capability StorageUniform{|BufferBlock}16", addGraphics16BitStorageUniformFloat16To32Group);
3206         addTestGroup(group.get(), "uniform_int_32_to_16", "32-bit int into 16-bit tests under capability StorageUniform{|BufferBlock}16", addGraphics16BitStorageUniformInt32To16Group);
3207         addTestGroup(group.get(), "uniform_int_16_to_32", "16-bit int into 32-bit tests under capability StorageUniform{|BufferBlock}16", addGraphics16BitStorageUniformInt16To32Group);
3208         addTestGroup(group.get(), "input_output_float_32_to_16", "32-bit floats into 16-bit tests under capability StorageInputOutput16", addGraphics16BitStorageInputOutputFloat32To16Group);
3209         addTestGroup(group.get(), "input_output_float_16_to_32", "16-bit floats into 32-bit tests under capability StorageInputOutput16", addGraphics16BitStorageInputOutputFloat16To32Group);
3210         addTestGroup(group.get(), "input_output_int_32_to_16", "32-bit int into 16-bit tests under capability StorageInputOutput16", addGraphics16BitStorageInputOutputInt32To16Group);
3211         addTestGroup(group.get(), "input_output_int_16_to_32", "16-bit int into 32-bit tests under capability StorageInputOutput16", addGraphics16BitStorageInputOutputInt16To32Group);
3212         addTestGroup(group.get(), "push_constant_float_16_to_32", "16-bit floats into 32-bit tests under capability StoragePushConstant16", addGraphics16BitStoragePushConstantFloat16To32Group);
3213         addTestGroup(group.get(), "push_constant_int_16_to_32", "16-bit int into 32-bit tests under capability StoragePushConstant16", addGraphics16BitStoragePushConstantInt16To32Group);
3214
3215         return group.release();
3216 }
3217
3218 } // SpirVAssembly
3219 } // vkt