external/vulkancts/modules/vulkan/spirv_assembly/vktSpvAsm16bitStorageTests.cpp

   1 /*-------------------------------------------------------------------------
   2  * Vulkan Conformance Tests
   3  * ------------------------
   4  *
   5  * Copyright (c) 2017 Google Inc.
   6  *
   7  * Licensed under the Apache License, Version 2.0 (the "License");
   8  * you may not use this file except in compliance with the License.
   9  * You may obtain a copy of the License at
  10  *
  11  *      http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  *
  19  *//*!
  20  * \file
  21  * \brief SPIR-V Assembly Tests for the VK_KHR_16bit_storage
  22  *//*--------------------------------------------------------------------*/
  23
  24 // VK_KHR_16bit_storage
  25 //
  26 // \todo [2017-02-08 antiagainst] Additional corner cases to check:
  27 //
  28 // * Test OpAccessChain with subword types
  29 //  * For newly enabled types T:
  30 //    * For composite types: vector, matrix, structures, array over T:
  31 //      1. Use OpAccessChain to form a pointer to a subword type.
  32 //      2. Load the subword value X16.
  33 //      3. Convert X16 to X32.
  34 //      4. Store X32 to BufferBlock.
  35 //      5. Host inspects X32.
  36 // * Test {StorageInputOutput16} 16-to-16:
  37 //   * For newly enabled types T:
  38 //     1. Host creates X16 stream values of type T.
  39 //     2. Shaders have corresponding capability.
  40 //     3. For each viable shader stage:
  41 //       3a. Load X16 Input variable.
  42 //       3b. Store X16 to Output variable.
  43 //     4. Host inspects resulting values.
  44 // * Test {StorageInputOutput16} 16-to-16 one value to two:
  45 //     Like the previous test, but write X16 to two different output variables.
  46 //     (Checks that the 16-bit intermediate value can be used twice.)
  47
  48 #include "vktSpvAsm16bitStorageTests.hpp"
  49
  50 #include "tcuFloat.hpp"
  51 #include "tcuRGBA.hpp"
  52 #include "tcuStringTemplate.hpp"
  53 #include "tcuTestLog.hpp"
  54 #include "tcuVectorUtil.hpp"
  55
  56 #include "vkDefs.hpp"
  57 #include "vkDeviceUtil.hpp"
  58 #include "vkMemUtil.hpp"
  59 #include "vkPlatform.hpp"
  60 #include "vkPrograms.hpp"
  61 #include "vkQueryUtil.hpp"
  62 #include "vkRef.hpp"
  63 #include "vkRefUtil.hpp"
  64 #include "vkStrUtil.hpp"
  65 #include "vkTypeUtil.hpp"
  66
  67 #include "deRandom.hpp"
  68 #include "deStringUtil.hpp"
  69 #include "deUniquePtr.hpp"
  70 #include "deMath.h"
  71
  72 #include "vktSpvAsmComputeShaderCase.hpp"
  73 #include "vktSpvAsmComputeShaderTestUtil.hpp"
  74 #include "vktSpvAsmGraphicsShaderTestUtil.hpp"
  75 #include "vktTestCaseUtil.hpp"
  76 #include "vktTestGroupUtil.hpp"
  77
  78 #include <limits>
  79 #include <map>
  80 #include <string>
  81 #include <sstream>
  82 #include <utility>
  83
  84 namespace vkt
  85 {
  86 namespace SpirVAssembly
  87 {
  88
  89 using namespace vk;
  90 using std::map;
  91 using std::string;
  92 using std::vector;
  93 using tcu::IVec3;
  94 using tcu::IVec4;
  95 using tcu::RGBA;
  96 using tcu::TestLog;
  97 using tcu::TestStatus;
  98 using tcu::Vec4;
  99 using de::UniquePtr;
 100 using tcu::StringTemplate;
 101 using tcu::Vec4;
 102
 103 namespace
 104 {
 105
 106 struct Capability
 107 {
 108         const char*                             name;
 109         const char*                             cap;
 110         const char*                             decor;
 111         vk::VkDescriptorType    dtype;
 112 };
 113
 114 static const Capability CAPABILITIES[]  =
 115 {
 116         {"uniform_buffer_block",        "StorageUniformBufferBlock16",  "BufferBlock",  VK_DESCRIPTOR_TYPE_STORAGE_BUFFER},
 117         {"uniform",                                     "StorageUniform16",                             "Block",                VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER},
 118 };
 119
 120 VulkanFeatures  get16BitStorageFeatures (const char* cap)
 121 {
 122         VulkanFeatures features;
 123         if (string(cap) == "uniform_buffer_block")
 124                 features.ext16BitStorage = EXT16BITSTORAGEFEATURES_UNIFORM_BUFFER_BLOCK;
 125         else if (string(cap) == "uniform")
 126                 features.ext16BitStorage = EXT16BITSTORAGEFEATURES_UNIFORM;
 127         else
 128                 DE_ASSERT(false && "not supported");
 129
 130         return features;
 131 }
 132
 133
 134 // Batch function to check arrays of 16-bit floats.
 135 //
 136 // For comparing 16-bit floats, we need to consider both RTZ and RTE. So we can only recalculate
 137 // the expected values here instead of get the expected values directly from the test case.
 138 // Thus we need original floats here but not expected outputs.
 139 template<RoundingModeFlags RoundingMode>
 140 bool graphicsCheck16BitFloats (const std::vector<Resource>&     originalFloats,
 141                                                            const vector<AllocationSp>&  outputAllocs,
 142                                                            const std::vector<Resource>& /* expectedOutputs */,
 143                                                            tcu::TestLog&                                log)
 144 {
 145         if (outputAllocs.size() != originalFloats.size())
 146                 return false;
 147
 148         for (deUint32 outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
 149         {
 150                 const deUint16* returned        = static_cast<const deUint16*>(outputAllocs[outputNdx]->getHostPtr());
 151                 const float*    original        = static_cast<const float*>(originalFloats[outputNdx].second->data());
 152                 const deUint32  count           = static_cast<deUint32>(originalFloats[outputNdx].second->getNumBytes() / sizeof(float));
 153
 154                 for (deUint32 numNdx = 0; numNdx < count; ++numNdx)
 155                         if (!compare16BitFloat(original[numNdx], returned[numNdx], RoundingMode, log))
 156                                 return false;
 157         }
 158
 159         return true;
 160 }
 161
 162 template<RoundingModeFlags RoundingMode>
 163 bool computeCheck16BitFloats (const std::vector<BufferSp>&      originalFloats,
 164                                                           const vector<AllocationSp>&   outputAllocs,
 165                                                           const std::vector<BufferSp>&  /* expectedOutputs */,
 166                                                           tcu::TestLog&                                 log)
 167 {
 168         if (outputAllocs.size() != originalFloats.size())
 169                 return false;
 170
 171         for (deUint32 outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
 172         {
 173                 const deUint16* returned        = static_cast<const deUint16*>(outputAllocs[outputNdx]->getHostPtr());
 174                 const float*    original        = static_cast<const float*>(originalFloats[outputNdx]->data());
 175                 const deUint32  count           = static_cast<deUint32>(originalFloats[outputNdx]->getNumBytes() / sizeof(float));
 176
 177                 for (deUint32 numNdx = 0; numNdx < count; ++numNdx)
 178                         if (!compare16BitFloat(original[numNdx], returned[numNdx], RoundingMode, log))
 179                                 return false;
 180         }
 181
 182         return true;
 183 }
 184
 185
 186 // Batch function to check arrays of 32-bit floats.
 187 //
 188 // For comparing 32-bit floats, we just need the expected value precomputed in the test case.
 189 // So we need expected outputs here but not original floats.
 190 bool check32BitFloats (const std::vector<Resource>&             /* originalFloats */,
 191                                            const std::vector<AllocationSp>& outputAllocs,
 192                                            const std::vector<Resource>&         expectedOutputs,
 193                                            tcu::TestLog&                                        log)
 194 {
 195         if (outputAllocs.size() != expectedOutputs.size())
 196                 return false;
 197
 198         for (deUint32 outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
 199         {
 200                 const float*    returnedAsFloat = static_cast<const float*>(outputAllocs[outputNdx]->getHostPtr());
 201                 const float*    expectedAsFloat = static_cast<const float*>(expectedOutputs[outputNdx].second->data());
 202                 const deUint32  count                   = static_cast<deUint32>(expectedOutputs[outputNdx].second->getNumBytes() / sizeof(float));
 203
 204                 for (deUint32 numNdx = 0; numNdx < count; ++numNdx)
 205                         if (!compare32BitFloat(expectedAsFloat[numNdx], returnedAsFloat[numNdx], log))
 206                                 return false;
 207         }
 208
 209         return true;
 210 }
 211
 212 // Overload for compute pipeline
 213 bool check32BitFloats (const std::vector<BufferSp>&             /* originalFloats */,
 214                                            const std::vector<AllocationSp>& outputAllocs,
 215                                            const std::vector<BufferSp>&         expectedOutputs,
 216                                            tcu::TestLog&                                        log)
 217 {
 218         if (outputAllocs.size() != expectedOutputs.size())
 219                 return false;
 220
 221         for (deUint32 outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
 222         {
 223                 const float*    returnedAsFloat = static_cast<const float*>(outputAllocs[outputNdx]->getHostPtr());
 224                 const float*    expectedAsFloat = static_cast<const float*>(expectedOutputs[outputNdx]->data());
 225                 const deUint32  count                   = static_cast<deUint32>(expectedOutputs[outputNdx]->getNumBytes() / sizeof(float));
 226
 227                 for (deUint32 numNdx = 0; numNdx < count; ++numNdx)
 228                         if (!compare32BitFloat(expectedAsFloat[numNdx], returnedAsFloat[numNdx], log))
 229                                 return false;
 230         }
 231
 232         return true;
 233 }
 234
 235 // Generate and return 32-bit integers.
 236 //
 237 // Expected count to be at least 16.
 238 vector<deInt32> getInt32s (de::Random& rnd, const deUint32 count)
 239 {
 240         vector<deInt32>         data;
 241
 242         data.reserve(count);
 243
 244         // Make sure we have boundary numbers.
 245         data.push_back(deInt32(0x00000000));  // 0
 246         data.push_back(deInt32(0x00000001));  // 1
 247         data.push_back(deInt32(0x0000002a));  // 42
 248         data.push_back(deInt32(0x00007fff));  // 32767
 249         data.push_back(deInt32(0x00008000));  // 32768
 250         data.push_back(deInt32(0x0000ffff));  // 65535
 251         data.push_back(deInt32(0x00010000));  // 65536
 252         data.push_back(deInt32(0x7fffffff));  // 2147483647
 253         data.push_back(deInt32(0x80000000));  // -2147483648
 254         data.push_back(deInt32(0x80000001));  // -2147483647
 255         data.push_back(deInt32(0xffff0000));  // -65536
 256         data.push_back(deInt32(0xffff0001));  // -65535
 257         data.push_back(deInt32(0xffff8000));  // -32768
 258         data.push_back(deInt32(0xffff8001));  // -32767
 259         data.push_back(deInt32(0xffffffd6));  // -42
 260         data.push_back(deInt32(0xffffffff));  // -1
 261
 262         DE_ASSERT(count >= data.size());
 263
 264         for (deUint32 numNdx = static_cast<deUint32>(data.size()); numNdx < count; ++numNdx)
 265                 data.push_back(static_cast<deInt32>(rnd.getUint32()));
 266
 267         return data;
 268 }
 269
 270 // Generate and return 16-bit integers.
 271 //
 272 // Expected count to be at least 8.
 273 vector<deInt16> getInt16s (de::Random& rnd, const deUint32 count)
 274 {
 275         vector<deInt16>         data;
 276
 277         data.reserve(count);
 278
 279         // Make sure we have boundary numbers.
 280         data.push_back(deInt16(0x0000));  // 0
 281         data.push_back(deInt16(0x0001));  // 1
 282         data.push_back(deInt16(0x002a));  // 42
 283         data.push_back(deInt16(0x7fff));  // 32767
 284         data.push_back(deInt16(0x8000));  // -32868
 285         data.push_back(deInt16(0x8001));  // -32767
 286         data.push_back(deInt16(0xffd6));  // -42
 287         data.push_back(deInt16(0xffff));  // -1
 288
 289         DE_ASSERT(count >= data.size());
 290
 291         for (deUint32 numNdx = static_cast<deUint32>(data.size()); numNdx < count; ++numNdx)
 292                 data.push_back(static_cast<deInt16>(rnd.getUint16()));
 293
 294         return data;
 295 }
 296
 297 // IEEE-754 floating point numbers:
 298 // +--------+------+----------+-------------+
 299 // | binary | sign | exponent | significand |
 300 // +--------+------+----------+-------------+
 301 // | 16-bit |  1   |    5     |     10      |
 302 // +--------+------+----------+-------------+
 303 // | 32-bit |  1   |    8     |     23      |
 304 // +--------+------+----------+-------------+
 305 //
 306 // 16-bit floats:
 307 //
 308 // 0   000 00   00 0000 0001 (0x0001: 2e-24:         minimum positive denormalized)
 309 // 0   000 00   11 1111 1111 (0x03ff: 2e-14 - 2e-24: maximum positive denormalized)
 310 // 0   000 01   00 0000 0000 (0x0400: 2e-14:         minimum positive normalized)
 311 //
 312 // 32-bit floats:
 313 //
 314 // 0   011 1110 1   001 0000 0000 0000 0000 0000 (0x3e900000: 0.28125: with exact match in 16-bit normalized)
 315 // 0   011 1000 1   000 0000 0011 0000 0000 0000 (0x38803000: exact half way within two 16-bit normalized; round to zero: 0x0401)
 316 // 1   011 1000 1   000 0000 0011 0000 0000 0000 (0xb8803000: exact half way within two 16-bit normalized; round to zero: 0x8402)
 317 // 0   011 1000 1   000 0000 1111 1111 0000 0000 (0x3880ff00: not exact half way within two 16-bit normalized; round to zero: 0x0403)
 318 // 1   011 1000 1   000 0000 1111 1111 0000 0000 (0xb880ff00: not exact half way within two 16-bit normalized; round to zero: 0x8404)
 319
 320
 321 // Generate and return 32-bit floats
 322 //
 323 // The first 24 number pairs are manually picked, while the rest are randomly generated.
 324 // Expected count to be at least 24 (numPicks).
 325 vector<float> getFloat32s (de::Random& rnd, deUint32 count)
 326 {
 327         vector<float>           float32;
 328
 329         float32.reserve(count);
 330
 331         // Zero
 332         float32.push_back(0.f);
 333         float32.push_back(-0.f);
 334         // Infinity
 335         float32.push_back(std::numeric_limits<float>::infinity());
 336         float32.push_back(-std::numeric_limits<float>::infinity());
 337         // SNaN
 338         float32.push_back(std::numeric_limits<float>::signaling_NaN());
 339         float32.push_back(-std::numeric_limits<float>::signaling_NaN());
 340         // QNaN
 341         float32.push_back(std::numeric_limits<float>::quiet_NaN());
 342         float32.push_back(-std::numeric_limits<float>::quiet_NaN());
 343
 344         // Denormalized 32-bit float matching 0 in 16-bit
 345         float32.push_back(deFloatLdExp(1.f, -127));
 346         float32.push_back(-deFloatLdExp(1.f, -127));
 347
 348         // Normalized 32-bit float matching 0 in 16-bit
 349         float32.push_back(deFloatLdExp(1.f, -100));
 350         float32.push_back(-deFloatLdExp(1.f, -100));
 351         // Normalized 32-bit float with exact denormalized match in 16-bit
 352         float32.push_back(deFloatLdExp(1.f, -24));  // 2e-24: minimum 16-bit positive denormalized
 353         float32.push_back(-deFloatLdExp(1.f, -24)); // 2e-24: maximum 16-bit negative denormalized
 354         // Normalized 32-bit float with exact normalized match in 16-bit
 355         float32.push_back(deFloatLdExp(1.f, -14));  // 2e-14: minimum 16-bit positive normalized
 356         float32.push_back(-deFloatLdExp(1.f, -14)); // 2e-14: maximum 16-bit negative normalized
 357         // Normalized 32-bit float falling above half way within two 16-bit normalized
 358         float32.push_back(bitwiseCast<float>(deUint32(0x3880ff00)));
 359         float32.push_back(bitwiseCast<float>(deUint32(0xb880ff00)));
 360         // Normalized 32-bit float falling exact half way within two 16-bit normalized
 361         float32.push_back(bitwiseCast<float>(deUint32(0x38803000)));
 362         float32.push_back(bitwiseCast<float>(deUint32(0xb8803000)));
 363         // Some number
 364         float32.push_back(0.28125f);
 365         float32.push_back(-0.28125f);
 366         // Normalized 32-bit float matching infinity in 16-bit
 367         float32.push_back(deFloatLdExp(1.f, 100));
 368         float32.push_back(-deFloatLdExp(1.f, 100));
 369
 370         const deUint32          numPicks        = static_cast<deUint32>(float32.size());
 371
 372         DE_ASSERT(count >= numPicks);
 373         count -= numPicks;
 374
 375         for (deUint32 numNdx = 0; numNdx < count; ++numNdx)
 376                 float32.push_back(rnd.getFloat());
 377
 378         return float32;
 379 }
 380
 381 // IEEE-754 floating point numbers:
 382 // +--------+------+----------+-------------+
 383 // | binary | sign | exponent | significand |
 384 // +--------+------+----------+-------------+
 385 // | 16-bit |  1   |    5     |     10      |
 386 // +--------+------+----------+-------------+
 387 // | 32-bit |  1   |    8     |     23      |
 388 // +--------+------+----------+-------------+
 389 //
 390 // 16-bit floats:
 391 //
 392 // 0   000 00   00 0000 0001 (0x0001: 2e-24:         minimum positive denormalized)
 393 // 0   000 00   11 1111 1111 (0x03ff: 2e-14 - 2e-24: maximum positive denormalized)
 394 // 0   000 01   00 0000 0000 (0x0400: 2e-14:         minimum positive normalized)
 395 //
 396 // 0   000 00   00 0000 0000 (0x0000: +0)
 397 // 0   111 11   00 0000 0000 (0x7c00: +Inf)
 398 // 0   000 00   11 1111 0000 (0x03f0: +Denorm)
 399 // 0   000 01   00 0000 0001 (0x0401: +Norm)
 400 // 0   111 11   00 0000 1111 (0x7c0f: +SNaN)
 401 // 0   111 11   00 1111 0000 (0x7c0f: +QNaN)
 402
 403
 404 // Generate and return 16-bit floats and their corresponding 32-bit values.
 405 //
 406 // The first 14 number pairs are manually picked, while the rest are randomly generated.
 407 // Expected count to be at least 14 (numPicks).
 408 vector<deFloat16> getFloat16s (de::Random& rnd, deUint32 count)
 409 {
 410         vector<deFloat16>       float16;
 411
 412         float16.reserve(count);
 413
 414         // Zero
 415         float16.push_back(deUint16(0x0000));
 416         float16.push_back(deUint16(0x8000));
 417         // Infinity
 418         float16.push_back(deUint16(0x7c00));
 419         float16.push_back(deUint16(0xfc00));
 420         // SNaN
 421         float16.push_back(deUint16(0x7c0f));
 422         float16.push_back(deUint16(0xfc0f));
 423         // QNaN
 424         float16.push_back(deUint16(0x7cf0));
 425         float16.push_back(deUint16(0xfcf0));
 426
 427         // Denormalized
 428         float16.push_back(deUint16(0x03f0));
 429         float16.push_back(deUint16(0x83f0));
 430         // Normalized
 431         float16.push_back(deUint16(0x0401));
 432         float16.push_back(deUint16(0x8401));
 433         // Some normal number
 434         float16.push_back(deUint16(0x14cb));
 435         float16.push_back(deUint16(0x94cb));
 436
 437         const deUint32          numPicks        = static_cast<deUint32>(float16.size());
 438
 439         DE_ASSERT(count >= numPicks);
 440         count -= numPicks;
 441
 442         for (deUint32 numIdx = 0; numIdx < count; ++numIdx)
 443                 float16.push_back(rnd.getUint16());
 444
 445         return float16;
 446 }
 447
 448 void addCompute16bitStorageUniform16To32Group (tcu::TestCaseGroup* group)
 449 {
 450         tcu::TestContext&                               testCtx                 = group->getTestContext();
 451         de::Random                                              rnd                             (deStringHash(group->getName()));
 452         const int                                               numElements             = 128;
 453
 454         const StringTemplate                    shaderTemplate  (
 455                 "OpCapability Shader\n"
 456                 "OpCapability ${capability}\n"
 457                 "OpExtension \"SPV_KHR_16bit_storage\"\n"
 458                 "OpMemoryModel Logical GLSL450\n"
 459                 "OpEntryPoint GLCompute %main \"main\" %id\n"
 460                 "OpExecutionMode %main LocalSize 1 1 1\n"
 461                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
 462
 463                 "${stride}"
 464
 465                 "OpMemberDecorate %SSBO32 0 Offset 0\n"
 466                 "OpMemberDecorate %SSBO16 0 Offset 0\n"
 467                 "OpDecorate %SSBO32 BufferBlock\n"
 468                 "OpDecorate %SSBO16 ${storage}\n"
 469                 "OpDecorate %ssbo32 DescriptorSet 0\n"
 470                 "OpDecorate %ssbo16 DescriptorSet 0\n"
 471                 "OpDecorate %ssbo32 Binding 1\n"
 472                 "OpDecorate %ssbo16 Binding 0\n"
 473
 474                 "${matrix_decor:opt}\n"
 475
 476                 "%bool      = OpTypeBool\n"
 477                 "%void      = OpTypeVoid\n"
 478                 "%voidf     = OpTypeFunction %void\n"
 479                 "%u32       = OpTypeInt 32 0\n"
 480                 "%i32       = OpTypeInt 32 1\n"
 481                 "%f32       = OpTypeFloat 32\n"
 482                 "%uvec3     = OpTypeVector %u32 3\n"
 483                 "%fvec3     = OpTypeVector %f32 3\n"
 484                 "%uvec3ptr  = OpTypePointer Input %uvec3\n"
 485                 "%i32ptr    = OpTypePointer Uniform %i32\n"
 486                 "%f32ptr    = OpTypePointer Uniform %f32\n"
 487
 488                 "%zero      = OpConstant %i32 0\n"
 489                 "%c_i32_1   = OpConstant %i32 1\n"
 490                 "%c_i32_2   = OpConstant %i32 2\n"
 491                 "%c_i32_3   = OpConstant %i32 3\n"
 492                 "%c_i32_16  = OpConstant %i32 16\n"
 493                 "%c_i32_32  = OpConstant %i32 32\n"
 494                 "%c_i32_64  = OpConstant %i32 64\n"
 495                 "%c_i32_128 = OpConstant %i32 128\n"
 496
 497                 "%i32arr    = OpTypeArray %i32 %c_i32_128\n"
 498                 "%f32arr    = OpTypeArray %f32 %c_i32_128\n"
 499
 500                 "${types}\n"
 501                 "${matrix_types:opt}\n"
 502
 503                 "%SSBO32    = OpTypeStruct %${matrix_prefix:opt}${base32}arr\n"
 504                 "%SSBO16    = OpTypeStruct %${matrix_prefix:opt}${base16}arr\n"
 505                 "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
 506                 "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
 507                 "%ssbo32    = OpVariable %up_SSBO32 Uniform\n"
 508                 "%ssbo16    = OpVariable %up_SSBO16 Uniform\n"
 509
 510                 "%id        = OpVariable %uvec3ptr Input\n"
 511
 512                 "%main      = OpFunction %void None %voidf\n"
 513                 "%label     = OpLabel\n"
 514                 "%idval     = OpLoad %uvec3 %id\n"
 515                 "%x         = OpCompositeExtract %u32 %idval 0\n"
 516                 "%inloc     = OpAccessChain %${base16}ptr %ssbo16 %zero %x ${index0:opt}\n"
 517                 "%val16     = OpLoad %${base16} %inloc\n"
 518                 "%val32     = ${convert} %${base32} %val16\n"
 519                 "%outloc    = OpAccessChain %${base32}ptr %ssbo32 %zero %x ${index0:opt}\n"
 520                 "             OpStore %outloc %val32\n"
 521                 "${matrix_store:opt}\n"
 522                 "             OpReturn\n"
 523                 "             OpFunctionEnd\n");
 524
 525         {  // floats
 526                 const char                                                                              floatTypes[]    =
 527                         "%f16       = OpTypeFloat 16\n"
 528                         "%f16ptr    = OpTypePointer Uniform %f16\n"
 529                         "%f16arr    = OpTypeArray %f16 %c_i32_128\n"
 530                         "%v2f16     = OpTypeVector %f16 2\n"
 531                         "%v2f32     = OpTypeVector %f32 2\n"
 532                         "%v2f16ptr  = OpTypePointer Uniform %v2f16\n"
 533                         "%v2f32ptr  = OpTypePointer Uniform %v2f32\n"
 534                         "%v2f16arr  = OpTypeArray %v2f16 %c_i32_64\n"
 535                         "%v2f32arr  = OpTypeArray %v2f32 %c_i32_64\n";
 536
 537                 struct CompositeType
 538                 {
 539                         const char*     name;
 540                         const char*     base32;
 541                         const char*     base16;
 542                         const char*     stride;
 543                         unsigned        count;
 544                 };
 545
 546                 const CompositeType     cTypes[]        =
 547                 {
 548                         {"scalar",      "f32",          "f16",          "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 2\n",                         numElements},
 549                         {"vector",      "v2f32",        "v2f16",        "OpDecorate %v2f32arr ArrayStride 8\nOpDecorate %v2f16arr ArrayStride 4\n",                     numElements / 2},
 550                         {"matrix",      "v2f32",        "v2f16",        "OpDecorate %m4v2f32arr ArrayStride 32\nOpDecorate %m4v2f16arr ArrayStride 16\n",       numElements / 8},
 551                 };
 552
 553                 vector<deFloat16>       float16Data                     = getFloat16s(rnd, numElements);
 554                 vector<float>           float32Data;
 555
 556                 float32Data.reserve(numElements);
 557                 for (deUint32 numIdx = 0; numIdx < numElements; ++numIdx)
 558                         float32Data.push_back(deFloat16To32(float16Data[numIdx]));
 559
 560                 for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
 561                         for (deUint32 tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes); ++tyIdx)
 562                         {
 563                                 ComputeShaderSpec               spec;
 564                                 map<string, string>             specs;
 565                                 string                                  testName        = string(CAPABILITIES[capIdx].name) + "_" + cTypes[tyIdx].name + "_float";
 566
 567                                 specs["capability"]             = CAPABILITIES[capIdx].cap;
 568                                 specs["storage"]                = CAPABILITIES[capIdx].decor;
 569                                 specs["stride"]                 = cTypes[tyIdx].stride;
 570                                 specs["base32"]                 = cTypes[tyIdx].base32;
 571                                 specs["base16"]                 = cTypes[tyIdx].base16;
 572                                 specs["types"]                  = floatTypes;
 573                                 specs["convert"]                = "OpFConvert";
 574
 575                                 if (strcmp(cTypes[tyIdx].name, "matrix") == 0)
 576                                 {
 577                                         specs["index0"]                 = "%zero";
 578                                         specs["matrix_prefix"]  = "m4";
 579                                         specs["matrix_types"]   =
 580                                                 "%m4v2f16 = OpTypeMatrix %v2f16 4\n"
 581                                                 "%m4v2f32 = OpTypeMatrix %v2f32 4\n"
 582                                                 "%m4v2f16arr = OpTypeArray %m4v2f16 %c_i32_16\n"
 583                                                 "%m4v2f32arr = OpTypeArray %m4v2f32 %c_i32_16\n";
 584                                         specs["matrix_decor"]   =
 585                                                 "OpMemberDecorate %SSBO32 0 ColMajor\n"
 586                                                 "OpMemberDecorate %SSBO32 0 MatrixStride 8\n"
 587                                                 "OpMemberDecorate %SSBO16 0 ColMajor\n"
 588                                                 "OpMemberDecorate %SSBO16 0 MatrixStride 4\n";
 589                                         specs["matrix_store"]   =
 590                                                 "%inloc_1  = OpAccessChain %v2f16ptr %ssbo16 %zero %x %c_i32_1\n"
 591                                                 "%val16_1  = OpLoad %v2f16 %inloc_1\n"
 592                                                 "%val32_1  = OpFConvert %v2f32 %val16_1\n"
 593                                                 "%outloc_1 = OpAccessChain %v2f32ptr %ssbo32 %zero %x %c_i32_1\n"
 594                                                 "            OpStore %outloc_1 %val32_1\n"
 595
 596                                                 "%inloc_2  = OpAccessChain %v2f16ptr %ssbo16 %zero %x %c_i32_2\n"
 597                                                 "%val16_2  = OpLoad %v2f16 %inloc_2\n"
 598                                                 "%val32_2  = OpFConvert %v2f32 %val16_2\n"
 599                                                 "%outloc_2 = OpAccessChain %v2f32ptr %ssbo32 %zero %x %c_i32_2\n"
 600                                                 "            OpStore %outloc_2 %val32_2\n"
 601
 602                                                 "%inloc_3  = OpAccessChain %v2f16ptr %ssbo16 %zero %x %c_i32_3\n"
 603                                                 "%val16_3  = OpLoad %v2f16 %inloc_3\n"
 604                                                 "%val32_3  = OpFConvert %v2f32 %val16_3\n"
 605                                                 "%outloc_3 = OpAccessChain %v2f32ptr %ssbo32 %zero %x %c_i32_3\n"
 606                                                 "            OpStore %outloc_3 %val32_3\n";
 607                                 }
 608
 609                                 spec.assembly                   = shaderTemplate.specialize(specs);
 610                                 spec.numWorkGroups              = IVec3(cTypes[tyIdx].count, 1, 1);
 611                                 spec.verifyIO                   = check32BitFloats;
 612                                 spec.inputTypes[0]              = CAPABILITIES[capIdx].dtype;
 613
 614                                 spec.inputs.push_back(BufferSp(new Float16Buffer(float16Data)));
 615                                 spec.outputs.push_back(BufferSp(new Float32Buffer(float32Data)));
 616                                 spec.extensions.push_back("VK_KHR_16bit_storage");
 617                                 spec.requestedVulkanFeatures = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
 618
 619                                 group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), testName.c_str(), spec));
 620                         }
 621         }
 622
 623         {  // Integers
 624                 const char              sintTypes[]             =
 625                         "%i16       = OpTypeInt 16 1\n"
 626                         "%i16ptr    = OpTypePointer Uniform %i16\n"
 627                         "%i16arr    = OpTypeArray %i16 %c_i32_128\n"
 628                         "%v4i16     = OpTypeVector %i16 4\n"
 629                         "%v4i32     = OpTypeVector %i32 4\n"
 630                         "%v4i16ptr  = OpTypePointer Uniform %v4i16\n"
 631                         "%v4i32ptr  = OpTypePointer Uniform %v4i32\n"
 632                         "%v4i16arr  = OpTypeArray %v4i16 %c_i32_32\n"
 633                         "%v4i32arr  = OpTypeArray %v4i32 %c_i32_32\n";
 634
 635                 const char              uintTypes[]             =
 636                         "%u16       = OpTypeInt 16 0\n"
 637                         "%u16ptr    = OpTypePointer Uniform %u16\n"
 638                         "%u32ptr    = OpTypePointer Uniform %u32\n"
 639                         "%u16arr    = OpTypeArray %u16 %c_i32_128\n"
 640                         "%u32arr    = OpTypeArray %u32 %c_i32_128\n"
 641                         "%v4u16     = OpTypeVector %u16 4\n"
 642                         "%v4u32     = OpTypeVector %u32 4\n"
 643                         "%v4u16ptr  = OpTypePointer Uniform %v4u16\n"
 644                         "%v4u32ptr  = OpTypePointer Uniform %v4u32\n"
 645                         "%v4u16arr  = OpTypeArray %v4u16 %c_i32_32\n"
 646                         "%v4u32arr  = OpTypeArray %v4u32 %c_i32_32\n";
 647
 648                 struct CompositeType
 649                 {
 650                         const char*     name;
 651                         bool            isSigned;
 652                         const char* types;
 653                         const char*     base32;
 654                         const char*     base16;
 655                         const char* opcode;
 656                         const char*     stride;
 657                         unsigned        count;
 658                 };
 659
 660                 const CompositeType     cTypes[]        =
 661                 {
 662                         {"scalar_sint", true,   sintTypes,      "i32",          "i16",          "OpSConvert",   "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 2\n",                 numElements},
 663                         {"scalar_uint", false,  uintTypes,      "u32",          "u16",          "OpUConvert",   "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 2\n",                 numElements},
 664                         {"vector_sint", true,   sintTypes,      "v4i32",        "v4i16",        "OpSConvert",   "OpDecorate %v4i32arr ArrayStride 16\nOpDecorate %v4i16arr ArrayStride 8\n",    numElements / 4},
 665                         {"vector_uint", false,  uintTypes,      "v4u32",        "v4u16",        "OpUConvert",   "OpDecorate %v4u32arr ArrayStride 16\nOpDecorate %v4u16arr ArrayStride 8\n",    numElements / 4},
 666                 };
 667
 668                 vector<deInt16> inputs                  = getInt16s(rnd, numElements);
 669                 vector<deInt32> sOutputs;
 670                 vector<deInt32> uOutputs;
 671                 const deUint16  signBitMask             = 0x8000;
 672                 const deUint32  signExtendMask  = 0xffff0000;
 673
 674                 sOutputs.reserve(inputs.size());
 675                 uOutputs.reserve(inputs.size());
 676
 677                 for (deUint32 numNdx = 0; numNdx < inputs.size(); ++numNdx)
 678                 {
 679                         uOutputs.push_back(static_cast<deUint16>(inputs[numNdx]));
 680                         if (inputs[numNdx] & signBitMask)
 681                                 sOutputs.push_back(static_cast<deInt32>(inputs[numNdx] | signExtendMask));
 682                         else
 683                                 sOutputs.push_back(static_cast<deInt32>(inputs[numNdx]));
 684                 }
 685
 686                 for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
 687                         for (deUint32 tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes); ++tyIdx)
 688                         {
 689                                 ComputeShaderSpec               spec;
 690                                 map<string, string>             specs;
 691                                 string                                  testName        = string(CAPABILITIES[capIdx].name) + "_" + cTypes[tyIdx].name;
 692
 693                                 specs["capability"]             = CAPABILITIES[capIdx].cap;
 694                                 specs["storage"]                = CAPABILITIES[capIdx].decor;
 695                                 specs["stride"]                 = cTypes[tyIdx].stride;
 696                                 specs["base32"]                 = cTypes[tyIdx].base32;
 697                                 specs["base16"]                 = cTypes[tyIdx].base16;
 698                                 specs["types"]                  = cTypes[tyIdx].types;
 699                                 specs["convert"]                = cTypes[tyIdx].opcode;
 700
 701                                 spec.assembly                   = shaderTemplate.specialize(specs);
 702                                 spec.numWorkGroups              = IVec3(cTypes[tyIdx].count, 1, 1);
 703                                 spec.inputTypes[0]              = CAPABILITIES[capIdx].dtype;
 704
 705                                 spec.inputs.push_back(BufferSp(new Int16Buffer(inputs)));
 706                                 if (cTypes[tyIdx].isSigned)
 707                                         spec.outputs.push_back(BufferSp(new Int32Buffer(sOutputs)));
 708                                 else
 709                                         spec.outputs.push_back(BufferSp(new Int32Buffer(uOutputs)));
 710                                 spec.extensions.push_back("VK_KHR_16bit_storage");
 711                                 spec.requestedVulkanFeatures = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
 712
 713                                 group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), testName.c_str(), spec));
 714                         }
 715         }
 716 }
 717
 718 void addCompute16bitStoragePushConstant16To32Group (tcu::TestCaseGroup* group)
 719 {
 720         tcu::TestContext&                               testCtx                 = group->getTestContext();
 721         de::Random                                              rnd                             (deStringHash(group->getName()));
 722         const int                                               numElements             = 64;
 723
 724         const StringTemplate                    shaderTemplate  (
 725                 "OpCapability Shader\n"
 726                 "OpCapability StoragePushConstant16\n"
 727                 "OpExtension \"SPV_KHR_16bit_storage\"\n"
 728                 "OpMemoryModel Logical GLSL450\n"
 729                 "OpEntryPoint GLCompute %main \"main\" %id\n"
 730                 "OpExecutionMode %main LocalSize 1 1 1\n"
 731                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
 732
 733                 "${stride}"
 734
 735                 "OpDecorate %PC16 Block\n"
 736                 "OpMemberDecorate %PC16 0 Offset 0\n"
 737                 "OpMemberDecorate %SSBO32 0 Offset 0\n"
 738                 "OpDecorate %SSBO32 BufferBlock\n"
 739                 "OpDecorate %ssbo32 DescriptorSet 0\n"
 740                 "OpDecorate %ssbo32 Binding 0\n"
 741
 742                 "${matrix_decor:opt}\n"
 743
 744                 "%bool      = OpTypeBool\n"
 745                 "%void      = OpTypeVoid\n"
 746                 "%voidf     = OpTypeFunction %void\n"
 747                 "%u32       = OpTypeInt 32 0\n"
 748                 "%i32       = OpTypeInt 32 1\n"
 749                 "%f32       = OpTypeFloat 32\n"
 750                 "%uvec3     = OpTypeVector %u32 3\n"
 751                 "%fvec3     = OpTypeVector %f32 3\n"
 752                 "%uvec3ptr  = OpTypePointer Input %uvec3\n"
 753                 "%i32ptr    = OpTypePointer Uniform %i32\n"
 754                 "%f32ptr    = OpTypePointer Uniform %f32\n"
 755
 756                 "%zero      = OpConstant %i32 0\n"
 757                 "%c_i32_1   = OpConstant %i32 1\n"
 758                 "%c_i32_8   = OpConstant %i32 8\n"
 759                 "%c_i32_16  = OpConstant %i32 16\n"
 760                 "%c_i32_32  = OpConstant %i32 32\n"
 761                 "%c_i32_64  = OpConstant %i32 64\n"
 762
 763                 "%i32arr    = OpTypeArray %i32 %c_i32_64\n"
 764                 "%f32arr    = OpTypeArray %f32 %c_i32_64\n"
 765
 766                 "${types}\n"
 767                 "${matrix_types:opt}\n"
 768
 769                 "%PC16      = OpTypeStruct %${matrix_prefix:opt}${base16}arr\n"
 770                 "%pp_PC16   = OpTypePointer PushConstant %PC16\n"
 771                 "%pc16      = OpVariable %pp_PC16 PushConstant\n"
 772                 "%SSBO32    = OpTypeStruct %${matrix_prefix:opt}${base32}arr\n"
 773                 "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
 774                 "%ssbo32    = OpVariable %up_SSBO32 Uniform\n"
 775
 776                 "%id        = OpVariable %uvec3ptr Input\n"
 777
 778                 "%main      = OpFunction %void None %voidf\n"
 779                 "%label     = OpLabel\n"
 780                 "%idval     = OpLoad %uvec3 %id\n"
 781                 "%x         = OpCompositeExtract %u32 %idval 0\n"
 782                 "%inloc     = OpAccessChain %${base16}ptr %pc16 %zero %x ${index0:opt}\n"
 783                 "%val16     = OpLoad %${base16} %inloc\n"
 784                 "%val32     = ${convert} %${base32} %val16\n"
 785                 "%outloc    = OpAccessChain %${base32}ptr %ssbo32 %zero %x ${index0:opt}\n"
 786                 "             OpStore %outloc %val32\n"
 787                 "${matrix_store:opt}\n"
 788                 "             OpReturn\n"
 789                 "             OpFunctionEnd\n");
 790
 791         {  // floats
 792                 const char                                                                              floatTypes[]    =
 793                         "%f16       = OpTypeFloat 16\n"
 794                         "%f16ptr    = OpTypePointer PushConstant %f16\n"
 795                         "%f16arr    = OpTypeArray %f16 %c_i32_64\n"
 796                         "%v4f16     = OpTypeVector %f16 4\n"
 797                         "%v4f32     = OpTypeVector %f32 4\n"
 798                         "%v4f16ptr  = OpTypePointer PushConstant %v4f16\n"
 799                         "%v4f32ptr  = OpTypePointer Uniform %v4f32\n"
 800                         "%v4f16arr  = OpTypeArray %v4f16 %c_i32_16\n"
 801                         "%v4f32arr  = OpTypeArray %v4f32 %c_i32_16\n";
 802
 803                 struct CompositeType
 804                 {
 805                         const char*     name;
 806                         const char*     base32;
 807                         const char*     base16;
 808                         const char*     stride;
 809                         unsigned        count;
 810                 };
 811
 812                 const CompositeType     cTypes[]        =
 813                 {
 814                         {"scalar",      "f32",          "f16",          "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 2\n",                         numElements},
 815                         {"vector",      "v4f32",        "v4f16",        "OpDecorate %v4f32arr ArrayStride 16\nOpDecorate %v4f16arr ArrayStride 8\n",            numElements / 4},
 816                         {"matrix",      "v4f32",        "v4f16",        "OpDecorate %m2v4f32arr ArrayStride 32\nOpDecorate %m2v4f16arr ArrayStride 16\n",       numElements / 8},
 817                 };
 818
 819                 vector<deFloat16>       float16Data                     = getFloat16s(rnd, numElements);
 820                 vector<float>           float32Data;
 821
 822                 float32Data.reserve(numElements);
 823                 for (deUint32 numIdx = 0; numIdx < numElements; ++numIdx)
 824                         float32Data.push_back(deFloat16To32(float16Data[numIdx]));
 825
 826                 for (deUint32 tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes); ++tyIdx)
 827                 {
 828                         ComputeShaderSpec               spec;
 829                         map<string, string>             specs;
 830                         string                                  testName        = string(cTypes[tyIdx].name) + "_float";
 831
 832                         specs["stride"]                 = cTypes[tyIdx].stride;
 833                         specs["base32"]                 = cTypes[tyIdx].base32;
 834                         specs["base16"]                 = cTypes[tyIdx].base16;
 835                         specs["types"]                  = floatTypes;
 836                         specs["convert"]                = "OpFConvert";
 837
 838                         if (strcmp(cTypes[tyIdx].name, "matrix") == 0)
 839                         {
 840                                 specs["index0"]                 = "%zero";
 841                                 specs["matrix_prefix"]  = "m2";
 842                                 specs["matrix_types"]   =
 843                                         "%m2v4f16 = OpTypeMatrix %v4f16 2\n"
 844                                         "%m2v4f32 = OpTypeMatrix %v4f32 2\n"
 845                                         "%m2v4f16arr = OpTypeArray %m2v4f16 %c_i32_8\n"
 846                                         "%m2v4f32arr = OpTypeArray %m2v4f32 %c_i32_8\n";
 847                                 specs["matrix_decor"]   =
 848                                         "OpMemberDecorate %SSBO32 0 ColMajor\n"
 849                                         "OpMemberDecorate %SSBO32 0 MatrixStride 16\n"
 850                                         "OpMemberDecorate %PC16 0 ColMajor\n"
 851                                         "OpMemberDecorate %PC16 0 MatrixStride 8\n";
 852                                 specs["matrix_store"]   =
 853                                         "%inloc_1  = OpAccessChain %v4f16ptr %pc16 %zero %x %c_i32_1\n"
 854                                         "%val16_1  = OpLoad %v4f16 %inloc_1\n"
 855                                         "%val32_1  = OpFConvert %v4f32 %val16_1\n"
 856                                         "%outloc_1 = OpAccessChain %v4f32ptr %ssbo32 %zero %x %c_i32_1\n"
 857                                         "            OpStore %outloc_1 %val32_1\n";
 858                         }
 859
 860                         spec.assembly                   = shaderTemplate.specialize(specs);
 861                         spec.numWorkGroups              = IVec3(cTypes[tyIdx].count, 1, 1);
 862                         spec.verifyIO                   = check32BitFloats;
 863                         spec.pushConstants              = BufferSp(new Float16Buffer(float16Data));
 864
 865                         spec.outputs.push_back(BufferSp(new Float32Buffer(float32Data)));
 866                         spec.extensions.push_back("VK_KHR_16bit_storage");
 867                         spec.requestedVulkanFeatures.ext16BitStorage = EXT16BITSTORAGEFEATURES_PUSH_CONSTANT;
 868
 869                         group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), testName.c_str(), spec));
 870                 }
 871         }
 872         {  // integers
 873                 const char              sintTypes[]             =
 874                         "%i16       = OpTypeInt 16 1\n"
 875                         "%i16ptr    = OpTypePointer PushConstant %i16\n"
 876                         "%i16arr    = OpTypeArray %i16 %c_i32_64\n"
 877                         "%v2i16     = OpTypeVector %i16 2\n"
 878                         "%v2i32     = OpTypeVector %i32 2\n"
 879                         "%v2i16ptr  = OpTypePointer PushConstant %v2i16\n"
 880                         "%v2i32ptr  = OpTypePointer Uniform %v2i32\n"
 881                         "%v2i16arr  = OpTypeArray %v2i16 %c_i32_32\n"
 882                         "%v2i32arr  = OpTypeArray %v2i32 %c_i32_32\n";
 883
 884                 const char              uintTypes[]             =
 885                         "%u16       = OpTypeInt 16 0\n"
 886                         "%u16ptr    = OpTypePointer PushConstant %u16\n"
 887                         "%u32ptr    = OpTypePointer Uniform %u32\n"
 888                         "%u16arr    = OpTypeArray %u16 %c_i32_64\n"
 889                         "%u32arr    = OpTypeArray %u32 %c_i32_64\n"
 890                         "%v2u16     = OpTypeVector %u16 2\n"
 891                         "%v2u32     = OpTypeVector %u32 2\n"
 892                         "%v2u16ptr  = OpTypePointer PushConstant %v2u16\n"
 893                         "%v2u32ptr  = OpTypePointer Uniform %v2u32\n"
 894                         "%v2u16arr  = OpTypeArray %v2u16 %c_i32_32\n"
 895                         "%v2u32arr  = OpTypeArray %v2u32 %c_i32_32\n";
 896
 897                 struct CompositeType
 898                 {
 899                         const char*     name;
 900                         bool            isSigned;
 901                         const char* types;
 902                         const char*     base32;
 903                         const char*     base16;
 904                         const char* opcode;
 905                         const char*     stride;
 906                         unsigned        count;
 907                 };
 908
 909                 const CompositeType     cTypes[]        =
 910                 {
 911                         {"scalar_sint", true,   sintTypes,      "i32",          "i16",          "OpSConvert",   "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 2\n",         numElements},
 912                         {"scalar_uint", false,  uintTypes,      "u32",          "u16",          "OpUConvert",   "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 2\n",         numElements},
 913                         {"vector_sint", true,   sintTypes,      "v2i32",        "v2i16",        "OpSConvert",   "OpDecorate %v2i32arr ArrayStride 8\nOpDecorate %v2i16arr ArrayStride 4\n",     numElements / 2},
 914                         {"vector_uint", false,  uintTypes,      "v2u32",        "v2u16",        "OpUConvert",   "OpDecorate %v2u32arr ArrayStride 8\nOpDecorate %v2u16arr ArrayStride 4\n",     numElements / 2},
 915                 };
 916
 917                 vector<deInt16> inputs                  = getInt16s(rnd, numElements);
 918                 vector<deInt32> sOutputs;
 919                 vector<deInt32> uOutputs;
 920                 const deUint16  signBitMask             = 0x8000;
 921                 const deUint32  signExtendMask  = 0xffff0000;
 922
 923                 sOutputs.reserve(inputs.size());
 924                 uOutputs.reserve(inputs.size());
 925
 926                 for (deUint32 numNdx = 0; numNdx < inputs.size(); ++numNdx)
 927                 {
 928                         uOutputs.push_back(static_cast<deUint16>(inputs[numNdx]));
 929                         if (inputs[numNdx] & signBitMask)
 930                                 sOutputs.push_back(static_cast<deInt32>(inputs[numNdx] | signExtendMask));
 931                         else
 932                                 sOutputs.push_back(static_cast<deInt32>(inputs[numNdx]));
 933                 }
 934
 935                 for (deUint32 tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes); ++tyIdx)
 936                 {
 937                         ComputeShaderSpec               spec;
 938                         map<string, string>             specs;
 939                         const char*                             testName        = cTypes[tyIdx].name;
 940
 941                         specs["stride"]                 = cTypes[tyIdx].stride;
 942                         specs["base32"]                 = cTypes[tyIdx].base32;
 943                         specs["base16"]                 = cTypes[tyIdx].base16;
 944                         specs["types"]                  = cTypes[tyIdx].types;
 945                         specs["convert"]                = cTypes[tyIdx].opcode;
 946
 947                         spec.assembly                   = shaderTemplate.specialize(specs);
 948                         spec.numWorkGroups              = IVec3(cTypes[tyIdx].count, 1, 1);
 949                         spec.pushConstants              = BufferSp(new Int16Buffer(inputs));
 950
 951                         if (cTypes[tyIdx].isSigned)
 952                                 spec.outputs.push_back(BufferSp(new Int32Buffer(sOutputs)));
 953                         else
 954                                 spec.outputs.push_back(BufferSp(new Int32Buffer(uOutputs)));
 955                         spec.extensions.push_back("VK_KHR_16bit_storage");
 956                         spec.requestedVulkanFeatures.ext16BitStorage = EXT16BITSTORAGEFEATURES_PUSH_CONSTANT;
 957
 958                         group->addChild(new SpvAsmComputeShaderCase(testCtx, testName, testName, spec));
 959                 }
 960         }
 961 }
 962
 963 void addGraphics16BitStorageUniformInt32To16Group (tcu::TestCaseGroup* testGroup)
 964 {
 965         de::Random                                                      rnd                                     (deStringHash(testGroup->getName()));
 966         map<string, string>                                     fragments;
 967         const deUint32                                          numDataPoints           = 256;
 968         RGBA                                                            defaultColors[4];
 969         GraphicsResources                                       resources;
 970         vector<string>                                          extensions;
 971         const StringTemplate                            capabilities            ("OpCapability ${cap}\n");
 972         // inputs and outputs are declared to be vectors of signed integers.
 973         // However, depending on the test, they may be interpreted as unsiged
 974         // integers. That won't be a problem as long as we passed the bits
 975         // in faithfully to the pipeline.
 976         vector<deInt32>                                         inputs                          = getInt32s(rnd, numDataPoints);
 977         vector<deInt16>                                         outputs;
 978
 979         outputs.reserve(inputs.size());
 980         for (deUint32 numNdx = 0; numNdx < inputs.size(); ++numNdx)
 981                 outputs.push_back(static_cast<deInt16>(0xffff & inputs[numNdx]));
 982
 983         resources.inputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Int32Buffer(inputs))));
 984         resources.outputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Int16Buffer(outputs))));
 985
 986         extensions.push_back("VK_KHR_16bit_storage");
 987         fragments["extension"]  = "OpExtension \"SPV_KHR_16bit_storage\"";
 988
 989         getDefaultColors(defaultColors);
 990
 991         struct IntegerFacts
 992         {
 993                 const char*     name;
 994                 const char*     type32;
 995                 const char*     type16;
 996                 const char* opcode;
 997                 const char*     isSigned;
 998         };
 999
1000         const IntegerFacts      intFacts[]              =
1001         {
1002                 {"sint",        "%i32",         "%i16",         "OpSConvert",   "1"},
1003                 {"uint",        "%u32",         "%u16",         "OpUConvert",   "0"},
1004         };
1005
1006         const StringTemplate    scalarPreMain(
1007                         "${itype16} = OpTypeInt 16 ${signed}\n"
1008                         "%c_i32_256 = OpConstant %i32 256\n"
1009                         "   %up_i32 = OpTypePointer Uniform ${itype32}\n"
1010                         "   %up_i16 = OpTypePointer Uniform ${itype16}\n"
1011                         "   %ra_i32 = OpTypeArray ${itype32} %c_i32_256\n"
1012                         "   %ra_i16 = OpTypeArray ${itype16} %c_i32_256\n"
1013                         "   %SSBO32 = OpTypeStruct %ra_i32\n"
1014                         "   %SSBO16 = OpTypeStruct %ra_i16\n"
1015                         "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
1016                         "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
1017                         "   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
1018                         "   %ssbo16 = OpVariable %up_SSBO16 Uniform\n");
1019
1020         const StringTemplate    scalarDecoration(
1021                         "OpDecorate %ra_i32 ArrayStride 4\n"
1022                         "OpDecorate %ra_i16 ArrayStride 2\n"
1023                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
1024                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
1025                         "OpDecorate %SSBO32 ${indecor}\n"
1026                         "OpDecorate %SSBO16 BufferBlock\n"
1027                         "OpDecorate %ssbo32 DescriptorSet 0\n"
1028                         "OpDecorate %ssbo16 DescriptorSet 0\n"
1029                         "OpDecorate %ssbo32 Binding 0\n"
1030                         "OpDecorate %ssbo16 Binding 1\n");
1031
1032         const StringTemplate    scalarTestFunc(
1033                         "%test_code = OpFunction %v4f32 None %v4f32_function\n"
1034                         "    %param = OpFunctionParameter %v4f32\n"
1035
1036                         "%entry = OpLabel\n"
1037                         "    %i = OpVariable %fp_i32 Function\n"
1038                         "         OpStore %i %c_i32_0\n"
1039                         "         OpBranch %loop\n"
1040
1041                         " %loop = OpLabel\n"
1042                         "   %15 = OpLoad %i32 %i\n"
1043                         "   %lt = OpSLessThan %bool %15 %c_i32_256\n"
1044                         "         OpLoopMerge %merge %inc None\n"
1045                         "         OpBranchConditional %lt %write %merge\n"
1046
1047                         "%write = OpLabel\n"
1048                         "   %30 = OpLoad %i32 %i\n"
1049                         "  %src = OpAccessChain %up_i32 %ssbo32 %c_i32_0 %30\n"
1050                         "%val32 = OpLoad ${itype32} %src\n"
1051                         "%val16 = ${convert} ${itype16} %val32\n"
1052                         "  %dst = OpAccessChain %up_i16 %ssbo16 %c_i32_0 %30\n"
1053                         "         OpStore %dst %val16\n"
1054                         "         OpBranch %inc\n"
1055
1056                         "  %inc = OpLabel\n"
1057                         "   %37 = OpLoad %i32 %i\n"
1058                         "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
1059                         "         OpStore %i %39\n"
1060                         "         OpBranch %loop\n"
1061
1062                         "%merge = OpLabel\n"
1063                         "         OpReturnValue %param\n"
1064
1065                         "OpFunctionEnd\n");
1066
1067         const StringTemplate    vecPreMain(
1068                         "${itype16} = OpTypeInt 16 ${signed}\n"
1069                         " %c_i32_64 = OpConstant %i32 64\n"
1070                         "%v4itype32 = OpTypeVector ${itype32} 4\n"
1071                         "%v4itype16 = OpTypeVector ${itype16} 4\n"
1072                         " %up_v4i32 = OpTypePointer Uniform %v4itype32\n"
1073                         " %up_v4i16 = OpTypePointer Uniform %v4itype16\n"
1074                         " %ra_v4i32 = OpTypeArray %v4itype32 %c_i32_64\n"
1075                         " %ra_v4i16 = OpTypeArray %v4itype16 %c_i32_64\n"
1076                         "   %SSBO32 = OpTypeStruct %ra_v4i32\n"
1077                         "   %SSBO16 = OpTypeStruct %ra_v4i16\n"
1078                         "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
1079                         "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
1080                         "   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
1081                         "   %ssbo16 = OpVariable %up_SSBO16 Uniform\n");
1082
1083         const StringTemplate    vecDecoration(
1084                         "OpDecorate %ra_v4i32 ArrayStride 16\n"
1085                         "OpDecorate %ra_v4i16 ArrayStride 8\n"
1086                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
1087                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
1088                         "OpDecorate %SSBO32 ${indecor}\n"
1089                         "OpDecorate %SSBO16 BufferBlock\n"
1090                         "OpDecorate %ssbo32 DescriptorSet 0\n"
1091                         "OpDecorate %ssbo16 DescriptorSet 0\n"
1092                         "OpDecorate %ssbo32 Binding 0\n"
1093                         "OpDecorate %ssbo16 Binding 1\n");
1094
1095         const StringTemplate    vecTestFunc(
1096                         "%test_code = OpFunction %v4f32 None %v4f32_function\n"
1097                         "    %param = OpFunctionParameter %v4f32\n"
1098
1099                         "%entry = OpLabel\n"
1100                         "    %i = OpVariable %fp_i32 Function\n"
1101                         "         OpStore %i %c_i32_0\n"
1102                         "         OpBranch %loop\n"
1103
1104                         " %loop = OpLabel\n"
1105                         "   %15 = OpLoad %i32 %i\n"
1106                         "   %lt = OpSLessThan %bool %15 %c_i32_64\n"
1107                         "         OpLoopMerge %merge %inc None\n"
1108                         "         OpBranchConditional %lt %write %merge\n"
1109
1110                         "%write = OpLabel\n"
1111                         "   %30 = OpLoad %i32 %i\n"
1112                         "  %src = OpAccessChain %up_v4i32 %ssbo32 %c_i32_0 %30\n"
1113                         "%val32 = OpLoad %v4itype32 %src\n"
1114                         "%val16 = ${convert} %v4itype16 %val32\n"
1115                         "  %dst = OpAccessChain %up_v4i16 %ssbo16 %c_i32_0 %30\n"
1116                         "         OpStore %dst %val16\n"
1117                         "         OpBranch %inc\n"
1118
1119                         "  %inc = OpLabel\n"
1120                         "   %37 = OpLoad %i32 %i\n"
1121                         "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
1122                         "         OpStore %i %39\n"
1123                         "         OpBranch %loop\n"
1124
1125                         "%merge = OpLabel\n"
1126                         "         OpReturnValue %param\n"
1127
1128                         "OpFunctionEnd\n");
1129
1130         struct Category
1131         {
1132                 const char*                             name;
1133                 const StringTemplate&   preMain;
1134                 const StringTemplate&   decoration;
1135                 const StringTemplate&   testFunction;
1136         };
1137
1138         const Category          categories[]    =
1139         {
1140                 {"scalar",      scalarPreMain,  scalarDecoration,       scalarTestFunc},
1141                 {"vector",      vecPreMain,             vecDecoration,          vecTestFunc},
1142         };
1143
1144         for (deUint32 catIdx = 0; catIdx < DE_LENGTH_OF_ARRAY(categories); ++catIdx)
1145                 for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
1146                         for (deUint32 factIdx = 0; factIdx < DE_LENGTH_OF_ARRAY(intFacts); ++factIdx)
1147                         {
1148                                 map<string, string>     specs;
1149                                 string                          name            = string(CAPABILITIES[capIdx].name) + "_" + categories[catIdx].name + "_" + intFacts[factIdx].name;
1150
1151                                 specs["cap"]                                    = CAPABILITIES[capIdx].cap;
1152                                 specs["indecor"]                                = CAPABILITIES[capIdx].decor;
1153                                 specs["itype32"]                                = intFacts[factIdx].type32;
1154                                 specs["itype16"]                                = intFacts[factIdx].type16;
1155                                 specs["signed"]                                 = intFacts[factIdx].isSigned;
1156                                 specs["convert"]                                = intFacts[factIdx].opcode;
1157
1158                                 fragments["pre_main"]                   = categories[catIdx].preMain.specialize(specs);
1159                                 fragments["testfun"]                    = categories[catIdx].testFunction.specialize(specs);
1160                                 fragments["capability"]                 = capabilities.specialize(specs);
1161                                 fragments["decoration"]                 = categories[catIdx].decoration.specialize(specs);
1162
1163                                 resources.inputs.back().first   = CAPABILITIES[capIdx].dtype;
1164
1165                                 createTestsForAllStages(name, defaultColors, defaultColors, fragments, resources, extensions, testGroup, get16BitStorageFeatures(CAPABILITIES[capIdx].name));
1166                         }
1167 }
1168
1169 void addCompute16bitStorageUniform32To16Group (tcu::TestCaseGroup* group)
1170 {
1171         tcu::TestContext&                               testCtx                 = group->getTestContext();
1172         de::Random                                              rnd                             (deStringHash(group->getName()));
1173         const int                                               numElements             = 128;
1174
1175         const StringTemplate                    shaderTemplate  (
1176                 "OpCapability Shader\n"
1177                 "OpCapability ${capability}\n"
1178                 "OpExtension \"SPV_KHR_16bit_storage\"\n"
1179                 "OpMemoryModel Logical GLSL450\n"
1180                 "OpEntryPoint GLCompute %main \"main\" %id\n"
1181                 "OpExecutionMode %main LocalSize 1 1 1\n"
1182                 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1183
1184                 "${stride}"
1185
1186                 "OpMemberDecorate %SSBO32 0 Offset 0\n"
1187                 "OpMemberDecorate %SSBO16 0 Offset 0\n"
1188                 "OpDecorate %SSBO32 ${storage}\n"
1189                 "OpDecorate %SSBO16 BufferBlock\n"
1190                 "OpDecorate %ssbo32 DescriptorSet 0\n"
1191                 "OpDecorate %ssbo16 DescriptorSet 0\n"
1192                 "OpDecorate %ssbo32 Binding 0\n"
1193                 "OpDecorate %ssbo16 Binding 1\n"
1194
1195                 "${matrix_decor:opt}\n"
1196
1197                 "${rounding:opt}\n"
1198
1199                 "%bool      = OpTypeBool\n"
1200                 "%void      = OpTypeVoid\n"
1201                 "%voidf     = OpTypeFunction %void\n"
1202                 "%u32       = OpTypeInt 32 0\n"
1203                 "%i32       = OpTypeInt 32 1\n"
1204                 "%f32       = OpTypeFloat 32\n"
1205                 "%uvec3     = OpTypeVector %u32 3\n"
1206                 "%fvec3     = OpTypeVector %f32 3\n"
1207                 "%uvec3ptr  = OpTypePointer Input %uvec3\n"
1208                 "%i32ptr    = OpTypePointer Uniform %i32\n"
1209                 "%f32ptr    = OpTypePointer Uniform %f32\n"
1210
1211                 "%zero      = OpConstant %i32 0\n"
1212                 "%c_i32_1   = OpConstant %i32 1\n"
1213                 "%c_i32_16  = OpConstant %i32 16\n"
1214                 "%c_i32_32  = OpConstant %i32 32\n"
1215                 "%c_i32_64  = OpConstant %i32 64\n"
1216                 "%c_i32_128 = OpConstant %i32 128\n"
1217
1218                 "%i32arr    = OpTypeArray %i32 %c_i32_128\n"
1219                 "%f32arr    = OpTypeArray %f32 %c_i32_128\n"
1220
1221                 "${types}\n"
1222                 "${matrix_types:opt}\n"
1223
1224                 "%SSBO32    = OpTypeStruct %${matrix_prefix:opt}${base32}arr\n"
1225                 "%SSBO16    = OpTypeStruct %${matrix_prefix:opt}${base16}arr\n"
1226                 "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
1227                 "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
1228                 "%ssbo32    = OpVariable %up_SSBO32 Uniform\n"
1229                 "%ssbo16    = OpVariable %up_SSBO16 Uniform\n"
1230
1231                 "%id        = OpVariable %uvec3ptr Input\n"
1232
1233                 "%main      = OpFunction %void None %voidf\n"
1234                 "%label     = OpLabel\n"
1235                 "%idval     = OpLoad %uvec3 %id\n"
1236                 "%x         = OpCompositeExtract %u32 %idval 0\n"
1237                 "%inloc     = OpAccessChain %${base32}ptr %ssbo32 %zero %x ${index0:opt}\n"
1238                 "%val32     = OpLoad %${base32} %inloc\n"
1239                 "%val16     = ${convert} %${base16} %val32\n"
1240                 "%outloc    = OpAccessChain %${base16}ptr %ssbo16 %zero %x ${index0:opt}\n"
1241                 "             OpStore %outloc %val16\n"
1242                 "${matrix_store:opt}\n"
1243                 "             OpReturn\n"
1244                 "             OpFunctionEnd\n");
1245
1246         {  // Floats
1247                 const char                                              floatTypes[]    =
1248                         "%f16       = OpTypeFloat 16\n"
1249                         "%f16ptr    = OpTypePointer Uniform %f16\n"
1250                         "%f16arr    = OpTypeArray %f16 %c_i32_128\n"
1251                         "%v4f16     = OpTypeVector %f16 4\n"
1252                         "%v4f32     = OpTypeVector %f32 4\n"
1253                         "%v4f16ptr  = OpTypePointer Uniform %v4f16\n"
1254                         "%v4f32ptr  = OpTypePointer Uniform %v4f32\n"
1255                         "%v4f16arr  = OpTypeArray %v4f16 %c_i32_32\n"
1256                         "%v4f32arr  = OpTypeArray %v4f32 %c_i32_32\n";
1257
1258                 struct RndMode
1259                 {
1260                         const char*                             name;
1261                         const char*                             decor;
1262                         ComputeVerifyIOFunc             func;
1263                 };
1264
1265                 const RndMode           rndModes[]              =
1266                 {
1267                         {"rtz",                                         "OpDecorate %val16  FPRoundingMode RTZ",        computeCheck16BitFloats<ROUNDINGMODE_RTZ>},
1268                         {"rte",                                         "OpDecorate %val16  FPRoundingMode RTE",        computeCheck16BitFloats<ROUNDINGMODE_RTE>},
1269                         {"unspecified_rnd_mode",        "",                                                                                     computeCheck16BitFloats<RoundingModeFlags(ROUNDINGMODE_RTE | ROUNDINGMODE_RTZ)>},
1270                 };
1271
1272                 struct CompositeType
1273                 {
1274                         const char*     name;
1275                         const char*     base32;
1276                         const char*     base16;
1277                         const char*     stride;
1278                         unsigned        count;
1279                 };
1280
1281                 const CompositeType     cTypes[]        =
1282                 {
1283                         {"scalar",      "f32",          "f16",          "OpDecorate %f32arr ArrayStride 4\nOpDecorate %f16arr ArrayStride 2\n",                         numElements},
1284                         {"vector",      "v4f32",        "v4f16",        "OpDecorate %v4f32arr ArrayStride 16\nOpDecorate %v4f16arr ArrayStride 8\n",            numElements / 4},
1285                         {"matrix",      "v4f32",        "v4f16",        "OpDecorate %m2v4f32arr ArrayStride 32\nOpDecorate %m2v4f16arr ArrayStride 16\n",       numElements / 8},
1286                 };
1287
1288                 vector<float>           float32Data                     = getFloat32s(rnd, numElements);
1289                 vector<deFloat16>       float16DummyData        (numElements, 0);
1290
1291                 for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
1292                         for (deUint32 tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes); ++tyIdx)
1293                                 for (deUint32 rndModeIdx = 0; rndModeIdx < DE_LENGTH_OF_ARRAY(rndModes); ++rndModeIdx)
1294                                 {
1295                                         ComputeShaderSpec               spec;
1296                                         map<string, string>             specs;
1297                                         string                                  testName        = string(CAPABILITIES[capIdx].name) + "_" + cTypes[tyIdx].name + "_float_" + rndModes[rndModeIdx].name;
1298
1299                                         specs["capability"]             = CAPABILITIES[capIdx].cap;
1300                                         specs["storage"]                = CAPABILITIES[capIdx].decor;
1301                                         specs["stride"]                 = cTypes[tyIdx].stride;
1302                                         specs["base32"]                 = cTypes[tyIdx].base32;
1303                                         specs["base16"]                 = cTypes[tyIdx].base16;
1304                                         specs["rounding"]               = rndModes[rndModeIdx].decor;
1305                                         specs["types"]                  = floatTypes;
1306                                         specs["convert"]                = "OpFConvert";
1307
1308                                         if (strcmp(cTypes[tyIdx].name, "matrix") == 0)
1309                                         {
1310                                                 if (strcmp(rndModes[rndModeIdx].name, "rtz") == 0)
1311                                                         specs["rounding"] += "\nOpDecorate %val16_1  FPRoundingMode RTZ\n";
1312                                                 else if (strcmp(rndModes[rndModeIdx].name, "rte") == 0)
1313                                                         specs["rounding"] += "\nOpDecorate %val16_1  FPRoundingMode RTE\n";
1314
1315                                                 specs["index0"]                 = "%zero";
1316                                                 specs["matrix_prefix"]  = "m2";
1317                                                 specs["matrix_types"]   =
1318                                                         "%m2v4f16 = OpTypeMatrix %v4f16 2\n"
1319                                                         "%m2v4f32 = OpTypeMatrix %v4f32 2\n"
1320                                                         "%m2v4f16arr = OpTypeArray %m2v4f16 %c_i32_16\n"
1321                                                         "%m2v4f32arr = OpTypeArray %m2v4f32 %c_i32_16\n";
1322                                                 specs["matrix_decor"]   =
1323                                                         "OpMemberDecorate %SSBO32 0 ColMajor\n"
1324                                                         "OpMemberDecorate %SSBO32 0 MatrixStride 16\n"
1325                                                         "OpMemberDecorate %SSBO16 0 ColMajor\n"
1326                                                         "OpMemberDecorate %SSBO16 0 MatrixStride 8\n";
1327                                                 specs["matrix_store"]   =
1328                                                         "%inloc_1  = OpAccessChain %v4f32ptr %ssbo32 %zero %x %c_i32_1\n"
1329                                                         "%val32_1  = OpLoad %v4f32 %inloc_1\n"
1330                                                         "%val16_1  = OpFConvert %v4f16 %val32_1\n"
1331                                                         "%outloc_1 = OpAccessChain %v4f16ptr %ssbo16 %zero %x %c_i32_1\n"
1332                                                         "            OpStore %outloc_1 %val16_1\n";
1333                                         }
1334
1335                                         spec.assembly                   = shaderTemplate.specialize(specs);
1336                                         spec.numWorkGroups              = IVec3(cTypes[tyIdx].count, 1, 1);
1337                                         spec.verifyIO                   = rndModes[rndModeIdx].func;
1338                                         spec.inputTypes[0]              = CAPABILITIES[capIdx].dtype;
1339
1340                                         spec.inputs.push_back(BufferSp(new Float32Buffer(float32Data)));
1341                                         // We provided a custom verifyIO in the above in which inputs will be used for checking.
1342                                         // So put dummy data in the expected values.
1343                                         spec.outputs.push_back(BufferSp(new Float16Buffer(float16DummyData)));
1344                                         spec.extensions.push_back("VK_KHR_16bit_storage");
1345                                         spec.requestedVulkanFeatures = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
1346
1347                                         group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), testName.c_str(), spec));
1348                                 }
1349         }
1350
1351         {  // Integers
1352                 const char              sintTypes[]     =
1353                         "%i16       = OpTypeInt 16 1\n"
1354                         "%i16ptr    = OpTypePointer Uniform %i16\n"
1355                         "%i16arr    = OpTypeArray %i16 %c_i32_128\n"
1356                         "%v2i16     = OpTypeVector %i16 2\n"
1357                         "%v2i32     = OpTypeVector %i32 2\n"
1358                         "%v2i16ptr  = OpTypePointer Uniform %v2i16\n"
1359                         "%v2i32ptr  = OpTypePointer Uniform %v2i32\n"
1360                         "%v2i16arr  = OpTypeArray %v2i16 %c_i32_64\n"
1361                         "%v2i32arr  = OpTypeArray %v2i32 %c_i32_64\n";
1362
1363                 const char              uintTypes[]     =
1364                         "%u16       = OpTypeInt 16 0\n"
1365                         "%u16ptr    = OpTypePointer Uniform %u16\n"
1366                         "%u32ptr    = OpTypePointer Uniform %u32\n"
1367                         "%u16arr    = OpTypeArray %u16 %c_i32_128\n"
1368                         "%u32arr    = OpTypeArray %u32 %c_i32_128\n"
1369                         "%v2u16     = OpTypeVector %u16 2\n"
1370                         "%v2u32     = OpTypeVector %u32 2\n"
1371                         "%v2u16ptr  = OpTypePointer Uniform %v2u16\n"
1372                         "%v2u32ptr  = OpTypePointer Uniform %v2u32\n"
1373                         "%v2u16arr  = OpTypeArray %v2u16 %c_i32_64\n"
1374                         "%v2u32arr  = OpTypeArray %v2u32 %c_i32_64\n";
1375
1376                 struct CompositeType
1377                 {
1378                         const char*     name;
1379                         const char* types;
1380                         const char*     base32;
1381                         const char*     base16;
1382                         const char* opcode;
1383                         const char*     stride;
1384                         unsigned        count;
1385                 };
1386
1387                 const CompositeType     cTypes[]        =
1388                 {
1389                         {"scalar_sint", sintTypes,      "i32",          "i16",          "OpSConvert",   "OpDecorate %i32arr ArrayStride 4\nOpDecorate %i16arr ArrayStride 2\n",         numElements},
1390                         {"scalar_uint", uintTypes,      "u32",          "u16",          "OpUConvert",   "OpDecorate %u32arr ArrayStride 4\nOpDecorate %u16arr ArrayStride 2\n",         numElements},
1391                         {"vector_sint", sintTypes,      "v2i32",        "v2i16",        "OpSConvert",   "OpDecorate %v2i32arr ArrayStride 8\nOpDecorate %v2i16arr ArrayStride 4\n",     numElements / 2},
1392                         {"vector_uint", uintTypes,      "v2u32",        "v2u16",        "OpUConvert",   "OpDecorate %v2u32arr ArrayStride 8\nOpDecorate %v2u16arr ArrayStride 4\n",     numElements / 2},
1393                 };
1394
1395                 vector<deInt32> inputs                  = getInt32s(rnd, numElements);
1396                 vector<deInt16> outputs;
1397
1398                 outputs.reserve(inputs.size());
1399                 for (deUint32 numNdx = 0; numNdx < inputs.size(); ++numNdx)
1400                         outputs.push_back(static_cast<deInt16>(0xffff & inputs[numNdx]));
1401
1402                 for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
1403                         for (deUint32 tyIdx = 0; tyIdx < DE_LENGTH_OF_ARRAY(cTypes); ++tyIdx)
1404                         {
1405                                 ComputeShaderSpec               spec;
1406                                 map<string, string>             specs;
1407                                 string                                  testName        = string(CAPABILITIES[capIdx].name) + "_" + cTypes[tyIdx].name;
1408
1409                                 specs["capability"]             = CAPABILITIES[capIdx].cap;
1410                                 specs["storage"]                = CAPABILITIES[capIdx].decor;
1411                                 specs["stride"]                 = cTypes[tyIdx].stride;
1412                                 specs["base32"]                 = cTypes[tyIdx].base32;
1413                                 specs["base16"]                 = cTypes[tyIdx].base16;
1414                                 specs["types"]                  = cTypes[tyIdx].types;
1415                                 specs["convert"]                = cTypes[tyIdx].opcode;
1416
1417                                 spec.assembly                   = shaderTemplate.specialize(specs);
1418                                 spec.numWorkGroups              = IVec3(cTypes[tyIdx].count, 1, 1);
1419                                 spec.inputTypes[0]              = CAPABILITIES[capIdx].dtype;
1420
1421                                 spec.inputs.push_back(BufferSp(new Int32Buffer(inputs)));
1422                                 spec.outputs.push_back(BufferSp(new Int16Buffer(outputs)));
1423                                 spec.extensions.push_back("VK_KHR_16bit_storage");
1424                                 spec.requestedVulkanFeatures = get16BitStorageFeatures(CAPABILITIES[capIdx].name);
1425
1426                                 group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), testName.c_str(), spec));
1427                         }
1428         }
1429 }
1430
1431 void addGraphics16BitStorageUniformFloat32To16Group (tcu::TestCaseGroup* testGroup)
1432 {
1433         de::Random                                                      rnd                                     (deStringHash(testGroup->getName()));
1434         map<string, string>                                     fragments;
1435         GraphicsResources                                       resources;
1436         vector<string>                                          extensions;
1437         const deUint32                                          numDataPoints           = 256;
1438         RGBA                                                            defaultColors[4];
1439         vector<float>                                           float32Data                     = getFloat32s(rnd, numDataPoints);
1440         vector<deFloat16>                                       float16DummyData        (numDataPoints, 0);
1441         const StringTemplate                            capabilities            ("OpCapability ${cap}\n");
1442
1443         resources.inputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Float32Buffer(float32Data))));
1444         // We use a custom verifyIO to check the result via computing directly from inputs; the contents in outputs do not matter.
1445         resources.outputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Float16Buffer(float16DummyData))));
1446
1447         extensions.push_back("VK_KHR_16bit_storage");
1448         fragments["extension"]  = "OpExtension \"SPV_KHR_16bit_storage\"";
1449
1450         struct RndMode
1451         {
1452                 const char*                             name;
1453                 const char*                             decor;
1454                 GraphicsVerifyIOFunc    f;
1455         };
1456
1457         getDefaultColors(defaultColors);
1458
1459         {  // scalar cases
1460                 fragments["pre_main"]                           =
1461                         "      %f16 = OpTypeFloat 16\n"
1462                         "%c_i32_256 = OpConstant %i32 256\n"
1463                         "   %up_f32 = OpTypePointer Uniform %f32\n"
1464                         "   %up_f16 = OpTypePointer Uniform %f16\n"
1465                         "   %ra_f32 = OpTypeArray %f32 %c_i32_256\n"
1466                         "   %ra_f16 = OpTypeArray %f16 %c_i32_256\n"
1467                         "   %SSBO32 = OpTypeStruct %ra_f32\n"
1468                         "   %SSBO16 = OpTypeStruct %ra_f16\n"
1469                         "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
1470                         "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
1471                         "   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
1472                         "   %ssbo16 = OpVariable %up_SSBO16 Uniform\n";
1473
1474                 const StringTemplate decoration         (
1475                         "OpDecorate %ra_f32 ArrayStride 4\n"
1476                         "OpDecorate %ra_f16 ArrayStride 2\n"
1477                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
1478                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
1479                         "OpDecorate %SSBO32 ${indecor}\n"
1480                         "OpDecorate %SSBO16 BufferBlock\n"
1481                         "OpDecorate %ssbo32 DescriptorSet 0\n"
1482                         "OpDecorate %ssbo16 DescriptorSet 0\n"
1483                         "OpDecorate %ssbo32 Binding 0\n"
1484                         "OpDecorate %ssbo16 Binding 1\n"
1485                         "${rounddecor}\n");
1486
1487                 fragments["testfun"]                            =
1488                         "%test_code = OpFunction %v4f32 None %v4f32_function\n"
1489                         "    %param = OpFunctionParameter %v4f32\n"
1490
1491                         "%entry = OpLabel\n"
1492                         "    %i = OpVariable %fp_i32 Function\n"
1493                         "         OpStore %i %c_i32_0\n"
1494                         "         OpBranch %loop\n"
1495
1496                         " %loop = OpLabel\n"
1497                         "   %15 = OpLoad %i32 %i\n"
1498                         "   %lt = OpSLessThan %bool %15 %c_i32_256\n"
1499                         "         OpLoopMerge %merge %inc None\n"
1500                         "         OpBranchConditional %lt %write %merge\n"
1501
1502                         "%write = OpLabel\n"
1503                         "   %30 = OpLoad %i32 %i\n"
1504                         "  %src = OpAccessChain %up_f32 %ssbo32 %c_i32_0 %30\n"
1505                         "%val32 = OpLoad %f32 %src\n"
1506                         "%val16 = OpFConvert %f16 %val32\n"
1507                         "  %dst = OpAccessChain %up_f16 %ssbo16 %c_i32_0 %30\n"
1508                         "         OpStore %dst %val16\n"
1509                         "         OpBranch %inc\n"
1510
1511                         "  %inc = OpLabel\n"
1512                         "   %37 = OpLoad %i32 %i\n"
1513                         "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
1514                         "         OpStore %i %39\n"
1515                         "         OpBranch %loop\n"
1516
1517                         "%merge = OpLabel\n"
1518                         "         OpReturnValue %param\n"
1519
1520                         "OpFunctionEnd\n";
1521
1522                 const RndMode   rndModes[] =
1523                 {
1524                         {"rtz",                                         "OpDecorate %val16  FPRoundingMode RTZ",        graphicsCheck16BitFloats<ROUNDINGMODE_RTZ>},
1525                         {"rte",                                         "OpDecorate %val16  FPRoundingMode RTE",        graphicsCheck16BitFloats<ROUNDINGMODE_RTE>},
1526                         {"unspecified_rnd_mode",        "",                                                                                     graphicsCheck16BitFloats<RoundingModeFlags(ROUNDINGMODE_RTE | ROUNDINGMODE_RTZ)>},
1527                 };
1528
1529                 for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
1530                         for (deUint32 rndModeIdx = 0; rndModeIdx < DE_LENGTH_OF_ARRAY(rndModes); ++rndModeIdx)
1531                         {
1532                                 map<string, string>     specs;
1533                                 string                          testName        = string(CAPABILITIES[capIdx].name) + "_scalar_float_" + rndModes[rndModeIdx].name;
1534
1535                                 specs["cap"]                                    = CAPABILITIES[capIdx].cap;
1536                                 specs["indecor"]                                = CAPABILITIES[capIdx].decor;
1537                                 specs["rounddecor"]                             = rndModes[rndModeIdx].decor;
1538
1539                                 fragments["capability"]                 = capabilities.specialize(specs);
1540                                 fragments["decoration"]                 = decoration.specialize(specs);
1541
1542                                 resources.inputs.back().first   = CAPABILITIES[capIdx].dtype;
1543                                 resources.verifyIO                              = rndModes[rndModeIdx].f;
1544
1545
1546                                 createTestsForAllStages(testName, defaultColors, defaultColors, fragments, resources, extensions, testGroup, get16BitStorageFeatures(CAPABILITIES[capIdx].name));
1547                         }
1548         }
1549
1550         {  // vector cases
1551                 fragments["pre_main"]                           =
1552                         "      %f16 = OpTypeFloat 16\n"
1553                         " %c_i32_64 = OpConstant %i32 64\n"
1554                         "        %v4f16 = OpTypeVector %f16 4\n"
1555                         " %up_v4f32 = OpTypePointer Uniform %v4f32\n"
1556                         " %up_v4f16 = OpTypePointer Uniform %v4f16\n"
1557                         " %ra_v4f32 = OpTypeArray %v4f32 %c_i32_64\n"
1558                         " %ra_v4f16 = OpTypeArray %v4f16 %c_i32_64\n"
1559                         "   %SSBO32 = OpTypeStruct %ra_v4f32\n"
1560                         "   %SSBO16 = OpTypeStruct %ra_v4f16\n"
1561                         "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
1562                         "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
1563                         "   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
1564                         "   %ssbo16 = OpVariable %up_SSBO16 Uniform\n";
1565
1566                 const StringTemplate decoration         (
1567                         "OpDecorate %ra_v4f32 ArrayStride 16\n"
1568                         "OpDecorate %ra_v4f16 ArrayStride 8\n"
1569                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
1570                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
1571                         "OpDecorate %SSBO32 ${indecor}\n"
1572                         "OpDecorate %SSBO16 BufferBlock\n"
1573                         "OpDecorate %ssbo32 DescriptorSet 0\n"
1574                         "OpDecorate %ssbo16 DescriptorSet 0\n"
1575                         "OpDecorate %ssbo32 Binding 0\n"
1576                         "OpDecorate %ssbo16 Binding 1\n"
1577                         "${rounddecor}\n");
1578
1579                 // ssbo16[] <- convert ssbo32[] to 16bit float
1580                 fragments["testfun"]                            =
1581                         "%test_code = OpFunction %v4f32 None %v4f32_function\n"
1582                         "    %param = OpFunctionParameter %v4f32\n"
1583
1584                         "%entry = OpLabel\n"
1585                         "    %i = OpVariable %fp_i32 Function\n"
1586                         "         OpStore %i %c_i32_0\n"
1587                         "         OpBranch %loop\n"
1588
1589                         " %loop = OpLabel\n"
1590                         "   %15 = OpLoad %i32 %i\n"
1591                         "   %lt = OpSLessThan %bool %15 %c_i32_64\n"
1592                         "         OpLoopMerge %merge %inc None\n"
1593                         "         OpBranchConditional %lt %write %merge\n"
1594
1595                         "%write = OpLabel\n"
1596                         "   %30 = OpLoad %i32 %i\n"
1597                         "  %src = OpAccessChain %up_v4f32 %ssbo32 %c_i32_0 %30\n"
1598                         "%val32 = OpLoad %v4f32 %src\n"
1599                         "%val16 = OpFConvert %v4f16 %val32\n"
1600                         "  %dst = OpAccessChain %up_v4f16 %ssbo16 %c_i32_0 %30\n"
1601                         "         OpStore %dst %val16\n"
1602                         "         OpBranch %inc\n"
1603
1604                         "  %inc = OpLabel\n"
1605                         "   %37 = OpLoad %i32 %i\n"
1606                         "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
1607                         "         OpStore %i %39\n"
1608                         "         OpBranch %loop\n"
1609
1610                         "%merge = OpLabel\n"
1611                         "         OpReturnValue %param\n"
1612
1613                         "OpFunctionEnd\n";
1614
1615                 const RndMode   rndModes[] =
1616                 {
1617                         {"rtz",                                         "OpDecorate %val16  FPRoundingMode RTZ",        graphicsCheck16BitFloats<ROUNDINGMODE_RTZ>},
1618                         {"rte",                                         "OpDecorate %val16  FPRoundingMode RTE",        graphicsCheck16BitFloats<ROUNDINGMODE_RTE>},
1619                         {"unspecified_rnd_mode",        "",                                                                                     graphicsCheck16BitFloats<RoundingModeFlags(ROUNDINGMODE_RTE | ROUNDINGMODE_RTZ)>},
1620                 };
1621
1622                 for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
1623                         for (deUint32 rndModeIdx = 0; rndModeIdx < DE_LENGTH_OF_ARRAY(rndModes); ++rndModeIdx)
1624                         {
1625                                 map<string, string>     specs;
1626                                 string                          testName        = string(CAPABILITIES[capIdx].name) + "_vector_float_" + rndModes[rndModeIdx].name;
1627
1628                                 specs["cap"]                                    = CAPABILITIES[capIdx].cap;
1629                                 specs["indecor"]                                = CAPABILITIES[capIdx].decor;
1630                                 specs["rounddecor"]                             = rndModes[rndModeIdx].decor;
1631
1632                                 fragments["capability"]                 = capabilities.specialize(specs);
1633                                 fragments["decoration"]                 = decoration.specialize(specs);
1634
1635                                 resources.inputs.back().first   = CAPABILITIES[capIdx].dtype;
1636                                 resources.verifyIO                              = rndModes[rndModeIdx].f;
1637
1638
1639                                 createTestsForAllStages(testName, defaultColors, defaultColors, fragments, resources, extensions, testGroup, get16BitStorageFeatures(CAPABILITIES[capIdx].name));
1640                         }
1641         }
1642
1643         {  // matrix cases
1644                 fragments["pre_main"]                           =
1645                         "       %f16 = OpTypeFloat 16\n"
1646                         "  %c_i32_16 = OpConstant %i32 16\n"
1647                         "     %v4f16 = OpTypeVector %f16 4\n"
1648                         "   %m4x4f32 = OpTypeMatrix %v4f32 4\n"
1649                         "   %m4x4f16 = OpTypeMatrix %v4f16 4\n"
1650                         "  %up_v4f32 = OpTypePointer Uniform %v4f32\n"
1651                         "  %up_v4f16 = OpTypePointer Uniform %v4f16\n"
1652                         "%a16m4x4f32 = OpTypeArray %m4x4f32 %c_i32_16\n"
1653                         "%a16m4x4f16 = OpTypeArray %m4x4f16 %c_i32_16\n"
1654                         "    %SSBO32 = OpTypeStruct %a16m4x4f32\n"
1655                         "    %SSBO16 = OpTypeStruct %a16m4x4f16\n"
1656                         " %up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
1657                         " %up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
1658                         "    %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
1659                         "    %ssbo16 = OpVariable %up_SSBO16 Uniform\n";
1660
1661                 const StringTemplate decoration         (
1662                         "OpDecorate %a16m4x4f32 ArrayStride 64\n"
1663                         "OpDecorate %a16m4x4f16 ArrayStride 32\n"
1664                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
1665                         "OpMemberDecorate %SSBO32 0 ColMajor\n"
1666                         "OpMemberDecorate %SSBO32 0 MatrixStride 16\n"
1667                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
1668                         "OpMemberDecorate %SSBO16 0 ColMajor\n"
1669                         "OpMemberDecorate %SSBO16 0 MatrixStride 8\n"
1670                         "OpDecorate %SSBO32 ${indecor}\n"
1671                         "OpDecorate %SSBO16 BufferBlock\n"
1672                         "OpDecorate %ssbo32 DescriptorSet 0\n"
1673                         "OpDecorate %ssbo16 DescriptorSet 0\n"
1674                         "OpDecorate %ssbo32 Binding 0\n"
1675                         "OpDecorate %ssbo16 Binding 1\n"
1676                         "${rounddecor}\n");
1677
1678                 fragments["testfun"]                            =
1679                         "%test_code = OpFunction %v4f32 None %v4f32_function\n"
1680                         "    %param = OpFunctionParameter %v4f32\n"
1681
1682                         "%entry = OpLabel\n"
1683                         "    %i = OpVariable %fp_i32 Function\n"
1684                         "         OpStore %i %c_i32_0\n"
1685                         "         OpBranch %loop\n"
1686
1687                         " %loop = OpLabel\n"
1688                         "   %15 = OpLoad %i32 %i\n"
1689                         "   %lt = OpSLessThan %bool %15 %c_i32_16\n"
1690                         "         OpLoopMerge %merge %inc None\n"
1691                         "         OpBranchConditional %lt %write %merge\n"
1692
1693                         "  %write = OpLabel\n"
1694                         "     %30 = OpLoad %i32 %i\n"
1695                         "  %src_0 = OpAccessChain %up_v4f32 %ssbo32 %c_i32_0 %30 %c_i32_0\n"
1696                         "  %src_1 = OpAccessChain %up_v4f32 %ssbo32 %c_i32_0 %30 %c_i32_1\n"
1697                         "  %src_2 = OpAccessChain %up_v4f32 %ssbo32 %c_i32_0 %30 %c_i32_2\n"
1698                         "  %src_3 = OpAccessChain %up_v4f32 %ssbo32 %c_i32_0 %30 %c_i32_3\n"
1699                         "%val32_0 = OpLoad %v4f32 %src_0\n"
1700                         "%val32_1 = OpLoad %v4f32 %src_1\n"
1701                         "%val32_2 = OpLoad %v4f32 %src_2\n"
1702                         "%val32_3 = OpLoad %v4f32 %src_3\n"
1703                         "%val16_0 = OpFConvert %v4f16 %val32_0\n"
1704                         "%val16_1 = OpFConvert %v4f16 %val32_1\n"
1705                         "%val16_2 = OpFConvert %v4f16 %val32_2\n"
1706                         "%val16_3 = OpFConvert %v4f16 %val32_3\n"
1707                         "  %dst_0 = OpAccessChain %up_v4f16 %ssbo16 %c_i32_0 %30 %c_i32_0\n"
1708                         "  %dst_1 = OpAccessChain %up_v4f16 %ssbo16 %c_i32_0 %30 %c_i32_1\n"
1709                         "  %dst_2 = OpAccessChain %up_v4f16 %ssbo16 %c_i32_0 %30 %c_i32_2\n"
1710                         "  %dst_3 = OpAccessChain %up_v4f16 %ssbo16 %c_i32_0 %30 %c_i32_3\n"
1711                         "           OpStore %dst_0 %val16_0\n"
1712                         "           OpStore %dst_1 %val16_1\n"
1713                         "           OpStore %dst_2 %val16_2\n"
1714                         "           OpStore %dst_3 %val16_3\n"
1715                         "           OpBranch %inc\n"
1716
1717                         "  %inc = OpLabel\n"
1718                         "   %37 = OpLoad %i32 %i\n"
1719                         "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
1720                         "         OpStore %i %39\n"
1721                         "         OpBranch %loop\n"
1722
1723                         "%merge = OpLabel\n"
1724                         "         OpReturnValue %param\n"
1725
1726                         "OpFunctionEnd\n";
1727
1728                 const RndMode   rndModes[] =
1729                 {
1730                         {"rte",                                         "OpDecorate %val16_0  FPRoundingMode RTE\nOpDecorate %val16_1  FPRoundingMode RTE\nOpDecorate %val16_2  FPRoundingMode RTE\nOpDecorate %val16_3  FPRoundingMode RTE",   graphicsCheck16BitFloats<ROUNDINGMODE_RTE>},
1731                         {"rtz",                                         "OpDecorate %val16_0  FPRoundingMode RTZ\nOpDecorate %val16_1  FPRoundingMode RTZ\nOpDecorate %val16_2  FPRoundingMode RTZ\nOpDecorate %val16_3  FPRoundingMode RTZ",   graphicsCheck16BitFloats<ROUNDINGMODE_RTZ>},
1732                         {"unspecified_rnd_mode",        "",                                                                                                                                                                                                                                                                                                                                             graphicsCheck16BitFloats<RoundingModeFlags(ROUNDINGMODE_RTE | ROUNDINGMODE_RTZ)>},
1733                 };
1734
1735                 for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
1736                         for (deUint32 rndModeIdx = 0; rndModeIdx < DE_LENGTH_OF_ARRAY(rndModes); ++rndModeIdx)
1737                         {
1738                                 map<string, string>     specs;
1739                                 string                          testName        = string(CAPABILITIES[capIdx].name) + "_matrix_float_" + rndModes[rndModeIdx].name;
1740
1741                                 specs["cap"]                                    = CAPABILITIES[capIdx].cap;
1742                                 specs["indecor"]                                = CAPABILITIES[capIdx].decor;
1743                                 specs["rounddecor"]                             = rndModes[rndModeIdx].decor;
1744
1745                                 fragments["capability"]                 = capabilities.specialize(specs);
1746                                 fragments["decoration"]                 = decoration.specialize(specs);
1747
1748                                 resources.inputs.back().first   = CAPABILITIES[capIdx].dtype;
1749                                 resources.verifyIO                              = rndModes[rndModeIdx].f;
1750
1751
1752                                 createTestsForAllStages(testName, defaultColors, defaultColors, fragments, resources, extensions, testGroup, get16BitStorageFeatures(CAPABILITIES[capIdx].name));
1753                         }
1754         }
1755 }
1756
1757 void addGraphics16BitStorageInputOutputFloat32To16Group (tcu::TestCaseGroup* testGroup)
1758 {
1759         de::Random                      rnd                                     (deStringHash(testGroup->getName()));
1760         RGBA                            defaultColors[4];
1761         vector<string>          extensions;
1762         map<string, string>     fragments                       = passthruFragments();
1763         const deUint32          numDataPoints           = 64;
1764         vector<float>           float32Data                     = getFloat32s(rnd, numDataPoints);
1765
1766         extensions.push_back("VK_KHR_16bit_storage");
1767
1768         fragments["capability"]                         = "OpCapability StorageInputOutput16\n";
1769         fragments["extension"]                          = "OpExtension \"SPV_KHR_16bit_storage\"\n";
1770
1771         getDefaultColors(defaultColors);
1772
1773         struct RndMode
1774         {
1775                 const char*                             name;
1776                 const char*                             decor;
1777                 RoundingModeFlags               flags;
1778         };
1779
1780         const RndMode           rndModes[]              =
1781         {
1782                 {"rtz",                                         "OpDecorate %ret  FPRoundingMode RTZ",  ROUNDINGMODE_RTZ},
1783                 {"rte",                                         "OpDecorate %ret  FPRoundingMode RTE",  ROUNDINGMODE_RTE},
1784                 {"unspecified_rnd_mode",        "",                                                                             RoundingModeFlags(ROUNDINGMODE_RTE | ROUNDINGMODE_RTZ)},
1785         };
1786
1787         struct Case
1788         {
1789                 const char*     name;
1790                 const char*     interfaceOpFunc;
1791                 const char*     preMain;
1792                 const char*     inputType;
1793                 const char*     outputType;
1794                 deUint32        numPerCase;
1795                 deUint32        numElements;
1796         };
1797
1798         const Case      cases[]         =
1799         {
1800                 { // Scalar cases
1801                         "scalar",
1802
1803                         "%interface_op_func = OpFunction %f16 None %f16_f32_function\n"
1804                         "        %io_param1 = OpFunctionParameter %f32\n"
1805                         "            %entry = OpLabel\n"
1806                         "                          %ret = OpFConvert %f16 %io_param1\n"
1807                         "                     OpReturnValue %ret\n"
1808                         "                     OpFunctionEnd\n",
1809
1810                         "             %f16 = OpTypeFloat 16\n"
1811                         "          %op_f16 = OpTypePointer Output %f16\n"
1812                         "           %a3f16 = OpTypeArray %f16 %c_i32_3\n"
1813                         "        %op_a3f16 = OpTypePointer Output %a3f16\n"
1814                         "%f16_f32_function = OpTypeFunction %f16 %f32\n"
1815                         "           %a3f32 = OpTypeArray %f32 %c_i32_3\n"
1816                         "        %ip_a3f32 = OpTypePointer Input %a3f32\n",
1817
1818                         "f32",
1819                         "f16",
1820                         4,
1821                         1,
1822                 },
1823                 { // Vector cases
1824                         "vector",
1825
1826                         "%interface_op_func = OpFunction %v2f16 None %v2f16_v2f32_function\n"
1827                         "        %io_param1 = OpFunctionParameter %v2f32\n"
1828                         "            %entry = OpLabel\n"
1829                         "                          %ret = OpFConvert %v2f16 %io_param1\n"
1830                         "                     OpReturnValue %ret\n"
1831                         "                     OpFunctionEnd\n",
1832
1833                         "                 %f16 = OpTypeFloat 16\n"
1834                         "               %v2f16 = OpTypeVector %f16 2\n"
1835                         "            %op_v2f16 = OpTypePointer Output %v2f16\n"
1836                         "             %a3v2f16 = OpTypeArray %v2f16 %c_i32_3\n"
1837                         "          %op_a3v2f16 = OpTypePointer Output %a3v2f16\n"
1838                         "%v2f16_v2f32_function = OpTypeFunction %v2f16 %v2f32\n"
1839                         "             %a3v2f32 = OpTypeArray %v2f32 %c_i32_3\n"
1840                         "          %ip_a3v2f32 = OpTypePointer Input %a3v2f32\n",
1841
1842                         "v2f32",
1843                         "v2f16",
1844                         2 * 4,
1845                         2,
1846                 }
1847         };
1848
1849         VulkanFeatures  requiredFeatures;
1850         requiredFeatures.ext16BitStorage = EXT16BITSTORAGEFEATURES_INPUT_OUTPUT;
1851
1852         for (deUint32 caseIdx = 0; caseIdx < DE_LENGTH_OF_ARRAY(cases); ++caseIdx)
1853                 for (deUint32 rndModeIdx = 0; rndModeIdx < DE_LENGTH_OF_ARRAY(rndModes); ++rndModeIdx)
1854                 {
1855                         fragments["interface_op_func"]  = cases[caseIdx].interfaceOpFunc;
1856                         fragments["pre_main"]                   = cases[caseIdx].preMain;
1857                         fragments["decoration"]                 = rndModes[rndModeIdx].decor;
1858
1859                         fragments["input_type"]                 = cases[caseIdx].inputType;
1860                         fragments["output_type"]                = cases[caseIdx].outputType;
1861
1862                         GraphicsInterfaces      interfaces;
1863                         const deUint32          numPerCase      = cases[caseIdx].numPerCase;
1864                         vector<float>           subInputs       (numPerCase);
1865                         vector<deFloat16>       subOutputs      (numPerCase);
1866
1867                         // The pipeline need this to call compare16BitFloat() when checking the result.
1868                         interfaces.setRoundingMode(rndModes[rndModeIdx].flags);
1869
1870                         for (deUint32 caseNdx = 0; caseNdx < numDataPoints / numPerCase; ++caseNdx)
1871                         {
1872                                 string                  testName        = string(cases[caseIdx].name) + numberToString(caseNdx) + "_" + rndModes[rndModeIdx].name;
1873
1874                                 for (deUint32 numNdx = 0; numNdx < numPerCase; ++numNdx)
1875                                 {
1876                                         subInputs[numNdx]       = float32Data[caseNdx * numPerCase + numNdx];
1877                                         // We derive the expected result from inputs directly in the graphics pipeline.
1878                                         subOutputs[numNdx]      = 0;
1879                                 }
1880                                 interfaces.setInputOutput(std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_FLOAT32), BufferSp(new Float32Buffer(subInputs))),
1881                                                                                   std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_FLOAT16), BufferSp(new Float16Buffer(subOutputs))));
1882                                 createTestsForAllStages(testName, defaultColors, defaultColors, fragments, interfaces, extensions, testGroup, requiredFeatures);
1883                         }
1884                 }
1885 }
1886
1887 void addGraphics16BitStorageInputOutputFloat16To32Group (tcu::TestCaseGroup* testGroup)
1888 {
1889         de::Random                              rnd                                     (deStringHash(testGroup->getName()));
1890         RGBA                                    defaultColors[4];
1891         vector<string>                  extensions;
1892         map<string, string>             fragments                       = passthruFragments();
1893         const deUint32                  numDataPoints           = 64;
1894         vector<deFloat16>               float16Data                     (getFloat16s(rnd, numDataPoints));
1895         vector<float>                   float32Data;
1896
1897         float32Data.reserve(numDataPoints);
1898         for (deUint32 numIdx = 0; numIdx < numDataPoints; ++numIdx)
1899                 float32Data.push_back(deFloat16To32(float16Data[numIdx]));
1900
1901         extensions.push_back("VK_KHR_16bit_storage");
1902
1903         fragments["capability"]                         = "OpCapability StorageInputOutput16\n";
1904         fragments["extension"]                          = "OpExtension \"SPV_KHR_16bit_storage\"\n";
1905
1906         getDefaultColors(defaultColors);
1907
1908         struct Case
1909         {
1910                 const char*     name;
1911                 const char*     interfaceOpFunc;
1912                 const char*     preMain;
1913                 const char*     inputType;
1914                 const char*     outputType;
1915                 deUint32        numPerCase;
1916                 deUint32        numElements;
1917         };
1918
1919         Case    cases[]         =
1920         {
1921                 { // Scalar cases
1922                         "scalar",
1923
1924                         "%interface_op_func = OpFunction %f32 None %f32_f16_function\n"
1925                         "        %io_param1 = OpFunctionParameter %f16\n"
1926                         "            %entry = OpLabel\n"
1927                         "                          %ret = OpFConvert %f32 %io_param1\n"
1928                         "                     OpReturnValue %ret\n"
1929                         "                     OpFunctionEnd\n",
1930
1931                         "             %f16 = OpTypeFloat 16\n"
1932                         "          %ip_f16 = OpTypePointer Input %f16\n"
1933                         "           %a3f16 = OpTypeArray %f16 %c_i32_3\n"
1934                         "        %ip_a3f16 = OpTypePointer Input %a3f16\n"
1935                         "%f32_f16_function = OpTypeFunction %f32 %f16\n"
1936                         "           %a3f32 = OpTypeArray %f32 %c_i32_3\n"
1937                         "        %op_a3f32 = OpTypePointer Output %a3f32\n",
1938
1939                         "f16",
1940                         "f32",
1941                         4,
1942                         1,
1943                 },
1944                 { // Vector cases
1945                         "vector",
1946
1947                         "%interface_op_func = OpFunction %v2f32 None %v2f32_v2f16_function\n"
1948                         "        %io_param1 = OpFunctionParameter %v2f16\n"
1949                         "            %entry = OpLabel\n"
1950                         "                          %ret = OpFConvert %v2f32 %io_param1\n"
1951                         "                     OpReturnValue %ret\n"
1952                         "                     OpFunctionEnd\n",
1953
1954                         "                 %f16 = OpTypeFloat 16\n"
1955                         "                       %v2f16 = OpTypeVector %f16 2\n"
1956                         "            %ip_v2f16 = OpTypePointer Input %v2f16\n"
1957                         "             %a3v2f16 = OpTypeArray %v2f16 %c_i32_3\n"
1958                         "          %ip_a3v2f16 = OpTypePointer Input %a3v2f16\n"
1959                         "%v2f32_v2f16_function = OpTypeFunction %v2f32 %v2f16\n"
1960                         "             %a3v2f32 = OpTypeArray %v2f32 %c_i32_3\n"
1961                         "          %op_a3v2f32 = OpTypePointer Output %a3v2f32\n",
1962
1963                         "v2f16",
1964                         "v2f32",
1965                         2 * 4,
1966                         2,
1967                 }
1968         };
1969
1970         VulkanFeatures  requiredFeatures;
1971         requiredFeatures.ext16BitStorage = EXT16BITSTORAGEFEATURES_INPUT_OUTPUT;
1972
1973         for (deUint32 caseIdx = 0; caseIdx < DE_LENGTH_OF_ARRAY(cases); ++caseIdx)
1974         {
1975                 fragments["interface_op_func"]  = cases[caseIdx].interfaceOpFunc;
1976                 fragments["pre_main"]                   = cases[caseIdx].preMain;
1977
1978                 fragments["input_type"]                 = cases[caseIdx].inputType;
1979                 fragments["output_type"]                = cases[caseIdx].outputType;
1980
1981                 GraphicsInterfaces      interfaces;
1982                 const deUint32          numPerCase      = cases[caseIdx].numPerCase;
1983                 vector<deFloat16>       subInputs       (numPerCase);
1984                 vector<float>           subOutputs      (numPerCase);
1985
1986                 for (deUint32 caseNdx = 0; caseNdx < numDataPoints / numPerCase; ++caseNdx)
1987                 {
1988                         string                  testName        = string(cases[caseIdx].name) + numberToString(caseNdx);
1989
1990                         for (deUint32 numNdx = 0; numNdx < numPerCase; ++numNdx)
1991                         {
1992                                 subInputs[numNdx]       = float16Data[caseNdx * numPerCase + numNdx];
1993                                 subOutputs[numNdx]      = float32Data[caseNdx * numPerCase + numNdx];
1994                         }
1995                         interfaces.setInputOutput(std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_FLOAT16), BufferSp(new Float16Buffer(subInputs))),
1996                                                                           std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_FLOAT32), BufferSp(new Float32Buffer(subOutputs))));
1997                         createTestsForAllStages(testName, defaultColors, defaultColors, fragments, interfaces, extensions, testGroup, requiredFeatures);
1998                 }
1999         }
2000 }
2001
2002 void addGraphics16BitStorageInputOutputInt32To16Group (tcu::TestCaseGroup* testGroup)
2003 {
2004         de::Random                                                      rnd                                     (deStringHash(testGroup->getName()));
2005         RGBA                                                            defaultColors[4];
2006         vector<string>                                          extensions;
2007         map<string, string>                                     fragments                       = passthruFragments();
2008         const deUint32                                          numDataPoints           = 64;
2009         // inputs and outputs are declared to be vectors of signed integers.
2010         // However, depending on the test, they may be interpreted as unsiged
2011         // integers. That won't be a problem as long as we passed the bits
2012         // in faithfully to the pipeline.
2013         vector<deInt32>                                         inputs                          = getInt32s(rnd, numDataPoints);
2014         vector<deInt16>                                         outputs;
2015
2016         outputs.reserve(inputs.size());
2017         for (deUint32 numNdx = 0; numNdx < inputs.size(); ++numNdx)
2018                 outputs.push_back(static_cast<deInt16>(0xffff & inputs[numNdx]));
2019
2020         extensions.push_back("VK_KHR_16bit_storage");
2021
2022         fragments["capability"]                         = "OpCapability StorageInputOutput16\n";
2023         fragments["extension"]                          = "OpExtension \"SPV_KHR_16bit_storage\"\n";
2024
2025         getDefaultColors(defaultColors);
2026
2027         const StringTemplate    scalarInterfaceOpFunc(
2028                         "%interface_op_func = OpFunction %${type16} None %${type16}_${type32}_function\n"
2029                         "        %io_param1 = OpFunctionParameter %${type32}\n"
2030                         "            %entry = OpLabel\n"
2031                         "                          %ret = ${convert} %${type16} %io_param1\n"
2032                         "                     OpReturnValue %ret\n"
2033                         "                     OpFunctionEnd\n");
2034
2035         const StringTemplate    scalarPreMain(
2036                         "             %${type16} = OpTypeInt 16 ${signed}\n"
2037                         "          %op_${type16} = OpTypePointer Output %${type16}\n"
2038                         "           %a3${type16} = OpTypeArray %${type16} %c_i32_3\n"
2039                         "        %op_a3${type16} = OpTypePointer Output %a3${type16}\n"
2040                         "%${type16}_${type32}_function = OpTypeFunction %${type16} %${type32}\n"
2041                         "           %a3${type32} = OpTypeArray %${type32} %c_i32_3\n"
2042                         "        %ip_a3${type32} = OpTypePointer Input %a3${type32}\n");
2043
2044         const StringTemplate    vecInterfaceOpFunc(
2045                         "%interface_op_func = OpFunction %${type16} None %${type16}_${type32}_function\n"
2046                         "        %io_param1 = OpFunctionParameter %${type32}\n"
2047                         "            %entry = OpLabel\n"
2048                         "                          %ret = ${convert} %${type16} %io_param1\n"
2049                         "                     OpReturnValue %ret\n"
2050                         "                     OpFunctionEnd\n");
2051
2052         const StringTemplate    vecPreMain(
2053                         "                       %i16 = OpTypeInt 16 1\n"
2054                         "                       %u16 = OpTypeInt 16 0\n"
2055                         "                 %v4i16 = OpTypeVector %i16 4\n"
2056                         "                 %v4u16 = OpTypeVector %u16 4\n"
2057                         "          %op_${type16} = OpTypePointer Output %${type16}\n"
2058                         "           %a3${type16} = OpTypeArray %${type16} %c_i32_3\n"
2059                         "        %op_a3${type16} = OpTypePointer Output %a3${type16}\n"
2060                         "%${type16}_${type32}_function = OpTypeFunction %${type16} %${type32}\n"
2061                         "           %a3${type32} = OpTypeArray %${type32} %c_i32_3\n"
2062                         "        %ip_a3${type32} = OpTypePointer Input %a3${type32}\n");
2063
2064         struct Case
2065         {
2066                 const char*                             name;
2067                 const StringTemplate&   interfaceOpFunc;
2068                 const StringTemplate&   preMain;
2069                 const char*                             type32;
2070                 const char*                             type16;
2071                 const char*                             sign;
2072                 const char*                             opcode;
2073                 deUint32                                numPerCase;
2074                 deUint32                                numElements;
2075         };
2076
2077         Case    cases[]         =
2078         {
2079                 {"scalar_sint", scalarInterfaceOpFunc,  scalarPreMain,  "i32",          "i16",          "1",    "OpSConvert",   4,              1},
2080                 {"scalar_uint", scalarInterfaceOpFunc,  scalarPreMain,  "u32",          "u16",          "0",    "OpUConvert",   4,              1},
2081                 {"vector_sint", vecInterfaceOpFunc,             vecPreMain,             "v4i32",        "v4i16",        "1",    "OpSConvert",   4 * 4,  4},
2082                 {"vector_uint", vecInterfaceOpFunc,             vecPreMain,             "v4u32",        "v4u16",        "0",    "OpUConvert",   4 * 4,  4},
2083         };
2084
2085         VulkanFeatures  requiredFeatures;
2086         requiredFeatures.ext16BitStorage = EXT16BITSTORAGEFEATURES_INPUT_OUTPUT;
2087
2088         for (deUint32 caseIdx = 0; caseIdx < DE_LENGTH_OF_ARRAY(cases); ++caseIdx)
2089         {
2090                 map<string, string>                             specs;
2091
2092                 specs["type32"]                                 = cases[caseIdx].type32;
2093                 specs["type16"]                                 = cases[caseIdx].type16;
2094                 specs["signed"]                                 = cases[caseIdx].sign;
2095                 specs["convert"]                                = cases[caseIdx].opcode;
2096
2097                 fragments["pre_main"]                   = cases[caseIdx].preMain.specialize(specs);
2098                 fragments["interface_op_func"]  = cases[caseIdx].interfaceOpFunc.specialize(specs);
2099                 fragments["input_type"]                 = cases[caseIdx].type32;
2100                 fragments["output_type"]                = cases[caseIdx].type16;
2101
2102                 GraphicsInterfaces                              interfaces;
2103                 const deUint32                                  numPerCase      = cases[caseIdx].numPerCase;
2104                 vector<deInt32>                                 subInputs       (numPerCase);
2105                 vector<deInt16>                                 subOutputs      (numPerCase);
2106
2107                 for (deUint32 caseNdx = 0; caseNdx < numDataPoints / numPerCase; ++caseNdx)
2108                 {
2109                         string                  testName        = string(cases[caseIdx].name) + numberToString(caseNdx);
2110
2111                         for (deUint32 numNdx = 0; numNdx < numPerCase; ++numNdx)
2112                         {
2113                                 subInputs[numNdx]       = inputs[caseNdx * numPerCase + numNdx];
2114                                 subOutputs[numNdx]      = outputs[caseNdx * numPerCase + numNdx];
2115                         }
2116                         if (strcmp(cases[caseIdx].sign, "1") == 0)
2117                         {
2118                                 interfaces.setInputOutput(std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_INT32), BufferSp(new Int32Buffer(subInputs))),
2119                                                                                   std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_INT16), BufferSp(new Int16Buffer(subOutputs))));
2120                         }
2121                         else
2122                         {
2123                                 interfaces.setInputOutput(std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_UINT32), BufferSp(new Int32Buffer(subInputs))),
2124                                                                                   std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_UINT16), BufferSp(new Int16Buffer(subOutputs))));
2125                         }
2126                         createTestsForAllStages(testName, defaultColors, defaultColors, fragments, interfaces, extensions, testGroup, requiredFeatures);
2127                 }
2128         }
2129 }
2130
2131 void addGraphics16BitStorageInputOutputInt16To32Group (tcu::TestCaseGroup* testGroup)
2132 {
2133         de::Random                                                      rnd                                     (deStringHash(testGroup->getName()));
2134         RGBA                                                            defaultColors[4];
2135         vector<string>                                          extensions;
2136         map<string, string>                                     fragments                       = passthruFragments();
2137         const deUint32                                          numDataPoints           = 64;
2138         // inputs and outputs are declared to be vectors of signed integers.
2139         // However, depending on the test, they may be interpreted as unsiged
2140         // integers. That won't be a problem as long as we passed the bits
2141         // in faithfully to the pipeline.
2142         vector<deInt16>                                         inputs                          = getInt16s(rnd, numDataPoints);
2143         vector<deInt32>                                         sOutputs;
2144         vector<deInt32>                                         uOutputs;
2145         const deUint16                                          signBitMask                     = 0x8000;
2146         const deUint32                                          signExtendMask          = 0xffff0000;
2147
2148         sOutputs.reserve(inputs.size());
2149         uOutputs.reserve(inputs.size());
2150
2151         for (deUint32 numNdx = 0; numNdx < inputs.size(); ++numNdx)
2152         {
2153                 uOutputs.push_back(static_cast<deUint16>(inputs[numNdx]));
2154                 if (inputs[numNdx] & signBitMask)
2155                         sOutputs.push_back(static_cast<deInt32>(inputs[numNdx] | signExtendMask));
2156                 else
2157                         sOutputs.push_back(static_cast<deInt32>(inputs[numNdx]));
2158         }
2159
2160         extensions.push_back("VK_KHR_16bit_storage");
2161
2162         fragments["capability"]                         = "OpCapability StorageInputOutput16\n";
2163         fragments["extension"]                          = "OpExtension \"SPV_KHR_16bit_storage\"\n";
2164
2165         getDefaultColors(defaultColors);
2166
2167         const StringTemplate scalarIfOpFunc     (
2168                         "%interface_op_func = OpFunction %${type32} None %${type32}_${type16}_function\n"
2169                         "        %io_param1 = OpFunctionParameter %${type16}\n"
2170                         "            %entry = OpLabel\n"
2171                         "                          %ret = ${convert} %${type32} %io_param1\n"
2172                         "                     OpReturnValue %ret\n"
2173                         "                     OpFunctionEnd\n");
2174
2175         const StringTemplate scalarPreMain      (
2176                         "             %${type16} = OpTypeInt 16 ${signed}\n"
2177                         "          %ip_${type16} = OpTypePointer Input %${type16}\n"
2178                         "           %a3${type16} = OpTypeArray %${type16} %c_i32_3\n"
2179                         "        %ip_a3${type16} = OpTypePointer Input %a3${type16}\n"
2180                         "%${type32}_${type16}_function = OpTypeFunction %${type32} %${type16}\n"
2181                         "           %a3${type32} = OpTypeArray %${type32} %c_i32_3\n"
2182                         "        %op_a3${type32} = OpTypePointer Output %a3${type32}\n");
2183
2184         const StringTemplate vecIfOpFunc        (
2185                         "%interface_op_func = OpFunction %${type32} None %${type32}_${type16}_function\n"
2186                         "        %io_param1 = OpFunctionParameter %${type16}\n"
2187                         "            %entry = OpLabel\n"
2188                         "                          %ret = ${convert} %${type32} %io_param1\n"
2189                         "                     OpReturnValue %ret\n"
2190                         "                     OpFunctionEnd\n");
2191
2192         const StringTemplate vecPreMain (
2193                         "                       %i16 = OpTypeInt 16 1\n"
2194                         "                       %u16 = OpTypeInt 16 0\n"
2195                         "                 %v4i16 = OpTypeVector %i16 4\n"
2196                         "                 %v4u16 = OpTypeVector %u16 4\n"
2197                         "          %ip_${type16} = OpTypePointer Input %${type16}\n"
2198                         "           %a3${type16} = OpTypeArray %${type16} %c_i32_3\n"
2199                         "        %ip_a3${type16} = OpTypePointer Input %a3${type16}\n"
2200                         "%${type32}_${type16}_function = OpTypeFunction %${type32} %${type16}\n"
2201                         "           %a3${type32} = OpTypeArray %${type32} %c_i32_3\n"
2202                         "        %op_a3${type32} = OpTypePointer Output %a3${type32}\n");
2203
2204         struct Case
2205         {
2206                 const char*                             name;
2207                 const StringTemplate&   interfaceOpFunc;
2208                 const StringTemplate&   preMain;
2209                 const char*                             type32;
2210                 const char*                             type16;
2211                 const char*                             sign;
2212                 const char*                             opcode;
2213                 deUint32                                numPerCase;
2214                 deUint32                                numElements;
2215         };
2216
2217         Case    cases[]         =
2218         {
2219                 {"scalar_sint", scalarIfOpFunc, scalarPreMain,  "i32",          "i16",          "1",    "OpSConvert",   4,              1},
2220                 {"scalar_uint", scalarIfOpFunc, scalarPreMain,  "u32",          "u16",          "0",    "OpUConvert",   4,              1},
2221                 {"vector_sint", vecIfOpFunc,    vecPreMain,             "v4i32",        "v4i16",        "1",    "OpSConvert",   4 * 4,  4},
2222                 {"vector_uint", vecIfOpFunc,    vecPreMain,             "v4u32",        "v4u16",        "0",    "OpUConvert",   4 * 4,  4},
2223         };
2224
2225         VulkanFeatures  requiredFeatures;
2226         requiredFeatures.ext16BitStorage = EXT16BITSTORAGEFEATURES_INPUT_OUTPUT;
2227
2228         for (deUint32 caseIdx = 0; caseIdx < DE_LENGTH_OF_ARRAY(cases); ++caseIdx)
2229         {
2230                 map<string, string>                             specs;
2231
2232                 specs["type32"]                                 = cases[caseIdx].type32;
2233                 specs["type16"]                                 = cases[caseIdx].type16;
2234                 specs["signed"]                                 = cases[caseIdx].sign;
2235                 specs["convert"]                                = cases[caseIdx].opcode;
2236
2237                 fragments["pre_main"]                   = cases[caseIdx].preMain.specialize(specs);
2238                 fragments["interface_op_func"]  = cases[caseIdx].interfaceOpFunc.specialize(specs);
2239                 fragments["input_type"]                 = cases[caseIdx].type16;
2240                 fragments["output_type"]                = cases[caseIdx].type32;
2241
2242                 GraphicsInterfaces                              interfaces;
2243                 const deUint32                                  numPerCase      = cases[caseIdx].numPerCase;
2244                 vector<deInt16>                                 subInputs       (numPerCase);
2245                 vector<deInt32>                                 subOutputs      (numPerCase);
2246
2247                 for (deUint32 caseNdx = 0; caseNdx < numDataPoints / numPerCase; ++caseNdx)
2248                 {
2249                         string                  testName        = string(cases[caseIdx].name) + numberToString(caseNdx);
2250
2251                         for (deUint32 numNdx = 0; numNdx < numPerCase; ++numNdx)
2252                         {
2253                                 subInputs[numNdx]       = inputs[caseNdx * numPerCase + numNdx];
2254                                 if (cases[caseIdx].sign[0] == '1')
2255                                         subOutputs[numNdx]      = sOutputs[caseNdx * numPerCase + numNdx];
2256                                 else
2257                                         subOutputs[numNdx]      = uOutputs[caseNdx * numPerCase + numNdx];
2258                         }
2259                         if (strcmp(cases[caseIdx].sign, "1") == 0)
2260                         {
2261                                 interfaces.setInputOutput(std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_INT16), BufferSp(new Int16Buffer(subInputs))),
2262                                                                                   std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_INT32), BufferSp(new Int32Buffer(subOutputs))));
2263                         }
2264                         else
2265                         {
2266                                 interfaces.setInputOutput(std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_UINT16), BufferSp(new Int16Buffer(subInputs))),
2267                                                                                   std::make_pair(IFDataType(cases[caseIdx].numElements, NUMBERTYPE_UINT32), BufferSp(new Int32Buffer(subOutputs))));
2268                         }
2269                         createTestsForAllStages(testName, defaultColors, defaultColors, fragments, interfaces, extensions, testGroup, requiredFeatures);
2270                 }
2271         }
2272 }
2273
2274 void addGraphics16BitStoragePushConstantFloat16To32Group (tcu::TestCaseGroup* testGroup)
2275 {
2276         de::Random                                                      rnd                                     (deStringHash(testGroup->getName()));
2277         map<string, string>                                     fragments;
2278         RGBA                                                            defaultColors[4];
2279         vector<string>                                          extensions;
2280         GraphicsResources                                       resources;
2281         PushConstants                                           pcs;
2282         const deUint32                                          numDataPoints           = 64;
2283         vector<deFloat16>                                       float16Data                     (getFloat16s(rnd, numDataPoints));
2284         vector<float>                                           float32Data;
2285         VulkanFeatures                                          requiredFeatures;
2286
2287         float32Data.reserve(numDataPoints);
2288         for (deUint32 numIdx = 0; numIdx < numDataPoints; ++numIdx)
2289                 float32Data.push_back(deFloat16To32(float16Data[numIdx]));
2290
2291         extensions.push_back("VK_KHR_16bit_storage");
2292         requiredFeatures.ext16BitStorage = EXT16BITSTORAGEFEATURES_PUSH_CONSTANT;
2293
2294         fragments["capability"]                         = "OpCapability StoragePushConstant16\n";
2295         fragments["extension"]                          = "OpExtension \"SPV_KHR_16bit_storage\"";
2296
2297         pcs.setPushConstant(BufferSp(new Float16Buffer(float16Data)));
2298         resources.outputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Float32Buffer(float32Data))));
2299         resources.verifyIO = check32BitFloats;
2300
2301         getDefaultColors(defaultColors);
2302
2303         const StringTemplate    testFun         (
2304                 "%test_code = OpFunction %v4f32 None %v4f32_function\n"
2305                 "    %param = OpFunctionParameter %v4f32\n"
2306
2307                 "%entry = OpLabel\n"
2308                 "    %i = OpVariable %fp_i32 Function\n"
2309                 "         OpStore %i %c_i32_0\n"
2310                 "         OpBranch %loop\n"
2311
2312                 " %loop = OpLabel\n"
2313                 "   %15 = OpLoad %i32 %i\n"
2314                 "   %lt = OpSLessThan %bool %15 ${count}\n"
2315                 "         OpLoopMerge %merge %inc None\n"
2316                 "         OpBranchConditional %lt %write %merge\n"
2317
2318                 "%write = OpLabel\n"
2319                 "   %30 = OpLoad %i32 %i\n"
2320                 "  %src = OpAccessChain ${pp_type16} %pc16 %c_i32_0 %30 ${index0:opt}\n"
2321                 "%val16 = OpLoad ${f_type16} %src\n"
2322                 "%val32 = OpFConvert ${f_type32} %val16\n"
2323                 "  %dst = OpAccessChain ${up_type32} %ssbo32 %c_i32_0 %30 ${index0:opt}\n"
2324                 "         OpStore %dst %val32\n"
2325
2326                 "${store:opt}\n"
2327
2328                 "         OpBranch %inc\n"
2329
2330                 "  %inc = OpLabel\n"
2331                 "   %37 = OpLoad %i32 %i\n"
2332                 "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
2333                 "         OpStore %i %39\n"
2334                 "         OpBranch %loop\n"
2335
2336                 "%merge = OpLabel\n"
2337                 "         OpReturnValue %param\n"
2338
2339                 "OpFunctionEnd\n");
2340
2341         {  // Scalar cases
2342                 fragments["pre_main"]                           =
2343                         "      %f16 = OpTypeFloat 16\n"
2344                         " %c_i32_64 = OpConstant %i32 64\n"                                     // Should be the same as numDataPoints
2345                         "   %a64f16 = OpTypeArray %f16 %c_i32_64\n"
2346                         "   %a64f32 = OpTypeArray %f32 %c_i32_64\n"
2347                         "   %pp_f16 = OpTypePointer PushConstant %f16\n"
2348                         "   %up_f32 = OpTypePointer Uniform %f32\n"
2349                         "   %SSBO32 = OpTypeStruct %a64f32\n"
2350                         "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
2351                         "   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
2352                         "     %PC16 = OpTypeStruct %a64f16\n"
2353                         "  %pp_PC16 = OpTypePointer PushConstant %PC16\n"
2354                         "     %pc16 = OpVariable %pp_PC16 PushConstant\n";
2355
2356                 fragments["decoration"]                         =
2357                         "OpDecorate %a64f16 ArrayStride 2\n"
2358                         "OpDecorate %a64f32 ArrayStride 4\n"
2359                         "OpDecorate %SSBO32 BufferBlock\n"
2360                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
2361                         "OpDecorate %PC16 Block\n"
2362                         "OpMemberDecorate %PC16 0 Offset 0\n"
2363                         "OpDecorate %ssbo32 DescriptorSet 0\n"
2364                         "OpDecorate %ssbo32 Binding 0\n";
2365
2366                 map<string, string>             specs;
2367
2368                 specs["count"]                  = "%c_i32_64";
2369                 specs["pp_type16"]              = "%pp_f16";
2370                 specs["f_type16"]               = "%f16";
2371                 specs["f_type32"]               = "%f32";
2372                 specs["up_type32"]              = "%up_f32";
2373
2374                 fragments["testfun"]    = testFun.specialize(specs);
2375
2376                 createTestsForAllStages("scalar", defaultColors, defaultColors, fragments, pcs, resources, extensions, testGroup, requiredFeatures);
2377         }
2378
2379         {  // Vector cases
2380                 fragments["pre_main"]                           =
2381                         "      %f16 = OpTypeFloat 16\n"
2382                         "    %v4f16 = OpTypeVector %f16 4\n"
2383                         " %c_i32_16 = OpConstant %i32 16\n"
2384                         " %a16v4f16 = OpTypeArray %v4f16 %c_i32_16\n"
2385                         " %a16v4f32 = OpTypeArray %v4f32 %c_i32_16\n"
2386                         " %pp_v4f16 = OpTypePointer PushConstant %v4f16\n"
2387                         " %up_v4f32 = OpTypePointer Uniform %v4f32\n"
2388                         "   %SSBO32 = OpTypeStruct %a16v4f32\n"
2389                         "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
2390                         "   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
2391                         "     %PC16 = OpTypeStruct %a16v4f16\n"
2392                         "  %pp_PC16 = OpTypePointer PushConstant %PC16\n"
2393                         "     %pc16 = OpVariable %pp_PC16 PushConstant\n";
2394
2395                 fragments["decoration"]                         =
2396                         "OpDecorate %a16v4f16 ArrayStride 8\n"
2397                         "OpDecorate %a16v4f32 ArrayStride 16\n"
2398                         "OpDecorate %SSBO32 BufferBlock\n"
2399                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
2400                         "OpDecorate %PC16 Block\n"
2401                         "OpMemberDecorate %PC16 0 Offset 0\n"
2402                         "OpDecorate %ssbo32 DescriptorSet 0\n"
2403                         "OpDecorate %ssbo32 Binding 0\n";
2404
2405                 map<string, string>             specs;
2406
2407                 specs["count"]                  = "%c_i32_16";
2408                 specs["pp_type16"]              = "%pp_v4f16";
2409                 specs["f_type16"]               = "%v4f16";
2410                 specs["f_type32"]               = "%v4f32";
2411                 specs["up_type32"]              = "%up_v4f32";
2412
2413                 fragments["testfun"]    = testFun.specialize(specs);
2414
2415                 createTestsForAllStages("vector", defaultColors, defaultColors, fragments, pcs, resources, extensions, testGroup, requiredFeatures);
2416         }
2417
2418         {  // Matrix cases
2419                 fragments["pre_main"]                           =
2420                         "  %c_i32_8 = OpConstant %i32 8\n"
2421                         "      %f16 = OpTypeFloat 16\n"
2422                         "    %v4f16 = OpTypeVector %f16 4\n"
2423                         "  %m2v4f16 = OpTypeMatrix %v4f16 2\n"
2424                         "  %m2v4f32 = OpTypeMatrix %v4f32 2\n"
2425                         "%a8m2v4f16 = OpTypeArray %m2v4f16 %c_i32_8\n"
2426                         "%a8m2v4f32 = OpTypeArray %m2v4f32 %c_i32_8\n"
2427                         " %pp_v4f16 = OpTypePointer PushConstant %v4f16\n"
2428                         " %up_v4f32 = OpTypePointer Uniform %v4f32\n"
2429                         "   %SSBO32 = OpTypeStruct %a8m2v4f32\n"
2430                         "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
2431                         "   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
2432                         "     %PC16 = OpTypeStruct %a8m2v4f16\n"
2433                         "  %pp_PC16 = OpTypePointer PushConstant %PC16\n"
2434                         "     %pc16 = OpVariable %pp_PC16 PushConstant\n";
2435
2436                 fragments["decoration"]                         =
2437                         "OpDecorate %a8m2v4f16 ArrayStride 16\n"
2438                         "OpDecorate %a8m2v4f32 ArrayStride 32\n"
2439                         "OpDecorate %SSBO32 BufferBlock\n"
2440                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
2441                         "OpMemberDecorate %SSBO32 0 ColMajor\n"
2442                         "OpMemberDecorate %SSBO32 0 MatrixStride 16\n"
2443                         "OpDecorate %PC16 Block\n"
2444                         "OpMemberDecorate %PC16 0 Offset 0\n"
2445                         "OpMemberDecorate %PC16 0 ColMajor\n"
2446                         "OpMemberDecorate %PC16 0 MatrixStride 8\n"
2447                         "OpDecorate %ssbo32 DescriptorSet 0\n"
2448                         "OpDecorate %ssbo32 Binding 0\n";
2449
2450                 map<string, string>             specs;
2451
2452                 specs["count"]                  = "%c_i32_8";
2453                 specs["pp_type16"]              = "%pp_v4f16";
2454                 specs["up_type32"]              = "%up_v4f32";
2455                 specs["f_type16"]               = "%v4f16";
2456                 specs["f_type32"]               = "%v4f32";
2457                 specs["index0"]                 = "%c_i32_0";
2458                 specs["store"]                  =
2459                         "  %src_1 = OpAccessChain %pp_v4f16 %pc16 %c_i32_0 %30 %c_i32_1\n"
2460                         "%val16_1 = OpLoad %v4f16 %src_1\n"
2461                         "%val32_1 = OpFConvert %v4f32 %val16_1\n"
2462                         "  %dst_1 = OpAccessChain %up_v4f32 %ssbo32 %c_i32_0 %30 %c_i32_1\n"
2463                         "           OpStore %dst_1 %val32_1\n";
2464
2465                 fragments["testfun"]    = testFun.specialize(specs);
2466
2467                 createTestsForAllStages("matrix", defaultColors, defaultColors, fragments, pcs, resources, extensions, testGroup, requiredFeatures);
2468         }
2469 }
2470
2471 void addGraphics16BitStoragePushConstantInt16To32Group (tcu::TestCaseGroup* testGroup)
2472 {
2473         de::Random                                                      rnd                                     (deStringHash(testGroup->getName()));
2474         map<string, string>                                     fragments;
2475         RGBA                                                            defaultColors[4];
2476         const deUint32                                          numDataPoints           = 64;
2477         vector<deInt16>                                         inputs                          = getInt16s(rnd, numDataPoints);
2478         vector<deInt32>                                         sOutputs;
2479         vector<deInt32>                                         uOutputs;
2480         PushConstants                                           pcs;
2481         GraphicsResources                                       resources;
2482         vector<string>                                          extensions;
2483         const deUint16                                          signBitMask                     = 0x8000;
2484         const deUint32                                          signExtendMask          = 0xffff0000;
2485         VulkanFeatures                                          requiredFeatures;
2486
2487         sOutputs.reserve(inputs.size());
2488         uOutputs.reserve(inputs.size());
2489
2490         for (deUint32 numNdx = 0; numNdx < inputs.size(); ++numNdx)
2491         {
2492                 uOutputs.push_back(static_cast<deUint16>(inputs[numNdx]));
2493                 if (inputs[numNdx] & signBitMask)
2494                         sOutputs.push_back(static_cast<deInt32>(inputs[numNdx] | signExtendMask));
2495                 else
2496                         sOutputs.push_back(static_cast<deInt32>(inputs[numNdx]));
2497         }
2498
2499         extensions.push_back("VK_KHR_16bit_storage");
2500         requiredFeatures.ext16BitStorage = EXT16BITSTORAGEFEATURES_PUSH_CONSTANT;
2501
2502         fragments["capability"]                         = "OpCapability StoragePushConstant16\n";
2503         fragments["extension"]                          = "OpExtension \"SPV_KHR_16bit_storage\"";
2504
2505         pcs.setPushConstant(BufferSp(new Int16Buffer(inputs)));
2506
2507         getDefaultColors(defaultColors);
2508
2509         const StringTemplate    testFun         (
2510                 "%test_code = OpFunction %v4f32 None %v4f32_function\n"
2511                 "    %param = OpFunctionParameter %v4f32\n"
2512
2513                 "%entry = OpLabel\n"
2514                 "    %i = OpVariable %fp_i32 Function\n"
2515                 "         OpStore %i %c_i32_0\n"
2516                 "         OpBranch %loop\n"
2517
2518                 " %loop = OpLabel\n"
2519                 "   %15 = OpLoad %i32 %i\n"
2520                 "   %lt = OpSLessThan %bool %15 %c_i32_${count}\n"
2521                 "         OpLoopMerge %merge %inc None\n"
2522                 "         OpBranchConditional %lt %write %merge\n"
2523
2524                 "%write = OpLabel\n"
2525                 "   %30 = OpLoad %i32 %i\n"
2526                 "  %src = OpAccessChain %pp_${type16} %pc16 %c_i32_0 %30\n"
2527                 "%val16 = OpLoad %${type16} %src\n"
2528                 "%val32 = ${convert} %${type32} %val16\n"
2529                 "  %dst = OpAccessChain %up_${type32} %ssbo32 %c_i32_0 %30\n"
2530                 "         OpStore %dst %val32\n"
2531                 "         OpBranch %inc\n"
2532
2533                 "  %inc = OpLabel\n"
2534                 "   %37 = OpLoad %i32 %i\n"
2535                 "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
2536                 "         OpStore %i %39\n"
2537                 "         OpBranch %loop\n"
2538
2539                 "%merge = OpLabel\n"
2540                 "         OpReturnValue %param\n"
2541
2542                 "OpFunctionEnd\n");
2543
2544         {  // Scalar cases
2545                 const StringTemplate    preMain         (
2546                         "         %${type16} = OpTypeInt 16 ${signed}\n"
2547                         "    %c_i32_${count} = OpConstant %i32 ${count}\n"                                      // Should be the same as numDataPoints
2548                         "%a${count}${type16} = OpTypeArray %${type16} %c_i32_${count}\n"
2549                         "%a${count}${type32} = OpTypeArray %${type32} %c_i32_${count}\n"
2550                         "      %pp_${type16} = OpTypePointer PushConstant %${type16}\n"
2551                         "      %up_${type32} = OpTypePointer Uniform      %${type32}\n"
2552                         "            %SSBO32 = OpTypeStruct %a${count}${type32}\n"
2553                         "         %up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
2554                         "            %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
2555                         "              %PC16 = OpTypeStruct %a${count}${type16}\n"
2556                         "           %pp_PC16 = OpTypePointer PushConstant %PC16\n"
2557                         "              %pc16 = OpVariable %pp_PC16 PushConstant\n");
2558
2559                 const StringTemplate    decoration      (
2560                         "OpDecorate %a${count}${type16} ArrayStride 2\n"
2561                         "OpDecorate %a${count}${type32} ArrayStride 4\n"
2562                         "OpDecorate %SSBO32 BufferBlock\n"
2563                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
2564                         "OpDecorate %PC16 Block\n"
2565                         "OpMemberDecorate %PC16 0 Offset 0\n"
2566                         "OpDecorate %ssbo32 DescriptorSet 0\n"
2567                         "OpDecorate %ssbo32 Binding 0\n");
2568
2569                 {  // signed int
2570                         map<string, string>             specs;
2571
2572                         specs["type16"]                 = "i16";
2573                         specs["type32"]                 = "i32";
2574                         specs["signed"]                 = "1";
2575                         specs["count"]                  = "64";
2576                         specs["convert"]                = "OpSConvert";
2577
2578                         fragments["testfun"]    = testFun.specialize(specs);
2579                         fragments["pre_main"]   = preMain.specialize(specs);
2580                         fragments["decoration"] = decoration.specialize(specs);
2581
2582                         resources.outputs.clear();
2583                         resources.outputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Int32Buffer(sOutputs))));
2584                         createTestsForAllStages("sint_scalar", defaultColors, defaultColors, fragments, pcs, resources, extensions, testGroup, requiredFeatures);
2585                 }
2586                 {  // signed int
2587                         map<string, string>             specs;
2588
2589                         specs["type16"]                 = "u16";
2590                         specs["type32"]                 = "u32";
2591                         specs["signed"]                 = "0";
2592                         specs["count"]                  = "64";
2593                         specs["convert"]                = "OpUConvert";
2594
2595                         fragments["testfun"]    = testFun.specialize(specs);
2596                         fragments["pre_main"]   = preMain.specialize(specs);
2597                         fragments["decoration"] = decoration.specialize(specs);
2598
2599                         resources.outputs.clear();
2600                         resources.outputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Int32Buffer(uOutputs))));
2601                         createTestsForAllStages("uint_scalar", defaultColors, defaultColors, fragments, pcs, resources, extensions, testGroup, requiredFeatures);
2602                 }
2603         }
2604
2605         {  // Vector cases
2606                 const StringTemplate    preMain         (
2607                         "    %${base_type16} = OpTypeInt 16 ${signed}\n"
2608                         "         %${type16} = OpTypeVector %${base_type16} 2\n"
2609                         "    %c_i32_${count} = OpConstant %i32 ${count}\n"
2610                         "%a${count}${type16} = OpTypeArray %${type16} %c_i32_${count}\n"
2611                         "%a${count}${type32} = OpTypeArray %${type32} %c_i32_${count}\n"
2612                         "      %pp_${type16} = OpTypePointer PushConstant %${type16}\n"
2613                         "      %up_${type32} = OpTypePointer Uniform      %${type32}\n"
2614                         "            %SSBO32 = OpTypeStruct %a${count}${type32}\n"
2615                         "         %up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
2616                         "            %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
2617                         "              %PC16 = OpTypeStruct %a${count}${type16}\n"
2618                         "           %pp_PC16 = OpTypePointer PushConstant %PC16\n"
2619                         "              %pc16 = OpVariable %pp_PC16 PushConstant\n");
2620
2621                 const StringTemplate    decoration      (
2622                         "OpDecorate %a${count}${type16} ArrayStride 4\n"
2623                         "OpDecorate %a${count}${type32} ArrayStride 8\n"
2624                         "OpDecorate %SSBO32 BufferBlock\n"
2625                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
2626                         "OpDecorate %PC16 Block\n"
2627                         "OpMemberDecorate %PC16 0 Offset 0\n"
2628                         "OpDecorate %ssbo32 DescriptorSet 0\n"
2629                         "OpDecorate %ssbo32 Binding 0\n");
2630
2631                 {  // signed int
2632                         map<string, string>             specs;
2633
2634                         specs["base_type16"]    = "i16";
2635                         specs["type16"]                 = "v2i16";
2636                         specs["type32"]                 = "v2i32";
2637                         specs["signed"]                 = "1";
2638                         specs["count"]                  = "32";                         // 64 / 2
2639                         specs["convert"]                = "OpSConvert";
2640
2641                         fragments["testfun"]    = testFun.specialize(specs);
2642                         fragments["pre_main"]   = preMain.specialize(specs);
2643                         fragments["decoration"] = decoration.specialize(specs);
2644
2645                         resources.outputs.clear();
2646                         resources.outputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Int32Buffer(sOutputs))));
2647                         createTestsForAllStages("sint_vector", defaultColors, defaultColors, fragments, pcs, resources, extensions, testGroup, requiredFeatures);
2648                 }
2649                 {  // signed int
2650                         map<string, string>             specs;
2651
2652                         specs["base_type16"]    = "u16";
2653                         specs["type16"]                 = "v2u16";
2654                         specs["type32"]                 = "v2u32";
2655                         specs["signed"]                 = "0";
2656                         specs["count"]                  = "32";
2657                         specs["convert"]                = "OpUConvert";
2658
2659                         fragments["testfun"]    = testFun.specialize(specs);
2660                         fragments["pre_main"]   = preMain.specialize(specs);
2661                         fragments["decoration"] = decoration.specialize(specs);
2662
2663                         resources.outputs.clear();
2664                         resources.outputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Int32Buffer(uOutputs))));
2665                         createTestsForAllStages("uint_vector", defaultColors, defaultColors, fragments, pcs, resources, extensions, testGroup, requiredFeatures);
2666                 }
2667         }
2668 }
2669
2670 void addGraphics16BitStorageUniformInt16To32Group (tcu::TestCaseGroup* testGroup)
2671 {
2672         de::Random                                                      rnd                                     (deStringHash(testGroup->getName()));
2673         map<string, string>                                     fragments;
2674         const deUint32                                          numDataPoints           = 256;
2675         RGBA                                                            defaultColors[4];
2676         vector<deInt16>                                         inputs                          = getInt16s(rnd, numDataPoints);
2677         vector<deInt32>                                         sOutputs;
2678         vector<deInt32>                                         uOutputs;
2679         GraphicsResources                                       resources;
2680         vector<string>                                          extensions;
2681         const deUint16                                          signBitMask                     = 0x8000;
2682         const deUint32                                          signExtendMask          = 0xffff0000;
2683         const StringTemplate                            capabilities            ("OpCapability ${cap}\n");
2684
2685         sOutputs.reserve(inputs.size());
2686         uOutputs.reserve(inputs.size());
2687
2688         for (deUint32 numNdx = 0; numNdx < inputs.size(); ++numNdx)
2689         {
2690                 uOutputs.push_back(static_cast<deUint16>(inputs[numNdx]));
2691                 if (inputs[numNdx] & signBitMask)
2692                         sOutputs.push_back(static_cast<deInt32>(inputs[numNdx] | signExtendMask));
2693                 else
2694                         sOutputs.push_back(static_cast<deInt32>(inputs[numNdx]));
2695         }
2696
2697         resources.inputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Int16Buffer(inputs))));
2698
2699         extensions.push_back("VK_KHR_16bit_storage");
2700         fragments["extension"]  = "OpExtension \"SPV_KHR_16bit_storage\"";
2701
2702         getDefaultColors(defaultColors);
2703
2704         struct IntegerFacts
2705         {
2706                 const char*     name;
2707                 const char*     type32;
2708                 const char*     type16;
2709                 const char* opcode;
2710                 bool            isSigned;
2711         };
2712
2713         const IntegerFacts      intFacts[]      =
2714         {
2715                 {"sint",        "%i32",         "%i16",         "OpSConvert",   true},
2716                 {"uint",        "%u32",         "%u16",         "OpUConvert",   false},
2717         };
2718
2719         const StringTemplate scalarPreMain              (
2720                         "${itype16} = OpTypeInt 16 ${signed}\n"
2721                         " %c_i32_256 = OpConstant %i32 256\n"
2722                         "   %up_i32 = OpTypePointer Uniform ${itype32}\n"
2723                         "   %up_i16 = OpTypePointer Uniform ${itype16}\n"
2724                         "   %ra_i32 = OpTypeArray ${itype32} %c_i32_256\n"
2725                         "   %ra_i16 = OpTypeArray ${itype16} %c_i32_256\n"
2726                         "   %SSBO32 = OpTypeStruct %ra_i32\n"
2727                         "   %SSBO16 = OpTypeStruct %ra_i16\n"
2728                         "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
2729                         "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
2730                         "   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
2731                         "   %ssbo16 = OpVariable %up_SSBO16 Uniform\n");
2732
2733         const StringTemplate scalarDecoration           (
2734                         "OpDecorate %ra_i32 ArrayStride 4\n"
2735                         "OpDecorate %ra_i16 ArrayStride 2\n"
2736                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
2737                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
2738                         "OpDecorate %SSBO32 BufferBlock\n"
2739                         "OpDecorate %SSBO16 ${indecor}\n"
2740                         "OpDecorate %ssbo32 DescriptorSet 0\n"
2741                         "OpDecorate %ssbo16 DescriptorSet 0\n"
2742                         "OpDecorate %ssbo32 Binding 1\n"
2743                         "OpDecorate %ssbo16 Binding 0\n");
2744
2745         const StringTemplate scalarTestFunc     (
2746                         "%test_code = OpFunction %v4f32 None %v4f32_function\n"
2747                         "    %param = OpFunctionParameter %v4f32\n"
2748
2749                         "%entry = OpLabel\n"
2750                         "    %i = OpVariable %fp_i32 Function\n"
2751                         "         OpStore %i %c_i32_0\n"
2752                         "         OpBranch %loop\n"
2753
2754                         " %loop = OpLabel\n"
2755                         "   %15 = OpLoad %i32 %i\n"
2756                         "   %lt = OpSLessThan %bool %15 %c_i32_256\n"
2757                         "         OpLoopMerge %merge %inc None\n"
2758                         "         OpBranchConditional %lt %write %merge\n"
2759
2760                         "%write = OpLabel\n"
2761                         "   %30 = OpLoad %i32 %i\n"
2762                         "  %src = OpAccessChain %up_i16 %ssbo16 %c_i32_0 %30\n"
2763                         "%val16 = OpLoad ${itype16} %src\n"
2764                         "%val32 = ${convert} ${itype32} %val16\n"
2765                         "  %dst = OpAccessChain %up_i32 %ssbo32 %c_i32_0 %30\n"
2766                         "         OpStore %dst %val32\n"
2767                         "         OpBranch %inc\n"
2768
2769                         "  %inc = OpLabel\n"
2770                         "   %37 = OpLoad %i32 %i\n"
2771                         "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
2772                         "         OpStore %i %39\n"
2773                         "         OpBranch %loop\n"
2774                         "%merge = OpLabel\n"
2775                         "         OpReturnValue %param\n"
2776
2777                         "OpFunctionEnd\n");
2778
2779         const StringTemplate vecPreMain         (
2780                         "${itype16} = OpTypeInt 16 ${signed}\n"
2781                         "%c_i32_128 = OpConstant %i32 128\n"
2782                         "%v2itype16 = OpTypeVector ${itype16} 2\n"
2783                         "%v2itype32 = OpTypeVector ${itype32} 2\n"
2784                         " %up_v2i32 = OpTypePointer Uniform %v2itype32\n"
2785                         " %up_v2i16 = OpTypePointer Uniform %v2itype16\n"
2786                         " %ra_v2i32 = OpTypeArray %v2itype32 %c_i32_128\n"
2787                         " %ra_v2i16 = OpTypeArray %v2itype16 %c_i32_128\n"
2788                         "   %SSBO32 = OpTypeStruct %ra_v2i32\n"
2789                         "   %SSBO16 = OpTypeStruct %ra_v2i16\n"
2790                         "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
2791                         "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
2792                         "   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
2793                         "   %ssbo16 = OpVariable %up_SSBO16 Uniform\n");
2794
2795         const StringTemplate vecDecoration              (
2796                         "OpDecorate %ra_v2i32 ArrayStride 8\n"
2797                         "OpDecorate %ra_v2i16 ArrayStride 4\n"
2798                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
2799                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
2800                         "OpDecorate %SSBO32 BufferBlock\n"
2801                         "OpDecorate %SSBO16 ${indecor}\n"
2802                         "OpDecorate %ssbo32 DescriptorSet 0\n"
2803                         "OpDecorate %ssbo16 DescriptorSet 0\n"
2804                         "OpDecorate %ssbo32 Binding 1\n"
2805                         "OpDecorate %ssbo16 Binding 0\n");
2806
2807         const StringTemplate vecTestFunc        (
2808                         "%test_code = OpFunction %v4f32 None %v4f32_function\n"
2809                         "    %param = OpFunctionParameter %v4f32\n"
2810
2811                         "%entry = OpLabel\n"
2812                         "    %i = OpVariable %fp_i32 Function\n"
2813                         "         OpStore %i %c_i32_0\n"
2814                         "         OpBranch %loop\n"
2815
2816                         " %loop = OpLabel\n"
2817                         "   %15 = OpLoad %i32 %i\n"
2818                         "   %lt = OpSLessThan %bool %15 %c_i32_128\n"
2819                         "         OpLoopMerge %merge %inc None\n"
2820                         "         OpBranchConditional %lt %write %merge\n"
2821
2822                         "%write = OpLabel\n"
2823                         "   %30 = OpLoad %i32 %i\n"
2824                         "  %src = OpAccessChain %up_v2i16 %ssbo16 %c_i32_0 %30\n"
2825                         "%val16 = OpLoad %v2itype16 %src\n"
2826                         "%val32 = ${convert} %v2itype32 %val16\n"
2827                         "  %dst = OpAccessChain %up_v2i32 %ssbo32 %c_i32_0 %30\n"
2828                         "         OpStore %dst %val32\n"
2829                         "         OpBranch %inc\n"
2830
2831                         "  %inc = OpLabel\n"
2832                         "   %37 = OpLoad %i32 %i\n"
2833                         "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
2834                         "         OpStore %i %39\n"
2835                         "         OpBranch %loop\n"
2836                         "%merge = OpLabel\n"
2837                         "         OpReturnValue %param\n"
2838
2839                         "OpFunctionEnd\n");
2840
2841         struct Category
2842         {
2843                 const char*                             name;
2844                 const StringTemplate&   preMain;
2845                 const StringTemplate&   decoration;
2846                 const StringTemplate&   testFunction;
2847         };
2848
2849         const Category          categories[]    =
2850         {
2851                 {"scalar", scalarPreMain, scalarDecoration, scalarTestFunc},
2852                 {"vector", vecPreMain, vecDecoration, vecTestFunc},
2853         };
2854
2855         for (deUint32 catIdx = 0; catIdx < DE_LENGTH_OF_ARRAY(categories); ++catIdx)
2856                 for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
2857                         for (deUint32 factIdx = 0; factIdx < DE_LENGTH_OF_ARRAY(intFacts); ++factIdx)
2858                         {
2859                                 map<string, string>     specs;
2860                                 string                          name            = string(CAPABILITIES[capIdx].name) + "_" + categories[catIdx].name + "_" + intFacts[factIdx].name;
2861
2862                                 specs["cap"]                                    = CAPABILITIES[capIdx].cap;
2863                                 specs["indecor"]                                = CAPABILITIES[capIdx].decor;
2864                                 specs["itype32"]                                = intFacts[factIdx].type32;
2865                                 specs["itype16"]                                = intFacts[factIdx].type16;
2866                                 if (intFacts[factIdx].isSigned)
2867                                         specs["signed"]                         = "1";
2868                                 else
2869                                         specs["signed"]                         = "0";
2870                                 specs["convert"]                                = intFacts[factIdx].opcode;
2871
2872                                 fragments["pre_main"]                   = categories[catIdx].preMain.specialize(specs);
2873                                 fragments["testfun"]                    = categories[catIdx].testFunction.specialize(specs);
2874                                 fragments["capability"]                 = capabilities.specialize(specs);
2875                                 fragments["decoration"]                 = categories[catIdx].decoration.specialize(specs);
2876
2877                                 resources.inputs.back().first   = CAPABILITIES[capIdx].dtype;
2878                                 resources.outputs.clear();
2879                                 if (intFacts[factIdx].isSigned)
2880                                         resources.outputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Int32Buffer(sOutputs))));
2881                                 else
2882                                         resources.outputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Int32Buffer(uOutputs))));
2883
2884                                 createTestsForAllStages(name, defaultColors, defaultColors, fragments, resources, extensions, testGroup, get16BitStorageFeatures(CAPABILITIES[capIdx].name));
2885                         }
2886 }
2887
2888 void addGraphics16BitStorageUniformFloat16To32Group (tcu::TestCaseGroup* testGroup)
2889 {
2890         de::Random                                                      rnd                                     (deStringHash(testGroup->getName()));
2891         map<string, string>                                     fragments;
2892         GraphicsResources                                       resources;
2893         vector<string>                                          extensions;
2894         const deUint32                                          numDataPoints           = 256;
2895         RGBA                                                            defaultColors[4];
2896         const StringTemplate                            capabilities            ("OpCapability ${cap}\n");
2897         vector<deFloat16>                                       float16Data                     = getFloat16s(rnd, numDataPoints);
2898         vector<float>                                           float32Data;
2899
2900         float32Data.reserve(numDataPoints);
2901         for (deUint32 numIdx = 0; numIdx < numDataPoints; ++numIdx)
2902                 float32Data.push_back(deFloat16To32(float16Data[numIdx]));
2903
2904         resources.inputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Float16Buffer(float16Data))));
2905         resources.outputs.push_back(std::make_pair(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, BufferSp(new Float32Buffer(float32Data))));
2906         resources.verifyIO = check32BitFloats;
2907
2908         extensions.push_back("VK_KHR_16bit_storage");
2909         fragments["extension"]  = "OpExtension \"SPV_KHR_16bit_storage\"";
2910
2911         getDefaultColors(defaultColors);
2912
2913         { // scalar cases
2914                 fragments["pre_main"]                           =
2915                         "      %f16 = OpTypeFloat 16\n"
2916                         "%c_i32_256 = OpConstant %i32 256\n"
2917                         "   %up_f32 = OpTypePointer Uniform %f32\n"
2918                         "   %up_f16 = OpTypePointer Uniform %f16\n"
2919                         "   %ra_f32 = OpTypeArray %f32 %c_i32_256\n"
2920                         "   %ra_f16 = OpTypeArray %f16 %c_i32_256\n"
2921                         "   %SSBO32 = OpTypeStruct %ra_f32\n"
2922                         "   %SSBO16 = OpTypeStruct %ra_f16\n"
2923                         "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
2924                         "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
2925                         "   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
2926                         "   %ssbo16 = OpVariable %up_SSBO16 Uniform\n";
2927
2928                 const StringTemplate decoration         (
2929                         "OpDecorate %ra_f32 ArrayStride 4\n"
2930                         "OpDecorate %ra_f16 ArrayStride 2\n"
2931                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
2932                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
2933                         "OpDecorate %SSBO32 BufferBlock\n"
2934                         "OpDecorate %SSBO16 ${indecor}\n"
2935                         "OpDecorate %ssbo32 DescriptorSet 0\n"
2936                         "OpDecorate %ssbo16 DescriptorSet 0\n"
2937                         "OpDecorate %ssbo32 Binding 1\n"
2938                         "OpDecorate %ssbo16 Binding 0\n");
2939
2940                 // ssbo32[] <- convert ssbo16[] to 32bit float
2941                 fragments["testfun"]                            =
2942                         "%test_code = OpFunction %v4f32 None %v4f32_function\n"
2943                         "    %param = OpFunctionParameter %v4f32\n"
2944
2945                         "%entry = OpLabel\n"
2946                         "    %i = OpVariable %fp_i32 Function\n"
2947                         "         OpStore %i %c_i32_0\n"
2948                         "         OpBranch %loop\n"
2949
2950                         " %loop = OpLabel\n"
2951                         "   %15 = OpLoad %i32 %i\n"
2952                         "   %lt = OpSLessThan %bool %15 %c_i32_256\n"
2953                         "         OpLoopMerge %merge %inc None\n"
2954                         "         OpBranchConditional %lt %write %merge\n"
2955
2956                         "%write = OpLabel\n"
2957                         "   %30 = OpLoad %i32 %i\n"
2958                         "  %src = OpAccessChain %up_f16 %ssbo16 %c_i32_0 %30\n"
2959                         "%val16 = OpLoad %f16 %src\n"
2960                         "%val32 = OpFConvert %f32 %val16\n"
2961                         "  %dst = OpAccessChain %up_f32 %ssbo32 %c_i32_0 %30\n"
2962                         "         OpStore %dst %val32\n"
2963                         "         OpBranch %inc\n"
2964
2965                         "  %inc = OpLabel\n"
2966                         "   %37 = OpLoad %i32 %i\n"
2967                         "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
2968                         "         OpStore %i %39\n"
2969                         "         OpBranch %loop\n"
2970
2971                         "%merge = OpLabel\n"
2972                         "         OpReturnValue %param\n"
2973
2974                         "OpFunctionEnd\n";
2975
2976                 for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
2977                 {
2978                         map<string, string>     specs;
2979                         string                          testName        = string(CAPABILITIES[capIdx].name) + "_scalar_float";
2980
2981                         specs["cap"]                                    = CAPABILITIES[capIdx].cap;
2982                         specs["indecor"]                                = CAPABILITIES[capIdx].decor;
2983
2984                         fragments["capability"]                 = capabilities.specialize(specs);
2985                         fragments["decoration"]                 = decoration.specialize(specs);
2986
2987                         resources.inputs.back().first   = CAPABILITIES[capIdx].dtype;
2988
2989                         createTestsForAllStages(testName, defaultColors, defaultColors, fragments, resources, extensions, testGroup, get16BitStorageFeatures(CAPABILITIES[capIdx].name));
2990                 }
2991         }
2992
2993         { // vector cases
2994                 fragments["pre_main"]                           =
2995                         "      %f16 = OpTypeFloat 16\n"
2996                         "%c_i32_128 = OpConstant %i32 128\n"
2997                         "        %v2f16 = OpTypeVector %f16 2\n"
2998                         " %up_v2f32 = OpTypePointer Uniform %v2f32\n"
2999                         " %up_v2f16 = OpTypePointer Uniform %v2f16\n"
3000                         " %ra_v2f32 = OpTypeArray %v2f32 %c_i32_128\n"
3001                         " %ra_v2f16 = OpTypeArray %v2f16 %c_i32_128\n"
3002                         "   %SSBO32 = OpTypeStruct %ra_v2f32\n"
3003                         "   %SSBO16 = OpTypeStruct %ra_v2f16\n"
3004                         "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
3005                         "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
3006                         "   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
3007                         "   %ssbo16 = OpVariable %up_SSBO16 Uniform\n";
3008
3009                 const StringTemplate decoration         (
3010                         "OpDecorate %ra_v2f32 ArrayStride 8\n"
3011                         "OpDecorate %ra_v2f16 ArrayStride 4\n"
3012                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
3013                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
3014                         "OpDecorate %SSBO32 BufferBlock\n"
3015                         "OpDecorate %SSBO16 ${indecor}\n"
3016                         "OpDecorate %ssbo32 DescriptorSet 0\n"
3017                         "OpDecorate %ssbo16 DescriptorSet 0\n"
3018                         "OpDecorate %ssbo32 Binding 1\n"
3019                         "OpDecorate %ssbo16 Binding 0\n");
3020
3021                 // ssbo32[] <- convert ssbo16[] to 32bit float
3022                 fragments["testfun"]                            =
3023                         "%test_code = OpFunction %v4f32 None %v4f32_function\n"
3024                         "    %param = OpFunctionParameter %v4f32\n"
3025
3026                         "%entry = OpLabel\n"
3027                         "    %i = OpVariable %fp_i32 Function\n"
3028                         "         OpStore %i %c_i32_0\n"
3029                         "         OpBranch %loop\n"
3030
3031                         " %loop = OpLabel\n"
3032                         "   %15 = OpLoad %i32 %i\n"
3033                         "   %lt = OpSLessThan %bool %15 %c_i32_128\n"
3034                         "         OpLoopMerge %merge %inc None\n"
3035                         "         OpBranchConditional %lt %write %merge\n"
3036
3037                         "%write = OpLabel\n"
3038                         "   %30 = OpLoad %i32 %i\n"
3039                         "  %src = OpAccessChain %up_v2f16 %ssbo16 %c_i32_0 %30\n"
3040                         "%val16 = OpLoad %v2f16 %src\n"
3041                         "%val32 = OpFConvert %v2f32 %val16\n"
3042                         "  %dst = OpAccessChain %up_v2f32 %ssbo32 %c_i32_0 %30\n"
3043                         "         OpStore %dst %val32\n"
3044                         "         OpBranch %inc\n"
3045
3046                         "  %inc = OpLabel\n"
3047                         "   %37 = OpLoad %i32 %i\n"
3048                         "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
3049                         "         OpStore %i %39\n"
3050                         "         OpBranch %loop\n"
3051
3052                         "%merge = OpLabel\n"
3053                         "         OpReturnValue %param\n"
3054
3055                         "OpFunctionEnd\n";
3056
3057                 for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
3058                 {
3059                         map<string, string>     specs;
3060                         string                          testName        = string(CAPABILITIES[capIdx].name) + "_vector_float";
3061
3062                         specs["cap"]                                    = CAPABILITIES[capIdx].cap;
3063                         specs["indecor"]                                = CAPABILITIES[capIdx].decor;
3064
3065                         fragments["capability"]                 = capabilities.specialize(specs);
3066                         fragments["decoration"]                 = decoration.specialize(specs);
3067
3068                         resources.inputs.back().first   = CAPABILITIES[capIdx].dtype;
3069
3070                         createTestsForAllStages(testName, defaultColors, defaultColors, fragments, resources, extensions, testGroup, get16BitStorageFeatures(CAPABILITIES[capIdx].name));
3071                 }
3072         }
3073
3074         { // matrix cases
3075                 fragments["pre_main"]                           =
3076                         " %c_i32_32 = OpConstant %i32 32\n"
3077                         "      %f16 = OpTypeFloat 16\n"
3078                         "    %v2f16 = OpTypeVector %f16 2\n"
3079                         "  %m4x2f32 = OpTypeMatrix %v2f32 4\n"
3080                         "  %m4x2f16 = OpTypeMatrix %v2f16 4\n"
3081                         " %up_v2f32 = OpTypePointer Uniform %v2f32\n"
3082                         " %up_v2f16 = OpTypePointer Uniform %v2f16\n"
3083                         "%a8m4x2f32 = OpTypeArray %m4x2f32 %c_i32_32\n"
3084                         "%a8m4x2f16 = OpTypeArray %m4x2f16 %c_i32_32\n"
3085                         "   %SSBO32 = OpTypeStruct %a8m4x2f32\n"
3086                         "   %SSBO16 = OpTypeStruct %a8m4x2f16\n"
3087                         "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
3088                         "%up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
3089                         "   %ssbo32 = OpVariable %up_SSBO32 Uniform\n"
3090                         "   %ssbo16 = OpVariable %up_SSBO16 Uniform\n";
3091
3092                 const StringTemplate decoration         (
3093                         "OpDecorate %a8m4x2f32 ArrayStride 32\n"
3094                         "OpDecorate %a8m4x2f16 ArrayStride 16\n"
3095                         "OpMemberDecorate %SSBO32 0 Offset 0\n"
3096                         "OpMemberDecorate %SSBO32 0 ColMajor\n"
3097                         "OpMemberDecorate %SSBO32 0 MatrixStride 8\n"
3098                         "OpMemberDecorate %SSBO16 0 Offset 0\n"
3099                         "OpMemberDecorate %SSBO16 0 ColMajor\n"
3100                         "OpMemberDecorate %SSBO16 0 MatrixStride 4\n"
3101                         "OpDecorate %SSBO32 BufferBlock\n"
3102                         "OpDecorate %SSBO16 ${indecor}\n"
3103                         "OpDecorate %ssbo32 DescriptorSet 0\n"
3104                         "OpDecorate %ssbo16 DescriptorSet 0\n"
3105                         "OpDecorate %ssbo32 Binding 1\n"
3106                         "OpDecorate %ssbo16 Binding 0\n");
3107
3108                 fragments["testfun"]                            =
3109                         "%test_code = OpFunction %v4f32 None %v4f32_function\n"
3110                         "    %param = OpFunctionParameter %v4f32\n"
3111
3112                         "%entry = OpLabel\n"
3113                         "    %i = OpVariable %fp_i32 Function\n"
3114                         "         OpStore %i %c_i32_0\n"
3115                         "         OpBranch %loop\n"
3116
3117                         " %loop = OpLabel\n"
3118                         "   %15 = OpLoad %i32 %i\n"
3119                         "   %lt = OpSLessThan %bool %15 %c_i32_32\n"
3120                         "         OpLoopMerge %merge %inc None\n"
3121                         "         OpBranchConditional %lt %write %merge\n"
3122
3123                         "  %write = OpLabel\n"
3124                         "     %30 = OpLoad %i32 %i\n"
3125                         "  %src_0 = OpAccessChain %up_v2f16 %ssbo16 %c_i32_0 %30 %c_i32_0\n"
3126                         "  %src_1 = OpAccessChain %up_v2f16 %ssbo16 %c_i32_0 %30 %c_i32_1\n"
3127                         "  %src_2 = OpAccessChain %up_v2f16 %ssbo16 %c_i32_0 %30 %c_i32_2\n"
3128                         "  %src_3 = OpAccessChain %up_v2f16 %ssbo16 %c_i32_0 %30 %c_i32_3\n"
3129                         "%val16_0 = OpLoad %v2f16 %src_0\n"
3130                         "%val16_1 = OpLoad %v2f16 %src_1\n"
3131                         "%val16_2 = OpLoad %v2f16 %src_2\n"
3132                         "%val16_3 = OpLoad %v2f16 %src_3\n"
3133                         "%val32_0 = OpFConvert %v2f32 %val16_0\n"
3134                         "%val32_1 = OpFConvert %v2f32 %val16_1\n"
3135                         "%val32_2 = OpFConvert %v2f32 %val16_2\n"
3136                         "%val32_3 = OpFConvert %v2f32 %val16_3\n"
3137                         "  %dst_0 = OpAccessChain %up_v2f32 %ssbo32 %c_i32_0 %30 %c_i32_0\n"
3138                         "  %dst_1 = OpAccessChain %up_v2f32 %ssbo32 %c_i32_0 %30 %c_i32_1\n"
3139                         "  %dst_2 = OpAccessChain %up_v2f32 %ssbo32 %c_i32_0 %30 %c_i32_2\n"
3140                         "  %dst_3 = OpAccessChain %up_v2f32 %ssbo32 %c_i32_0 %30 %c_i32_3\n"
3141                         "           OpStore %dst_0 %val32_0\n"
3142                         "           OpStore %dst_1 %val32_1\n"
3143                         "           OpStore %dst_2 %val32_2\n"
3144                         "           OpStore %dst_3 %val32_3\n"
3145                         "           OpBranch %inc\n"
3146
3147                         "  %inc = OpLabel\n"
3148                         "   %37 = OpLoad %i32 %i\n"
3149                         "   %39 = OpIAdd %i32 %37 %c_i32_1\n"
3150                         "         OpStore %i %39\n"
3151                         "         OpBranch %loop\n"
3152
3153                         "%merge = OpLabel\n"
3154                         "         OpReturnValue %param\n"
3155
3156                         "OpFunctionEnd\n";
3157
3158                 for (deUint32 capIdx = 0; capIdx < DE_LENGTH_OF_ARRAY(CAPABILITIES); ++capIdx)
3159                 {
3160                         map<string, string>     specs;
3161                         string                          testName        = string(CAPABILITIES[capIdx].name) + "_matrix_float";
3162
3163                         specs["cap"]                                    = CAPABILITIES[capIdx].cap;
3164                         specs["indecor"]                                = CAPABILITIES[capIdx].decor;
3165
3166                         fragments["capability"]                 = capabilities.specialize(specs);
3167                         fragments["decoration"]                 = decoration.specialize(specs);
3168
3169                         resources.inputs.back().first   = CAPABILITIES[capIdx].dtype;
3170
3171                         createTestsForAllStages(testName, defaultColors, defaultColors, fragments, resources, extensions, testGroup, get16BitStorageFeatures(CAPABILITIES[capIdx].name));
3172                 }
3173         }
3174 }
3175
3176 } // anonymous
3177
3178 tcu::TestCaseGroup* create16BitStorageComputeGroup (tcu::TestContext& testCtx)
3179 {
3180         de::MovePtr<tcu::TestCaseGroup> group           (new tcu::TestCaseGroup(testCtx, "16bit_storage", "Compute tests for VK_KHR_16bit_storage extension"));
3181         addTestGroup(group.get(), "uniform_32_to_16", "32bit floats/ints to 16bit tests under capability StorageUniform{|BufferBlock}", addCompute16bitStorageUniform32To16Group);
3182         addTestGroup(group.get(), "uniform_16_to_32", "16bit floats/ints to 32bit tests under capability StorageUniform{|BufferBlock}", addCompute16bitStorageUniform16To32Group);
3183         addTestGroup(group.get(), "push_constant_16_to_32", "16bit floats/ints to 32bit tests under capability StoragePushConstant16", addCompute16bitStoragePushConstant16To32Group);
3184
3185         return group.release();
3186 }
3187
3188 tcu::TestCaseGroup* create16BitStorageGraphicsGroup (tcu::TestContext& testCtx)
3189 {
3190         de::MovePtr<tcu::TestCaseGroup> group           (new tcu::TestCaseGroup(testCtx, "16bit_storage", "Graphics tests for VK_KHR_16bit_storage extension"));
3191
3192         addTestGroup(group.get(), "uniform_float_32_to_16", "32-bit floats into 16-bit tests under capability StorageUniform{|BufferBlock}16", addGraphics16BitStorageUniformFloat32To16Group);
3193         addTestGroup(group.get(), "uniform_float_16_to_32", "16-bit floats into 32-bit testsunder capability StorageUniform{|BufferBlock}16", addGraphics16BitStorageUniformFloat16To32Group);
3194         addTestGroup(group.get(), "uniform_int_32_to_16", "32-bit int into 16-bit tests under capability StorageUniform{|BufferBlock}16", addGraphics16BitStorageUniformInt32To16Group);
3195         addTestGroup(group.get(), "uniform_int_16_to_32", "16-bit int into 32-bit tests under capability StorageUniform{|BufferBlock}16", addGraphics16BitStorageUniformInt16To32Group);
3196         addTestGroup(group.get(), "input_output_float_32_to_16", "32-bit floats into 16-bit tests under capability StorageInputOutput16", addGraphics16BitStorageInputOutputFloat32To16Group);
3197         addTestGroup(group.get(), "input_output_float_16_to_32", "16-bit floats into 32-bit tests under capability StorageInputOutput16", addGraphics16BitStorageInputOutputFloat16To32Group);
3198         addTestGroup(group.get(), "input_output_int_32_to_16", "32-bit int into 16-bit tests under capability StorageInputOutput16", addGraphics16BitStorageInputOutputInt32To16Group);
3199         addTestGroup(group.get(), "input_output_int_16_to_32", "16-bit int into 32-bit tests under capability StorageInputOutput16", addGraphics16BitStorageInputOutputInt16To32Group);
3200         addTestGroup(group.get(), "push_constant_float_16_to_32", "16-bit floats into 32-bit tests under capability StoragePushConstant16", addGraphics16BitStoragePushConstantFloat16To32Group);
3201         addTestGroup(group.get(), "push_constant_int_16_to_32", "16-bit int into 32-bit tests under capability StoragePushConstant16", addGraphics16BitStoragePushConstantInt16To32Group);
3202
3203         return group.release();
3204 }
3205
3206 } // SpirVAssembly
3207 } // vkt