external/openglcts/modules/common/subgroups/glcSubgroupsPartitionedTests.cpp

   1 /*------------------------------------------------------------------------
   2  * Vulkan Conformance Tests
   3  * ------------------------
   4  *
   5  * Copyright (c) 2017 The Khronos Group Inc.
   6  * Copyright (c) 2017 Codeplay Software Ltd.
   7  * Copyright (c) 2018 NVIDIA Corporation
   8  *
   9  * Licensed under the Apache License, Version 2.0 (the "License");
  10  * you may not use this file except in compliance with the License.
  11  * You may obtain a copy of the License at
  12  *
  13  *      http://www.apache.org/licenses/LICENSE-2.0
  14  *
  15  * Unless required by applicable law or agreed to in writing, software
  16  * distributed under the License is distributed on an "AS IS" BASIS,
  17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  18  * See the License for the specific language governing permissions and
  19  * limitations under the License.
  20  *
  21  */ /*!
  22  * \file
  23  * \brief Subgroups Tests
  24  */ /*--------------------------------------------------------------------*/
  25
  26 #include "vktSubgroupsPartitionedTests.hpp"
  27 #include "vktSubgroupsTestsUtils.hpp"
  28
  29 #include <string>
  30 #include <vector>
  31
  32 using namespace tcu;
  33 using namespace std;
  34 using namespace vk;
  35 using namespace vkt;
  36
  37 namespace
  38 {
  39 enum OpType
  40 {
  41         OPTYPE_ADD = 0,
  42         OPTYPE_MUL,
  43         OPTYPE_MIN,
  44         OPTYPE_MAX,
  45         OPTYPE_AND,
  46         OPTYPE_OR,
  47         OPTYPE_XOR,
  48         OPTYPE_INCLUSIVE_ADD,
  49         OPTYPE_INCLUSIVE_MUL,
  50         OPTYPE_INCLUSIVE_MIN,
  51         OPTYPE_INCLUSIVE_MAX,
  52         OPTYPE_INCLUSIVE_AND,
  53         OPTYPE_INCLUSIVE_OR,
  54         OPTYPE_INCLUSIVE_XOR,
  55         OPTYPE_EXCLUSIVE_ADD,
  56         OPTYPE_EXCLUSIVE_MUL,
  57         OPTYPE_EXCLUSIVE_MIN,
  58         OPTYPE_EXCLUSIVE_MAX,
  59         OPTYPE_EXCLUSIVE_AND,
  60         OPTYPE_EXCLUSIVE_OR,
  61         OPTYPE_EXCLUSIVE_XOR,
  62         OPTYPE_LAST
  63 };
  64
  65 static bool checkVertexPipelineStages(std::vector<const void*> datas,
  66                                                                           deUint32 width, deUint32)
  67 {
  68         const deUint32* data =
  69                 reinterpret_cast<const deUint32*>(datas[0]);
  70         for (deUint32 x = 0; x < width; ++x)
  71         {
  72                 deUint32 val = data[x];
  73
  74                 if (0xFFFFFF != val)
  75                 {
  76                         return false;
  77                 }
  78         }
  79
  80         return true;
  81 }
  82
  83 static bool checkCompute(std::vector<const void*> datas,
  84                                                  const deUint32 numWorkgroups[3], const deUint32 localSize[3],
  85                                                  deUint32)
  86 {
  87         const deUint32* data =
  88                 reinterpret_cast<const deUint32*>(datas[0]);
  89
  90         for (deUint32 nX = 0; nX < numWorkgroups[0]; ++nX)
  91         {
  92                 for (deUint32 nY = 0; nY < numWorkgroups[1]; ++nY)
  93                 {
  94                         for (deUint32 nZ = 0; nZ < numWorkgroups[2]; ++nZ)
  95                         {
  96                                 for (deUint32 lX = 0; lX < localSize[0]; ++lX)
  97                                 {
  98                                         for (deUint32 lY = 0; lY < localSize[1]; ++lY)
  99                                         {
 100                                                 for (deUint32 lZ = 0; lZ < localSize[2];
 101                                                                 ++lZ)
 102                                                 {
 103                                                         const deUint32 globalInvocationX =
 104                                                                 nX * localSize[0] + lX;
 105                                                         const deUint32 globalInvocationY =
 106                                                                 nY * localSize[1] + lY;
 107                                                         const deUint32 globalInvocationZ =
 108                                                                 nZ * localSize[2] + lZ;
 109
 110                                                         const deUint32 globalSizeX =
 111                                                                 numWorkgroups[0] * localSize[0];
 112                                                         const deUint32 globalSizeY =
 113                                                                 numWorkgroups[1] * localSize[1];
 114
 115                                                         const deUint32 offset =
 116                                                                 globalSizeX *
 117                                                                 ((globalSizeY *
 118                                                                   globalInvocationZ) +
 119                                                                  globalInvocationY) +
 120                                                                 globalInvocationX;
 121
 122                                                         if (0xFFFFFF != data[offset])
 123                                                         {
 124                                                                 return false;
 125                                                         }
 126                                                 }
 127                                         }
 128                                 }
 129                         }
 130                 }
 131         }
 132
 133         return true;
 134 }
 135
 136 std::string getOpTypeName(int opType)
 137 {
 138         switch (opType)
 139         {
 140                 default:
 141                         DE_FATAL("Unsupported op type");
 142                         return "";
 143                 case OPTYPE_ADD:
 144                         return "subgroupAdd";
 145                 case OPTYPE_MUL:
 146                         return "subgroupMul";
 147                 case OPTYPE_MIN:
 148                         return "subgroupMin";
 149                 case OPTYPE_MAX:
 150                         return "subgroupMax";
 151                 case OPTYPE_AND:
 152                         return "subgroupAnd";
 153                 case OPTYPE_OR:
 154                         return "subgroupOr";
 155                 case OPTYPE_XOR:
 156                         return "subgroupXor";
 157                 case OPTYPE_INCLUSIVE_ADD:
 158                         return "subgroupInclusiveAdd";
 159                 case OPTYPE_INCLUSIVE_MUL:
 160                         return "subgroupInclusiveMul";
 161                 case OPTYPE_INCLUSIVE_MIN:
 162                         return "subgroupInclusiveMin";
 163                 case OPTYPE_INCLUSIVE_MAX:
 164                         return "subgroupInclusiveMax";
 165                 case OPTYPE_INCLUSIVE_AND:
 166                         return "subgroupInclusiveAnd";
 167                 case OPTYPE_INCLUSIVE_OR:
 168                         return "subgroupInclusiveOr";
 169                 case OPTYPE_INCLUSIVE_XOR:
 170                         return "subgroupInclusiveXor";
 171                 case OPTYPE_EXCLUSIVE_ADD:
 172                         return "subgroupExclusiveAdd";
 173                 case OPTYPE_EXCLUSIVE_MUL:
 174                         return "subgroupExclusiveMul";
 175                 case OPTYPE_EXCLUSIVE_MIN:
 176                         return "subgroupExclusiveMin";
 177                 case OPTYPE_EXCLUSIVE_MAX:
 178                         return "subgroupExclusiveMax";
 179                 case OPTYPE_EXCLUSIVE_AND:
 180                         return "subgroupExclusiveAnd";
 181                 case OPTYPE_EXCLUSIVE_OR:
 182                         return "subgroupExclusiveOr";
 183                 case OPTYPE_EXCLUSIVE_XOR:
 184                         return "subgroupExclusiveXor";
 185         }
 186 }
 187
 188 std::string getOpTypeNamePartitioned(int opType)
 189 {
 190         switch (opType)
 191         {
 192                 default:
 193                         DE_FATAL("Unsupported op type");
 194                         return "";
 195                 case OPTYPE_ADD:
 196                         return "subgroupPartitionedAddNV";
 197                 case OPTYPE_MUL:
 198                         return "subgroupPartitionedMulNV";
 199                 case OPTYPE_MIN:
 200                         return "subgroupPartitionedMinNV";
 201                 case OPTYPE_MAX:
 202                         return "subgroupPartitionedMaxNV";
 203                 case OPTYPE_AND:
 204                         return "subgroupPartitionedAndNV";
 205                 case OPTYPE_OR:
 206                         return "subgroupPartitionedOrNV";
 207                 case OPTYPE_XOR:
 208                         return "subgroupPartitionedXorNV";
 209                 case OPTYPE_INCLUSIVE_ADD:
 210                         return "subgroupPartitionedInclusiveAddNV";
 211                 case OPTYPE_INCLUSIVE_MUL:
 212                         return "subgroupPartitionedInclusiveMulNV";
 213                 case OPTYPE_INCLUSIVE_MIN:
 214                         return "subgroupPartitionedInclusiveMinNV";
 215                 case OPTYPE_INCLUSIVE_MAX:
 216                         return "subgroupPartitionedInclusiveMaxNV";
 217                 case OPTYPE_INCLUSIVE_AND:
 218                         return "subgroupPartitionedInclusiveAndNV";
 219                 case OPTYPE_INCLUSIVE_OR:
 220                         return "subgroupPartitionedInclusiveOrNV";
 221                 case OPTYPE_INCLUSIVE_XOR:
 222                         return "subgroupPartitionedInclusiveXorNV";
 223                 case OPTYPE_EXCLUSIVE_ADD:
 224                         return "subgroupPartitionedExclusiveAddNV";
 225                 case OPTYPE_EXCLUSIVE_MUL:
 226                         return "subgroupPartitionedExclusiveMulNV";
 227                 case OPTYPE_EXCLUSIVE_MIN:
 228                         return "subgroupPartitionedExclusiveMinNV";
 229                 case OPTYPE_EXCLUSIVE_MAX:
 230                         return "subgroupPartitionedExclusiveMaxNV";
 231                 case OPTYPE_EXCLUSIVE_AND:
 232                         return "subgroupPartitionedExclusiveAndNV";
 233                 case OPTYPE_EXCLUSIVE_OR:
 234                         return "subgroupPartitionedExclusiveOrNV";
 235                 case OPTYPE_EXCLUSIVE_XOR:
 236                         return "subgroupPartitionedExclusiveXorNV";
 237         }
 238 }
 239
 240 std::string getIdentity(int opType, vk::VkFormat format)
 241 {
 242         bool isFloat = false;
 243         bool isInt = false;
 244         bool isUnsigned = false;
 245
 246         switch (format)
 247         {
 248                 default:
 249                         DE_FATAL("Unhandled format!");
 250                         return "";
 251                 case VK_FORMAT_R32_SINT:
 252                 case VK_FORMAT_R32G32_SINT:
 253                 case VK_FORMAT_R32G32B32_SINT:
 254                 case VK_FORMAT_R32G32B32A32_SINT:
 255                         isInt = true;
 256                         break;
 257                 case VK_FORMAT_R32_UINT:
 258                 case VK_FORMAT_R32G32_UINT:
 259                 case VK_FORMAT_R32G32B32_UINT:
 260                 case VK_FORMAT_R32G32B32A32_UINT:
 261                         isUnsigned = true;
 262                         break;
 263                 case VK_FORMAT_R32_SFLOAT:
 264                 case VK_FORMAT_R32G32_SFLOAT:
 265                 case VK_FORMAT_R32G32B32_SFLOAT:
 266                 case VK_FORMAT_R32G32B32A32_SFLOAT:
 267                 case VK_FORMAT_R64_SFLOAT:
 268                 case VK_FORMAT_R64G64_SFLOAT:
 269                 case VK_FORMAT_R64G64B64_SFLOAT:
 270                 case VK_FORMAT_R64G64B64A64_SFLOAT:
 271                         isFloat = true;
 272                         break;
 273                 case VK_FORMAT_R8_USCALED:
 274                 case VK_FORMAT_R8G8_USCALED:
 275                 case VK_FORMAT_R8G8B8_USCALED:
 276                 case VK_FORMAT_R8G8B8A8_USCALED:
 277                         break; // bool types are not anything
 278         }
 279
 280         switch (opType)
 281         {
 282                 default:
 283                         DE_FATAL("Unsupported op type");
 284                         return "";
 285                 case OPTYPE_ADD:
 286                 case OPTYPE_INCLUSIVE_ADD:
 287                 case OPTYPE_EXCLUSIVE_ADD:
 288                         return subgroups::getFormatNameForGLSL(format) + "(0)";
 289                 case OPTYPE_MUL:
 290                 case OPTYPE_INCLUSIVE_MUL:
 291                 case OPTYPE_EXCLUSIVE_MUL:
 292                         return subgroups::getFormatNameForGLSL(format) + "(1)";
 293                 case OPTYPE_MIN:
 294                 case OPTYPE_INCLUSIVE_MIN:
 295                 case OPTYPE_EXCLUSIVE_MIN:
 296                         if (isFloat)
 297                         {
 298                                 return subgroups::getFormatNameForGLSL(format) + "(intBitsToFloat(0x7f800000))";
 299                         }
 300                         else if (isInt)
 301                         {
 302                                 return subgroups::getFormatNameForGLSL(format) + "(0x7fffffff)";
 303                         }
 304                         else if (isUnsigned)
 305                         {
 306                                 return subgroups::getFormatNameForGLSL(format) + "(0xffffffffu)";
 307                         }
 308                         else
 309                         {
 310                                 DE_FATAL("Unhandled case");
 311                                 return "";
 312                         }
 313                 case OPTYPE_MAX:
 314                 case OPTYPE_INCLUSIVE_MAX:
 315                 case OPTYPE_EXCLUSIVE_MAX:
 316                         if (isFloat)
 317                         {
 318                                 return subgroups::getFormatNameForGLSL(format) + "(intBitsToFloat(0xff800000))";
 319                         }
 320                         else if (isInt)
 321                         {
 322                                 return subgroups::getFormatNameForGLSL(format) + "(0x80000000)";
 323                         }
 324                         else if (isUnsigned)
 325                         {
 326                                 return subgroups::getFormatNameForGLSL(format) + "(0)";
 327                         }
 328                         else
 329                         {
 330                                 DE_FATAL("Unhandled case");
 331                                 return "";
 332                         }
 333                 case OPTYPE_AND:
 334                 case OPTYPE_INCLUSIVE_AND:
 335                 case OPTYPE_EXCLUSIVE_AND:
 336                         return subgroups::getFormatNameForGLSL(format) + "(~0)";
 337                 case OPTYPE_OR:
 338                 case OPTYPE_INCLUSIVE_OR:
 339                 case OPTYPE_EXCLUSIVE_OR:
 340                         return subgroups::getFormatNameForGLSL(format) + "(0)";
 341                 case OPTYPE_XOR:
 342                 case OPTYPE_INCLUSIVE_XOR:
 343                 case OPTYPE_EXCLUSIVE_XOR:
 344                         return subgroups::getFormatNameForGLSL(format) + "(0)";
 345         }
 346 }
 347
 348 std::string getCompare(int opType, vk::VkFormat format, std::string lhs, std::string rhs)
 349 {
 350         std::string formatName = subgroups::getFormatNameForGLSL(format);
 351         switch (format)
 352         {
 353                 default:
 354                         return "all(equal(" + lhs + ", " + rhs + "))";
 355                 case VK_FORMAT_R8_USCALED:
 356                 case VK_FORMAT_R32_UINT:
 357                 case VK_FORMAT_R32_SINT:
 358                         return "(" + lhs + " == " + rhs + ")";
 359                 case VK_FORMAT_R32_SFLOAT:
 360                 case VK_FORMAT_R64_SFLOAT:
 361                         switch (opType)
 362                         {
 363                                 default:
 364                                         return "(abs(" + lhs + " - " + rhs + ") < 0.00001)";
 365                                 case OPTYPE_MIN:
 366                                 case OPTYPE_INCLUSIVE_MIN:
 367                                 case OPTYPE_EXCLUSIVE_MIN:
 368                                 case OPTYPE_MAX:
 369                                 case OPTYPE_INCLUSIVE_MAX:
 370                                 case OPTYPE_EXCLUSIVE_MAX:
 371                                         return "(" + lhs + " == " + rhs + ")";
 372                         }
 373                 case VK_FORMAT_R32G32_SFLOAT:
 374                 case VK_FORMAT_R32G32B32_SFLOAT:
 375                 case VK_FORMAT_R32G32B32A32_SFLOAT:
 376                 case VK_FORMAT_R64G64_SFLOAT:
 377                 case VK_FORMAT_R64G64B64_SFLOAT:
 378                 case VK_FORMAT_R64G64B64A64_SFLOAT:
 379                         switch (opType)
 380                         {
 381                                 default:
 382                                         return "all(lessThan(abs(" + lhs + " - " + rhs + "), " + formatName + "(0.00001)))";
 383                                 case OPTYPE_MIN:
 384                                 case OPTYPE_INCLUSIVE_MIN:
 385                                 case OPTYPE_EXCLUSIVE_MIN:
 386                                 case OPTYPE_MAX:
 387                                 case OPTYPE_INCLUSIVE_MAX:
 388                                 case OPTYPE_EXCLUSIVE_MAX:
 389                                         return "all(equal(" + lhs + ", " + rhs + "))";
 390                         }
 391         }
 392 }
 393
 394 struct CaseDefinition
 395 {
 396         int                                     opType;
 397         VkShaderStageFlags      shaderStage;
 398         VkFormat                        format;
 399 };
 400
 401 string getTestString(const CaseDefinition &caseDef)
 402 {
 403     // NOTE: tempResult can't have anything in bits 31:24 to avoid int->float
 404     // conversion overflow in framebuffer tests.
 405     string fmt = subgroups::getFormatNameForGLSL(caseDef.format);
 406         string bdy =
 407                 "  uint tempResult = 0;\n"
 408                 "  uint id = gl_SubgroupInvocationID;\n";
 409
 410     // Test the case where the partition has a single subset with all invocations in it.
 411     // This should generate the same result as the non-partitioned function.
 412     bdy +=
 413         "  uvec4 allBallot = mask;\n"
 414         "  " + fmt + " allResult = " + getOpTypeNamePartitioned(caseDef.opType) + "(data[gl_SubgroupInvocationID], allBallot);\n"
 415         "  " + fmt + " refResult = " + getOpTypeName(caseDef.opType) + "(data[gl_SubgroupInvocationID]);\n"
 416         "  if (" + getCompare(caseDef.opType, caseDef.format, "allResult", "refResult") + ") {\n"
 417         "      tempResult |= 0x1;\n"
 418         "  }\n";
 419
 420     // The definition of a partition doesn't forbid bits corresponding to inactive
 421     // invocations being in the subset with active invocations. In other words, test that
 422     // bits corresponding to inactive invocations are ignored.
 423     bdy +=
 424             "  if (0 == (gl_SubgroupInvocationID % 2)) {\n"
 425         "    " + fmt + " allResult = " + getOpTypeNamePartitioned(caseDef.opType) + "(data[gl_SubgroupInvocationID], allBallot);\n"
 426         "    " + fmt + " refResult = " + getOpTypeName(caseDef.opType) + "(data[gl_SubgroupInvocationID]);\n"
 427         "    if (" + getCompare(caseDef.opType, caseDef.format, "allResult", "refResult") + ") {\n"
 428         "        tempResult |= 0x2;\n"
 429         "    }\n"
 430         "  } else {\n"
 431         "    tempResult |= 0x2;\n"
 432         "  }\n";
 433
 434     // Test the case where the partition has each invocation in a unique subset. For
 435     // exclusive ops, the result is identity. For reduce/inclusive, it's the original value.
 436     string expectedSelfResult = "data[gl_SubgroupInvocationID]";
 437     if (caseDef.opType >= OPTYPE_EXCLUSIVE_ADD &&
 438         caseDef.opType <= OPTYPE_EXCLUSIVE_XOR) {
 439         expectedSelfResult = getIdentity(caseDef.opType, caseDef.format);
 440     }
 441
 442     bdy +=
 443         "  uvec4 selfBallot = subgroupPartitionNV(gl_SubgroupInvocationID);\n"
 444         "  " + fmt + " selfResult = " + getOpTypeNamePartitioned(caseDef.opType) + "(data[gl_SubgroupInvocationID], selfBallot);\n"
 445         "  if (" + getCompare(caseDef.opType, caseDef.format, "selfResult", expectedSelfResult) + ") {\n"
 446         "      tempResult |= 0x4;\n"
 447         "  }\n";
 448
 449     // Test "random" partitions based on a hash of the invocation id.
 450     // This "hash" function produces interesting/randomish partitions.
 451     static const char *idhash = "((id%N)+(id%(N+1))-(id%2)+(id/2))%((N+1)/2)";
 452
 453     bdy +=
 454                 "  for (uint N = 1; N < 16; ++N) {\n"
 455                 "    " + fmt + " idhashFmt = " + fmt + "(" + idhash + ");\n"
 456                 "    uvec4 partitionBallot = subgroupPartitionNV(idhashFmt) & mask;\n"
 457                 "    " + fmt + " partitionedResult = " + getOpTypeNamePartitioned(caseDef.opType) + "(data[gl_SubgroupInvocationID], partitionBallot);\n"
 458                 "      for (uint i = 0; i < N; ++i) {\n"
 459                 "        " + fmt + " iFmt = " + fmt + "(i);\n"
 460         "        if (" + getCompare(caseDef.opType, caseDef.format, "idhashFmt", "iFmt") + ") {\n"
 461         "          " + fmt + " subsetResult = " + getOpTypeName(caseDef.opType) + "(data[gl_SubgroupInvocationID]);\n"
 462         "          tempResult |= " + getCompare(caseDef.opType, caseDef.format, "partitionedResult", "subsetResult") + " ? (0x4 << N) : 0;\n"
 463         "        }\n"
 464         "      }\n"
 465         "  }\n"
 466         // tests in flow control:
 467                 "  if (1 == (gl_SubgroupInvocationID % 2)) {\n"
 468         "    for (uint N = 1; N < 7; ++N) {\n"
 469                 "      " + fmt + " idhashFmt = " + fmt + "(" + idhash + ");\n"
 470                 "      uvec4 partitionBallot = subgroupPartitionNV(idhashFmt) & mask;\n"
 471         "      " + fmt + " partitionedResult = " + getOpTypeNamePartitioned(caseDef.opType) + "(data[gl_SubgroupInvocationID], partitionBallot);\n"
 472         "        for (uint i = 0; i < N; ++i) {\n"
 473                 "          " + fmt + " iFmt = " + fmt + "(i);\n"
 474         "          if (" + getCompare(caseDef.opType, caseDef.format, "idhashFmt", "iFmt") + ") {\n"
 475         "            " + fmt + " subsetResult = " + getOpTypeName(caseDef.opType) + "(data[gl_SubgroupInvocationID]);\n"
 476         "            tempResult |= " + getCompare(caseDef.opType, caseDef.format, "partitionedResult", "subsetResult") + " ? (0x20000 << N) : 0;\n"
 477         "          }\n"
 478         "        }\n"
 479         "    }\n"
 480         "  } else {\n"
 481         "    tempResult |= 0xFC0000;\n"
 482         "  }\n"
 483         ;
 484
 485     return bdy;
 486 }
 487
 488 void initFrameBufferPrograms (SourceCollections& programCollection, CaseDefinition caseDef)
 489 {
 490         const vk::ShaderBuildOptions    buildOptions    (programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
 491         std::ostringstream                              bdy;
 492
 493         subgroups::setFragmentShaderFrameBuffer(programCollection);
 494
 495         if (VK_SHADER_STAGE_VERTEX_BIT != caseDef.shaderStage)
 496                 subgroups::setVertexShaderFrameBuffer(programCollection);
 497
 498         bdy << getTestString(caseDef);
 499
 500         if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
 501         {
 502                 std::ostringstream vertexSrc;
 503                 vertexSrc << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
 504                         << "#extension GL_NV_shader_subgroup_partitioned: enable\n"
 505                         << "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
 506                         << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
 507                         << "layout(location = 0) in highp vec4 in_position;\n"
 508                         << "layout(location = 0) out float out_color;\n"
 509                         << "layout(set = 0, binding = 0) uniform Buffer1\n"
 510                         << "{\n"
 511                         << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
 512                         << "};\n"
 513                         << "\n"
 514                         << "void main (void)\n"
 515                         << "{\n"
 516                         << "  uvec4 mask = subgroupBallot(true);\n"
 517                         << bdy.str()
 518                         << "  out_color = float(tempResult);\n"
 519                         << "  gl_Position = in_position;\n"
 520                         << "  gl_PointSize = 1.0f;\n"
 521                         << "}\n";
 522                 programCollection.glslSources.add("vert")
 523                         << glu::VertexSource(vertexSrc.str()) << buildOptions;
 524         }
 525         else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
 526         {
 527                 std::ostringstream geometry;
 528
 529                 geometry << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
 530                         << "#extension GL_NV_shader_subgroup_partitioned: enable\n"
 531                         << "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
 532                         << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
 533                         << "layout(points) in;\n"
 534                         << "layout(points, max_vertices = 1) out;\n"
 535                         << "layout(location = 0) out float out_color;\n"
 536                         << "layout(set = 0, binding = 0) uniform Buffer\n"
 537                         << "{\n"
 538                         << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
 539                         << "};\n"
 540                         << "\n"
 541                         << "void main (void)\n"
 542                         << "{\n"
 543                         << "  uvec4 mask = subgroupBallot(true);\n"
 544                         << bdy.str()
 545                         << "  out_color = float(tempResult);\n"
 546                         << "  gl_Position = gl_in[0].gl_Position;\n"
 547                         << "  EmitVertex();\n"
 548                         << "  EndPrimitive();\n"
 549                         << "}\n";
 550
 551                 programCollection.glslSources.add("geometry")
 552                                 << glu::GeometrySource(geometry.str()) << buildOptions;
 553         }
 554         else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
 555         {
 556                 std::ostringstream controlSource;
 557                 controlSource  << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
 558                         << "#extension GL_NV_shader_subgroup_partitioned: enable\n"
 559                         << "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
 560                         << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
 561                         << "layout(vertices = 2) out;\n"
 562                         << "layout(location = 0) out float out_color[];\n"
 563                         << "layout(set = 0, binding = 0) uniform Buffer1\n"
 564                         << "{\n"
 565                         << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
 566                         << "};\n"
 567                         << "\n"
 568                         << "void main (void)\n"
 569                         << "{\n"
 570                         << "  if (gl_InvocationID == 0)\n"
 571                         <<"  {\n"
 572                         << "    gl_TessLevelOuter[0] = 1.0f;\n"
 573                         << "    gl_TessLevelOuter[1] = 1.0f;\n"
 574                         << "  }\n"
 575                         << "  uvec4 mask = subgroupBallot(true);\n"
 576                         << bdy.str()
 577                         << "  out_color[gl_InvocationID] = float(tempResult);"
 578                         << "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
 579                         << "}\n";
 580
 581
 582                 programCollection.glslSources.add("tesc")
 583                         << glu::TessellationControlSource(controlSource.str()) << buildOptions;
 584                 subgroups::setTesEvalShaderFrameBuffer(programCollection);
 585         }
 586         else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
 587         {
 588
 589                 std::ostringstream evaluationSource;
 590                 evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
 591                         << "#extension GL_NV_shader_subgroup_partitioned: enable\n"
 592                         << "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
 593                         << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
 594                         << "layout(isolines, equal_spacing, ccw ) in;\n"
 595                         << "layout(location = 0) out float out_color;\n"
 596                         << "layout(set = 0, binding = 0) uniform Buffer1\n"
 597                         << "{\n"
 598                         << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
 599                         << "};\n"
 600                         << "\n"
 601                         << "void main (void)\n"
 602                         << "{\n"
 603                         << "  uvec4 mask = subgroupBallot(true);\n"
 604                         << bdy.str()
 605                         << "  out_color = float(tempResult);\n"
 606                         << "  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
 607                         << "}\n";
 608
 609                 subgroups::setTesCtrlShaderFrameBuffer(programCollection);
 610                 programCollection.glslSources.add("tese") << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
 611         }
 612         else
 613         {
 614                 DE_FATAL("Unsupported shader stage");
 615         }
 616 }
 617
 618 void initPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
 619 {
 620         const string bdy = getTestString(caseDef);
 621
 622         if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
 623         {
 624                 std::ostringstream src;
 625
 626                 src << "#version 450\n"
 627                         << "#extension GL_NV_shader_subgroup_partitioned: enable\n"
 628                         << "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
 629                         << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
 630                         << "layout (local_size_x_id = 0, local_size_y_id = 1, "
 631                         "local_size_z_id = 2) in;\n"
 632                         << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
 633                         << "{\n"
 634                         << "  uint result[];\n"
 635                         << "};\n"
 636                         << "layout(set = 0, binding = 1, std430) buffer Buffer2\n"
 637                         << "{\n"
 638                         << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[];\n"
 639                         << "};\n"
 640                         << "\n"
 641                         << "void main (void)\n"
 642                         << "{\n"
 643                         << "  uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
 644                         << "  highp uint offset = globalSize.x * ((globalSize.y * "
 645                         "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
 646                         "gl_GlobalInvocationID.x;\n"
 647                         << "  uvec4 mask = subgroupBallot(true);\n"
 648                         << bdy
 649                         << "  result[offset] = tempResult;\n"
 650                         << "}\n";
 651
 652                 programCollection.glslSources.add("comp")
 653                                 << glu::ComputeSource(src.str()) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
 654         }
 655         else
 656         {
 657                 {
 658                         const std::string vertex =
 659                                 "#version 450\n"
 660                                 "#extension GL_NV_shader_subgroup_partitioned: enable\n"
 661                             "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
 662                                 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
 663                                 "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
 664                                 "{\n"
 665                                 "  uint result[];\n"
 666                                 "};\n"
 667                                 "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
 668                                 "{\n"
 669                                 "  " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
 670                                 "};\n"
 671                                 "\n"
 672                                 "void main (void)\n"
 673                                 "{\n"
 674                                 "  uvec4 mask = subgroupBallot(true);\n"
 675                                 + bdy+
 676                                 "  result[gl_VertexIndex] = tempResult;\n"
 677                                 "  float pixelSize = 2.0f/1024.0f;\n"
 678                                 "  float pixelPosition = pixelSize/2.0f - 1.0f;\n"
 679                                 "  gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
 680                                 "  gl_PointSize = 1.0f;\n"
 681                                 "}\n";
 682                         programCollection.glslSources.add("vert")
 683                                         << glu::VertexSource(vertex) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
 684                 }
 685
 686                 {
 687                         const std::string tesc =
 688                                 "#version 450\n"
 689                                 "#extension GL_NV_shader_subgroup_partitioned: enable\n"
 690                             "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
 691                                 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
 692                                 "layout(vertices=1) out;\n"
 693                                 "layout(set = 0, binding = 1, std430) buffer Buffer1\n"
 694                                 "{\n"
 695                                 "  uint result[];\n"
 696                                 "};\n"
 697                                 "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
 698                                 "{\n"
 699                                 "  " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
 700                                 "};\n"
 701                                 "\n"
 702                                 "void main (void)\n"
 703                                 "{\n"
 704                                 "  uvec4 mask = subgroupBallot(true);\n"
 705                                 + bdy +
 706                                 "  result[gl_PrimitiveID] = tempResult;\n"
 707                                 "  if (gl_InvocationID == 0)\n"
 708                                 "  {\n"
 709                                 "    gl_TessLevelOuter[0] = 1.0f;\n"
 710                                 "    gl_TessLevelOuter[1] = 1.0f;\n"
 711                                 "  }\n"
 712                                 "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
 713                                 "}\n";
 714                         programCollection.glslSources.add("tesc")
 715                                 << glu::TessellationControlSource(tesc) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
 716                 }
 717
 718                 {
 719                         const std::string tese =
 720                                 "#version 450\n"
 721                                 "#extension GL_NV_shader_subgroup_partitioned: enable\n"
 722                             "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
 723                                 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
 724                                 "layout(isolines) in;\n"
 725                                 "layout(set = 0, binding = 2, std430) buffer Buffer1\n"
 726                                 "{\n"
 727                                 "  uint result[];\n"
 728                                 "};\n"
 729                                 "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
 730                                 "{\n"
 731                                 "  " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
 732                                 "};\n"
 733                                 "\n"
 734                                 "void main (void)\n"
 735                                 "{\n"
 736                                 "  uvec4 mask = subgroupBallot(true);\n"
 737                                 + bdy +
 738                                 "  result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = tempResult;\n"
 739                                 "  float pixelSize = 2.0f/1024.0f;\n"
 740                                 "  gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
 741                                 "}\n";
 742                         programCollection.glslSources.add("tese")
 743                                 << glu::TessellationEvaluationSource(tese) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
 744                 }
 745
 746                 {
 747                         const std::string geometry =
 748                                 "#version 450\n"
 749                                 "#extension GL_NV_shader_subgroup_partitioned: enable\n"
 750                             "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
 751                                 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
 752                                 "layout(${TOPOLOGY}) in;\n"
 753                                 "layout(points, max_vertices = 1) out;\n"
 754                                 "layout(set = 0, binding = 3, std430) buffer Buffer1\n"
 755                                 "{\n"
 756                                 "  uint result[];\n"
 757                                 "};\n"
 758                                 "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
 759                                 "{\n"
 760                                 "  " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
 761                                 "};\n"
 762                                 "\n"
 763                                 "void main (void)\n"
 764                                 "{\n"
 765                                 "  uvec4 mask = subgroupBallot(true);\n"
 766                                  + bdy +
 767                                 "  result[gl_PrimitiveIDIn] = tempResult;\n"
 768                                 "  gl_Position = gl_in[0].gl_Position;\n"
 769                                 "  EmitVertex();\n"
 770                                 "  EndPrimitive();\n"
 771                                 "}\n";
 772                         subgroups::addGeometryShadersFromTemplate(geometry, vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u),
 773                                                                                                           programCollection.glslSources);
 774                 }
 775
 776                 {
 777                         const std::string fragment =
 778                                 "#version 450\n"
 779                                 "#extension GL_NV_shader_subgroup_partitioned: enable\n"
 780                             "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
 781                                 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
 782                                 "layout(location = 0) out uint result;\n"
 783                                 "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
 784                                 "{\n"
 785                                 "  " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
 786                                 "};\n"
 787                                 "void main (void)\n"
 788                                 "{\n"
 789                                 "  uvec4 mask = subgroupBallot(true);\n"
 790                                 + bdy +
 791                                 "  result = tempResult;\n"
 792                                 "}\n";
 793                         programCollection.glslSources.add("fragment")
 794                                 << glu::FragmentSource(fragment)<< vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
 795                 }
 796                 subgroups::addNoSubgroupShader(programCollection);
 797         }
 798 }
 799
 800 void supportedCheck (Context& context, CaseDefinition caseDef)
 801 {
 802         if (!subgroups::isSubgroupSupported(context))
 803                 TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
 804
 805         if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_PARTITIONED_BIT_NV))
 806         {
 807                 TCU_THROW(NotSupportedError, "Device does not support subgroup partitioned operations");
 808         }
 809
 810         if (subgroups::isDoubleFormat(caseDef.format) &&
 811                         !subgroups::isDoubleSupportedForDevice(context))
 812         {
 813                 TCU_THROW(NotSupportedError, "Device does not support subgroup double operations");
 814         }
 815 }
 816
 817 tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
 818 {
 819         if (!subgroups::areSubgroupOperationsSupportedForStage(
 820                                 context, caseDef.shaderStage))
 821         {
 822                 if (subgroups::areSubgroupOperationsRequiredForStage(
 823                                         caseDef.shaderStage))
 824                 {
 825                         return tcu::TestStatus::fail(
 826                                            "Shader stage " +
 827                                            subgroups::getShaderStageName(caseDef.shaderStage) +
 828                                            " is required to support subgroup operations!");
 829                 }
 830                 else
 831                 {
 832                         TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
 833                 }
 834         }
 835
 836         subgroups::SSBOData inputData;
 837         inputData.format = caseDef.format;
 838         inputData.numElements = subgroups::maxSupportedSubgroupSize();
 839         inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
 840
 841         if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
 842                 return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages);
 843         else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
 844                 return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages);
 845         else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
 846                 return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
 847         else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
 848                 return subgroups::makeTessellationEvaluationFrameBufferTest(context,  VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
 849         else
 850                 TCU_THROW(InternalError, "Unhandled shader stage");
 851 }
 852
 853 bool checkShaderStages (Context& context, const CaseDefinition& caseDef)
 854 {
 855         if (!subgroups::areSubgroupOperationsSupportedForStage(
 856                                 context, caseDef.shaderStage))
 857         {
 858                 if (subgroups::areSubgroupOperationsRequiredForStage(
 859                                         caseDef.shaderStage))
 860                 {
 861                         return false;
 862                 }
 863                 else
 864                 {
 865                         TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
 866                 }
 867         }
 868         return true;
 869 }
 870
 871 tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
 872 {
 873         if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
 874         {
 875                 if(!checkShaderStages(context,caseDef))
 876                 {
 877                         return tcu::TestStatus::fail(
 878                                                         "Shader stage " +
 879                                                         subgroups::getShaderStageName(caseDef.shaderStage) +
 880                                                         " is required to support subgroup operations!");
 881                 }
 882                 subgroups::SSBOData inputData;
 883                 inputData.format = caseDef.format;
 884                 inputData.numElements = subgroups::maxSupportedSubgroupSize();
 885                 inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
 886
 887                 return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkCompute);
 888         }
 889         else
 890         {
 891                 VkPhysicalDeviceSubgroupProperties subgroupProperties;
 892                 subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
 893                 subgroupProperties.pNext = DE_NULL;
 894
 895                 VkPhysicalDeviceProperties2 properties;
 896                 properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
 897                 properties.pNext = &subgroupProperties;
 898
 899                 context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
 900
 901                 VkShaderStageFlagBits stages = (VkShaderStageFlagBits)(caseDef.shaderStage  & subgroupProperties.supportedStages);
 902
 903                 if ( VK_SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
 904                 {
 905                         if ( (stages & VK_SHADER_STAGE_FRAGMENT_BIT) == 0)
 906                                 TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
 907                         else
 908                                 stages = VK_SHADER_STAGE_FRAGMENT_BIT;
 909                 }
 910
 911                 if ((VkShaderStageFlagBits)0u == stages)
 912                         TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
 913
 914                 subgroups::SSBOData inputData;
 915                 inputData.format                        = caseDef.format;
 916                 inputData.numElements           = subgroups::maxSupportedSubgroupSize();
 917                 inputData.initializeType        = subgroups::SSBOData::InitializeNonZero;
 918                 inputData.binding                       = 4u;
 919                 inputData.stages                        = stages;
 920
 921                 return subgroups::allStages(context, VK_FORMAT_R32_UINT, &inputData,
 922                                                                                  1, checkVertexPipelineStages, stages);
 923         }
 924 }
 925 }
 926
 927 namespace vkt
 928 {
 929 namespace subgroups
 930 {
 931 tcu::TestCaseGroup* createSubgroupsPartitionedTests(tcu::TestContext& testCtx)
 932 {
 933         de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(
 934                         testCtx, "partitioned", "NV_shader_subgroup_partitioned category tests"));
 935
 936         const VkShaderStageFlags stages[] =
 937         {
 938                 VK_SHADER_STAGE_VERTEX_BIT,
 939                 VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
 940                 VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
 941                 VK_SHADER_STAGE_GEOMETRY_BIT,
 942         };
 943
 944         const VkFormat formats[] =
 945         {
 946                 VK_FORMAT_R32_SINT, VK_FORMAT_R32G32_SINT, VK_FORMAT_R32G32B32_SINT,
 947                 VK_FORMAT_R32G32B32A32_SINT, VK_FORMAT_R32_UINT, VK_FORMAT_R32G32_UINT,
 948                 VK_FORMAT_R32G32B32_UINT, VK_FORMAT_R32G32B32A32_UINT,
 949                 VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT,
 950                 VK_FORMAT_R32G32B32_SFLOAT, VK_FORMAT_R32G32B32A32_SFLOAT,
 951                 VK_FORMAT_R64_SFLOAT, VK_FORMAT_R64G64_SFLOAT,
 952                 VK_FORMAT_R64G64B64_SFLOAT, VK_FORMAT_R64G64B64A64_SFLOAT,
 953                 VK_FORMAT_R8_USCALED, VK_FORMAT_R8G8_USCALED,
 954                 VK_FORMAT_R8G8B8_USCALED, VK_FORMAT_R8G8B8A8_USCALED,
 955         };
 956
 957         for (int formatIndex = 0; formatIndex < DE_LENGTH_OF_ARRAY(formats); ++formatIndex)
 958         {
 959                 const VkFormat format = formats[formatIndex];
 960
 961                 for (int opTypeIndex = 0; opTypeIndex < OPTYPE_LAST; ++opTypeIndex)
 962                 {
 963                         bool isBool = false;
 964                         bool isFloat = false;
 965
 966                         switch (format)
 967                         {
 968                                 default:
 969                                         break;
 970                                 case VK_FORMAT_R32_SFLOAT:
 971                                 case VK_FORMAT_R32G32_SFLOAT:
 972                                 case VK_FORMAT_R32G32B32_SFLOAT:
 973                                 case VK_FORMAT_R32G32B32A32_SFLOAT:
 974                                 case VK_FORMAT_R64_SFLOAT:
 975                                 case VK_FORMAT_R64G64_SFLOAT:
 976                                 case VK_FORMAT_R64G64B64_SFLOAT:
 977                                 case VK_FORMAT_R64G64B64A64_SFLOAT:
 978                                         isFloat = true;
 979                                         break;
 980                                 case VK_FORMAT_R8_USCALED:
 981                                 case VK_FORMAT_R8G8_USCALED:
 982                                 case VK_FORMAT_R8G8B8_USCALED:
 983                                 case VK_FORMAT_R8G8B8A8_USCALED:
 984                                         isBool = true;
 985                                         break;
 986                         }
 987
 988                         bool isBitwiseOp = false;
 989
 990                         switch (opTypeIndex)
 991                         {
 992                                 default:
 993                                         break;
 994                                 case OPTYPE_AND:
 995                                 case OPTYPE_INCLUSIVE_AND:
 996                                 case OPTYPE_EXCLUSIVE_AND:
 997                                 case OPTYPE_OR:
 998                                 case OPTYPE_INCLUSIVE_OR:
 999                                 case OPTYPE_EXCLUSIVE_OR:
1000                                 case OPTYPE_XOR:
1001                                 case OPTYPE_INCLUSIVE_XOR:
1002                                 case OPTYPE_EXCLUSIVE_XOR:
1003                                         isBitwiseOp = true;
1004                                         break;
1005                         }
1006
1007                         if (isFloat && isBitwiseOp)
1008                         {
1009                                 // Skip float with bitwise category.
1010                                 continue;
1011                         }
1012
1013                         if (isBool && !isBitwiseOp)
1014                         {
1015                                 // Skip bool when its not the bitwise category.
1016                                 continue;
1017                         }
1018                         std::string op = getOpTypeName(opTypeIndex);
1019
1020                         {
1021                                 const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_COMPUTE_BIT, format};
1022                                 addFunctionCaseWithPrograms(group.get(),
1023                                                                                         de::toLower(op) + "_" +
1024                                                                                         subgroups::getFormatNameForGLSL(format) +
1025                                                                                         "_" + getShaderStageName(caseDef.shaderStage),
1026                                                                                         "", supportedCheck, initPrograms, test, caseDef);
1027                         }
1028
1029                         {
1030                                 const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_ALL_GRAPHICS, format};
1031                                 addFunctionCaseWithPrograms(group.get(),
1032                                                                                         de::toLower(op) + "_" +
1033                                                                                         subgroups::getFormatNameForGLSL(format) +
1034                                                                                         "_graphic",
1035                                                                                         "", supportedCheck, initPrograms, test, caseDef);
1036                         }
1037
1038                         for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
1039                         {
1040                                 const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex], format};
1041                                 addFunctionCaseWithPrograms(group.get(), de::toLower(op) + "_" + subgroups::getFormatNameForGLSL(format) +
1042                                                                                         "_" + getShaderStageName(caseDef.shaderStage) + "_framebuffer", "",
1043                                                                                         supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
1044                         }
1045                 }
1046         }
1047
1048         return group.release();
1049 }
1050
1051 } // subgroups
1052 } // vkt
1053