GL/ES: add subgroup build and files
authorDaniel Koch <dkoch@nvidia.com>
Thu, 28 Mar 2019 14:59:00 +0000 (10:59 -0400)
committerDaniel Koch <dkoch@nvidia.com>
Fri, 29 Mar 2019 11:40:49 +0000 (07:40 -0400)
cp external/vulkancts/modules/vulkan/subgroups/*.cpp external/openglcts/modules/common/subgroups/
cp external/vulkancts/modules/vulkan/subgroups/*.hpp external/openglcts/modules/common/subgroups/
cd external/openglcts/modules/common/subgroups/
mv vkt* -> glc*
(last refreshed from master@b557785133ac0acb1854e0af945c4ca0dd76d5fc)

Component: OpenGL
VK-GL-CTS Issue: 1698

Change-Id: Ib5ffd58038c7a523ea640b05c88fd4adfb1292d4

32 files changed:
external/openglcts/modules/common/CMakeLists.txt
external/openglcts/modules/common/subgroups/CMakeLists.txt [new file with mode: 0755]
external/openglcts/modules/common/subgroups/glcSubgroupsArithmeticTests.cpp [new file with mode: 0755]
external/openglcts/modules/common/subgroups/glcSubgroupsArithmeticTests.hpp [new file with mode: 0644]
external/openglcts/modules/common/subgroups/glcSubgroupsBallotBroadcastTests.cpp [new file with mode: 0755]
external/openglcts/modules/common/subgroups/glcSubgroupsBallotBroadcastTests.hpp [new file with mode: 0644]
external/openglcts/modules/common/subgroups/glcSubgroupsBallotOtherTests.cpp [new file with mode: 0755]
external/openglcts/modules/common/subgroups/glcSubgroupsBallotOtherTests.hpp [new file with mode: 0644]
external/openglcts/modules/common/subgroups/glcSubgroupsBallotTests.cpp [new file with mode: 0755]
external/openglcts/modules/common/subgroups/glcSubgroupsBallotTests.hpp [new file with mode: 0644]
external/openglcts/modules/common/subgroups/glcSubgroupsBasicTests.cpp [new file with mode: 0755]
external/openglcts/modules/common/subgroups/glcSubgroupsBasicTests.hpp [new file with mode: 0644]
external/openglcts/modules/common/subgroups/glcSubgroupsBuiltinMaskVarTests.cpp [new file with mode: 0755]
external/openglcts/modules/common/subgroups/glcSubgroupsBuiltinMaskVarTests.hpp [new file with mode: 0644]
external/openglcts/modules/common/subgroups/glcSubgroupsBuiltinVarTests.cpp [new file with mode: 0755]
external/openglcts/modules/common/subgroups/glcSubgroupsBuiltinVarTests.hpp [new file with mode: 0644]
external/openglcts/modules/common/subgroups/glcSubgroupsClusteredTests.cpp [new file with mode: 0755]
external/openglcts/modules/common/subgroups/glcSubgroupsClusteredTests.hpp [new file with mode: 0644]
external/openglcts/modules/common/subgroups/glcSubgroupsPartitionedTests.cpp [new file with mode: 0755]
external/openglcts/modules/common/subgroups/glcSubgroupsPartitionedTests.hpp [new file with mode: 0755]
external/openglcts/modules/common/subgroups/glcSubgroupsQuadTests.cpp [new file with mode: 0755]
external/openglcts/modules/common/subgroups/glcSubgroupsQuadTests.hpp [new file with mode: 0644]
external/openglcts/modules/common/subgroups/glcSubgroupsShapeTests.cpp [new file with mode: 0755]
external/openglcts/modules/common/subgroups/glcSubgroupsShapeTests.hpp [new file with mode: 0644]
external/openglcts/modules/common/subgroups/glcSubgroupsShuffleTests.cpp [new file with mode: 0755]
external/openglcts/modules/common/subgroups/glcSubgroupsShuffleTests.hpp [new file with mode: 0644]
external/openglcts/modules/common/subgroups/glcSubgroupsTests.cpp [new file with mode: 0755]
external/openglcts/modules/common/subgroups/glcSubgroupsTests.hpp [new file with mode: 0644]
external/openglcts/modules/common/subgroups/glcSubgroupsTestsUtils.cpp [new file with mode: 0644]
external/openglcts/modules/common/subgroups/glcSubgroupsTestsUtils.hpp [new file with mode: 0644]
external/openglcts/modules/common/subgroups/glcSubgroupsVoteTests.cpp [new file with mode: 0755]
external/openglcts/modules/common/subgroups/glcSubgroupsVoteTests.hpp [new file with mode: 0644]

index 5c426c7..322fe7f 100644 (file)
@@ -5,6 +5,8 @@ if (DEQP_SUPPORT_WGL)
        add_definitions(-DGLCTS_SUPPORT_WGL=1)
 endif ()
 
+add_subdirectory(subgroups)
+
 set(GLCTS_COMMON_SRCS
        glcAggressiveShaderOptimizationsTests.cpp
        glcAggressiveShaderOptimizationsTests.hpp
@@ -115,6 +117,7 @@ set(GLCTS_COMMON_LIBS
        glutil
        tcutil
        eglutil
+       deqp-gl-subgroups
        )
 
 # Add glslang
diff --git a/external/openglcts/modules/common/subgroups/CMakeLists.txt b/external/openglcts/modules/common/subgroups/CMakeLists.txt
new file mode 100755 (executable)
index 0000000..6a2663a
--- /dev/null
@@ -0,0 +1,45 @@
+include_directories(..)
+
+set(DEQP_GL_SUBGROUPS_SRCS
+#  glcSubgroupsTests.cpp
+  glcSubgroupsTests.hpp
+#  glcSubgroupsBuiltinVarTests.cpp
+#  glcSubgroupsBuiltinVarTests.hpp
+#  glcSubgroupsBuiltinMaskVarTests.cpp
+#  glcSubgroupsBuiltinMaskVarTests.hpp
+#  glcSubgroupsBasicTests.cpp
+#  glcSubgroupsBasicTests.hpp
+#  glcSubgroupsVoteTests.cpp
+#  glcSubgroupsVoteTests.hpp
+#  glcSubgroupsBallotTests.cpp
+#  glcSubgroupsBallotTests.hpp
+#  glcSubgroupsBallotBroadcastTests.cpp
+#  glcSubgroupsBallotBroadcastTests.hpp
+#  glcSubgroupsBallotOtherTests.cpp
+#  glcSubgroupsBallotOtherTests.hpp
+#  glcSubgroupsArithmeticTests.cpp
+#  glcSubgroupsArithmeticTests.hpp
+#  glcSubgroupsClusteredTests.cpp
+#  glcSubgroupsClusteredTests.hpp
+#  glcSubgroupsPartitionedTests.cpp
+#  glcSubgroupsPartitionedTests.hpp
+#  glcSubgroupsShuffleTests.cpp
+#  glcSubgroupsShuffleTests.hpp
+#  glcSubgroupsQuadTests.cpp
+#  glcSubgroupsQuadTests.hpp
+#  glcSubgroupsShapeTests.cpp
+#  glcSubgroupsShapeTests.hpp
+#  glcSubgroupsTestsUtils.cpp
+#  glcSubgroupsTestsUtils.hpp
+  )
+
+set(DEQP_GL_SUBGROUPS_LIBS
+  glutil
+  tcutil
+  vkutil
+  )
+
+PCH(DEQP_GL_SUBGROUPS_SRCS ../../pch.cpp)
+
+add_library(deqp-gl-subgroups STATIC ${DEQP_GL_SUBGROUPS_SRCS})
+target_link_libraries(deqp-gl-subgroups ${DEQP_GL_SUBGROUPS_LIBS})
diff --git a/external/openglcts/modules/common/subgroups/glcSubgroupsArithmeticTests.cpp b/external/openglcts/modules/common/subgroups/glcSubgroupsArithmeticTests.cpp
new file mode 100755 (executable)
index 0000000..6f2f6ba
--- /dev/null
@@ -0,0 +1,1066 @@
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "vktSubgroupsArithmeticTests.hpp"
+#include "vktSubgroupsTestsUtils.hpp"
+
+#include <string>
+#include <vector>
+
+using namespace tcu;
+using namespace std;
+using namespace vk;
+using namespace vkt;
+
+namespace
+{
+enum OpType
+{
+       OPTYPE_ADD = 0,
+       OPTYPE_MUL,
+       OPTYPE_MIN,
+       OPTYPE_MAX,
+       OPTYPE_AND,
+       OPTYPE_OR,
+       OPTYPE_XOR,
+       OPTYPE_INCLUSIVE_ADD,
+       OPTYPE_INCLUSIVE_MUL,
+       OPTYPE_INCLUSIVE_MIN,
+       OPTYPE_INCLUSIVE_MAX,
+       OPTYPE_INCLUSIVE_AND,
+       OPTYPE_INCLUSIVE_OR,
+       OPTYPE_INCLUSIVE_XOR,
+       OPTYPE_EXCLUSIVE_ADD,
+       OPTYPE_EXCLUSIVE_MUL,
+       OPTYPE_EXCLUSIVE_MIN,
+       OPTYPE_EXCLUSIVE_MAX,
+       OPTYPE_EXCLUSIVE_AND,
+       OPTYPE_EXCLUSIVE_OR,
+       OPTYPE_EXCLUSIVE_XOR,
+       OPTYPE_LAST
+};
+
+static bool checkVertexPipelineStages(std::vector<const void*> datas,
+                                                                         deUint32 width, deUint32)
+{
+       return vkt::subgroups::check(datas, width, 0x3);
+}
+
+static bool checkCompute(std::vector<const void*> datas,
+                                                const deUint32 numWorkgroups[3], const deUint32 localSize[3],
+                                                deUint32)
+{
+       return vkt::subgroups::checkCompute(datas, numWorkgroups, localSize, 0x3);
+}
+
+std::string getOpTypeName(int opType)
+{
+       switch (opType)
+       {
+               default:
+                       DE_FATAL("Unsupported op type");
+                       return "";
+               case OPTYPE_ADD:
+                       return "subgroupAdd";
+               case OPTYPE_MUL:
+                       return "subgroupMul";
+               case OPTYPE_MIN:
+                       return "subgroupMin";
+               case OPTYPE_MAX:
+                       return "subgroupMax";
+               case OPTYPE_AND:
+                       return "subgroupAnd";
+               case OPTYPE_OR:
+                       return "subgroupOr";
+               case OPTYPE_XOR:
+                       return "subgroupXor";
+               case OPTYPE_INCLUSIVE_ADD:
+                       return "subgroupInclusiveAdd";
+               case OPTYPE_INCLUSIVE_MUL:
+                       return "subgroupInclusiveMul";
+               case OPTYPE_INCLUSIVE_MIN:
+                       return "subgroupInclusiveMin";
+               case OPTYPE_INCLUSIVE_MAX:
+                       return "subgroupInclusiveMax";
+               case OPTYPE_INCLUSIVE_AND:
+                       return "subgroupInclusiveAnd";
+               case OPTYPE_INCLUSIVE_OR:
+                       return "subgroupInclusiveOr";
+               case OPTYPE_INCLUSIVE_XOR:
+                       return "subgroupInclusiveXor";
+               case OPTYPE_EXCLUSIVE_ADD:
+                       return "subgroupExclusiveAdd";
+               case OPTYPE_EXCLUSIVE_MUL:
+                       return "subgroupExclusiveMul";
+               case OPTYPE_EXCLUSIVE_MIN:
+                       return "subgroupExclusiveMin";
+               case OPTYPE_EXCLUSIVE_MAX:
+                       return "subgroupExclusiveMax";
+               case OPTYPE_EXCLUSIVE_AND:
+                       return "subgroupExclusiveAnd";
+               case OPTYPE_EXCLUSIVE_OR:
+                       return "subgroupExclusiveOr";
+               case OPTYPE_EXCLUSIVE_XOR:
+                       return "subgroupExclusiveXor";
+       }
+}
+
+std::string getOpTypeOperation(int opType, vk::VkFormat format, std::string lhs, std::string rhs)
+{
+       switch (opType)
+       {
+               default:
+                       DE_FATAL("Unsupported op type");
+                       return "";
+               case OPTYPE_ADD:
+               case OPTYPE_INCLUSIVE_ADD:
+               case OPTYPE_EXCLUSIVE_ADD:
+                       return lhs + " + " + rhs;
+               case OPTYPE_MUL:
+               case OPTYPE_INCLUSIVE_MUL:
+               case OPTYPE_EXCLUSIVE_MUL:
+                       return lhs + " * " + rhs;
+               case OPTYPE_MIN:
+               case OPTYPE_INCLUSIVE_MIN:
+               case OPTYPE_EXCLUSIVE_MIN:
+                       switch (format)
+                       {
+                               default:
+                                       return "min(" + lhs + ", " + rhs + ")";
+                               case VK_FORMAT_R32_SFLOAT:
+                               case VK_FORMAT_R64_SFLOAT:
+                                       return "(isnan(" + lhs + ") ? " + rhs + " : (isnan(" + rhs + ") ? " + lhs + " : min(" + lhs + ", " + rhs + ")))";
+                               case VK_FORMAT_R32G32_SFLOAT:
+                               case VK_FORMAT_R32G32B32_SFLOAT:
+                               case VK_FORMAT_R32G32B32A32_SFLOAT:
+                               case VK_FORMAT_R64G64_SFLOAT:
+                               case VK_FORMAT_R64G64B64_SFLOAT:
+                               case VK_FORMAT_R64G64B64A64_SFLOAT:
+                                       return "mix(mix(min(" + lhs + ", " + rhs + "), " + lhs + ", isnan(" + rhs + ")), " + rhs + ", isnan(" + lhs + "))";
+                       }
+               case OPTYPE_MAX:
+               case OPTYPE_INCLUSIVE_MAX:
+               case OPTYPE_EXCLUSIVE_MAX:
+                       switch (format)
+                       {
+                               default:
+                                       return "max(" + lhs + ", " + rhs + ")";
+                               case VK_FORMAT_R32_SFLOAT:
+                               case VK_FORMAT_R64_SFLOAT:
+                                       return "(isnan(" + lhs + ") ? " + rhs + " : (isnan(" + rhs + ") ? " + lhs + " : max(" + lhs + ", " + rhs + ")))";
+                               case VK_FORMAT_R32G32_SFLOAT:
+                               case VK_FORMAT_R32G32B32_SFLOAT:
+                               case VK_FORMAT_R32G32B32A32_SFLOAT:
+                               case VK_FORMAT_R64G64_SFLOAT:
+                               case VK_FORMAT_R64G64B64_SFLOAT:
+                               case VK_FORMAT_R64G64B64A64_SFLOAT:
+                                       return "mix(mix(max(" + lhs + ", " + rhs + "), " + lhs + ", isnan(" + rhs + ")), " + rhs + ", isnan(" + lhs + "))";
+                       }
+               case OPTYPE_AND:
+               case OPTYPE_INCLUSIVE_AND:
+               case OPTYPE_EXCLUSIVE_AND:
+                       switch (format)
+                       {
+                               default:
+                                       return lhs + " & " + rhs;
+                               case VK_FORMAT_R8_USCALED:
+                                       return lhs + " && " + rhs;
+                               case VK_FORMAT_R8G8_USCALED:
+                                       return "bvec2(" + lhs + ".x && " + rhs + ".x, " + lhs + ".y && " + rhs + ".y)";
+                               case VK_FORMAT_R8G8B8_USCALED:
+                                       return "bvec3(" + lhs + ".x && " + rhs + ".x, " + lhs + ".y && " + rhs + ".y, " + lhs + ".z && " + rhs + ".z)";
+                               case VK_FORMAT_R8G8B8A8_USCALED:
+                                       return "bvec4(" + lhs + ".x && " + rhs + ".x, " + lhs + ".y && " + rhs + ".y, " + lhs + ".z && " + rhs + ".z, " + lhs + ".w && " + rhs + ".w)";
+                       }
+               case OPTYPE_OR:
+               case OPTYPE_INCLUSIVE_OR:
+               case OPTYPE_EXCLUSIVE_OR:
+                       switch (format)
+                       {
+                               default:
+                                       return lhs + " | " + rhs;
+                               case VK_FORMAT_R8_USCALED:
+                                       return lhs + " || " + rhs;
+                               case VK_FORMAT_R8G8_USCALED:
+                                       return "bvec2(" + lhs + ".x || " + rhs + ".x, " + lhs + ".y || " + rhs + ".y)";
+                               case VK_FORMAT_R8G8B8_USCALED:
+                                       return "bvec3(" + lhs + ".x || " + rhs + ".x, " + lhs + ".y || " + rhs + ".y, " + lhs + ".z || " + rhs + ".z)";
+                               case VK_FORMAT_R8G8B8A8_USCALED:
+                                       return "bvec4(" + lhs + ".x || " + rhs + ".x, " + lhs + ".y || " + rhs + ".y, " + lhs + ".z || " + rhs + ".z, " + lhs + ".w || " + rhs + ".w)";
+                       }
+               case OPTYPE_XOR:
+               case OPTYPE_INCLUSIVE_XOR:
+               case OPTYPE_EXCLUSIVE_XOR:
+                       switch (format)
+                       {
+                               default:
+                                       return lhs + " ^ " + rhs;
+                               case VK_FORMAT_R8_USCALED:
+                                       return lhs + " ^^ " + rhs;
+                               case VK_FORMAT_R8G8_USCALED:
+                                       return "bvec2(" + lhs + ".x ^^ " + rhs + ".x, " + lhs + ".y ^^ " + rhs + ".y)";
+                               case VK_FORMAT_R8G8B8_USCALED:
+                                       return "bvec3(" + lhs + ".x ^^ " + rhs + ".x, " + lhs + ".y ^^ " + rhs + ".y, " + lhs + ".z ^^ " + rhs + ".z)";
+                               case VK_FORMAT_R8G8B8A8_USCALED:
+                                       return "bvec4(" + lhs + ".x ^^ " + rhs + ".x, " + lhs + ".y ^^ " + rhs + ".y, " + lhs + ".z ^^ " + rhs + ".z, " + lhs + ".w ^^ " + rhs + ".w)";
+                       }
+       }
+}
+
+std::string getIdentity(int opType, vk::VkFormat format)
+{
+       bool isFloat = false;
+       bool isInt = false;
+       bool isUnsigned = false;
+
+       switch (format)
+       {
+               default:
+                       DE_FATAL("Unhandled format!");
+                       break;
+               case VK_FORMAT_R32_SINT:
+               case VK_FORMAT_R32G32_SINT:
+               case VK_FORMAT_R32G32B32_SINT:
+               case VK_FORMAT_R32G32B32A32_SINT:
+                       isInt = true;
+                       break;
+               case VK_FORMAT_R32_UINT:
+               case VK_FORMAT_R32G32_UINT:
+               case VK_FORMAT_R32G32B32_UINT:
+               case VK_FORMAT_R32G32B32A32_UINT:
+                       isUnsigned = true;
+                       break;
+               case VK_FORMAT_R32_SFLOAT:
+               case VK_FORMAT_R32G32_SFLOAT:
+               case VK_FORMAT_R32G32B32_SFLOAT:
+               case VK_FORMAT_R32G32B32A32_SFLOAT:
+               case VK_FORMAT_R64_SFLOAT:
+               case VK_FORMAT_R64G64_SFLOAT:
+               case VK_FORMAT_R64G64B64_SFLOAT:
+               case VK_FORMAT_R64G64B64A64_SFLOAT:
+                       isFloat = true;
+                       break;
+               case VK_FORMAT_R8_USCALED:
+               case VK_FORMAT_R8G8_USCALED:
+               case VK_FORMAT_R8G8B8_USCALED:
+               case VK_FORMAT_R8G8B8A8_USCALED:
+                       break; // bool types are not anything
+       }
+
+       switch (opType)
+       {
+               default:
+                       DE_FATAL("Unsupported op type");
+                       return "";
+               case OPTYPE_ADD:
+               case OPTYPE_INCLUSIVE_ADD:
+               case OPTYPE_EXCLUSIVE_ADD:
+                       return subgroups::getFormatNameForGLSL(format) + "(0)";
+               case OPTYPE_MUL:
+               case OPTYPE_INCLUSIVE_MUL:
+               case OPTYPE_EXCLUSIVE_MUL:
+                       return subgroups::getFormatNameForGLSL(format) + "(1)";
+               case OPTYPE_MIN:
+               case OPTYPE_INCLUSIVE_MIN:
+               case OPTYPE_EXCLUSIVE_MIN:
+                       if (isFloat)
+                       {
+                               return subgroups::getFormatNameForGLSL(format) + "(intBitsToFloat(0x7f800000))";
+                       }
+                       else if (isInt)
+                       {
+                               return subgroups::getFormatNameForGLSL(format) + "(0x7fffffff)";
+                       }
+                       else if (isUnsigned)
+                       {
+                               return subgroups::getFormatNameForGLSL(format) + "(0xffffffffu)";
+                       }
+                       else
+                       {
+                               DE_FATAL("Unhandled case");
+                               return "";
+                       }
+               case OPTYPE_MAX:
+               case OPTYPE_INCLUSIVE_MAX:
+               case OPTYPE_EXCLUSIVE_MAX:
+                       if (isFloat)
+                       {
+                               return subgroups::getFormatNameForGLSL(format) + "(intBitsToFloat(0xff800000))";
+                       }
+                       else if (isInt)
+                       {
+                               return subgroups::getFormatNameForGLSL(format) + "(0x80000000)";
+                       }
+                       else if (isUnsigned)
+                       {
+                               return subgroups::getFormatNameForGLSL(format) + "(0)";
+                       }
+                       else
+                       {
+                               DE_FATAL("Unhandled case");
+                               return "";
+                       }
+               case OPTYPE_AND:
+               case OPTYPE_INCLUSIVE_AND:
+               case OPTYPE_EXCLUSIVE_AND:
+                       return subgroups::getFormatNameForGLSL(format) + "(~0)";
+               case OPTYPE_OR:
+               case OPTYPE_INCLUSIVE_OR:
+               case OPTYPE_EXCLUSIVE_OR:
+                       return subgroups::getFormatNameForGLSL(format) + "(0)";
+               case OPTYPE_XOR:
+               case OPTYPE_INCLUSIVE_XOR:
+               case OPTYPE_EXCLUSIVE_XOR:
+                       return subgroups::getFormatNameForGLSL(format) + "(0)";
+       }
+}
+
+std::string getCompare(int opType, vk::VkFormat format, std::string lhs, std::string rhs)
+{
+       std::string formatName = subgroups::getFormatNameForGLSL(format);
+       switch (format)
+       {
+               default:
+                       return "all(equal(" + lhs + ", " + rhs + "))";
+               case VK_FORMAT_R8_USCALED:
+               case VK_FORMAT_R32_UINT:
+               case VK_FORMAT_R32_SINT:
+                       return "(" + lhs + " == " + rhs + ")";
+               case VK_FORMAT_R32_SFLOAT:
+               case VK_FORMAT_R64_SFLOAT:
+                       switch (opType)
+                       {
+                               default:
+                                       return "(abs(" + lhs + " - " + rhs + ") < 0.00001)";
+                               case OPTYPE_MIN:
+                               case OPTYPE_INCLUSIVE_MIN:
+                               case OPTYPE_EXCLUSIVE_MIN:
+                               case OPTYPE_MAX:
+                               case OPTYPE_INCLUSIVE_MAX:
+                               case OPTYPE_EXCLUSIVE_MAX:
+                                       return "(" + lhs + " == " + rhs + ")";
+                       }
+               case VK_FORMAT_R32G32_SFLOAT:
+               case VK_FORMAT_R32G32B32_SFLOAT:
+               case VK_FORMAT_R32G32B32A32_SFLOAT:
+               case VK_FORMAT_R64G64_SFLOAT:
+               case VK_FORMAT_R64G64B64_SFLOAT:
+               case VK_FORMAT_R64G64B64A64_SFLOAT:
+                       switch (opType)
+                       {
+                               default:
+                                       return "all(lessThan(abs(" + lhs + " - " + rhs + "), " + formatName + "(0.00001)))";
+                               case OPTYPE_MIN:
+                               case OPTYPE_INCLUSIVE_MIN:
+                               case OPTYPE_EXCLUSIVE_MIN:
+                               case OPTYPE_MAX:
+                               case OPTYPE_INCLUSIVE_MAX:
+                               case OPTYPE_EXCLUSIVE_MAX:
+                                       return "all(equal(" + lhs + ", " + rhs + "))";
+                       }
+       }
+}
+
+struct CaseDefinition
+{
+       int                                     opType;
+       VkShaderStageFlags      shaderStage;
+       VkFormat                        format;
+};
+
+void initFrameBufferPrograms (SourceCollections& programCollection, CaseDefinition caseDef)
+{
+       const vk::ShaderBuildOptions    buildOptions    (programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+       std::string                                             indexVars;
+       std::ostringstream                              bdy;
+
+       subgroups::setFragmentShaderFrameBuffer(programCollection);
+
+       if (VK_SHADER_STAGE_VERTEX_BIT != caseDef.shaderStage)
+               subgroups::setVertexShaderFrameBuffer(programCollection);
+
+       switch (caseDef.opType)
+       {
+               default:
+                       indexVars = "  uint start = 0, end = gl_SubgroupSize;\n";
+                       break;
+               case OPTYPE_INCLUSIVE_ADD:
+               case OPTYPE_INCLUSIVE_MUL:
+               case OPTYPE_INCLUSIVE_MIN:
+               case OPTYPE_INCLUSIVE_MAX:
+               case OPTYPE_INCLUSIVE_AND:
+               case OPTYPE_INCLUSIVE_OR:
+               case OPTYPE_INCLUSIVE_XOR:
+                       indexVars = "  uint start = 0, end = gl_SubgroupInvocationID + 1;\n";
+                       break;
+               case OPTYPE_EXCLUSIVE_ADD:
+               case OPTYPE_EXCLUSIVE_MUL:
+               case OPTYPE_EXCLUSIVE_MIN:
+               case OPTYPE_EXCLUSIVE_MAX:
+               case OPTYPE_EXCLUSIVE_AND:
+               case OPTYPE_EXCLUSIVE_OR:
+               case OPTYPE_EXCLUSIVE_XOR:
+                       indexVars = "  uint start = 0, end = gl_SubgroupInvocationID;\n";
+                       break;
+       }
+
+       bdy << indexVars
+               << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " ref = "
+               << getIdentity(caseDef.opType, caseDef.format) << ";\n"
+               << "  uint tempResult = 0;\n"
+               << "  for (uint index = start; index < end; index++)\n"
+               << "  {\n"
+               << "    if (subgroupBallotBitExtract(mask, index))\n"
+               << "    {\n"
+               << "      ref = " << getOpTypeOperation(caseDef.opType, caseDef.format, "ref", "data[index]") << ";\n"
+               << "    }\n"
+               << "  }\n"
+               << "  tempResult = " << getCompare(caseDef.opType, caseDef.format, "ref",
+                                                                                       getOpTypeName(caseDef.opType) + "(data[gl_SubgroupInvocationID])") << " ? 0x1 : 0;\n"
+               << "  if (1 == (gl_SubgroupInvocationID % 2))\n"
+               << "  {\n"
+               << "    mask = subgroupBallot(true);\n"
+               << "    ref = " << getIdentity(caseDef.opType, caseDef.format) << ";\n"
+               << "    for (uint index = start; index < end; index++)\n"
+               << "    {\n"
+               << "      if (subgroupBallotBitExtract(mask, index))\n"
+               << "      {\n"
+               << "        ref = " << getOpTypeOperation(caseDef.opType, caseDef.format, "ref", "data[index]") << ";\n"
+               << "      }\n"
+               << "    }\n"
+               << "    tempResult |= " << getCompare(caseDef.opType, caseDef.format, "ref",
+                               getOpTypeName(caseDef.opType) + "(data[gl_SubgroupInvocationID])") << " ? 0x2 : 0;\n"
+               << "  }\n"
+               << "  else\n"
+               << "  {\n"
+               << "    tempResult |= 0x2;\n"
+               << "  }\n";
+
+       if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+       {
+               std::ostringstream vertexSrc;
+               vertexSrc << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+                       << "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
+                       << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       << "layout(location = 0) in highp vec4 in_position;\n"
+                       << "layout(location = 0) out float out_color;\n"
+                       << "layout(set = 0, binding = 0) uniform Buffer1\n"
+                       << "{\n"
+                       << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+                       << "};\n"
+                       << "\n"
+                       << "void main (void)\n"
+                       << "{\n"
+                       << "  uvec4 mask = subgroupBallot(true);\n"
+                       << bdy.str()
+                       << "  out_color = float(tempResult);\n"
+                       << "  gl_Position = in_position;\n"
+                       << "  gl_PointSize = 1.0f;\n"
+                       << "}\n";
+               programCollection.glslSources.add("vert")
+                       << glu::VertexSource(vertexSrc.str()) << buildOptions;
+       }
+       else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
+       {
+               std::ostringstream geometry;
+
+               geometry << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+                       << "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
+                       << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       << "layout(points) in;\n"
+                       << "layout(points, max_vertices = 1) out;\n"
+                       << "layout(location = 0) out float out_color;\n"
+                       << "layout(set = 0, binding = 0) uniform Buffer\n"
+                       << "{\n"
+                       << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+                       << "};\n"
+                       << "\n"
+                       << "void main (void)\n"
+                       << "{\n"
+                       << "  uvec4 mask = subgroupBallot(true);\n"
+                       << bdy.str()
+                       << "  out_color = float(tempResult);\n"
+                       << "  gl_Position = gl_in[0].gl_Position;\n"
+                       << "  EmitVertex();\n"
+                       << "  EndPrimitive();\n"
+                       << "}\n";
+
+               programCollection.glslSources.add("geometry")
+                               << glu::GeometrySource(geometry.str()) << buildOptions;
+       }
+       else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+       {
+               std::ostringstream controlSource;
+               controlSource  << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+                       << "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
+                       << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       << "layout(vertices = 2) out;\n"
+                       << "layout(location = 0) out float out_color[];\n"
+                       << "layout(set = 0, binding = 0) uniform Buffer1\n"
+                       << "{\n"
+                       << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+                       << "};\n"
+                       << "\n"
+                       << "void main (void)\n"
+                       << "{\n"
+                       << "  if (gl_InvocationID == 0)\n"
+                       <<"  {\n"
+                       << "    gl_TessLevelOuter[0] = 1.0f;\n"
+                       << "    gl_TessLevelOuter[1] = 1.0f;\n"
+                       << "  }\n"
+                       << "  uvec4 mask = subgroupBallot(true);\n"
+                       << bdy.str()
+                       << "  out_color[gl_InvocationID] = float(tempResult);"
+                       << "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+                       << "}\n";
+
+
+               programCollection.glslSources.add("tesc")
+                       << glu::TessellationControlSource(controlSource.str()) << buildOptions;
+               subgroups::setTesEvalShaderFrameBuffer(programCollection);
+       }
+       else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
+       {
+
+               std::ostringstream evaluationSource;
+               evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+                       << "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
+                       << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       << "layout(isolines, equal_spacing, ccw ) in;\n"
+                       << "layout(location = 0) out float out_color;\n"
+                       << "layout(set = 0, binding = 0) uniform Buffer1\n"
+                       << "{\n"
+                       << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+                       << "};\n"
+                       << "\n"
+                       << "void main (void)\n"
+                       << "{\n"
+                       << "  uvec4 mask = subgroupBallot(true);\n"
+                       << bdy.str()
+                       << "  out_color = float(tempResult);\n"
+                       << "  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
+                       << "}\n";
+
+               subgroups::setTesCtrlShaderFrameBuffer(programCollection);
+               programCollection.glslSources.add("tese") << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
+       }
+       else
+       {
+               DE_FATAL("Unsupported shader stage");
+       }
+}
+
+void initPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
+{
+       std::string indexVars;
+       switch (caseDef.opType)
+       {
+               default:
+                       indexVars = "  uint start = 0, end = gl_SubgroupSize;\n";
+                       break;
+               case OPTYPE_INCLUSIVE_ADD:
+               case OPTYPE_INCLUSIVE_MUL:
+               case OPTYPE_INCLUSIVE_MIN:
+               case OPTYPE_INCLUSIVE_MAX:
+               case OPTYPE_INCLUSIVE_AND:
+               case OPTYPE_INCLUSIVE_OR:
+               case OPTYPE_INCLUSIVE_XOR:
+                       indexVars = "  uint start = 0, end = gl_SubgroupInvocationID + 1;\n";
+                       break;
+               case OPTYPE_EXCLUSIVE_ADD:
+               case OPTYPE_EXCLUSIVE_MUL:
+               case OPTYPE_EXCLUSIVE_MIN:
+               case OPTYPE_EXCLUSIVE_MAX:
+               case OPTYPE_EXCLUSIVE_AND:
+               case OPTYPE_EXCLUSIVE_OR:
+               case OPTYPE_EXCLUSIVE_XOR:
+                       indexVars = "  uint start = 0, end = gl_SubgroupInvocationID;\n";
+                       break;
+       }
+
+       const string bdy =
+               indexVars +
+               "  " + subgroups::getFormatNameForGLSL(caseDef.format) + " ref = "
+               + getIdentity(caseDef.opType, caseDef.format) + ";\n"
+               "  uint tempResult = 0;\n"
+               "  for (uint index = start; index < end; index++)\n"
+               "  {\n"
+               "    if (subgroupBallotBitExtract(mask, index))\n"
+               "    {\n"
+               "      ref = " + getOpTypeOperation(caseDef.opType, caseDef.format, "ref", "data[index]") + ";\n"
+               "    }\n"
+               "  }\n"
+               "  tempResult = " + getCompare(caseDef.opType, caseDef.format, "ref", getOpTypeName(caseDef.opType) + "(data[gl_SubgroupInvocationID])") + " ? 0x1 : 0;\n"
+               "  if (1 == (gl_SubgroupInvocationID % 2))\n"
+               "  {\n"
+               "    mask = subgroupBallot(true);\n"
+               "    ref = " + getIdentity(caseDef.opType, caseDef.format) + ";\n"
+               "    for (uint index = start; index < end; index++)\n"
+               "    {\n"
+               "      if (subgroupBallotBitExtract(mask, index))\n"
+               "      {\n"
+               "        ref = " + getOpTypeOperation(caseDef.opType, caseDef.format, "ref", "data[index]") + ";\n"
+               "      }\n"
+               "    }\n"
+               "    tempResult |= " + getCompare(caseDef.opType, caseDef.format, "ref", getOpTypeName(caseDef.opType) + "(data[gl_SubgroupInvocationID])") + " ? 0x2 : 0;\n"
+               "  }\n"
+               "  else\n"
+               "  {\n"
+               "    tempResult |= 0x2;\n"
+               "  }\n";
+
+       if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+       {
+               std::ostringstream src;
+
+               src << "#version 450\n"
+                       << "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
+                       << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       << "layout (local_size_x_id = 0, local_size_y_id = 1, "
+                       "local_size_z_id = 2) in;\n"
+                       << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
+                       << "{\n"
+                       << "  uint result[];\n"
+                       << "};\n"
+                       << "layout(set = 0, binding = 1, std430) buffer Buffer2\n"
+                       << "{\n"
+                       << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[];\n"
+                       << "};\n"
+                       << "\n"
+                       << "void main (void)\n"
+                       << "{\n"
+                       << "  uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
+                       << "  highp uint offset = globalSize.x * ((globalSize.y * "
+                       "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
+                       "gl_GlobalInvocationID.x;\n"
+                       << "  uvec4 mask = subgroupBallot(true);\n"
+                       << bdy
+                       << "  result[offset] = tempResult;\n"
+                       << "}\n";
+
+               programCollection.glslSources.add("comp")
+                               << glu::ComputeSource(src.str()) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+       }
+       else
+       {
+               {
+                       const std::string vertex =
+                               "#version 450\n"
+                               "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
+                               "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                               "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
+                               "{\n"
+                               "  uint result[];\n"
+                               "};\n"
+                               "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+                               "{\n"
+                               "  " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
+                               "};\n"
+                               "\n"
+                               "void main (void)\n"
+                               "{\n"
+                               "  uvec4 mask = subgroupBallot(true);\n"
+                               + bdy+
+                               "  result[gl_VertexIndex] = tempResult;\n"
+                               "  float pixelSize = 2.0f/1024.0f;\n"
+                               "  float pixelPosition = pixelSize/2.0f - 1.0f;\n"
+                               "  gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
+                               "  gl_PointSize = 1.0f;\n"
+                               "}\n";
+                       programCollection.glslSources.add("vert")
+                                       << glu::VertexSource(vertex) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+               }
+
+               {
+                       const std::string tesc =
+                               "#version 450\n"
+                               "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
+                               "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                               "layout(vertices=1) out;\n"
+                               "layout(set = 0, binding = 1, std430) buffer Buffer1\n"
+                               "{\n"
+                               "  uint result[];\n"
+                               "};\n"
+                               "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+                               "{\n"
+                               "  " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
+                               "};\n"
+                               "\n"
+                               "void main (void)\n"
+                               "{\n"
+                               "  uvec4 mask = subgroupBallot(true);\n"
+                               + bdy +
+                               "  result[gl_PrimitiveID] = tempResult;\n"
+                               "  if (gl_InvocationID == 0)\n"
+                               "  {\n"
+                               "    gl_TessLevelOuter[0] = 1.0f;\n"
+                               "    gl_TessLevelOuter[1] = 1.0f;\n"
+                               "  }\n"
+                               "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+                               "}\n";
+                       programCollection.glslSources.add("tesc")
+                               << glu::TessellationControlSource(tesc) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+               }
+
+               {
+                       const std::string tese =
+                               "#version 450\n"
+                               "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
+                               "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                               "layout(isolines) in;\n"
+                               "layout(set = 0, binding = 2, std430) buffer Buffer1\n"
+                               "{\n"
+                               "  uint result[];\n"
+                               "};\n"
+                               "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+                               "{\n"
+                               "  " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
+                               "};\n"
+                               "\n"
+                               "void main (void)\n"
+                               "{\n"
+                               "  uvec4 mask = subgroupBallot(true);\n"
+                               + bdy +
+                               "  result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = tempResult;\n"
+                               "  float pixelSize = 2.0f/1024.0f;\n"
+                               "  gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
+                               "}\n";
+                       programCollection.glslSources.add("tese")
+                               << glu::TessellationEvaluationSource(tese) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+               }
+
+               {
+                       const std::string geometry =
+                               "#version 450\n"
+                               "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
+                               "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                               "layout(${TOPOLOGY}) in;\n"
+                               "layout(points, max_vertices = 1) out;\n"
+                               "layout(set = 0, binding = 3, std430) buffer Buffer1\n"
+                               "{\n"
+                               "  uint result[];\n"
+                               "};\n"
+                               "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+                               "{\n"
+                               "  " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
+                               "};\n"
+                               "\n"
+                               "void main (void)\n"
+                               "{\n"
+                               "  uvec4 mask = subgroupBallot(true);\n"
+                                + bdy +
+                               "  result[gl_PrimitiveIDIn] = tempResult;\n"
+                               "  gl_Position = gl_in[0].gl_Position;\n"
+                               "  EmitVertex();\n"
+                               "  EndPrimitive();\n"
+                               "}\n";
+                       subgroups::addGeometryShadersFromTemplate(geometry, vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u),
+                                                                                                         programCollection.glslSources);
+               }
+
+               {
+                       const std::string fragment =
+                               "#version 450\n"
+                               "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
+                               "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                               "layout(location = 0) out uint result;\n"
+                               "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+                               "{\n"
+                               "  " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
+                               "};\n"
+                               "void main (void)\n"
+                               "{\n"
+                               "  uvec4 mask = subgroupBallot(true);\n"
+                               + bdy +
+                               "  result = tempResult;\n"
+                               "}\n";
+                       programCollection.glslSources.add("fragment")
+                               << glu::FragmentSource(fragment)<< vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+               }
+               subgroups::addNoSubgroupShader(programCollection);
+       }
+}
+
+void supportedCheck (Context& context, CaseDefinition caseDef)
+{
+       if (!subgroups::isSubgroupSupported(context))
+               TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
+
+       if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_ARITHMETIC_BIT))
+       {
+               TCU_THROW(NotSupportedError, "Device does not support subgroup arithmetic operations");
+       }
+
+       if (subgroups::isDoubleFormat(caseDef.format) &&
+                       !subgroups::isDoubleSupportedForDevice(context))
+       {
+               TCU_THROW(NotSupportedError, "Device does not support subgroup double operations");
+       }
+}
+
+tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
+{
+       if (!subgroups::areSubgroupOperationsSupportedForStage(
+                               context, caseDef.shaderStage))
+       {
+               if (subgroups::areSubgroupOperationsRequiredForStage(
+                                       caseDef.shaderStage))
+               {
+                       return tcu::TestStatus::fail(
+                                          "Shader stage " +
+                                          subgroups::getShaderStageName(caseDef.shaderStage) +
+                                          " is required to support subgroup operations!");
+               }
+               else
+               {
+                       TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
+               }
+       }
+
+       subgroups::SSBOData inputData;
+       inputData.format = caseDef.format;
+       inputData.numElements = subgroups::maxSupportedSubgroupSize();
+       inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
+
+       if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+               return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages);
+       else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
+               return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages);
+       else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+               return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
+       else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
+               return subgroups::makeTessellationEvaluationFrameBufferTest(context,  VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
+       else
+               TCU_THROW(InternalError, "Unhandled shader stage");
+}
+
+bool checkShaderStages (Context& context, const CaseDefinition& caseDef)
+{
+       if (!subgroups::areSubgroupOperationsSupportedForStage(
+                               context, caseDef.shaderStage))
+       {
+               if (subgroups::areSubgroupOperationsRequiredForStage(
+                                       caseDef.shaderStage))
+               {
+                       return false;
+               }
+               else
+               {
+                       TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
+               }
+       }
+       return true;
+}
+
+tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
+{
+       if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+       {
+               if(!checkShaderStages(context,caseDef))
+               {
+                       return tcu::TestStatus::fail(
+                                                       "Shader stage " +
+                                                       subgroups::getShaderStageName(caseDef.shaderStage) +
+                                                       " is required to support subgroup operations!");
+               }
+               subgroups::SSBOData inputData;
+               inputData.format = caseDef.format;
+               inputData.numElements = subgroups::maxSupportedSubgroupSize();
+               inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
+
+               return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkCompute);
+       }
+       else
+       {
+               VkPhysicalDeviceSubgroupProperties subgroupProperties;
+               subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
+               subgroupProperties.pNext = DE_NULL;
+
+               VkPhysicalDeviceProperties2 properties;
+               properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+               properties.pNext = &subgroupProperties;
+
+               context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
+
+               VkShaderStageFlagBits stages = (VkShaderStageFlagBits)(caseDef.shaderStage  & subgroupProperties.supportedStages);
+
+               if ( VK_SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
+               {
+                       if ( (stages & VK_SHADER_STAGE_FRAGMENT_BIT) == 0)
+                               TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
+                       else
+                               stages = VK_SHADER_STAGE_FRAGMENT_BIT;
+               }
+
+               if ((VkShaderStageFlagBits)0u == stages)
+                       TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
+
+               subgroups::SSBOData inputData;
+               inputData.format                        = caseDef.format;
+               inputData.numElements           = subgroups::maxSupportedSubgroupSize();
+               inputData.initializeType        = subgroups::SSBOData::InitializeNonZero;
+               inputData.binding                       = 4u;
+               inputData.stages                        = stages;
+
+               return subgroups::allStages(context, VK_FORMAT_R32_UINT, &inputData,
+                                                                                1, checkVertexPipelineStages, stages);
+       }
+}
+}
+
+namespace vkt
+{
+namespace subgroups
+{
+tcu::TestCaseGroup* createSubgroupsArithmeticTests(tcu::TestContext& testCtx)
+{
+       de::MovePtr<tcu::TestCaseGroup> graphicGroup(new tcu::TestCaseGroup(
+               testCtx, "graphics", "Subgroup arithmetic category tests: graphics"));
+       de::MovePtr<tcu::TestCaseGroup> computeGroup(new tcu::TestCaseGroup(
+               testCtx, "compute", "Subgroup arithmetic category tests: compute"));
+       de::MovePtr<tcu::TestCaseGroup> framebufferGroup(new tcu::TestCaseGroup(
+               testCtx, "framebuffer", "Subgroup arithmetic category tests: framebuffer"));
+
+       const VkShaderStageFlags stages[] =
+       {
+               VK_SHADER_STAGE_VERTEX_BIT,
+               VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
+               VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
+               VK_SHADER_STAGE_GEOMETRY_BIT,
+       };
+
+       const VkFormat formats[] =
+       {
+               VK_FORMAT_R32_SINT, VK_FORMAT_R32G32_SINT, VK_FORMAT_R32G32B32_SINT,
+               VK_FORMAT_R32G32B32A32_SINT, VK_FORMAT_R32_UINT, VK_FORMAT_R32G32_UINT,
+               VK_FORMAT_R32G32B32_UINT, VK_FORMAT_R32G32B32A32_UINT,
+               VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT,
+               VK_FORMAT_R32G32B32_SFLOAT, VK_FORMAT_R32G32B32A32_SFLOAT,
+               VK_FORMAT_R64_SFLOAT, VK_FORMAT_R64G64_SFLOAT,
+               VK_FORMAT_R64G64B64_SFLOAT, VK_FORMAT_R64G64B64A64_SFLOAT,
+               VK_FORMAT_R8_USCALED, VK_FORMAT_R8G8_USCALED,
+               VK_FORMAT_R8G8B8_USCALED, VK_FORMAT_R8G8B8A8_USCALED,
+       };
+
+       for (int formatIndex = 0; formatIndex < DE_LENGTH_OF_ARRAY(formats); ++formatIndex)
+       {
+               const VkFormat format = formats[formatIndex];
+
+               for (int opTypeIndex = 0; opTypeIndex < OPTYPE_LAST; ++opTypeIndex)
+               {
+                       bool isBool = false;
+                       bool isFloat = false;
+
+                       switch (format)
+                       {
+                               default:
+                                       break;
+                               case VK_FORMAT_R32_SFLOAT:
+                               case VK_FORMAT_R32G32_SFLOAT:
+                               case VK_FORMAT_R32G32B32_SFLOAT:
+                               case VK_FORMAT_R32G32B32A32_SFLOAT:
+                               case VK_FORMAT_R64_SFLOAT:
+                               case VK_FORMAT_R64G64_SFLOAT:
+                               case VK_FORMAT_R64G64B64_SFLOAT:
+                               case VK_FORMAT_R64G64B64A64_SFLOAT:
+                                       isFloat = true;
+                                       break;
+                               case VK_FORMAT_R8_USCALED:
+                               case VK_FORMAT_R8G8_USCALED:
+                               case VK_FORMAT_R8G8B8_USCALED:
+                               case VK_FORMAT_R8G8B8A8_USCALED:
+                                       isBool = true;
+                                       break;
+                       }
+
+                       bool isBitwiseOp = false;
+
+                       switch (opTypeIndex)
+                       {
+                               default:
+                                       break;
+                               case OPTYPE_AND:
+                               case OPTYPE_INCLUSIVE_AND:
+                               case OPTYPE_EXCLUSIVE_AND:
+                               case OPTYPE_OR:
+                               case OPTYPE_INCLUSIVE_OR:
+                               case OPTYPE_EXCLUSIVE_OR:
+                               case OPTYPE_XOR:
+                               case OPTYPE_INCLUSIVE_XOR:
+                               case OPTYPE_EXCLUSIVE_XOR:
+                                       isBitwiseOp = true;
+                                       break;
+                       }
+
+                       if (isFloat && isBitwiseOp)
+                       {
+                               // Skip float with bitwise category.
+                               continue;
+                       }
+
+                       if (isBool && !isBitwiseOp)
+                       {
+                               // Skip bool when its not the bitwise category.
+                               continue;
+                       }
+                       std::string op = getOpTypeName(opTypeIndex);
+
+                       {
+                               const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_COMPUTE_BIT, format};
+                               addFunctionCaseWithPrograms(computeGroup.get(),
+                                                                                       de::toLower(op) + "_" +
+                                                                                       subgroups::getFormatNameForGLSL(format),
+                                                                                       "", supportedCheck, initPrograms, test, caseDef);
+                       }
+
+                       {
+                               const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_ALL_GRAPHICS, format};
+                               addFunctionCaseWithPrograms(graphicGroup.get(),
+                                                                                       de::toLower(op) + "_" +
+                                                                                       subgroups::getFormatNameForGLSL(format),
+                                                                                       "", supportedCheck, initPrograms, test, caseDef);
+                       }
+
+                       for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
+                       {
+                               const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex], format};
+                               addFunctionCaseWithPrograms(framebufferGroup.get(), de::toLower(op) + "_" + subgroups::getFormatNameForGLSL(format) +
+                                                                                       "_" + getShaderStageName(caseDef.shaderStage), "",
+                                                                                       supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
+                       }
+               }
+       }
+
+       de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(
+               testCtx, "arithmetic", "Subgroup arithmetic category tests"));
+
+       group->addChild(graphicGroup.release());
+       group->addChild(computeGroup.release());
+       group->addChild(framebufferGroup.release());
+
+       return group.release();
+}
+
+} // subgroups
+} // vkt
diff --git a/external/openglcts/modules/common/subgroups/glcSubgroupsArithmeticTests.hpp b/external/openglcts/modules/common/subgroups/glcSubgroupsArithmeticTests.hpp
new file mode 100644 (file)
index 0000000..6ae287e
--- /dev/null
@@ -0,0 +1,40 @@
+#ifndef _VKTSUBGROUPSARITHMETICTESTS_HPP
+#define _VKTSUBGROUPSARITHMETICTESTS_HPP
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "tcuDefs.hpp"
+#include "vktTestCase.hpp"
+
+namespace vkt
+{
+namespace subgroups
+{
+
+tcu::TestCaseGroup* createSubgroupsArithmeticTests(tcu::TestContext& testCtx);
+
+} // subgroups
+} // vkt
+
+#endif // _VKTSUBGROUPSARITHMETICTESTS_HPP
diff --git a/external/openglcts/modules/common/subgroups/glcSubgroupsBallotBroadcastTests.cpp b/external/openglcts/modules/common/subgroups/glcSubgroupsBallotBroadcastTests.cpp
new file mode 100755 (executable)
index 0000000..b6503a8
--- /dev/null
@@ -0,0 +1,599 @@
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "vktSubgroupsBallotBroadcastTests.hpp"
+#include "vktSubgroupsTestsUtils.hpp"
+
+#include <string>
+#include <vector>
+
+using namespace tcu;
+using namespace std;
+using namespace vk;
+using namespace vkt;
+
+namespace
+{
+enum OpType
+{
+       OPTYPE_BROADCAST = 0,
+       OPTYPE_BROADCAST_FIRST,
+       OPTYPE_LAST
+};
+
+static bool checkVertexPipelineStages(std::vector<const void*> datas,
+                                                                         deUint32 width, deUint32)
+{
+       return vkt::subgroups::check(datas, width, 3);
+}
+
+static bool checkCompute(std::vector<const void*> datas,
+                                                const deUint32 numWorkgroups[3], const deUint32 localSize[3],
+                                                deUint32)
+{
+       return vkt::subgroups::checkCompute(datas, numWorkgroups, localSize, 3);
+}
+
+std::string getOpTypeName(int opType)
+{
+       switch (opType)
+       {
+               default:
+                       DE_FATAL("Unsupported op type");
+                       return "";
+               case OPTYPE_BROADCAST:
+                       return "subgroupBroadcast";
+               case OPTYPE_BROADCAST_FIRST:
+                       return "subgroupBroadcastFirst";
+       }
+}
+
+struct CaseDefinition
+{
+       int                                     opType;
+       VkShaderStageFlags      shaderStage;
+       VkFormat                        format;
+};
+
+std::string getBodySource(CaseDefinition caseDef)
+{
+       std::ostringstream bdy;
+
+       bdy << "  uvec4 mask = subgroupBallot(true);\n";
+       bdy << "  uint tempResult = 0;\n";
+
+       if (OPTYPE_BROADCAST == caseDef.opType)
+       {
+               bdy     << "  tempResult = 0x3;\n";
+               for (int i = 0; i < (int)subgroups::maxSupportedSubgroupSize(); i++)
+               {
+                       bdy << "  {\n"
+                       << "    const uint id = "<< i << ";\n"
+                       << "    " << subgroups::getFormatNameForGLSL(caseDef.format)
+                       << " op = subgroupBroadcast(data1[gl_SubgroupInvocationID], id);\n"
+                       << "    if ((id < gl_SubgroupSize) && subgroupBallotBitExtract(mask, id))\n"
+                       << "    {\n"
+                       << "      if (op != data1[id])\n"
+                       << "      {\n"
+                       << "        tempResult = 0;\n"
+                       << "      }\n"
+                       << "    }\n"
+                       << "  }\n";
+               }
+       }
+       else
+       {
+               bdy     << "  uint firstActive = 0;\n"
+                       << "  for (uint i = 0; i < gl_SubgroupSize; i++)\n"
+                       << "  {\n"
+                       << "    if (subgroupBallotBitExtract(mask, i))\n"
+                       << "    {\n"
+                       << "      firstActive = i;\n"
+                       << "      break;\n"
+                       << "    }\n"
+                       << "  }\n"
+                       << "  tempResult |= (subgroupBroadcastFirst(data1[gl_SubgroupInvocationID]) == data1[firstActive]) ? 0x1 : 0;\n"
+                       << "  // make the firstActive invocation inactive now\n"
+                       << "  if (firstActive == gl_SubgroupInvocationID)\n"
+                       << "  {\n"
+                       << "    for (uint i = 0; i < gl_SubgroupSize; i++)\n"
+                       << "    {\n"
+                       << "      if (subgroupBallotBitExtract(mask, i))\n"
+                       << "      {\n"
+                       << "        firstActive = i;\n"
+                       << "        break;\n"
+                       << "      }\n"
+                       << "    }\n"
+                       << "    tempResult |= (subgroupBroadcastFirst(data1[gl_SubgroupInvocationID]) == data1[firstActive]) ? 0x2 : 0;\n"
+                       << "  }\n"
+                       << "  else\n"
+                       << "  {\n"
+                       << "    // the firstActive invocation didn't partake in the second result so set it to true\n"
+                       << "    tempResult |= 0x2;\n"
+                       << "  }\n";
+       }
+   return bdy.str();
+}
+
+void initFrameBufferPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
+{
+       const vk::ShaderBuildOptions    buildOptions    (programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+
+       subgroups::setFragmentShaderFrameBuffer(programCollection);
+
+       if (VK_SHADER_STAGE_VERTEX_BIT != caseDef.shaderStage)
+               subgroups::setVertexShaderFrameBuffer(programCollection);
+
+       std::string bdyStr = getBodySource(caseDef);
+
+       if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+       {
+               std::ostringstream                              vertex;
+               vertex << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+                       << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       << "layout(location = 0) in highp vec4 in_position;\n"
+                       << "layout(location = 0) out float out_color;\n"
+                       << "layout(set = 0, binding = 0) uniform  Buffer1\n"
+                       << "{\n"
+                       << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " data1[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+                       << "};\n"
+                       << "\n"
+                       << "void main (void)\n"
+                       << "{\n"
+                       << bdyStr
+                       << "  out_color = float(tempResult);\n"
+                       << "  gl_Position = in_position;\n"
+                       << "  gl_PointSize = 1.0f;\n"
+                       << "}\n";
+               programCollection.glslSources.add("vert")
+                       << glu::VertexSource(vertex.str()) << buildOptions;
+       }
+       else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
+       {
+               std::ostringstream geometry;
+
+               geometry << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+                       << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       << "layout(points) in;\n"
+                       << "layout(points, max_vertices = 1) out;\n"
+                       << "layout(location = 0) out float out_color;\n"
+                       << "layout(set = 0, binding = 0) uniform Buffer1\n"
+                       << "{\n"
+                       << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " data1[" <<subgroups::maxSupportedSubgroupSize() << "];\n"
+                       << "};\n"
+                       << "\n"
+                       << "void main (void)\n"
+                       << "{\n"
+                       << bdyStr
+                       << "  out_color = float(tempResult);\n"
+                       << "  gl_Position = gl_in[0].gl_Position;\n"
+                       << "  EmitVertex();\n"
+                       << "  EndPrimitive();\n"
+                       << "}\n";
+
+               programCollection.glslSources.add("geometry")
+                       << glu::GeometrySource(geometry.str()) << buildOptions;
+       }
+       else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+       {
+               std::ostringstream controlSource;
+
+               controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+                       << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       << "layout(vertices = 2) out;\n"
+                       << "layout(location = 0) out float out_color[];\n"
+                       << "layout(set = 0, binding = 0) uniform Buffer2\n"
+                       << "{\n"
+                       << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " data1[" <<subgroups::maxSupportedSubgroupSize() << "];\n"
+                       << "};\n"
+                       << "\n"
+                       << "void main (void)\n"
+                       << "{\n"
+                       << "  if (gl_InvocationID == 0)\n"
+                       << "  {\n"
+                       << "    gl_TessLevelOuter[0] = 1.0f;\n"
+                       << "    gl_TessLevelOuter[1] = 1.0f;\n"
+                       << "  }\n"
+                       << bdyStr
+                       << "  out_color[gl_InvocationID ] = float(tempResult);\n"
+                       << "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+                       << "}\n";
+
+               programCollection.glslSources.add("tesc")
+                       << glu::TessellationControlSource(controlSource.str()) << buildOptions;
+               subgroups::setTesEvalShaderFrameBuffer(programCollection);
+       }
+       else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
+       {
+               std::ostringstream evaluationSource;
+               evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+                       << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       << "layout(isolines, equal_spacing, ccw ) in;\n"
+                       << "layout(location = 0) out float out_color;\n"
+                       << "layout(set = 0, binding = 0) uniform Buffer1\n"
+                       << "{\n"
+                       << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " data1[" <<subgroups::maxSupportedSubgroupSize() << "];\n"
+                       << "};\n"
+                       << "\n"
+                       << "void main (void)\n"
+                       << "{\n"
+                       << bdyStr
+                       << "  out_color  = float(tempResult);\n"
+                       << "  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
+                       << "}\n";
+
+               subgroups::setTesCtrlShaderFrameBuffer(programCollection);
+               programCollection.glslSources.add("tese")
+                       << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
+       }
+       else
+       {
+               DE_FATAL("Unsupported shader stage");
+       }
+}
+
+void initPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
+{
+       std::string bdyStr = getBodySource(caseDef);
+
+       if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+       {
+               std::ostringstream src;
+
+               src << "#version 450\n"
+                       << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       << "layout (local_size_x_id = 0, local_size_y_id = 1, "
+                       "local_size_z_id = 2) in;\n"
+                       << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
+                       << "{\n"
+                       << "  uint result[];\n"
+                       << "};\n"
+                       << "layout(set = 0, binding = 1, std430) buffer Buffer2\n"
+                       << "{\n"
+                       << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " data1[];\n"
+                       << "};\n"
+                       << "\n"
+                       << "void main (void)\n"
+                       << "{\n"
+                       << "  uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
+                       << "  highp uint offset = globalSize.x * ((globalSize.y * "
+                       "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
+                       "gl_GlobalInvocationID.x;\n"
+                       << bdyStr
+                       << "  result[offset] = tempResult;\n"
+                       << "}\n";
+
+               programCollection.glslSources.add("comp")
+                               << glu::ComputeSource(src.str()) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+       }
+       else
+       {
+               const string vertex =
+                       "#version 450\n"
+                       "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
+                       "{\n"
+                       "  uint result[];\n"
+                       "};\n"
+                       "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+                       "{\n"
+                       "  " + subgroups::getFormatNameForGLSL(caseDef.format) + " data1[];\n"
+                       "};\n"
+                       "\n"
+                       "void main (void)\n"
+                       "{\n"
+                       + bdyStr +
+                       "  result[gl_VertexIndex] = tempResult;\n"
+                       "  float pixelSize = 2.0f/1024.0f;\n"
+                       "  float pixelPosition = pixelSize/2.0f - 1.0f;\n"
+                       "  gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
+                       "  gl_PointSize = 1.0f;\n"
+                       "}\n";
+
+               const string tesc =
+                       "#version 450\n"
+                       "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       "layout(vertices=1) out;\n"
+                       "layout(set = 0, binding = 1, std430) buffer Buffer1\n"
+                       "{\n"
+                       "  uint result[];\n"
+                       "};\n"
+                       "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+                       "{\n"
+                       "  " + subgroups::getFormatNameForGLSL(caseDef.format) + " data1[];\n"
+                       "};\n"
+                       "\n"
+                       "void main (void)\n"
+                       "{\n"
+                       + bdyStr +
+                       "  result[gl_PrimitiveID] = tempResult;\n"
+                       "  if (gl_InvocationID == 0)\n"
+                       "  {\n"
+                       "    gl_TessLevelOuter[0] = 1.0f;\n"
+                       "    gl_TessLevelOuter[1] = 1.0f;\n"
+                       "  }\n"
+                       "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+                       "}\n";
+
+               const string tese =
+                       "#version 450\n"
+                       "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       "layout(isolines) in;\n"
+                       "layout(set = 0, binding = 2, std430) buffer Buffer1\n"
+                       "{\n"
+                       "  uint result[];\n"
+                       "};\n"
+                       "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+                       "{\n"
+                       "  " + subgroups::getFormatNameForGLSL(caseDef.format) + " data1[];\n"
+                       "};\n"
+                       "\n"
+                       "void main (void)\n"
+                       "{\n"
+                       + bdyStr +
+                       "  result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = tempResult;\n"
+                       "  float pixelSize = 2.0f/1024.0f;\n"
+                       "  gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
+                       "}\n";
+
+               const string geometry =
+                       "#version 450\n"
+                       "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       "layout(${TOPOLOGY}) in;\n"
+                       "layout(points, max_vertices = 1) out;\n"
+                       "layout(set = 0, binding = 3, std430) buffer Buffer1\n"
+                       "{\n"
+                       "  uint result[];\n"
+                       "};\n"
+                       "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+                       "{\n"
+                       "  " + subgroups::getFormatNameForGLSL(caseDef.format) + " data1[];\n"
+                       "};\n"
+                       "\n"
+                       "void main (void)\n"
+                       "{\n"
+                       + bdyStr +
+                       "  result[gl_PrimitiveIDIn] = tempResult;\n"
+                       "  gl_Position = gl_in[0].gl_Position;\n"
+                       "  EmitVertex();\n"
+                       "  EndPrimitive();\n"
+                       "}\n";
+
+               const string fragment =
+                       "#version 450\n"
+                       "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       "layout(location = 0) out uint result;\n"
+                       "layout(set = 0, binding = 4, std430) readonly buffer Buffer1\n"
+                       "{\n"
+                       "  " + subgroups::getFormatNameForGLSL(caseDef.format) + " data1[];\n"
+                       "};\n"
+                       "void main (void)\n"
+                       "{\n"
+                       + bdyStr +
+                       "  result = tempResult;\n"
+                       "}\n";
+
+               subgroups::addNoSubgroupShader(programCollection);
+
+               programCollection.glslSources.add("vert")
+                               << glu::VertexSource(vertex) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+               programCollection.glslSources.add("tesc")
+                               << glu::TessellationControlSource(tesc) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+               programCollection.glslSources.add("tese")
+                               << glu::TessellationEvaluationSource(tese) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+               subgroups::addGeometryShadersFromTemplate(geometry, vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u),
+                                                                                                 programCollection.glslSources);
+               programCollection.glslSources.add("fragment")
+                               << glu::FragmentSource(fragment)<< vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+       }
+}
+
+void supportedCheck (Context& context, CaseDefinition caseDef)
+{
+       if (!subgroups::isSubgroupSupported(context))
+               TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
+
+       if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_BALLOT_BIT))
+       {
+               TCU_THROW(NotSupportedError, "Device does not support subgroup ballot operations");
+       }
+
+       if (subgroups::isDoubleFormat(caseDef.format) &&
+               !subgroups::isDoubleSupportedForDevice(context))
+       {
+               TCU_THROW(NotSupportedError, "Device does not support subgroup double operations");
+       }
+}
+
+tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
+{
+       if (!subgroups::areSubgroupOperationsSupportedForStage(
+                       context, caseDef.shaderStage))
+       {
+               if (subgroups::areSubgroupOperationsRequiredForStage(caseDef.shaderStage))
+               {
+                       return tcu::TestStatus::fail(
+                                          "Shader stage " +
+                                          subgroups::getShaderStageName(caseDef.shaderStage) +
+                                          " is required to support subgroup operations!");
+               }
+               else
+               {
+                       TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
+               }
+       }
+
+       subgroups::SSBOData inputData[1];
+       inputData[0].format = caseDef.format;
+       inputData[0].numElements = subgroups::maxSupportedSubgroupSize();
+       inputData[0].initializeType = subgroups::SSBOData::InitializeNonZero;
+
+       if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+               return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, inputData, 1, checkVertexPipelineStages);
+       else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
+               return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, inputData, 1, checkVertexPipelineStages);
+       else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+               return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
+       else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
+               return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
+       else
+               TCU_THROW(InternalError, "Unhandled shader stage");
+}
+
+
+tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
+{
+       if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+       {
+               if (!subgroups::areSubgroupOperationsSupportedForStage(context, caseDef.shaderStage))
+               {
+                       if (subgroups::areSubgroupOperationsRequiredForStage(caseDef.shaderStage))
+                       {
+                               return tcu::TestStatus::fail(
+                                                  "Shader stage " +
+                                                  subgroups::getShaderStageName(caseDef.shaderStage) +
+                                                  " is required to support subgroup operations!");
+                       }
+                       else
+                       {
+                               TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
+                       }
+               }
+               subgroups::SSBOData inputData[1];
+               inputData[0].format = caseDef.format;
+               inputData[0].numElements = subgroups::maxSupportedSubgroupSize();
+               inputData[0].initializeType = subgroups::SSBOData::InitializeNonZero;
+
+               return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, inputData, 1, checkCompute);
+       }
+       else
+       {
+               VkPhysicalDeviceSubgroupProperties subgroupProperties;
+               subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
+               subgroupProperties.pNext = DE_NULL;
+
+               VkPhysicalDeviceProperties2 properties;
+               properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+               properties.pNext = &subgroupProperties;
+
+               context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
+
+               VkShaderStageFlagBits stages = (VkShaderStageFlagBits)(caseDef.shaderStage  & subgroupProperties.supportedStages);
+
+               if ( VK_SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
+               {
+                       if ( (stages & VK_SHADER_STAGE_FRAGMENT_BIT) == 0)
+                               TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
+                       else
+                               stages = VK_SHADER_STAGE_FRAGMENT_BIT;
+               }
+
+               if ((VkShaderStageFlagBits)0u == stages)
+                       TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
+
+               subgroups::SSBOData inputData;
+               inputData.format                        = caseDef.format;
+               inputData.numElements           = subgroups::maxSupportedSubgroupSize();
+               inputData.initializeType        = subgroups::SSBOData::InitializeNonZero;
+               inputData.binding                       = 4u;
+               inputData.stages                        = stages;
+
+               return subgroups::allStages(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, stages);
+       }
+}
+}
+
+namespace vkt
+{
+namespace subgroups
+{
+tcu::TestCaseGroup* createSubgroupsBallotBroadcastTests(tcu::TestContext& testCtx)
+{
+       de::MovePtr<tcu::TestCaseGroup> graphicGroup(new tcu::TestCaseGroup(
+               testCtx, "graphics", "Subgroup ballot broadcast category tests: graphics"));
+       de::MovePtr<tcu::TestCaseGroup> computeGroup(new tcu::TestCaseGroup(
+               testCtx, "compute", "Subgroup ballot broadcast category tests: compute"));
+       de::MovePtr<tcu::TestCaseGroup> framebufferGroup(new tcu::TestCaseGroup(
+               testCtx, "framebuffer", "Subgroup ballot broadcast category tests: framebuffer"));
+
+       const VkShaderStageFlags stages[] =
+       {
+               VK_SHADER_STAGE_VERTEX_BIT,
+               VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
+               VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
+               VK_SHADER_STAGE_GEOMETRY_BIT,
+       };
+
+       const VkFormat formats[] =
+       {
+               VK_FORMAT_R32_SINT, VK_FORMAT_R32G32_SINT, VK_FORMAT_R32G32B32_SINT,
+               VK_FORMAT_R32G32B32A32_SINT, VK_FORMAT_R32_UINT, VK_FORMAT_R32G32_UINT,
+               VK_FORMAT_R32G32B32_UINT, VK_FORMAT_R32G32B32A32_UINT,
+               VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT,
+               VK_FORMAT_R32G32B32_SFLOAT, VK_FORMAT_R32G32B32A32_SFLOAT,
+               VK_FORMAT_R64_SFLOAT, VK_FORMAT_R64G64_SFLOAT,
+               VK_FORMAT_R64G64B64_SFLOAT, VK_FORMAT_R64G64B64A64_SFLOAT,
+               VK_FORMAT_R8_USCALED, VK_FORMAT_R8G8_USCALED,
+               VK_FORMAT_R8G8B8_USCALED, VK_FORMAT_R8G8B8A8_USCALED,
+       };
+
+       for (int formatIndex = 0; formatIndex < DE_LENGTH_OF_ARRAY(formats); ++formatIndex)
+       {
+               const VkFormat format = formats[formatIndex];
+
+               for (int opTypeIndex = 0; opTypeIndex < OPTYPE_LAST; ++opTypeIndex)
+               {
+                       const std::string op = de::toLower(getOpTypeName(opTypeIndex));
+                       const std::string name = op + "_" + subgroups::getFormatNameForGLSL(format);
+
+                       {
+                               CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_COMPUTE_BIT, format};
+                               addFunctionCaseWithPrograms(computeGroup.get(), name, "", supportedCheck, initPrograms, test, caseDef);
+                       }
+
+                       {
+                               const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_ALL_GRAPHICS, format};
+                               addFunctionCaseWithPrograms(graphicGroup.get(), name, "", supportedCheck, initPrograms, test, caseDef);
+                       }
+
+                       for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
+                       {
+                               const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex], format};
+                               addFunctionCaseWithPrograms(framebufferGroup.get(), name + getShaderStageName(caseDef.shaderStage), "",
+                                                       supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
+                       }
+               }
+       }
+
+       de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(
+               testCtx, "ballot_broadcast", "Subgroup ballot broadcast category tests"));
+
+       group->addChild(graphicGroup.release());
+       group->addChild(computeGroup.release());
+       group->addChild(framebufferGroup.release());
+       return group.release();
+}
+
+} // subgroups
+} // vkt
diff --git a/external/openglcts/modules/common/subgroups/glcSubgroupsBallotBroadcastTests.hpp b/external/openglcts/modules/common/subgroups/glcSubgroupsBallotBroadcastTests.hpp
new file mode 100644 (file)
index 0000000..a2352f9
--- /dev/null
@@ -0,0 +1,40 @@
+#ifndef _VKTSUBGROUPSBALLOTBROADCASTTESTS_HPP
+#define _VKTSUBGROUPSBALLOTBROADCASTTESTS_HPP
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "tcuDefs.hpp"
+#include "vktTestCase.hpp"
+
+namespace vkt
+{
+namespace subgroups
+{
+
+tcu::TestCaseGroup* createSubgroupsBallotBroadcastTests(tcu::TestContext& testCtx);
+
+} // subgroups
+} // vkt
+
+#endif // _VKTSUBGROUPSBALLOTBROADCASTTESTS_HPP
diff --git a/external/openglcts/modules/common/subgroups/glcSubgroupsBallotOtherTests.cpp b/external/openglcts/modules/common/subgroups/glcSubgroupsBallotOtherTests.cpp
new file mode 100755 (executable)
index 0000000..c16cbe0
--- /dev/null
@@ -0,0 +1,604 @@
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "vktSubgroupsBallotOtherTests.hpp"
+#include "vktSubgroupsTestsUtils.hpp"
+
+#include <string>
+#include <vector>
+
+using namespace tcu;
+using namespace std;
+using namespace vk;
+using namespace vkt;
+
+namespace
+{
+enum OpType
+{
+       OPTYPE_INVERSE_BALLOT = 0,
+       OPTYPE_BALLOT_BIT_EXTRACT,
+       OPTYPE_BALLOT_BIT_COUNT,
+       OPTYPE_BALLOT_INCLUSIVE_BIT_COUNT,
+       OPTYPE_BALLOT_EXCLUSIVE_BIT_COUNT,
+       OPTYPE_BALLOT_FIND_LSB,
+       OPTYPE_BALLOT_FIND_MSB,
+       OPTYPE_LAST
+};
+
+static bool checkVertexPipelineStages(std::vector<const void*> datas,
+                                                                         deUint32 width, deUint32)
+{
+       return vkt::subgroups::check(datas, width, 0xf);
+}
+
+static bool checkCompute(std::vector<const void*> datas,
+                                                const deUint32 numWorkgroups[3], const deUint32 localSize[3],
+                                                deUint32)
+{
+       return vkt::subgroups::checkCompute(datas, numWorkgroups, localSize, 0xf);
+}
+
+std::string getOpTypeName(int opType)
+{
+       switch (opType)
+       {
+               default:
+                       DE_FATAL("Unsupported op type");
+                       return "";
+               case OPTYPE_INVERSE_BALLOT:
+                       return "subgroupInverseBallot";
+               case OPTYPE_BALLOT_BIT_EXTRACT:
+                       return "subgroupBallotBitExtract";
+               case OPTYPE_BALLOT_BIT_COUNT:
+                       return "subgroupBallotBitCount";
+               case OPTYPE_BALLOT_INCLUSIVE_BIT_COUNT:
+                       return "subgroupBallotInclusiveBitCount";
+               case OPTYPE_BALLOT_EXCLUSIVE_BIT_COUNT:
+                       return "subgroupBallotExclusiveBitCount";
+               case OPTYPE_BALLOT_FIND_LSB:
+                       return "subgroupBallotFindLSB";
+               case OPTYPE_BALLOT_FIND_MSB:
+                       return "subgroupBallotFindMSB";
+       }
+}
+
+struct CaseDefinition
+{
+       int                                     opType;
+       VkShaderStageFlags      shaderStage;
+};
+
+std::string getBodySource(CaseDefinition caseDef)
+{
+       std::ostringstream bdy;
+
+       bdy << "  uvec4 allOnes = uvec4(0xFFFFFFFF);\n"
+               << "  uvec4 allZeros = uvec4(0);\n"
+               << "  uint tempResult = 0;\n"
+               << "#define MAKE_HIGH_BALLOT_RESULT(i) uvec4("
+               << "i >= 32 ? 0 : (0xFFFFFFFF << i), "
+               << "i >= 64 ? 0 : (0xFFFFFFFF << ((i < 32) ? 0 : (i - 32))), "
+               << "i >= 96 ? 0 : (0xFFFFFFFF << ((i < 64) ? 0 : (i - 64))), "
+               << " 0xFFFFFFFF << ((i < 96) ? 0 : (i - 96)))\n"
+               << "#define MAKE_SINGLE_BIT_BALLOT_RESULT(i) uvec4("
+               << "i >= 32 ? 0 : 0x1 << i, "
+               << "i < 32 || i >= 64 ? 0 : 0x1 << (i - 32), "
+               << "i < 64 || i >= 96 ? 0 : 0x1 << (i - 64), "
+               << "i < 96 ? 0 : 0x1 << (i - 96))\n";
+
+       switch (caseDef.opType)
+       {
+               default:
+                       DE_FATAL("Unknown op type!");
+                       break;
+               case OPTYPE_INVERSE_BALLOT:
+                       bdy << "  tempResult |= subgroupInverseBallot(allOnes) ? 0x1 : 0;\n"
+                               << "  tempResult |= subgroupInverseBallot(allZeros) ? 0 : 0x2;\n"
+                               << "  tempResult |= subgroupInverseBallot(subgroupBallot(true)) ? 0x4 : 0;\n"
+                               << "  tempResult |= 0x8;\n";
+                       break;
+               case OPTYPE_BALLOT_BIT_EXTRACT:
+                       bdy << "  tempResult |= subgroupBallotBitExtract(allOnes, gl_SubgroupInvocationID) ? 0x1 : 0;\n"
+                               << "  tempResult |= subgroupBallotBitExtract(allZeros, gl_SubgroupInvocationID) ? 0 : 0x2;\n"
+                               << "  tempResult |= subgroupBallotBitExtract(subgroupBallot(true), gl_SubgroupInvocationID) ? 0x4 : 0;\n"
+                               << "  tempResult |= 0x8;\n"
+                               << "  for (uint i = 0; i < gl_SubgroupSize; i++)\n"
+                               << "  {\n"
+                               << "    if (!subgroupBallotBitExtract(allOnes, gl_SubgroupInvocationID))\n"
+                               << "    {\n"
+                               << "      tempResult &= ~0x8;\n"
+                               << "    }\n"
+                               << "  }\n";
+                       break;
+               case OPTYPE_BALLOT_BIT_COUNT:
+                       bdy << "  tempResult |= gl_SubgroupSize == subgroupBallotBitCount(allOnes) ? 0x1 : 0;\n"
+                               << "  tempResult |= 0 == subgroupBallotBitCount(allZeros) ? 0x2 : 0;\n"
+                               << "  tempResult |= 0 < subgroupBallotBitCount(subgroupBallot(true)) ? 0x4 : 0;\n"
+                               << "  tempResult |= 0 == subgroupBallotBitCount(MAKE_HIGH_BALLOT_RESULT(gl_SubgroupSize)) ? 0x8 : 0;\n";
+                       break;
+               case OPTYPE_BALLOT_INCLUSIVE_BIT_COUNT:
+                       bdy << "  uint inclusiveOffset = gl_SubgroupInvocationID + 1;\n"
+                               << "  tempResult |= inclusiveOffset == subgroupBallotInclusiveBitCount(allOnes) ? 0x1 : 0;\n"
+                               << "  tempResult |= 0 == subgroupBallotInclusiveBitCount(allZeros) ? 0x2 : 0;\n"
+                               << "  tempResult |= 0 < subgroupBallotInclusiveBitCount(subgroupBallot(true)) ? 0x4 : 0;\n"
+                               << "  tempResult |= 0x8;\n"
+                               << "  uvec4 inclusiveUndef = MAKE_HIGH_BALLOT_RESULT(inclusiveOffset);\n"
+                               << "  bool undefTerritory = false;\n"
+                               << "  for (uint i = 0; i <= 128; i++)\n"
+                               << "  {\n"
+                               << "    uvec4 iUndef = MAKE_HIGH_BALLOT_RESULT(i);\n"
+                               << "    if (iUndef == inclusiveUndef)"
+                               << "    {\n"
+                               << "      undefTerritory = true;\n"
+                               << "    }\n"
+                               << "    uint inclusiveBitCount = subgroupBallotInclusiveBitCount(iUndef);\n"
+                               << "    if (undefTerritory && (0 != inclusiveBitCount))\n"
+                               << "    {\n"
+                               << "      tempResult &= ~0x8;\n"
+                               << "    }\n"
+                               << "    else if (!undefTerritory && (0 == inclusiveBitCount))\n"
+                               << "    {\n"
+                               << "      tempResult &= ~0x8;\n"
+                               << "    }\n"
+                               << "  }\n";
+                       break;
+               case OPTYPE_BALLOT_EXCLUSIVE_BIT_COUNT:
+                       bdy << "  uint exclusiveOffset = gl_SubgroupInvocationID;\n"
+                               << "  tempResult |= exclusiveOffset == subgroupBallotExclusiveBitCount(allOnes) ? 0x1 : 0;\n"
+                               << "  tempResult |= 0 == subgroupBallotExclusiveBitCount(allZeros) ? 0x2 : 0;\n"
+                               << "  tempResult |= 0x4;\n"
+                               << "  tempResult |= 0x8;\n"
+                               << "  uvec4 exclusiveUndef = MAKE_HIGH_BALLOT_RESULT(exclusiveOffset);\n"
+                               << "  bool undefTerritory = false;\n"
+                               << "  for (uint i = 0; i <= 128; i++)\n"
+                               << "  {\n"
+                               << "    uvec4 iUndef = MAKE_HIGH_BALLOT_RESULT(i);\n"
+                               << "    if (iUndef == exclusiveUndef)"
+                               << "    {\n"
+                               << "      undefTerritory = true;\n"
+                               << "    }\n"
+                               << "    uint exclusiveBitCount = subgroupBallotExclusiveBitCount(iUndef);\n"
+                               << "    if (undefTerritory && (0 != exclusiveBitCount))\n"
+                               << "    {\n"
+                               << "      tempResult &= ~0x4;\n"
+                               << "    }\n"
+                               << "    else if (!undefTerritory && (0 == exclusiveBitCount))\n"
+                               << "    {\n"
+                               << "      tempResult &= ~0x8;\n"
+                               << "    }\n"
+                               << "  }\n";
+                       break;
+               case OPTYPE_BALLOT_FIND_LSB:
+                       bdy << "  tempResult |= 0 == subgroupBallotFindLSB(allOnes) ? 0x1 : 0;\n"
+                               << "  if (subgroupElect())\n"
+                               << "  {\n"
+                               << "    tempResult |= 0x2;\n"
+                               << "  }\n"
+                               << "  else\n"
+                               << "  {\n"
+                               << "    tempResult |= 0 < subgroupBallotFindLSB(subgroupBallot(true)) ? 0x2 : 0;\n"
+                               << "  }\n"
+                               << "  tempResult |= gl_SubgroupSize > subgroupBallotFindLSB(subgroupBallot(true)) ? 0x4 : 0;\n"
+                               << "  tempResult |= 0x8;\n"
+                               << "  for (uint i = 0; i < gl_SubgroupSize; i++)\n"
+                               << "  {\n"
+                               << "    if (i != subgroupBallotFindLSB(MAKE_HIGH_BALLOT_RESULT(i)))\n"
+                               << "    {\n"
+                               << "      tempResult &= ~0x8;\n"
+                               << "    }\n"
+                               << "  }\n";
+                       break;
+               case OPTYPE_BALLOT_FIND_MSB:
+                       bdy << "  tempResult |= (gl_SubgroupSize - 1) == subgroupBallotFindMSB(allOnes) ? 0x1 : 0;\n"
+                               << "  if (subgroupElect())\n"
+                               << "  {\n"
+                               << "    tempResult |= 0x2;\n"
+                               << "  }\n"
+                               << "  else\n"
+                               << "  {\n"
+                               << "    tempResult |= 0 < subgroupBallotFindMSB(subgroupBallot(true)) ? 0x2 : 0;\n"
+                               << "  }\n"
+                               << "  tempResult |= gl_SubgroupSize > subgroupBallotFindMSB(subgroupBallot(true)) ? 0x4 : 0;\n"
+                               << "  tempResult |= 0x8;\n"
+                               << "  for (uint i = 0; i < gl_SubgroupSize; i++)\n"
+                               << "  {\n"
+                               << "    if (i != subgroupBallotFindMSB(MAKE_SINGLE_BIT_BALLOT_RESULT(i)))\n"
+                               << "    {\n"
+                               << "      tempResult &= ~0x8;\n"
+                               << "    }\n"
+                               << "  }\n";
+                       break;
+       }
+   return bdy.str();
+}
+
+void initFrameBufferPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
+{
+       const vk::ShaderBuildOptions    buildOptions    (programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+
+       subgroups::setFragmentShaderFrameBuffer(programCollection);
+
+       if (VK_SHADER_STAGE_VERTEX_BIT != caseDef.shaderStage)
+               subgroups::setVertexShaderFrameBuffer(programCollection);
+
+       std::string bdyStr = getBodySource(caseDef);
+
+       if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+       {
+               std::ostringstream                              vertex;
+               vertex << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+                       << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       << "layout(location = 0) in highp vec4 in_position;\n"
+                       << "layout(location = 0) out float out_color;\n"
+                       << "\n"
+                       << "void main (void)\n"
+                       << "{\n"
+                       << bdyStr
+                       << "  out_color = float(tempResult);\n"
+                       << "  gl_Position = in_position;\n"
+                       << "  gl_PointSize = 1.0f;\n"
+                       << "}\n";
+               programCollection.glslSources.add("vert")
+                       << glu::VertexSource(vertex.str()) << buildOptions;
+       }
+       else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
+       {
+               std::ostringstream geometry;
+
+               geometry << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+                       << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       << "layout(points) in;\n"
+                       << "layout(points, max_vertices = 1) out;\n"
+                       << "layout(location = 0) out float out_color;\n"
+                       << "void main (void)\n"
+                       << "{\n"
+                       << bdyStr
+                       << "  out_color = float(tempResult);\n"
+                       << "  gl_Position = gl_in[0].gl_Position;\n"
+                       << "  EmitVertex();\n"
+                       << "  EndPrimitive();\n"
+                       << "}\n";
+
+               programCollection.glslSources.add("geometry")
+                       << glu::GeometrySource(geometry.str()) << buildOptions;
+       }
+       else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+       {
+               std::ostringstream controlSource;
+
+               controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+                       << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       << "layout(vertices = 2) out;\n"
+                       << "layout(location = 0) out float out_color[];\n"
+                       << "\n"
+                       << "void main (void)\n"
+                       << "{\n"
+                       << "  if (gl_InvocationID == 0)\n"
+                       << "  {\n"
+                       << "    gl_TessLevelOuter[0] = 1.0f;\n"
+                       << "    gl_TessLevelOuter[1] = 1.0f;\n"
+                       << "  }\n"
+                       << bdyStr
+                       << "  out_color[gl_InvocationID ] = float(tempResult);\n"
+                       << "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+                       << "}\n";
+
+               programCollection.glslSources.add("tesc")
+                       << glu::TessellationControlSource(controlSource.str()) << buildOptions;
+               subgroups::setTesEvalShaderFrameBuffer(programCollection);
+       }
+       else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
+       {
+               std::ostringstream evaluationSource;
+               evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+                       << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       << "layout(isolines, equal_spacing, ccw ) in;\n"
+                       << "layout(location = 0) out float out_color;\n"
+                       << "void main (void)\n"
+                       << "{\n"
+                       << bdyStr
+                       << "  out_color  = float(tempResult);\n"
+                       << "  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
+                       << "}\n";
+
+               subgroups::setTesCtrlShaderFrameBuffer(programCollection);
+               programCollection.glslSources.add("tese")
+                       << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
+       }
+       else
+       {
+               DE_FATAL("Unsupported shader stage");
+       }
+}
+
+void initPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
+{
+       std::string bdyStr = getBodySource(caseDef);
+
+       if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+       {
+               std::ostringstream src;
+
+               src << "#version 450\n"
+                       << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       << "layout (local_size_x_id = 0, local_size_y_id = 1, "
+                       "local_size_z_id = 2) in;\n"
+                       << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
+                       << "{\n"
+                       << "  uint result[];\n"
+                       << "};\n"
+                       << "\n"
+                       << "void main (void)\n"
+                       << "{\n"
+                       << "  uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
+                       << "  highp uint offset = globalSize.x * ((globalSize.y * "
+                       "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
+                       "gl_GlobalInvocationID.x;\n"
+                       << bdyStr
+                       << "  result[offset] = tempResult;\n"
+                       << "}\n";
+
+               programCollection.glslSources.add("comp")
+                               << glu::ComputeSource(src.str()) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+       }
+       else
+       {
+               const string vertex =
+                       "#version 450\n"
+                       "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
+                       "{\n"
+                       "  uint result[];\n"
+                       "};\n"
+                       "\n"
+                       "void main (void)\n"
+                       "{\n"
+                       + bdyStr +
+                       "  result[gl_VertexIndex] = tempResult;\n"
+                       "  float pixelSize = 2.0f/1024.0f;\n"
+                       "  float pixelPosition = pixelSize/2.0f - 1.0f;\n"
+                       "  gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
+                       "  gl_PointSize = 1.0f;\n"
+                       "}\n";
+
+               const string tesc =
+                       "#version 450\n"
+                       "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       "layout(vertices=1) out;\n"
+                       "layout(set = 0, binding = 1, std430) buffer Buffer1\n"
+                       "{\n"
+                       "  uint result[];\n"
+                       "};\n"
+                       "\n"
+                       "void main (void)\n"
+                       "{\n"
+                       + bdyStr +
+                       "  result[gl_PrimitiveID] = tempResult;\n"
+                       "  if (gl_InvocationID == 0)\n"
+                       "  {\n"
+                       "    gl_TessLevelOuter[0] = 1.0f;\n"
+                       "    gl_TessLevelOuter[1] = 1.0f;\n"
+                       "  }\n"
+                       "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+                       "}\n";
+
+               const string tese =
+                       "#version 450\n"
+                       "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       "layout(isolines) in;\n"
+                       "layout(set = 0, binding = 2, std430) buffer Buffer1\n"
+                       "{\n"
+                       "  uint result[];\n"
+                       "};\n"
+                       "\n"
+                       "void main (void)\n"
+                       "{\n"
+                       + bdyStr +
+                       "  result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = tempResult;\n"
+                       "  float pixelSize = 2.0f/1024.0f;\n"
+                       "  gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
+                       "}\n";
+
+               const string geometry =
+                       "#version 450\n"
+                       "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       "layout(${TOPOLOGY}) in;\n"
+                       "layout(points, max_vertices = 1) out;\n"
+                       "layout(set = 0, binding = 3, std430) buffer Buffer1\n"
+                       "{\n"
+                       "  uint result[];\n"
+                       "};\n"
+                       "\n"
+                       "void main (void)\n"
+                       "{\n"
+                       + bdyStr +
+                       "  result[gl_PrimitiveIDIn] = tempResult;\n"
+                       "  gl_Position = gl_in[0].gl_Position;\n"
+                       "  EmitVertex();\n"
+                       "  EndPrimitive();\n"
+                       "}\n";
+
+               const string fragment =
+                       "#version 450\n"
+                       "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       "layout(location = 0) out uint result;\n"
+                       "void main (void)\n"
+                       "{\n"
+                       + bdyStr +
+                       "  result = tempResult;\n"
+                       "}\n";
+
+               subgroups::addNoSubgroupShader(programCollection);
+
+               programCollection.glslSources.add("vert")
+                               << glu::VertexSource(vertex) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+               programCollection.glslSources.add("tesc")
+                               << glu::TessellationControlSource(tesc) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+               programCollection.glslSources.add("tese")
+                               << glu::TessellationEvaluationSource(tese) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+               subgroups::addGeometryShadersFromTemplate(geometry, vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u),
+                                                                                                 programCollection.glslSources);
+               programCollection.glslSources.add("fragment")
+                               << glu::FragmentSource(fragment)<< vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+       }
+}
+
+void supportedCheck (Context& context, CaseDefinition caseDef)
+{
+       DE_UNREF(caseDef);
+       if (!subgroups::isSubgroupSupported(context))
+               TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
+
+       if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_BALLOT_BIT))
+       {
+               TCU_THROW(NotSupportedError, "Device does not support subgroup ballot operations");
+       }
+}
+
+tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
+{
+       if (!subgroups::areSubgroupOperationsSupportedForStage(
+                       context, caseDef.shaderStage))
+       {
+               if (subgroups::areSubgroupOperationsRequiredForStage(caseDef.shaderStage))
+               {
+                       return tcu::TestStatus::fail(
+                                          "Shader stage " +
+                                          subgroups::getShaderStageName(caseDef.shaderStage) +
+                                          " is required to support subgroup operations!");
+               }
+               else
+               {
+                       TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
+               }
+       }
+
+       if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+               return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages);
+       else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
+               return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages);
+       else if ((VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) & caseDef.shaderStage)
+               return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages);
+       else
+               TCU_THROW(InternalError, "Unhandled shader stage");
+}
+
+tcu::TestStatus test (Context& context, const CaseDefinition caseDef)
+{
+       if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+       {
+               if (!subgroups::areSubgroupOperationsSupportedForStage(context, caseDef.shaderStage))
+               {
+                       return tcu::TestStatus::fail(
+                                          "Shader stage " +
+                               subgroups::getShaderStageName(caseDef.shaderStage) +
+                               " is required to support subgroup operations!");
+               }
+               return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkCompute);
+       }
+       else
+       {
+               VkPhysicalDeviceSubgroupProperties subgroupProperties;
+               subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
+               subgroupProperties.pNext = DE_NULL;
+
+               VkPhysicalDeviceProperties2 properties;
+               properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+               properties.pNext = &subgroupProperties;
+
+               context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
+
+               VkShaderStageFlagBits stages = (VkShaderStageFlagBits)(caseDef.shaderStage  & subgroupProperties.supportedStages);
+
+               if ( VK_SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
+               {
+                       if ( (stages & VK_SHADER_STAGE_FRAGMENT_BIT) == 0)
+                               TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
+                       else
+                               stages = VK_SHADER_STAGE_FRAGMENT_BIT;
+               }
+
+               if ((VkShaderStageFlagBits)0u == stages)
+                       TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
+
+               return subgroups::allStages(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages, stages);
+       }
+       return tcu::TestStatus::pass("OK");
+}
+}
+
+namespace vkt
+{
+namespace subgroups
+{
+tcu::TestCaseGroup* createSubgroupsBallotOtherTests(tcu::TestContext& testCtx)
+{
+       de::MovePtr<tcu::TestCaseGroup> graphicGroup(new tcu::TestCaseGroup(
+               testCtx, "graphics", "Subgroup ballot other category tests: graphics"));
+       de::MovePtr<tcu::TestCaseGroup> computeGroup(new tcu::TestCaseGroup(
+               testCtx, "compute", "Subgroup ballot other category tests: compute"));
+       de::MovePtr<tcu::TestCaseGroup> framebufferGroup(new tcu::TestCaseGroup(
+               testCtx, "framebuffer", "Subgroup ballot other category tests: framebuffer"));
+
+       const VkShaderStageFlags stages[] =
+       {
+               VK_SHADER_STAGE_VERTEX_BIT,
+               VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
+               VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
+               VK_SHADER_STAGE_GEOMETRY_BIT,
+       };
+
+       for (int opTypeIndex = 0; opTypeIndex < OPTYPE_LAST; ++opTypeIndex)
+       {
+               const string    op              = de::toLower(getOpTypeName(opTypeIndex));
+               {
+                       const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_COMPUTE_BIT};
+                       addFunctionCaseWithPrograms(computeGroup.get(), op, "", supportedCheck, initPrograms, test, caseDef);
+               }
+
+               {
+                       const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_ALL_GRAPHICS};
+                       addFunctionCaseWithPrograms(graphicGroup.get(), op, "", supportedCheck, initPrograms, test, caseDef);
+               }
+
+               for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
+               {
+                       const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex]};
+                       addFunctionCaseWithPrograms(framebufferGroup.get(), op + "_" + getShaderStageName(caseDef.shaderStage), "", supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
+               }
+       }
+
+       de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(
+               testCtx, "ballot_other", "Subgroup ballot other category tests"));
+
+       group->addChild(graphicGroup.release());
+       group->addChild(computeGroup.release());
+       group->addChild(framebufferGroup.release());
+
+       return group.release();
+}
+
+} // subgroups
+} // vkt
diff --git a/external/openglcts/modules/common/subgroups/glcSubgroupsBallotOtherTests.hpp b/external/openglcts/modules/common/subgroups/glcSubgroupsBallotOtherTests.hpp
new file mode 100644 (file)
index 0000000..628e82b
--- /dev/null
@@ -0,0 +1,40 @@
+#ifndef _VKTSUBGROUPSBALLOTOTHERTESTS_HPP
+#define _VKTSUBGROUPSBALLOTOTHERTESTS_HPP
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "tcuDefs.hpp"
+#include "vktTestCase.hpp"
+
+namespace vkt
+{
+namespace subgroups
+{
+
+tcu::TestCaseGroup* createSubgroupsBallotOtherTests(tcu::TestContext& testCtx);
+
+} // subgroups
+} // vkt
+
+#endif // _VKTSUBGROUPSBALLOTOTHERTESTS_HPP
diff --git a/external/openglcts/modules/common/subgroups/glcSubgroupsBallotTests.cpp b/external/openglcts/modules/common/subgroups/glcSubgroupsBallotTests.cpp
new file mode 100755 (executable)
index 0000000..bf46772
--- /dev/null
@@ -0,0 +1,1032 @@
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "vktSubgroupsBallotTests.hpp"
+#include "vktSubgroupsTestsUtils.hpp"
+
+#include <string>
+#include <vector>
+
+using namespace tcu;
+using namespace std;
+using namespace vk;
+using namespace vkt;
+
+namespace
+{
+static bool checkVertexPipelineStages(std::vector<const void*> datas,
+                                                                         deUint32 width, deUint32)
+{
+       return vkt::subgroups::check(datas, width, 0x7);
+}
+
+static bool checkCompute(std::vector<const void*> datas,
+                                                const deUint32 numWorkgroups[3], const deUint32 localSize[3],
+                                                deUint32)
+{
+       return vkt::subgroups::checkCompute(datas, numWorkgroups, localSize, 0x7);
+}
+
+struct CaseDefinition
+{
+       VkShaderStageFlags      shaderStage;
+};
+
+void initFrameBufferPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
+{
+       const vk::SpirVAsmBuildOptions  buildOptionsSpr (programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3);
+       std::ostringstream                              subgroupSizeStr;
+       subgroupSizeStr << subgroups::maxSupportedSubgroupSize();
+
+       subgroups::setFragmentShaderFrameBuffer(programCollection);
+
+       if (VK_SHADER_STAGE_VERTEX_BIT != caseDef.shaderStage)
+               subgroups::setVertexShaderFrameBuffer(programCollection);
+
+       if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+       {
+               /*
+                       "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       "layout(location = 0) in highp vec4 in_position;\n"
+                       "layout(location = 0) out float out_color;\n"
+                       "layout(set = 0, binding = 0) uniform Buffer1\n"
+                       "{\n"
+                       "  uint data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+                       "};\n"
+                       "\n"
+                       "void main (void)\n"
+                       "{\n"
+                       "  uint tempResult = 0;\n"
+                       "  tempResult |= !bool(uvec4(0) == subgroupBallot(true)) ? 0x1 : 0;\n"
+                       "  bool bData = data[gl_SubgroupInvocationID] != 0;\n"
+                       "  tempResult |= !bool(uvec4(0) == subgroupBallot(bData)) ? 0x2 : 0;\n"
+                       "  tempResult |= uvec4(0) == subgroupBallot(false) ? 0x4 : 0;\n"
+                       "  out_color = float(tempResult);\n"
+                       "  gl_Position = in_position;\n"
+                       "  gl_PointSize = 1.0f;\n"
+                       "}\n";
+               */
+               const string vertex =
+                       "; SPIR-V\n"
+                       "; Version: 1.3\n"
+                       "; Generator: Khronos Glslang Reference Front End; 2\n"
+                       "; Bound: 76\n"
+                       "; Schema: 0\n"
+                       "OpCapability Shader\n"
+                       "OpCapability GroupNonUniform\n"
+                       "OpCapability GroupNonUniformBallot\n"
+                       "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+                       "OpMemoryModel Logical GLSL450\n"
+                       "OpEntryPoint Vertex %4 \"main\" %35 %62 %70 %72\n"
+                       "OpDecorate %30 ArrayStride 16\n"
+                       "OpMemberDecorate %31 0 Offset 0\n"
+                       "OpDecorate %31 Block\n"
+                       "OpDecorate %33 DescriptorSet 0\n"
+                       "OpDecorate %33 Binding 0\n"
+                       "OpDecorate %35 RelaxedPrecision\n"
+                       "OpDecorate %35 BuiltIn SubgroupLocalInvocationId\n"
+                       "OpDecorate %36 RelaxedPrecision\n"
+                       "OpDecorate %62 Location 0\n"
+                       "OpMemberDecorate %68 0 BuiltIn Position\n"
+                       "OpMemberDecorate %68 1 BuiltIn PointSize\n"
+                       "OpMemberDecorate %68 2 BuiltIn ClipDistance\n"
+                       "OpMemberDecorate %68 3 BuiltIn CullDistance\n"
+                       "OpDecorate %68 Block\n"
+                       "OpDecorate %72 Location 0\n"
+                       "%2 = OpTypeVoid\n"
+                       "%3 = OpTypeFunction %2\n"
+                       "%6 = OpTypeInt 32 0\n"
+                       "%7 = OpTypePointer Function %6\n"
+                       "%9 = OpConstant %6 0\n"
+                       "%10 = OpTypeVector %6 4\n"
+                       "%11 = OpConstantComposite %10 %9 %9 %9 %9\n"
+                       "%12 = OpTypeBool\n"
+                       "%13 = OpConstantTrue %12\n"
+                       "%14 = OpConstant %6 3\n"
+                       "%16 = OpTypeVector %12 4\n"
+                       "%20 = OpTypeInt 32 1\n"
+                       "%21 = OpConstant %20 1\n"
+                       "%22 = OpConstant %20 0\n"
+                       "%27 = OpTypePointer Function %12\n"
+                       "%29 = OpConstant %6 " + subgroupSizeStr.str() + "\n"
+                       "%30 = OpTypeArray %6 %29\n"
+                       "%31 = OpTypeStruct %30\n"
+                       "%32 = OpTypePointer Uniform %31\n"
+                       "%33 = OpVariable %32 Uniform\n"
+                       "%34 = OpTypePointer Input %6\n"
+                       "%35 = OpVariable %34 Input\n"
+                       "%37 = OpTypePointer Uniform %6\n"
+                       "%46 = OpConstant %20 2\n"
+                       "%51 = OpConstantFalse %12\n"
+                       "%55 = OpConstant %20 4\n"
+                       "%60 = OpTypeFloat 32\n"
+                       "%61 = OpTypePointer Output %60\n"
+                       "%62 = OpVariable %61 Output\n"
+                       "%65 = OpTypeVector %60 4\n"
+                       "%66 = OpConstant %6 1\n"
+                       "%67 = OpTypeArray %60 %66\n"
+                       "%68 = OpTypeStruct %65 %60 %67 %67\n"
+                       "%69 = OpTypePointer Output %68\n"
+                       "%70 = OpVariable %69 Output\n"
+                       "%71 = OpTypePointer Input %65\n"
+                       "%72 = OpVariable %71 Input\n"
+                       "%74 = OpTypePointer Output %65\n"
+                       "%76 = OpConstant %60 1\n"
+                       "%4 = OpFunction %2 None %3\n"
+                       "%5 = OpLabel\n"
+                       "%8 = OpVariable %7 Function\n"
+                       "%28 = OpVariable %27 Function\n"
+                       "OpStore %8 %9\n"
+                       "%15 = OpGroupNonUniformBallot %10 %14 %13\n"
+                       "%17 = OpIEqual %16 %11 %15\n"
+                       "%18 = OpAll %12 %17\n"
+                       "%19 = OpLogicalNot %12 %18\n"
+                       "%23 = OpSelect %20 %19 %21 %22\n"
+                       "%24 = OpBitcast %6 %23\n"
+                       "%25 = OpLoad %6 %8\n"
+                       "%26 = OpBitwiseOr %6 %25 %24\n"
+                       "OpStore %8 %26\n"
+                       "%36 = OpLoad %6 %35\n"
+                       "%38 = OpAccessChain %37 %33 %22 %36\n"
+                       "%39 = OpLoad %6 %38\n"
+                       "%40 = OpINotEqual %12 %39 %9\n"
+                       "OpStore %28 %40\n"
+                       "%41 = OpLoad %12 %28\n"
+                       "%42 = OpGroupNonUniformBallot %10 %14 %41\n"
+                       "%43 = OpIEqual %16 %11 %42\n"
+                       "%44 = OpAll %12 %43\n"
+                       "%45 = OpLogicalNot %12 %44\n"
+                       "%47 = OpSelect %20 %45 %46 %22\n"
+                       "%48 = OpBitcast %6 %47\n"
+                       "%49 = OpLoad %6 %8\n"
+                       "%50 = OpBitwiseOr %6 %49 %48\n"
+                       "OpStore %8 %50\n"
+                       "%52 = OpGroupNonUniformBallot %10 %14 %51\n"
+                       "%53 = OpIEqual %16 %11 %52\n"
+                       "%54 = OpAll %12 %53\n"
+                       "%56 = OpSelect %20 %54 %55 %22\n"
+                       "%57 = OpBitcast %6 %56\n"
+                       "%58 = OpLoad %6 %8\n"
+                       "%59 = OpBitwiseOr %6 %58 %57\n"
+                       "OpStore %8 %59\n"
+                       "%63 = OpLoad %6 %8\n"
+                       "%64 = OpConvertUToF %60 %63\n"
+                       "OpStore %62 %64\n"
+                       "%73 = OpLoad %65 %72\n"
+                       "%75 = OpAccessChain %74 %70 %22\n"
+                       "OpStore %75 %73\n"
+                       "%77 = OpAccessChain %61 %70 %21\n"
+                       "OpStore %77 %76\n"
+                       "OpReturn\n"
+                       "OpFunctionEnd\n";
+               programCollection.spirvAsmSources.add("vert") << vertex << buildOptionsSpr;
+       }
+       else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
+       {
+               /*
+                       "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       "layout(points) in;\n"
+                       "layout(points, max_vertices = 1) out;\n"
+                       "layout(location = 0) out float out_color;\n"
+                       "layout(set = 0, binding = 0) uniform Buffer1\n"
+                       "{\n"
+                       "  uint data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+                       "};\n"
+                       "\n"
+                       "void main (void)\n"
+                       "{\n"
+                       "  uint tempResult = 0;\n"
+                       "  tempResult |= !bool(uvec4(0) == subgroupBallot(true)) ? 0x1 : 0;\n"
+                       "  bool bData = data[gl_SubgroupInvocationID] != 0;\n"
+                       "  tempResult |= !bool(uvec4(0) == subgroupBallot(bData)) ? 0x2 : 0;\n"
+                       "  tempResult |= uvec4(0) == subgroupBallot(false) ? 0x4 : 0;\n"
+                       "  out_color = float(tempResult);\n"
+                       "  gl_Position = gl_in[0].gl_Position;\n"
+                       "  EmitVertex();\n"
+                       "  EndPrimitive();\n"
+                       "}\n";
+               */
+               const string geometry =
+                       "; SPIR-V\n"
+                       "; Version: 1.3\n"
+                       "; Generator: Khronos Glslang Reference Front End; 2\n"
+                       "; Bound: 80\n"
+                       "; Schema: 0\n"
+                       "OpCapability Geometry\n"
+                       "OpCapability GroupNonUniform\n"
+                       "OpCapability GroupNonUniformBallot\n"
+                       "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+                       "OpMemoryModel Logical GLSL450\n"
+                       "OpEntryPoint Geometry %4 \"main\" %35 %62 %70 %74\n"
+                       "OpExecutionMode %4 InputPoints\n"
+                       "OpExecutionMode %4 Invocations 1\n"
+                       "OpExecutionMode %4 OutputPoints\n"
+                       "OpExecutionMode %4 OutputVertices 1\n"
+                       "OpDecorate %30 ArrayStride 16\n"
+                       "OpMemberDecorate %31 0 Offset 0\n"
+                       "OpDecorate %31 Block\n"
+                       "OpDecorate %33 DescriptorSet 0\n"
+                       "OpDecorate %33 Binding 0\n"
+                       "OpDecorate %35 RelaxedPrecision\n"
+                       "OpDecorate %35 BuiltIn SubgroupLocalInvocationId\n"
+                       "OpDecorate %36 RelaxedPrecision\n"
+                       "OpDecorate %62 Location 0\n"
+                       "OpMemberDecorate %68 0 BuiltIn Position\n"
+                       "OpMemberDecorate %68 1 BuiltIn PointSize\n"
+                       "OpMemberDecorate %68 2 BuiltIn ClipDistance\n"
+                       "OpMemberDecorate %68 3 BuiltIn CullDistance\n"
+                       "OpDecorate %68 Block\n"
+                       "OpMemberDecorate %71 0 BuiltIn Position\n"
+                       "OpMemberDecorate %71 1 BuiltIn PointSize\n"
+                       "OpMemberDecorate %71 2 BuiltIn ClipDistance\n"
+                       "OpMemberDecorate %71 3 BuiltIn CullDistance\n"
+                       "OpDecorate %71 Block\n"
+                       "%2 = OpTypeVoid\n"
+                       "%3 = OpTypeFunction %2\n"
+                       "%6 = OpTypeInt 32 0\n"
+                       "%7 = OpTypePointer Function %6\n"
+                       "%9 = OpConstant %6 0\n"
+                       "%10 = OpTypeVector %6 4\n"
+                       "%11 = OpConstantComposite %10 %9 %9 %9 %9\n"
+                       "%12 = OpTypeBool\n"
+                       "%13 = OpConstantTrue %12\n"
+                       "%14 = OpConstant %6 3\n"
+                       "%16 = OpTypeVector %12 4\n"
+                       "%20 = OpTypeInt 32 1\n"
+                       "%21 = OpConstant %20 1\n"
+                       "%22 = OpConstant %20 0\n"
+                       "%27 = OpTypePointer Function %12\n"
+                       "%29 = OpConstant %6 " + subgroupSizeStr.str() + "\n"
+                       "%30 = OpTypeArray %6 %29\n"
+                       "%31 = OpTypeStruct %30\n"
+                       "%32 = OpTypePointer Uniform %31\n"
+                       "%33 = OpVariable %32 Uniform\n"
+                       "%34 = OpTypePointer Input %6\n"
+                       "%35 = OpVariable %34 Input\n"
+                       "%37 = OpTypePointer Uniform %6\n"
+                       "%46 = OpConstant %20 2\n"
+                       "%51 = OpConstantFalse %12\n"
+                       "%55 = OpConstant %20 4\n"
+                       "%60 = OpTypeFloat 32\n"
+                       "%61 = OpTypePointer Output %60\n"
+                       "%62 = OpVariable %61 Output\n"
+                       "%65 = OpTypeVector %60 4\n"
+                       "%66 = OpConstant %6 1\n"
+                       "%67 = OpTypeArray %60 %66\n"
+                       "%68 = OpTypeStruct %65 %60 %67 %67\n"
+                       "%69 = OpTypePointer Output %68\n"
+                       "%70 = OpVariable %69 Output\n"
+                       "%71 = OpTypeStruct %65 %60 %67 %67\n"
+                       "%72 = OpTypeArray %71 %66\n"
+                       "%73 = OpTypePointer Input %72\n"
+                       "%74 = OpVariable %73 Input\n"
+                       "%75 = OpTypePointer Input %65\n"
+                       "%78 = OpTypePointer Output %65\n"
+                       "%4 = OpFunction %2 None %3\n"
+                       "%5 = OpLabel\n"
+                       "%8 = OpVariable %7 Function\n"
+                       "%28 = OpVariable %27 Function\n"
+                       "OpStore %8 %9\n"
+                       "%15 = OpGroupNonUniformBallot %10 %14 %13\n"
+                       "%17 = OpIEqual %16 %11 %15\n"
+                       "%18 = OpAll %12 %17\n"
+                       "%19 = OpLogicalNot %12 %18\n"
+                       "%23 = OpSelect %20 %19 %21 %22\n"
+                       "%24 = OpBitcast %6 %23\n"
+                       "%25 = OpLoad %6 %8\n"
+                       "%26 = OpBitwiseOr %6 %25 %24\n"
+                       "OpStore %8 %26\n"
+                       "%36 = OpLoad %6 %35\n"
+                       "%38 = OpAccessChain %37 %33 %22 %36\n"
+                       "%39 = OpLoad %6 %38\n"
+                       "%40 = OpINotEqual %12 %39 %9\n"
+                       "OpStore %28 %40\n"
+                       "%41 = OpLoad %12 %28\n"
+                       "%42 = OpGroupNonUniformBallot %10 %14 %41\n"
+                       "%43 = OpIEqual %16 %11 %42\n"
+                       "%44 = OpAll %12 %43\n"
+                       "%45 = OpLogicalNot %12 %44\n"
+                       "%47 = OpSelect %20 %45 %46 %22\n"
+                       "%48 = OpBitcast %6 %47\n"
+                       "%49 = OpLoad %6 %8\n"
+                       "%50 = OpBitwiseOr %6 %49 %48\n"
+                       "OpStore %8 %50\n"
+                       "%52 = OpGroupNonUniformBallot %10 %14 %51\n"
+                       "%53 = OpIEqual %16 %11 %52\n"
+                       "%54 = OpAll %12 %53\n"
+                       "%56 = OpSelect %20 %54 %55 %22\n"
+                       "%57 = OpBitcast %6 %56\n"
+                       "%58 = OpLoad %6 %8\n"
+                       "%59 = OpBitwiseOr %6 %58 %57\n"
+                       "OpStore %8 %59\n"
+                       "%63 = OpLoad %6 %8\n"
+                       "%64 = OpConvertUToF %60 %63\n"
+                       "OpStore %62 %64\n"
+                       "%76 = OpAccessChain %75 %74 %22 %22\n"
+                       "%77 = OpLoad %65 %76\n"
+                       "%79 = OpAccessChain %78 %70 %22\n"
+                       "OpStore %79 %77\n"
+                       "OpEmitVertex\n"
+                       "OpEndPrimitive\n"
+                       "OpReturn\n"
+                       "OpFunctionEnd\n";
+               programCollection.spirvAsmSources.add("geometry") << geometry << buildOptionsSpr;
+       }
+       else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+       {
+               /*
+                       "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       "layout(vertices = 2) out;\n"
+                       "layout(location = 0) out float out_color[];\n"
+                       "layout(set = 0, binding = 0) uniform Buffer1\n"
+                       "{\n"
+                       "  uint data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+                       "};\n"
+                       "\n"
+                       "void main (void)\n"
+                       "{\n"
+                       "  if (gl_InvocationID == 0)\n"
+                         {\n"
+                       "    gl_TessLevelOuter[0] = 1.0f;\n"
+                       "    gl_TessLevelOuter[1] = 1.0f;\n"
+                       "  }\n"
+                       "  uint tempResult = 0;\n"
+                       "  tempResult |= !bool(uvec4(0) == subgroupBallot(true)) ? 0x1 : 0;\n"
+                       "  bool bData = data[gl_SubgroupInvocationID] != 0;\n"
+                       "  tempResult |= !bool(uvec4(0) == subgroupBallot(bData)) ? 0x2 : 0;\n"
+                       "  tempResult |= uvec4(0) == subgroupBallot(false) ? 0x4 : 0;\n"
+                       "  out_color[gl_InvocationID] = float(tempResult);\n"
+                       "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+                       "}\n";
+               */
+               const string controlSource =
+                       "; SPIR-V\n"
+                       "; Version: 1.3\n"
+                       "; Generator: Khronos Glslang Reference Front End; 2\n"
+                       "; Bound: 102\n"
+                       "; Schema: 0\n"
+                       "OpCapability Tessellation\n"
+                       "OpCapability GroupNonUniform\n"
+                       "OpCapability GroupNonUniformBallot\n"
+                       "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+                       "OpMemoryModel Logical GLSL450\n"
+                       "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %50 %78 %89 %95\n"
+                       "OpExecutionMode %4 OutputVertices 2\n"
+                       "OpDecorate %8 BuiltIn InvocationId\n"
+                       "OpDecorate %20 Patch\n"
+                       "OpDecorate %20 BuiltIn TessLevelOuter\n"
+                       "OpDecorate %45 ArrayStride 16\n"
+                       "OpMemberDecorate %46 0 Offset 0\n"
+                       "OpDecorate %46 Block\n"
+                       "OpDecorate %48 DescriptorSet 0\n"
+                       "OpDecorate %48 Binding 0\n"
+                       "OpDecorate %50 RelaxedPrecision\n"
+                       "OpDecorate %50 BuiltIn SubgroupLocalInvocationId\n"
+                       "OpDecorate %51 RelaxedPrecision\n"
+                       "OpDecorate %78 Location 0\n"
+                       "OpMemberDecorate %86 0 BuiltIn Position\n"
+                       "OpMemberDecorate %86 1 BuiltIn PointSize\n"
+                       "OpMemberDecorate %86 2 BuiltIn ClipDistance\n"
+                       "OpMemberDecorate %86 3 BuiltIn CullDistance\n"
+                       "OpDecorate %86 Block\n"
+                       "OpMemberDecorate %91 0 BuiltIn Position\n"
+                       "OpMemberDecorate %91 1 BuiltIn PointSize\n"
+                       "OpMemberDecorate %91 2 BuiltIn ClipDistance\n"
+                       "OpMemberDecorate %91 3 BuiltIn CullDistance\n"
+                       "OpDecorate %91 Block\n"
+                       "%2 = OpTypeVoid\n"
+                       "%3 = OpTypeFunction %2\n"
+                       "%6 = OpTypeInt 32 1\n"
+                       "%7 = OpTypePointer Input %6\n"
+                       "%8 = OpVariable %7 Input\n"
+                       "%10 = OpConstant %6 0\n"
+                       "%11 = OpTypeBool\n"
+                       "%15 = OpTypeFloat 32\n"
+                       "%16 = OpTypeInt 32 0\n"
+                       "%17 = OpConstant %16 4\n"
+                       "%18 = OpTypeArray %15 %17\n"
+                       "%19 = OpTypePointer Output %18\n"
+                       "%20 = OpVariable %19 Output\n"
+                       "%21 = OpConstant %15 1\n"
+                       "%22 = OpTypePointer Output %15\n"
+                       "%24 = OpConstant %6 1\n"
+                       "%26 = OpTypePointer Function %16\n"
+                       "%28 = OpConstant %16 0\n"
+                       "%29 = OpTypeVector %16 4\n"
+                       "%30 = OpConstantComposite %29 %28 %28 %28 %28\n"
+                       "%31 = OpConstantTrue %11\n"
+                       "%32 = OpConstant %16 3\n"
+                       "%34 = OpTypeVector %11 4\n"
+                       "%42 = OpTypePointer Function %11\n"
+                       "%44 = OpConstant %16 " + subgroupSizeStr.str() + "\n"
+                       "%45 = OpTypeArray %16 %44\n"
+                       "%46 = OpTypeStruct %45\n"
+                       "%47 = OpTypePointer Uniform %46\n"
+                       "%48 = OpVariable %47 Uniform\n"
+                       "%49 = OpTypePointer Input %16\n"
+                       "%50 = OpVariable %49 Input\n"
+                       "%52 = OpTypePointer Uniform %16\n"
+                       "%61 = OpConstant %6 2\n"
+                       "%66 = OpConstantFalse %11\n"
+                       "%70 = OpConstant %6 4\n"
+                       "%75 = OpConstant %16 2\n"
+                       "%76 = OpTypeArray %15 %75\n"
+                       "%77 = OpTypePointer Output %76\n"
+                       "%78 = OpVariable %77 Output\n"
+                       "%83 = OpTypeVector %15 4\n"
+                       "%84 = OpConstant %16 1\n"
+                       "%85 = OpTypeArray %15 %84\n"
+                       "%86 = OpTypeStruct %83 %15 %85 %85\n"
+                       "%87 = OpTypeArray %86 %75\n"
+                       "%88 = OpTypePointer Output %87\n"
+                       "%89 = OpVariable %88 Output\n"
+                       "%91 = OpTypeStruct %83 %15 %85 %85\n"
+                       "%92 = OpConstant %16 32\n"
+                       "%93 = OpTypeArray %91 %92\n"
+                       "%94 = OpTypePointer Input %93\n"
+                       "%95 = OpVariable %94 Input\n"
+                       "%97 = OpTypePointer Input %83\n"
+                       "%100 = OpTypePointer Output %83\n"
+                       "%4 = OpFunction %2 None %3\n"
+                       "%5 = OpLabel\n"
+                       "%27 = OpVariable %26 Function\n"
+                       "%43 = OpVariable %42 Function\n"
+                       "%9 = OpLoad %6 %8\n"
+                       "%12 = OpIEqual %11 %9 %10\n"
+                       "OpSelectionMerge %14 None\n"
+                       "OpBranchConditional %12 %13 %14\n"
+                       "%13 = OpLabel\n"
+                       "%23 = OpAccessChain %22 %20 %10\n"
+                       "OpStore %23 %21\n"
+                       "%25 = OpAccessChain %22 %20 %24\n"
+                       "OpStore %25 %21\n"
+                       "OpBranch %14\n"
+                       "%14 = OpLabel\n"
+                       "OpStore %27 %28\n"
+                       "%33 = OpGroupNonUniformBallot %29 %32 %31\n"
+                       "%35 = OpIEqual %34 %30 %33\n"
+                       "%36 = OpAll %11 %35\n"
+                       "%37 = OpLogicalNot %11 %36\n"
+                       "%38 = OpSelect %6 %37 %24 %10\n"
+                       "%39 = OpBitcast %16 %38\n"
+                       "%40 = OpLoad %16 %27\n"
+                       "%41 = OpBitwiseOr %16 %40 %39\n"
+                       "OpStore %27 %41\n"
+                       "%51 = OpLoad %16 %50\n"
+                       "%53 = OpAccessChain %52 %48 %10 %51\n"
+                       "%54 = OpLoad %16 %53\n"
+                       "%55 = OpINotEqual %11 %54 %28\n"
+                       "OpStore %43 %55\n"
+                       "%56 = OpLoad %11 %43\n"
+                       "%57 = OpGroupNonUniformBallot %29 %32 %56\n"
+                       "%58 = OpIEqual %34 %30 %57\n"
+                       "%59 = OpAll %11 %58\n"
+                       "%60 = OpLogicalNot %11 %59\n"
+                       "%62 = OpSelect %6 %60 %61 %10\n"
+                       "%63 = OpBitcast %16 %62\n"
+                       "%64 = OpLoad %16 %27\n"
+                       "%65 = OpBitwiseOr %16 %64 %63\n"
+                       "OpStore %27 %65\n"
+                       "%67 = OpGroupNonUniformBallot %29 %32 %66\n"
+                       "%68 = OpIEqual %34 %30 %67\n"
+                       "%69 = OpAll %11 %68\n"
+                       "%71 = OpSelect %6 %69 %70 %10\n"
+                       "%72 = OpBitcast %16 %71\n"
+                       "%73 = OpLoad %16 %27\n"
+                       "%74 = OpBitwiseOr %16 %73 %72\n"
+                       "OpStore %27 %74\n"
+                       "%79 = OpLoad %6 %8\n"
+                       "%80 = OpLoad %16 %27\n"
+                       "%81 = OpConvertUToF %15 %80\n"
+                       "%82 = OpAccessChain %22 %78 %79\n"
+                       "OpStore %82 %81\n"
+                       "%90 = OpLoad %6 %8\n"
+                       "%96 = OpLoad %6 %8\n"
+                       "%98 = OpAccessChain %97 %95 %96 %10\n"
+                       "%99 = OpLoad %83 %98\n"
+                       "%101 = OpAccessChain %100 %89 %90 %10\n"
+                       "OpStore %101 %99\n"
+                       "OpReturn\n"
+                       "OpFunctionEnd\n";
+
+               programCollection.spirvAsmSources.add("tesc") << controlSource << buildOptionsSpr;
+               subgroups::setTesEvalShaderFrameBuffer(programCollection);
+
+       }
+       else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
+       {
+               /*
+                       "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       "layout(isolines, equal_spacing, ccw ) in;\n"
+                       "layout(location = 0) out float out_color;\n"
+                       "layout(set = 0, binding = 0) uniform Buffer1\n"
+                       "{\n"
+                       "  uint data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+                       "};\n"
+                       "\n"
+                       "void main (void)\n"
+                       "{\n"
+                       "  uint tempResult = 0;\n"
+                       "  tempResult |= !bool(uvec4(0) == subgroupBallot(true)) ? 0x1 : 0;\n"
+                       "  bool bData = data[gl_SubgroupInvocationID] != 0;\n"
+                       "  tempResult |= !bool(uvec4(0) == subgroupBallot(bData)) ? 0x2 : 0;\n"
+                       "  tempResult |= uvec4(0) == subgroupBallot(false) ? 0x4 : 0;\n"
+                       "  out_color = float(tempResult);\n"
+                       "  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
+                       "}\n";
+               */
+               const string evaluationSource =
+                       "; SPIR-V\n"
+                       "; Version: 1.3\n"
+                       "; Generator: Khronos Glslang Reference Front End; 2\n"
+                       "; Bound: 91\n"
+                       "; Schema: 0\n"
+                       "OpCapability Tessellation\n"
+                       "OpCapability GroupNonUniform\n"
+                       "OpCapability GroupNonUniformBallot\n"
+                       "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+                       "OpMemoryModel Logical GLSL450\n"
+                       "OpEntryPoint TessellationEvaluation %4 \"main\" %35 %62 %70 %75 %83\n"
+                       "OpExecutionMode %4 Isolines\n"
+                       "OpExecutionMode %4 SpacingEqual\n"
+                       "OpExecutionMode %4 VertexOrderCcw\n"
+                       "OpDecorate %30 ArrayStride 16\n"
+                       "OpMemberDecorate %31 0 Offset 0\n"
+                       "OpDecorate %31 Block\n"
+                       "OpDecorate %33 DescriptorSet 0\n"
+                       "OpDecorate %33 Binding 0\n"
+                       "OpDecorate %35 RelaxedPrecision\n"
+                       "OpDecorate %35 BuiltIn SubgroupLocalInvocationId\n"
+                       "OpDecorate %36 RelaxedPrecision\n"
+                       "OpDecorate %62 Location 0\n"
+                       "OpMemberDecorate %68 0 BuiltIn Position\n"
+                       "OpMemberDecorate %68 1 BuiltIn PointSize\n"
+                       "OpMemberDecorate %68 2 BuiltIn ClipDistance\n"
+                       "OpMemberDecorate %68 3 BuiltIn CullDistance\n"
+                       "OpDecorate %68 Block\n"
+                       "OpMemberDecorate %71 0 BuiltIn Position\n"
+                       "OpMemberDecorate %71 1 BuiltIn PointSize\n"
+                       "OpMemberDecorate %71 2 BuiltIn ClipDistance\n"
+                       "OpMemberDecorate %71 3 BuiltIn CullDistance\n"
+                       "OpDecorate %71 Block\n"
+                       "OpDecorate %83 BuiltIn TessCoord\n"
+                       "%2 = OpTypeVoid\n"
+                       "%3 = OpTypeFunction %2\n"
+                       "%6 = OpTypeInt 32 0\n"
+                       "%7 = OpTypePointer Function %6\n"
+                       "%9 = OpConstant %6 0\n"
+                       "%10 = OpTypeVector %6 4\n"
+                       "%11 = OpConstantComposite %10 %9 %9 %9 %9\n"
+                       "%12 = OpTypeBool\n"
+                       "%13 = OpConstantTrue %12\n"
+                       "%14 = OpConstant %6 3\n"
+                       "%16 = OpTypeVector %12 4\n"
+                       "%20 = OpTypeInt 32 1\n"
+                       "%21 = OpConstant %20 1\n"
+                       "%22 = OpConstant %20 0\n"
+                       "%27 = OpTypePointer Function %12\n"
+                       "%29 = OpConstant %6 " + subgroupSizeStr.str() + "\n"
+                       "%30 = OpTypeArray %6 %29\n"
+                       "%31 = OpTypeStruct %30\n"
+                       "%32 = OpTypePointer Uniform %31\n"
+                       "%33 = OpVariable %32 Uniform\n"
+                       "%34 = OpTypePointer Input %6\n"
+                       "%35 = OpVariable %34 Input\n"
+                       "%37 = OpTypePointer Uniform %6\n"
+                       "%46 = OpConstant %20 2\n"
+                       "%51 = OpConstantFalse %12\n"
+                       "%55 = OpConstant %20 4\n"
+                       "%60 = OpTypeFloat 32\n"
+                       "%61 = OpTypePointer Output %60\n"
+                       "%62 = OpVariable %61 Output\n"
+                       "%65 = OpTypeVector %60 4\n"
+                       "%66 = OpConstant %6 1\n"
+                       "%67 = OpTypeArray %60 %66\n"
+                       "%68 = OpTypeStruct %65 %60 %67 %67\n"
+                       "%69 = OpTypePointer Output %68\n"
+                       "%70 = OpVariable %69 Output\n"
+                       "%71 = OpTypeStruct %65 %60 %67 %67\n"
+                       "%72 = OpConstant %6 32\n"
+                       "%73 = OpTypeArray %71 %72\n"
+                       "%74 = OpTypePointer Input %73\n"
+                       "%75 = OpVariable %74 Input\n"
+                       "%76 = OpTypePointer Input %65\n"
+                       "%81 = OpTypeVector %60 3\n"
+                       "%82 = OpTypePointer Input %81\n"
+                       "%83 = OpVariable %82 Input\n"
+                       "%84 = OpTypePointer Input %60\n"
+                       "%89 = OpTypePointer Output %65\n"
+                       "%4 = OpFunction %2 None %3\n"
+                       "%5 = OpLabel\n"
+                       "%8 = OpVariable %7 Function\n"
+                       "%28 = OpVariable %27 Function\n"
+                       "OpStore %8 %9\n"
+                       "%15 = OpGroupNonUniformBallot %10 %14 %13\n"
+                       "%17 = OpIEqual %16 %11 %15\n"
+                       "%18 = OpAll %12 %17\n"
+                       "%19 = OpLogicalNot %12 %18\n"
+                       "%23 = OpSelect %20 %19 %21 %22\n"
+                       "%24 = OpBitcast %6 %23\n"
+                       "%25 = OpLoad %6 %8\n"
+                       "%26 = OpBitwiseOr %6 %25 %24\n"
+                       "OpStore %8 %26\n"
+                       "%36 = OpLoad %6 %35\n"
+                       "%38 = OpAccessChain %37 %33 %22 %36\n"
+                       "%39 = OpLoad %6 %38\n"
+                       "%40 = OpINotEqual %12 %39 %9\n"
+                       "OpStore %28 %40\n"
+                       "%41 = OpLoad %12 %28\n"
+                       "%42 = OpGroupNonUniformBallot %10 %14 %41\n"
+                       "%43 = OpIEqual %16 %11 %42\n"
+                       "%44 = OpAll %12 %43\n"
+                       "%45 = OpLogicalNot %12 %44\n"
+                       "%47 = OpSelect %20 %45 %46 %22\n"
+                       "%48 = OpBitcast %6 %47\n"
+                       "%49 = OpLoad %6 %8\n"
+                       "%50 = OpBitwiseOr %6 %49 %48\n"
+                       "OpStore %8 %50\n"
+                       "%52 = OpGroupNonUniformBallot %10 %14 %51\n"
+                       "%53 = OpIEqual %16 %11 %52\n"
+                       "%54 = OpAll %12 %53\n"
+                       "%56 = OpSelect %20 %54 %55 %22\n"
+                       "%57 = OpBitcast %6 %56\n"
+                       "%58 = OpLoad %6 %8\n"
+                       "%59 = OpBitwiseOr %6 %58 %57\n"
+                       "OpStore %8 %59\n"
+                       "%63 = OpLoad %6 %8\n"
+                       "%64 = OpConvertUToF %60 %63\n"
+                       "OpStore %62 %64\n"
+                       "%77 = OpAccessChain %76 %75 %22 %22\n"
+                       "%78 = OpLoad %65 %77\n"
+                       "%79 = OpAccessChain %76 %75 %21 %22\n"
+                       "%80 = OpLoad %65 %79\n"
+                       "%85 = OpAccessChain %84 %83 %9\n"
+                       "%86 = OpLoad %60 %85\n"
+                       "%87 = OpCompositeConstruct %65 %86 %86 %86 %86\n"
+                       "%88 = OpExtInst %65 %1 FMix %78 %80 %87\n"
+                       "%90 = OpAccessChain %89 %70 %22\n"
+                       "OpStore %90 %88\n"
+                       "OpReturn\n"
+                       "OpFunctionEnd\n";
+               subgroups::setTesCtrlShaderFrameBuffer(programCollection);
+               programCollection.spirvAsmSources.add("tese") << evaluationSource << buildOptionsSpr;
+       }
+       else
+       {
+               DE_FATAL("Unsupported shader stage");
+       }
+}
+
+
+void initPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
+{
+       if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+       {
+               std::ostringstream src;
+
+               src << "#version 450\n"
+                       << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       << "layout (local_size_x_id = 0, local_size_y_id = 1, "
+                       "local_size_z_id = 2) in;\n"
+                       << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
+                       << "{\n"
+                       << "  uint result[];\n"
+                       << "};\n"
+                       << "layout(set = 0, binding = 1, std430) buffer Buffer2\n"
+                       << "{\n"
+                       << "  uint data[];\n"
+                       << "};\n"
+                       << "\n"
+                       << subgroups::getSharedMemoryBallotHelper()
+                       << "void main (void)\n"
+                       << "{\n"
+                       << "  uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
+                       << "  highp uint offset = globalSize.x * ((globalSize.y * "
+                       "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
+                       "gl_GlobalInvocationID.x;\n"
+                       << "  uint tempResult = 0;\n"
+                       << "  tempResult |= sharedMemoryBallot(true) == subgroupBallot(true) ? 0x1 : 0;\n"
+                       << "  bool bData = data[gl_SubgroupInvocationID] != 0;\n"
+                       << "  tempResult |= sharedMemoryBallot(bData) == subgroupBallot(bData) ? 0x2 : 0;\n"
+                       << "  tempResult |= uvec4(0) == subgroupBallot(false) ? 0x4 : 0;\n"
+                       << "  result[offset] = tempResult;\n"
+                       << "}\n";
+
+               programCollection.glslSources.add("comp")
+                               << glu::ComputeSource(src.str()) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+       }
+       else
+       {
+               const string vertex =
+                       "#version 450\n"
+                       "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
+                       "{\n"
+                       "  uint result[];\n"
+                       "};\n"
+                       "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+                       "{\n"
+                       "  uint data[];\n"
+                       "};\n"
+                       "\n"
+                       "void main (void)\n"
+                       "{\n"
+                       "  uint tempResult = 0;\n"
+                       "  tempResult |= !bool(uvec4(0) == subgroupBallot(true)) ? 0x1 : 0;\n"
+                       "  bool bData = data[gl_SubgroupInvocationID] != 0;\n"
+                       "  tempResult |= !bool(uvec4(0) == subgroupBallot(bData)) ? 0x2 : 0;\n"
+                       "  tempResult |= uvec4(0) == subgroupBallot(false) ? 0x4 : 0;\n"
+                       "  result[gl_VertexIndex] = tempResult;\n"
+                       "  float pixelSize = 2.0f/1024.0f;\n"
+                       "  float pixelPosition = pixelSize/2.0f - 1.0f;\n"
+                       "  gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
+                       "  gl_PointSize = 1.0f;\n"
+                       "}\n";
+
+               const string tesc =
+                       "#version 450\n"
+                       "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       "layout(vertices=1) out;\n"
+                       "layout(set = 0, binding = 1, std430) buffer Buffer1\n"
+                       "{\n"
+                       "  uint result[];\n"
+                       "};\n"
+                       "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+                       "{\n"
+                       "  uint data[];\n"
+                       "};\n"
+                       "\n"
+                       "void main (void)\n"
+                       "{\n"
+                       "  uint tempResult = 0;\n"
+                       "  tempResult |= !bool(uvec4(0) == subgroupBallot(true)) ? 0x1 : 0;\n"
+                       "  bool bData = data[gl_SubgroupInvocationID] != 0;\n"
+                       "  tempResult |= !bool(uvec4(0) == subgroupBallot(bData)) ? 0x2 : 0;\n"
+                       "  tempResult |= uvec4(0) == subgroupBallot(false) ? 0x4 : 0;\n"
+                       "  result[gl_PrimitiveID] = tempResult;\n"
+                       "  if (gl_InvocationID == 0)\n"
+                       "  {\n"
+                       "    gl_TessLevelOuter[0] = 1.0f;\n"
+                       "    gl_TessLevelOuter[1] = 1.0f;\n"
+                       "  }\n"
+                       "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+                       "}\n";
+
+               const string tese =
+                       "#version 450\n"
+                       "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       "layout(isolines) in;\n"
+                       "layout(set = 0, binding = 2, std430) buffer Buffer1\n"
+                       "{\n"
+                       "  uint result[];\n"
+                       "};\n"
+                       "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+                       "{\n"
+                       "  uint data[];\n"
+                       "};\n"
+                       "\n"
+                       "void main (void)\n"
+                       "{\n"
+                       "  uint tempResult = 0;\n"
+                       "  tempResult |= !bool(uvec4(0) == subgroupBallot(true)) ? 0x1 : 0;\n"
+                       "  bool bData = data[gl_SubgroupInvocationID] != 0;\n"
+                       "  tempResult |= !bool(uvec4(0) == subgroupBallot(bData)) ? 0x2 : 0;\n"
+                       "  tempResult |= uvec4(0) == subgroupBallot(false) ? 0x4 : 0;\n"
+                       "  result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = tempResult;\n"
+                       "  float pixelSize = 2.0f/1024.0f;\n"
+                       "  gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
+                       "}\n";
+
+               const string geometry =
+                       "#version 450\n"
+                       "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       "layout(${TOPOLOGY}) in;\n"
+                       "layout(points, max_vertices = 1) out;\n"
+                       "layout(set = 0, binding = 3, std430) buffer Buffer1\n"
+                       "{\n"
+                       "  uint result[];\n"
+                       "};\n"
+                       "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+                       "{\n"
+                       "  uint data[];\n"
+                       "};\n"
+                       "\n"
+                       "void main (void)\n"
+                       "{\n"
+                       "  uint tempResult = 0;\n"
+                       "  tempResult |= !bool(uvec4(0) == subgroupBallot(true)) ? 0x1 : 0;\n"
+                       "  bool bData = data[gl_SubgroupInvocationID] != 0;\n"
+                       "  tempResult |= !bool(uvec4(0) == subgroupBallot(bData)) ? 0x2 : 0;\n"
+                       "  tempResult |= uvec4(0) == subgroupBallot(false) ? 0x4 : 0;\n"
+                       "  result[gl_PrimitiveIDIn] = tempResult;\n"
+                       "  gl_Position = gl_in[0].gl_Position;\n"
+                       "  EmitVertex();\n"
+                       "  EndPrimitive();\n"
+                       "}\n";
+
+               const string fragment =
+                       "#version 450\n"
+                       "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       "layout(location = 0) out uint result;\n"
+                       "layout(set = 0, binding = 4, std430) readonly buffer Buffer1\n"
+                       "{\n"
+                       "  uint data[];\n"
+                       "};\n"
+                       "void main (void)\n"
+                       "{\n"
+                       "  uint tempResult = 0;\n"
+                       "  tempResult |= !bool(uvec4(0) == subgroupBallot(true)) ? 0x1 : 0;\n"
+                       "  bool bData = data[gl_SubgroupInvocationID] != 0;\n"
+                       "  tempResult |= !bool(uvec4(0) == subgroupBallot(bData)) ? 0x2 : 0;\n"
+                       "  tempResult |= uvec4(0) == subgroupBallot(false) ? 0x4 : 0;\n"
+                       "  result = tempResult;\n"
+                       "}\n";
+
+               subgroups::addNoSubgroupShader(programCollection);
+
+               programCollection.glslSources.add("vert")
+                               << glu::VertexSource(vertex) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+               programCollection.glslSources.add("tesc")
+                               << glu::TessellationControlSource(tesc) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+               programCollection.glslSources.add("tese")
+                               << glu::TessellationEvaluationSource(tese) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+               subgroups::addGeometryShadersFromTemplate(geometry, vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u),
+                                                                                                 programCollection.glslSources);
+               programCollection.glslSources.add("fragment")
+                               << glu::FragmentSource(fragment)<< vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+       }
+}
+
+void supportedCheck (Context& context, CaseDefinition caseDef)
+{
+       DE_UNREF(caseDef);
+       if (!subgroups::isSubgroupSupported(context))
+               TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
+
+       if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_BALLOT_BIT))
+       {
+               TCU_THROW(NotSupportedError, "Device does not support subgroup ballot operations");
+       }
+}
+
+tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
+{
+       if (!subgroups::areSubgroupOperationsSupportedForStage(
+                       context, caseDef.shaderStage))
+       {
+               if (subgroups::areSubgroupOperationsRequiredForStage(caseDef.shaderStage))
+               {
+                       return tcu::TestStatus::fail(
+                                          "Shader stage " +
+                                          subgroups::getShaderStageName(caseDef.shaderStage) +
+                                          " is required to support subgroup operations!");
+               }
+               else
+               {
+                       TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
+               }
+       }
+
+       subgroups::SSBOData inputData[1];
+       inputData[0].format = VK_FORMAT_R32_UINT;
+       inputData[0].numElements = subgroups::maxSupportedSubgroupSize();
+       inputData[0].initializeType = subgroups::SSBOData::InitializeNonZero;
+
+       if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+               return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, inputData, 1, checkVertexPipelineStages);
+       else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
+               return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, inputData, 1, checkVertexPipelineStages);
+       else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+               return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
+       else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
+               return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
+       else
+               TCU_THROW(InternalError, "Unhandled shader stage");
+}
+
+tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
+{
+       if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+       {
+               if (!subgroups::areSubgroupOperationsSupportedForStage(context, caseDef.shaderStage))
+               {
+                               return tcu::TestStatus::fail(
+                                                  "Shader stage " +
+                                                  subgroups::getShaderStageName(caseDef.shaderStage) +
+                                                  " is required to support subgroup operations!");
+               }
+               subgroups::SSBOData inputData[1];
+               inputData[0].format = VK_FORMAT_R32_UINT;
+               inputData[0].numElements = subgroups::maxSupportedSubgroupSize();
+               inputData[0].initializeType = subgroups::SSBOData::InitializeNonZero;
+
+               return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, inputData, 1, checkCompute);
+       }
+       else
+       {
+               VkPhysicalDeviceSubgroupProperties subgroupProperties;
+               subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
+               subgroupProperties.pNext = DE_NULL;
+
+               VkPhysicalDeviceProperties2 properties;
+               properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+               properties.pNext = &subgroupProperties;
+
+               context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
+
+               VkShaderStageFlagBits stages = (VkShaderStageFlagBits)(caseDef.shaderStage  & subgroupProperties.supportedStages);
+
+               if ( VK_SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
+               {
+                       if ( (stages & VK_SHADER_STAGE_FRAGMENT_BIT) == 0)
+                               TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
+                       else
+                               stages = VK_SHADER_STAGE_FRAGMENT_BIT;
+               }
+
+               if ((VkShaderStageFlagBits)0u == stages)
+                       TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
+
+               subgroups::SSBOData inputData;
+               inputData.format                        = VK_FORMAT_R32_UINT;
+               inputData.numElements           = subgroups::maxSupportedSubgroupSize();
+               inputData.initializeType        = subgroups::SSBOData::InitializeNonZero;
+               inputData.binding                       = 4u;
+               inputData.stages                        = stages;
+
+               return subgroups::allStages(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, stages);
+       }
+}
+}
+
+namespace vkt
+{
+namespace subgroups
+{
+tcu::TestCaseGroup* createSubgroupsBallotTests(tcu::TestContext& testCtx)
+{
+       de::MovePtr<tcu::TestCaseGroup> graphicGroup(new tcu::TestCaseGroup(
+               testCtx, "graphics", "Subgroup ballot category tests: graphics"));
+       de::MovePtr<tcu::TestCaseGroup> computeGroup(new tcu::TestCaseGroup(
+               testCtx, "compute", "Subgroup ballot category tests: compute"));
+       de::MovePtr<tcu::TestCaseGroup> framebufferGroup(new tcu::TestCaseGroup(
+               testCtx, "framebuffer", "Subgroup ballot category tests: framebuffer"));
+
+       const VkShaderStageFlags stages[] =
+       {
+               VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
+               VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
+               VK_SHADER_STAGE_GEOMETRY_BIT,
+               VK_SHADER_STAGE_VERTEX_BIT
+       };
+
+       {
+               const CaseDefinition caseDef = {VK_SHADER_STAGE_COMPUTE_BIT};
+               addFunctionCaseWithPrograms(computeGroup.get(), getShaderStageName(caseDef.shaderStage), "", supportedCheck, initPrograms, test, caseDef);
+       }
+
+       {
+                       const CaseDefinition caseDef = {VK_SHADER_STAGE_ALL_GRAPHICS};
+                       addFunctionCaseWithPrograms(graphicGroup.get(), "graphic", "", supportedCheck, initPrograms, test, caseDef);
+       }
+
+       for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
+       {
+               const CaseDefinition caseDef = {stages[stageIndex]};
+               addFunctionCaseWithPrograms(framebufferGroup.get(), getShaderStageName(caseDef.shaderStage), "",
+                                       supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
+       }
+
+       de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(
+               testCtx, "ballot", "Subgroup ballot category tests"));
+
+       group->addChild(graphicGroup.release());
+       group->addChild(computeGroup.release());
+       group->addChild(framebufferGroup.release());
+
+       return group.release();
+}
+
+} // subgroups
+} // vkt
diff --git a/external/openglcts/modules/common/subgroups/glcSubgroupsBallotTests.hpp b/external/openglcts/modules/common/subgroups/glcSubgroupsBallotTests.hpp
new file mode 100644 (file)
index 0000000..9fa927d
--- /dev/null
@@ -0,0 +1,40 @@
+#ifndef _VKTSUBGROUPSBALLOTTESTS_HPP
+#define _VKTSUBGROUPSBALLOTTESTS_HPP
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "tcuDefs.hpp"
+#include "vktTestCase.hpp"
+
+namespace vkt
+{
+namespace subgroups
+{
+
+tcu::TestCaseGroup* createSubgroupsBallotTests(tcu::TestContext& testCtx);
+
+} // subgroups
+} // vkt
+
+#endif // _VKTSUBGROUPSBALLOTTESTS_HPP
diff --git a/external/openglcts/modules/common/subgroups/glcSubgroupsBasicTests.cpp b/external/openglcts/modules/common/subgroups/glcSubgroupsBasicTests.cpp
new file mode 100755 (executable)
index 0000000..d39a19e
--- /dev/null
@@ -0,0 +1,2128 @@
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "vktSubgroupsBasicTests.hpp"
+#include "vktSubgroupsTestsUtils.hpp"
+
+#include <string>
+#include <vector>
+
+using namespace tcu;
+using namespace std;
+using namespace vk;
+using namespace vkt;
+
+namespace
+{
+static const deUint32                  ELECTED_VALUE           = 42u;
+static const deUint32                  UNELECTED_VALUE         = 13u;
+static const vk::VkDeviceSize  SHADER_BUFFER_SIZE      = 4096ull; // min(maxUniformBufferRange, maxImageDimension1D)
+
+static bool checkFragmentSubgroupBarriersNoSSBO(std::vector<const void*> datas,
+               deUint32 width, deUint32 height, deUint32)
+{
+       const float* const      resultData      = reinterpret_cast<const float*>(datas[0]);
+
+       for (deUint32 x = 0u; x < width; ++x)
+       {
+               for (deUint32 y = 0u; y < height; ++y)
+               {
+                       const deUint32 ndx = (x * height + y) * 4u;
+                       if (1.0f == resultData[ndx +2])
+                       {
+                               if(resultData[ndx] != resultData[ndx +1])
+                               {
+                                       return false;
+                               }
+                       }
+                       else if (resultData[ndx] != resultData[ndx +3])
+                       {
+                               return false;
+                       }
+               }
+       }
+
+       return true;
+}
+
+static bool checkVertexPipelineStagesSubgroupElectNoSSBO(std::vector<const void*> datas,
+               deUint32 width, deUint32)
+{
+       const float* const      resultData                      = reinterpret_cast<const float*>(datas[0]);
+       float                           poisonValuesFound       = 0.0f;
+       float                           numSubgroupsUsed        = 0.0f;
+
+       for (deUint32 x = 0; x < width; ++x)
+       {
+               deUint32 val = static_cast<deUint32>(resultData[x * 2]);
+               numSubgroupsUsed += resultData[x * 2 + 1];
+
+               switch (val)
+               {
+                       default:
+                               // some garbage value was found!
+                               return false;
+                       case UNELECTED_VALUE:
+                               break;
+                       case ELECTED_VALUE:
+                               poisonValuesFound += 1.0f;
+                               break;
+               }
+       }
+       return numSubgroupsUsed == poisonValuesFound;
+}
+
+static bool checkVertexPipelineStagesSubgroupElect(std::vector<const void*> datas,
+               deUint32 width, deUint32)
+{
+       const deUint32* const resultData =
+               reinterpret_cast<const deUint32*>(datas[0]);
+       deUint32 poisonValuesFound = 0;
+
+       for (deUint32 x = 0; x < width; ++x)
+       {
+               deUint32 val = resultData[x];
+
+               switch (val)
+               {
+                       default:
+                               // some garbage value was found!
+                               return false;
+                       case UNELECTED_VALUE:
+                               break;
+                       case ELECTED_VALUE:
+                               poisonValuesFound++;
+                               break;
+               }
+       }
+
+       // we used an atomicly incremented counter to note how many subgroups we used for the vertex shader
+       const deUint32 numSubgroupsUsed =
+               *reinterpret_cast<const deUint32*>(datas[1]);
+
+       return numSubgroupsUsed == poisonValuesFound;
+}
+
+static bool checkVertexPipelineStagesSubgroupBarriers(std::vector<const void*> datas,
+               deUint32 width, deUint32)
+{
+       const deUint32* const resultData = reinterpret_cast<const deUint32*>(datas[0]);
+
+       // We used this SSBO to generate our unique value!
+       const deUint32 ref = *reinterpret_cast<const deUint32*>(datas[3]);
+
+       for (deUint32 x = 0; x < width; ++x)
+       {
+               deUint32 val = resultData[x];
+
+               if (val != ref)
+                       return false;
+       }
+
+       return true;
+}
+
+static bool checkVertexPipelineStagesSubgroupBarriersNoSSBO(std::vector<const void*> datas,
+               deUint32 width, deUint32)
+{
+       const float* const      resultData      = reinterpret_cast<const float*>(datas[0]);
+
+       for (deUint32 x = 0u; x < width; ++x)
+       {
+               const deUint32 ndx = x*4u;
+               if (1.0f == resultData[ndx +2])
+               {
+                       if(resultData[ndx] != resultData[ndx +1])
+                               return false;
+               }
+               else if (resultData[ndx] != resultData[ndx +3])
+               {
+                       return false;
+               }
+       }
+       return true;
+}
+
+static bool checkTessellationEvaluationSubgroupBarriersNoSSBO(std::vector<const void*> datas,
+               deUint32 width, deUint32)
+{
+       const float* const      resultData      = reinterpret_cast<const float*>(datas[0]);
+
+       for (deUint32 x = 0u; x < width; ++x)
+       {
+               const deUint32 ndx = x*4u;
+               if (0.0f == resultData[ndx +2] && resultData[ndx] != resultData[ndx +3])
+               {
+                       return false;
+               }
+       }
+       return true;
+}
+
+static bool checkComputeSubgroupElect(std::vector<const void*> datas,
+                                                                         const deUint32 numWorkgroups[3], const deUint32 localSize[3],
+                                                                         deUint32)
+{
+       return vkt::subgroups::checkCompute(datas, numWorkgroups, localSize, 1);
+}
+
+static bool checkComputeSubgroupBarriers(std::vector<const void*> datas,
+               const deUint32 numWorkgroups[3], const deUint32 localSize[3],
+               deUint32)
+{
+       // We used this SSBO to generate our unique value!
+       const deUint32 ref = *reinterpret_cast<const deUint32*>(datas[2]);
+       return vkt::subgroups::checkCompute(datas, numWorkgroups, localSize, ref);
+}
+
+enum OpType
+{
+       OPTYPE_ELECT = 0,
+       OPTYPE_SUBGROUP_BARRIER,
+       OPTYPE_SUBGROUP_MEMORY_BARRIER,
+       OPTYPE_SUBGROUP_MEMORY_BARRIER_BUFFER,
+       OPTYPE_SUBGROUP_MEMORY_BARRIER_SHARED,
+       OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE,
+       OPTYPE_LAST
+};
+
+std::string getOpTypeName(int opType)
+{
+       switch (opType)
+       {
+               default:
+                       DE_FATAL("Unsupported op type");
+                       return "";
+               case OPTYPE_ELECT:
+                       return "subgroupElect";
+               case OPTYPE_SUBGROUP_BARRIER:
+                       return "subgroupBarrier";
+               case OPTYPE_SUBGROUP_MEMORY_BARRIER:
+                       return "subgroupMemoryBarrier";
+               case OPTYPE_SUBGROUP_MEMORY_BARRIER_BUFFER:
+                       return "subgroupMemoryBarrierBuffer";
+               case OPTYPE_SUBGROUP_MEMORY_BARRIER_SHARED:
+                       return "subgroupMemoryBarrierShared";
+               case OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE:
+                       return "subgroupMemoryBarrierImage";
+       }
+}
+
+struct CaseDefinition
+{
+       int                                     opType;
+       VkShaderStageFlags      shaderStage;
+};
+
+void initFrameBufferPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
+{
+       const vk::ShaderBuildOptions    buildOptions    (programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+       const vk::SpirVAsmBuildOptions  buildOptionsSpr (programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3);
+
+       if(VK_SHADER_STAGE_FRAGMENT_BIT != caseDef.shaderStage)
+       {
+               /*
+                       "layout(location = 0) in vec4 in_color;\n"
+                       "layout(location = 0) out vec4 out_color;\n"
+                       "void main()\n"
+                       {\n"
+                       "       out_color = in_color;\n"
+                       "}\n";
+               */
+               const string fragment =
+                       "; SPIR-V\n"
+                       "; Version: 1.3\n"
+                       "; Generator: Khronos Glslang Reference Front End; 2\n"
+                       "; Bound: 13\n"
+                       "; Schema: 0\n"
+                       "OpCapability Shader\n"
+                       "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+                       "OpMemoryModel Logical GLSL450\n"
+                       "OpEntryPoint Fragment %4 \"main\" %9 %11\n"
+                       "OpExecutionMode %4 OriginUpperLeft\n"
+                       "OpDecorate %9 Location 0\n"
+                       "OpDecorate %11 Location 0\n"
+                       "%2 = OpTypeVoid\n"
+                       "%3 = OpTypeFunction %2\n"
+                       "%6 = OpTypeFloat 32\n"
+                       "%7 = OpTypeVector %6 4\n"
+                       "%8 = OpTypePointer Output %7\n"
+                       "%9 = OpVariable %8 Output\n"
+                       "%10 = OpTypePointer Input %7\n"
+                       "%11 = OpVariable %10 Input\n"
+                       "%4 = OpFunction %2 None %3\n"
+                       "%5 = OpLabel\n"
+                       "%12 = OpLoad %7 %11\n"
+                       "OpStore %9 %12\n"
+                       "OpReturn\n"
+                       "OpFunctionEnd\n";
+               programCollection.spirvAsmSources.add("fragment") << fragment;
+       }
+       if (VK_SHADER_STAGE_FRAGMENT_BIT == caseDef.shaderStage)
+       {
+               /*
+                       "#version 450\n"
+                       "void main (void)\n"
+                       "{\n"
+                       "  vec2 uv = vec2((gl_VertexIndex << 1) & 2, gl_VertexIndex & 2);\n"
+                       "  gl_Position = vec4(uv * 2.0f + -1.0f, 0.0f, 1.0f);\n"
+                       "  gl_PointSize = 1.0f;\n"
+                       "}\n";
+               */
+               const string vertex =
+                       "; SPIR-V\n"
+                       "; Version: 1.3\n"
+                       "; Generator: Khronos Glslang Reference Front End; 2\n"
+                       "; Bound: 44\n"
+                       "; Schema: 0\n"
+                       "OpCapability Shader\n"
+                       "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+                       "OpMemoryModel Logical GLSL450\n"
+                       "OpEntryPoint Vertex %4 \"main\" %12 %29\n"
+                       "OpDecorate %12 BuiltIn VertexIndex\n"
+                       "OpMemberDecorate %27 0 BuiltIn Position\n"
+                       "OpMemberDecorate %27 1 BuiltIn PointSize\n"
+                       "OpMemberDecorate %27 2 BuiltIn ClipDistance\n"
+                       "OpMemberDecorate %27 3 BuiltIn CullDistance\n"
+                       "OpDecorate %27 Block\n"
+                       "%2 = OpTypeVoid\n"
+                       "%3 = OpTypeFunction %2\n"
+                       "%6 = OpTypeFloat 32\n"
+                       "%7 = OpTypeVector %6 2\n"
+                       "%8 = OpTypePointer Function %7\n"
+                       "%10 = OpTypeInt 32 1\n"
+                       "%11 = OpTypePointer Input %10\n"
+                       "%12 = OpVariable %11 Input\n"
+                       "%14 = OpConstant %10 1\n"
+                       "%16 = OpConstant %10 2\n"
+                       "%23 = OpTypeVector %6 4\n"
+                       "%24 = OpTypeInt 32 0\n"
+                       "%25 = OpConstant %24 1\n"
+                       "%26 = OpTypeArray %6 %25\n"
+                       "%27 = OpTypeStruct %23 %6 %26 %26\n"
+                       "%28 = OpTypePointer Output %27\n"
+                       "%29 = OpVariable %28 Output\n"
+                       "%30 = OpConstant %10 0\n"
+                       "%32 = OpConstant %6 2\n"
+                       "%34 = OpConstant %6 -1\n"
+                       "%37 = OpConstant %6 0\n"
+                       "%38 = OpConstant %6 1\n"
+                       "%42 = OpTypePointer Output %23\n"
+                       "%44 = OpTypePointer Output %6\n"
+                       "%4 = OpFunction %2 None %3\n"
+                       "%5 = OpLabel\n"
+                       "%9 = OpVariable %8 Function\n"
+                       "%13 = OpLoad %10 %12\n"
+                       "%15 = OpShiftLeftLogical %10 %13 %14\n"
+                       "%17 = OpBitwiseAnd %10 %15 %16\n"
+                       "%18 = OpConvertSToF %6 %17\n"
+                       "%19 = OpLoad %10 %12\n"
+                       "%20 = OpBitwiseAnd %10 %19 %16\n"
+                       "%21 = OpConvertSToF %6 %20\n"
+                       "%22 = OpCompositeConstruct %7 %18 %21\n"
+                       "OpStore %9 %22\n"
+                       "%31 = OpLoad %7 %9\n"
+                       "%33 = OpVectorTimesScalar %7 %31 %32\n"
+                       "%35 = OpCompositeConstruct %7 %34 %34\n"
+                       "%36 = OpFAdd %7 %33 %35\n"
+                       "%39 = OpCompositeExtract %6 %36 0\n"
+                       "%40 = OpCompositeExtract %6 %36 1\n"
+                       "%41 = OpCompositeConstruct %23 %39 %40 %37 %38\n"
+                       "%43 = OpAccessChain %42 %29 %30\n"
+                       "OpStore %43 %41\n"
+                       "%45 = OpAccessChain %44 %29 %14\n"
+                       "OpStore %45 %38\n"
+                       "OpReturn\n"
+                       "OpFunctionEnd\n";
+               programCollection.spirvAsmSources.add("vert") << vertex;
+       }
+       else if (VK_SHADER_STAGE_VERTEX_BIT != caseDef.shaderStage)
+               subgroups::setVertexShaderFrameBuffer(programCollection);
+
+       if (OPTYPE_ELECT == caseDef.opType)
+       {
+               std::ostringstream electedValue ;
+               std::ostringstream unelectedValue;
+               electedValue << ELECTED_VALUE;
+               unelectedValue << UNELECTED_VALUE;
+
+               if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+               {
+                       /*
+                               "#extension GL_KHR_shader_subgroup_basic: enable\n"
+                               "layout(location = 0) out vec4 out_color;\n"
+                               "layout(location = 0) in highp vec4 in_position;\n"
+                               "\n"
+                               "void main (void)\n"
+                               "{\n"
+                               "  if (subgroupElect())\n"
+                               "  {\n"
+                               "    out_color.r = " << ELECTED_VALUE << ";\n"
+                               "    out_color.g = 1.0f;\n"
+                               "  }\n"
+                               "  else\n"
+                               "  {\n"
+                               "    out_color.r = " << UNELECTED_VALUE << ";\n"
+                               "    out_color.g = 0.0f;\n"
+                               "  }\n"
+                               "  gl_Position = in_position;\n"
+                               "  gl_PointSize = 1.0f;\n"
+                               "}\n";
+                       */
+                       const string vertex =
+                               "; SPIR-V\n"
+                               "; Version: 1.3\n"
+                               "; Generator: Khronos Glslang Reference Front End; 2\n"
+                               "; Bound: 38\n"
+                               "; Schema: 0\n"
+                               "OpCapability Shader\n"
+                               "OpCapability GroupNonUniform\n"
+                               "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+                               "OpMemoryModel Logical GLSL450\n"
+                               "OpEntryPoint Vertex %4 \"main\" %15 %31 %35\n"
+                               "OpDecorate %15 Location 0\n"
+                               "OpMemberDecorate %29 0 BuiltIn Position\n"
+                               "OpMemberDecorate %29 1 BuiltIn PointSize\n"
+                               "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
+                               "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
+                               "OpDecorate %29 Block\n"
+                               "OpDecorate %35 Location 0\n"
+                               "%2 = OpTypeVoid\n"
+                               "%3 = OpTypeFunction %2\n"
+                               "%6 = OpTypeBool\n"
+                               "%7 = OpTypeInt 32 0\n"
+                               "%8 = OpConstant %7 3\n"
+                               "%12 = OpTypeFloat 32\n"
+                               "%13 = OpTypeVector %12 4\n"
+                               "%14 = OpTypePointer Output %13\n"
+                               "%15 = OpVariable %14 Output\n"
+                               "%16 = OpConstant %12 " + electedValue.str() + "\n"
+                               "%17 = OpConstant %7 0\n"
+                               "%18 = OpTypePointer Output %12\n"
+                               "%20 = OpConstant %12 1\n"
+                               "%21 = OpConstant %7 1\n"
+                               "%24 = OpConstant %12 " + unelectedValue.str() + "\n"
+                               "%26 = OpConstant %12 0\n"
+                               "%28 = OpTypeArray %12 %21\n"
+                               "%29 = OpTypeStruct %13 %12 %28 %28\n"
+                               "%30 = OpTypePointer Output %29\n"
+                               "%31 = OpVariable %30 Output\n"
+                               "%32 = OpTypeInt 32 1\n"
+                               "%33 = OpConstant %32 0\n"
+                               "%34 = OpTypePointer Input %13\n"
+                               "%35 = OpVariable %34 Input\n"
+                               "%38 = OpConstant %32 1\n"
+                               "%4 = OpFunction %2 None %3\n"
+                               "%5 = OpLabel\n"
+                               "%9 = OpGroupNonUniformElect %6 %8\n"
+                               "OpSelectionMerge %11 None\n"
+                               "OpBranchConditional %9 %10 %23\n"
+                               "%10 = OpLabel\n"
+                               "%19 = OpAccessChain %18 %15 %17\n"
+                               "OpStore %19 %16\n"
+                               "%22 = OpAccessChain %18 %15 %21\n"
+                               "OpStore %22 %20\n"
+                               "OpBranch %11\n"
+                               "%23 = OpLabel\n"
+                               "%25 = OpAccessChain %18 %15 %17\n"
+                               "OpStore %25 %24\n"
+                               "%27 = OpAccessChain %18 %15 %21\n"
+                               "OpStore %27 %26\n"
+                               "OpBranch %11\n"
+                               "%11 = OpLabel\n"
+                               "%36 = OpLoad %13 %35\n"
+                               "%37 = OpAccessChain %14 %31 %33\n"
+                               "OpStore %37 %36\n"
+                               "%39 = OpAccessChain %18 %31 %38\n"
+                               "OpStore %39 %20\n"
+                               "OpReturn\n"
+                               "OpFunctionEnd\n";
+                       programCollection.spirvAsmSources.add("vert") << vertex << buildOptionsSpr;
+               }
+               else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
+               {
+                       /*
+                               "#version 450\n"
+                               "#extension GL_KHR_shader_subgroup_basic: enable\n"
+                               "layout(points) in;\n"
+                               "layout(points, max_vertices = 1) out;\n"
+                               "layout(location = 0) out vec4 out_color;\n"
+                               "void main (void)\n"
+                               "{\n"
+                               "  if (subgroupElect())\n"
+                               "  {\n"
+                               "    out_color.r = " << ELECTED_VALUE << ";\n"
+                               "    out_color.g = 1.0f;\n"
+                               "  }\n"
+                               "  else\n"
+                               "  {\n"
+                               "    out_color.r = " << UNELECTED_VALUE << ";\n"
+                               "    out_color.g = 0.0f;\n"
+                               "  }\n"
+                               "  gl_Position = gl_in[0].gl_Position;\n"
+                               "  EmitVertex();\n"
+                               "  EndPrimitive();\n"
+                               "}\n";
+                       */
+                       const string geometry =
+                               "; SPIR-V\n"
+                               "; Version: 1.3\n"
+                               "; Generator: Khronos Glslang Reference Front End; 2\n"
+                               "; Bound: 42\n"
+                               "; Schema: 0\n"
+                               "OpCapability Geometry\n"
+                               "OpCapability GroupNonUniform\n"
+                               "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+                               "OpMemoryModel Logical GLSL450\n"
+                               "OpEntryPoint Geometry %4 \"main\" %15 %31 %37\n"
+                               "OpExecutionMode %4 InputPoints\n"
+                               "OpExecutionMode %4 Invocations 1\n"
+                               "OpExecutionMode %4 OutputPoints\n"
+                               "OpExecutionMode %4 OutputVertices 1\n"
+                               "OpDecorate %15 Location 0\n"
+                               "OpMemberDecorate %29 0 BuiltIn Position\n"
+                               "OpMemberDecorate %29 1 BuiltIn PointSize\n"
+                               "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
+                               "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
+                               "OpDecorate %29 Block\n"
+                               "OpMemberDecorate %34 0 BuiltIn Position\n"
+                               "OpMemberDecorate %34 1 BuiltIn PointSize\n"
+                               "OpMemberDecorate %34 2 BuiltIn ClipDistance\n"
+                               "OpMemberDecorate %34 3 BuiltIn CullDistance\n"
+                               "OpDecorate %34 Block\n"
+                               "%2 = OpTypeVoid\n"
+                               "%3 = OpTypeFunction %2\n"
+                               "%6 = OpTypeBool\n"
+                               "%7 = OpTypeInt 32 0\n"
+                               "%8 = OpConstant %7 3\n"
+                               "%12 = OpTypeFloat 32\n"
+                               "%13 = OpTypeVector %12 4\n"
+                               "%14 = OpTypePointer Output %13\n"
+                               "%15 = OpVariable %14 Output\n"
+                               "%16 = OpConstant %12 " + electedValue.str() + "\n"
+                               "%17 = OpConstant %7 0\n"
+                               "%18 = OpTypePointer Output %12\n"
+                               "%20 = OpConstant %12 1\n"
+                               "%21 = OpConstant %7 1\n"
+                               "%24 = OpConstant %12 " + unelectedValue.str() + "\n"
+                               "%26 = OpConstant %12 0\n"
+                               "%28 = OpTypeArray %12 %21\n"
+                               "%29 = OpTypeStruct %13 %12 %28 %28\n"
+                               "%30 = OpTypePointer Output %29\n"
+                               "%31 = OpVariable %30 Output\n"
+                               "%32 = OpTypeInt 32 1\n"
+                               "%33 = OpConstant %32 0\n"
+                               "%34 = OpTypeStruct %13 %12 %28 %28\n"
+                               "%35 = OpTypeArray %34 %21\n"
+                               "%36 = OpTypePointer Input %35\n"
+                               "%37 = OpVariable %36 Input\n"
+                               "%38 = OpTypePointer Input %13\n"
+                               "%4 = OpFunction %2 None %3\n"
+                               "%5 = OpLabel\n"
+                               "%9 = OpGroupNonUniformElect %6 %8\n"
+                               "OpSelectionMerge %11 None\n"
+                               "OpBranchConditional %9 %10 %23\n"
+                               "%10 = OpLabel\n"
+                               "%19 = OpAccessChain %18 %15 %17\n"
+                               "OpStore %19 %16\n"
+                               "%22 = OpAccessChain %18 %15 %21\n"
+                               "OpStore %22 %20\n"
+                               "OpBranch %11\n"
+                               "%23 = OpLabel\n"
+                               "%25 = OpAccessChain %18 %15 %17\n"
+                               "OpStore %25 %24\n"
+                               "%27 = OpAccessChain %18 %15 %21\n"
+                               "OpStore %27 %26\n"
+                               "OpBranch %11\n"
+                               "%11 = OpLabel\n"
+                               "%39 = OpAccessChain %38 %37 %33 %33\n"
+                               "%40 = OpLoad %13 %39\n"
+                               "%41 = OpAccessChain %14 %31 %33\n"
+                               "OpStore %41 %40\n"
+                               "OpEmitVertex\n"
+                               "OpEndPrimitive\n"
+                               "OpReturn\n"
+                               "OpFunctionEnd\n";
+                       programCollection.spirvAsmSources.add("geometry") << geometry << buildOptionsSpr;
+               }
+               else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
+               {
+                       /*
+                               << "#extension GL_KHR_shader_subgroup_basic: enable\n"
+                               << "#extension GL_EXT_tessellation_shader : require\n"
+                               << "layout(vertices = 2) out;\n"
+                               << "void main (void)\n"
+                               << "{\n"
+                               << "  if (gl_InvocationID == 0)\n"
+                               <<"  {\n"
+                               << "    gl_TessLevelOuter[0] = 1.0f;\n"
+                               << "    gl_TessLevelOuter[1] = 1.0f;\n"
+                               << "  }\n"
+                               << "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+                               << "}\n";
+                       */
+                       const string controlSource =
+                               "; SPIR-V\n"
+                               "; Version: 1.3\n"
+                               "; Generator: Khronos Glslang Reference Front End; 2\n"
+                               "; Bound: 46\n"
+                               "; Schema: 0\n"
+                               "OpCapability Tessellation\n"
+                               "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+                               "OpMemoryModel Logical GLSL450\n"
+                               "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %33 %39\n"
+                               "OpExecutionMode %4 OutputVertices 2\n"
+                               "OpDecorate %8 BuiltIn InvocationId\n"
+                               "OpDecorate %20 Patch\n"
+                               "OpDecorate %20 BuiltIn TessLevelOuter\n"
+                               "OpMemberDecorate %29 0 BuiltIn Position\n"
+                               "OpMemberDecorate %29 1 BuiltIn PointSize\n"
+                               "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
+                               "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
+                               "OpDecorate %29 Block\n"
+                               "OpMemberDecorate %35 0 BuiltIn Position\n"
+                               "OpMemberDecorate %35 1 BuiltIn PointSize\n"
+                               "OpMemberDecorate %35 2 BuiltIn ClipDistance\n"
+                               "OpMemberDecorate %35 3 BuiltIn CullDistance\n"
+                               "OpDecorate %35 Block\n"
+                               "%2 = OpTypeVoid\n"
+                               "%3 = OpTypeFunction %2\n"
+                               "%6 = OpTypeInt 32 1\n"
+                               "%7 = OpTypePointer Input %6\n"
+                               "%8 = OpVariable %7 Input\n"
+                               "%10 = OpConstant %6 0\n"
+                               "%11 = OpTypeBool\n"
+                               "%15 = OpTypeFloat 32\n"
+                               "%16 = OpTypeInt 32 0\n"
+                               "%17 = OpConstant %16 4\n"
+                               "%18 = OpTypeArray %15 %17\n"
+                               "%19 = OpTypePointer Output %18\n"
+                               "%20 = OpVariable %19 Output\n"
+                               "%21 = OpConstant %15 1\n"
+                               "%22 = OpTypePointer Output %15\n"
+                               "%24 = OpConstant %6 1\n"
+                               "%26 = OpTypeVector %15 4\n"
+                               "%27 = OpConstant %16 1\n"
+                               "%28 = OpTypeArray %15 %27\n"
+                               "%29 = OpTypeStruct %26 %15 %28 %28\n"
+                               "%30 = OpConstant %16 2\n"
+                               "%31 = OpTypeArray %29 %30\n"
+                               "%32 = OpTypePointer Output %31\n"
+                               "%33 = OpVariable %32 Output\n"
+                               "%35 = OpTypeStruct %26 %15 %28 %28\n"
+                               "%36 = OpConstant %16 32\n"
+                               "%37 = OpTypeArray %35 %36\n"
+                               "%38 = OpTypePointer Input %37\n"
+                               "%39 = OpVariable %38 Input\n"
+                               "%41 = OpTypePointer Input %26\n"
+                               "%44 = OpTypePointer Output %26\n"
+                               "%4 = OpFunction %2 None %3\n"
+                               "%5 = OpLabel\n"
+                               "%9 = OpLoad %6 %8\n"
+                               "%12 = OpIEqual %11 %9 %10\n"
+                               "OpSelectionMerge %14 None\n"
+                               "OpBranchConditional %12 %13 %14\n"
+                               "%13 = OpLabel\n"
+                               "%23 = OpAccessChain %22 %20 %10\n"
+                               "OpStore %23 %21\n"
+                               "%25 = OpAccessChain %22 %20 %24\n"
+                               "OpStore %25 %21\n"
+                               "OpBranch %14\n"
+                               "%14 = OpLabel\n"
+                               "%34 = OpLoad %6 %8\n"
+                               "%40 = OpLoad %6 %8\n"
+                               "%42 = OpAccessChain %41 %39 %40 %10\n"
+                               "%43 = OpLoad %26 %42\n"
+                               "%45 = OpAccessChain %44 %33 %34 %10\n"
+                               "OpStore %45 %43\n"
+                               "OpReturn\n"
+                               "OpFunctionEnd\n";
+                       programCollection.spirvAsmSources.add("tesc") << controlSource << buildOptionsSpr;
+
+                       /*
+                               "#extension GL_KHR_shader_subgroup_basic: enable\n"
+                               "#extension GL_EXT_tessellation_shader : require\n"
+                               "layout(isolines, equal_spacing, ccw ) in;\n"
+                               "layout(location = 0) out vec4 out_color;\n"
+                               "\n"
+                               "void main (void)\n"
+                               "{\n"
+                               "  if (subgroupElect())\n"
+                               "  {\n"
+                               "    out_color.r = " << 2 * ELECTED_VALUE - UNELECTED_VALUE << ";\n"
+                               "    out_color.g = 2.0f;\n"
+                               "  }\n"
+                               "  else\n"
+                               "  {\n"
+                               "    out_color.r = " << UNELECTED_VALUE << ";\n"
+                               "    out_color.g = 0.0f;\n"
+                               "  }\n"
+                               "  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
+                               "}\n";
+                       */
+
+                       const string evaluationSource =
+                               "; SPIR-V\n"
+                               "; Version: 1.3\n"
+                               "; Generator: Khronos Glslang Reference Front End; 2\n"
+                               "; Bound: 54\n"
+                               "; Schema: 0\n"
+                               "OpCapability Tessellation\n"
+                               "OpCapability GroupNonUniform\n"
+                               "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+                               "OpMemoryModel Logical GLSL450\n"
+                               "OpEntryPoint TessellationEvaluation %4 \"main\" %15 %31 %38 %47\n"
+                               "OpExecutionMode %4 Isolines\n"
+                               "OpExecutionMode %4 SpacingEqual\n"
+                               "OpExecutionMode %4 VertexOrderCcw\n"
+                               "OpDecorate %15 Location 0\n"
+                               "OpMemberDecorate %29 0 BuiltIn Position\n"
+                               "OpMemberDecorate %29 1 BuiltIn PointSize\n"
+                               "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
+                               "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
+                               "OpDecorate %29 Block\n"
+                               "OpMemberDecorate %34 0 BuiltIn Position\n"
+                               "OpMemberDecorate %34 1 BuiltIn PointSize\n"
+                               "OpMemberDecorate %34 2 BuiltIn ClipDistance\n"
+                               "OpMemberDecorate %34 3 BuiltIn CullDistance\n"
+                               "OpDecorate %34 Block\n"
+                               "OpDecorate %47 BuiltIn TessCoord\n"
+                               "%2 = OpTypeVoid\n"
+                               "%3 = OpTypeFunction %2\n"
+                               "%6 = OpTypeBool\n"
+                               "%7 = OpTypeInt 32 0\n"
+                               "%8 = OpConstant %7 3\n"
+                               "%12 = OpTypeFloat 32\n"
+                               "%13 = OpTypeVector %12 4\n"
+                               "%14 = OpTypePointer Output %13\n"
+                               "%15 = OpVariable %14 Output\n"
+                               "%16 = OpConstant %12 71\n"//electedValue
+                               "%17 = OpConstant %7 0\n"
+                               "%18 = OpTypePointer Output %12\n"
+                               "%20 = OpConstant %12 2\n"
+                               "%21 = OpConstant %7 1\n"
+                               "%24 = OpConstant %12 " + unelectedValue.str() + "\n"
+                               "%26 = OpConstant %12 0\n"
+                               "%28 = OpTypeArray %12 %21\n"
+                               "%29 = OpTypeStruct %13 %12 %28 %28\n"
+                               "%30 = OpTypePointer Output %29\n"
+                               "%31 = OpVariable %30 Output\n"
+                               "%32 = OpTypeInt 32 1\n"
+                               "%33 = OpConstant %32 0\n"
+                               "%34 = OpTypeStruct %13 %12 %28 %28\n"
+                               "%35 = OpConstant %7 32\n"
+                               "%36 = OpTypeArray %34 %35\n"
+                               "%37 = OpTypePointer Input %36\n"
+                               "%38 = OpVariable %37 Input\n"
+                               "%39 = OpTypePointer Input %13\n"
+                               "%42 = OpConstant %32 1\n"
+                               "%45 = OpTypeVector %12 3\n"
+                               "%46 = OpTypePointer Input %45\n"
+                               "%47 = OpVariable %46 Input\n"
+                               "%48 = OpTypePointer Input %12\n"
+                               "%4 = OpFunction %2 None %3\n"
+                               "%5 = OpLabel\n"
+                               "%9 = OpGroupNonUniformElect %6 %8\n"
+                               "OpSelectionMerge %11 None\n"
+                               "OpBranchConditional %9 %10 %23\n"
+                               "%10 = OpLabel\n"
+                               "%19 = OpAccessChain %18 %15 %17\n"
+                               "OpStore %19 %16\n"
+                               "%22 = OpAccessChain %18 %15 %21\n"
+                               "OpStore %22 %20\n"
+                               "OpBranch %11\n"
+                               "%23 = OpLabel\n"
+                               "%25 = OpAccessChain %18 %15 %17\n"
+                               "OpStore %25 %24\n"
+                               "%27 = OpAccessChain %18 %15 %21\n"
+                               "OpStore %27 %26\n"
+                               "OpBranch %11\n"
+                               "%11 = OpLabel\n"
+                               "%40 = OpAccessChain %39 %38 %33 %33\n"
+                               "%41 = OpLoad %13 %40\n"
+                               "%43 = OpAccessChain %39 %38 %42 %33\n"
+                               "%44 = OpLoad %13 %43\n"
+                               "%49 = OpAccessChain %48 %47 %17\n"
+                               "%50 = OpLoad %12 %49\n"
+                               "%51 = OpCompositeConstruct %13 %50 %50 %50 %50\n"
+                               "%52 = OpExtInst %13 %1 FMix %41 %44 %51\n"
+                               "%53 = OpAccessChain %14 %31 %33\n"
+                               "OpStore %53 %52\n"
+                               "OpReturn\n"
+                               "OpFunctionEnd\n";
+
+                       programCollection.spirvAsmSources.add("tese") << evaluationSource << buildOptionsSpr;
+               }
+               else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+               {
+                       /*
+                               "#extension GL_KHR_shader_subgroup_basic: enable\n"
+                               "#extension GL_EXT_tessellation_shader : require\n"
+                               "layout(vertices = 2) out;\n"
+                               "layout(location = 0) out vec4 out_color[];\n"
+                               "void main (void)\n"
+                               "{\n"
+                               "  if (gl_InvocationID == 0)\n"
+                                 {\n"
+                               "    gl_TessLevelOuter[0] = 1.0f;\n"
+                               "    gl_TessLevelOuter[1] = 1.0f;\n"
+                               "  }\n"
+                               "  if (subgroupElect())\n"
+                               "  {\n"
+                               "    out_color[gl_InvocationID].r = " << ELECTED_VALUE << ";\n"
+                               "    out_color[gl_InvocationID].g = 1.0f;\n"
+                               "  }\n"
+                               "  else\n"
+                               "  {\n"
+                               "    out_color[gl_InvocationID].r = " << UNELECTED_VALUE << ";\n"
+                               "    out_color[gl_InvocationID].g = 0.0f;\n"
+                               "  }\n"
+                               "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+                               "}\n";
+                       */
+                       const string  controlSource =
+                               "; SPIR-V\n"
+                               "; Version: 1.3\n"
+                               "; Generator: Khronos Glslang Reference Front End; 2\n"
+                               "; Bound: 66\n"
+                               "; Schema: 0\n"
+                               "OpCapability Tessellation\n"
+                               "OpCapability GroupNonUniform\n"
+                               "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+                               "OpMemoryModel Logical GLSL450\n"
+                               "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %34 %53 %59\n"
+                               "OpExecutionMode %4 OutputVertices 2\n"
+                               "OpDecorate %8 BuiltIn InvocationId\n"
+                               "OpDecorate %20 Patch\n"
+                               "OpDecorate %20 BuiltIn TessLevelOuter\n"
+                               "OpDecorate %34 Location 0\n"
+                               "OpMemberDecorate %50 0 BuiltIn Position\n"
+                               "OpMemberDecorate %50 1 BuiltIn PointSize\n"
+                               "OpMemberDecorate %50 2 BuiltIn ClipDistance\n"
+                               "OpMemberDecorate %50 3 BuiltIn CullDistance\n"
+                               "OpDecorate %50 Block\n"
+                               "OpMemberDecorate %55 0 BuiltIn Position\n"
+                               "OpMemberDecorate %55 1 BuiltIn PointSize\n"
+                               "OpMemberDecorate %55 2 BuiltIn ClipDistance\n"
+                               "OpMemberDecorate %55 3 BuiltIn CullDistance\n"
+                               "OpDecorate %55 Block\n"
+                               "%2 = OpTypeVoid\n"
+                               "%3 = OpTypeFunction %2\n"
+                               "%6 = OpTypeInt 32 1\n"
+                               "%7 = OpTypePointer Input %6\n"
+                               "%8 = OpVariable %7 Input\n"
+                               "%10 = OpConstant %6 0\n"
+                               "%11 = OpTypeBool\n"
+                               "%15 = OpTypeFloat 32\n"
+                               "%16 = OpTypeInt 32 0\n"
+                               "%17 = OpConstant %16 4\n"
+                               "%18 = OpTypeArray %15 %17\n"
+                               "%19 = OpTypePointer Output %18\n"
+                               "%20 = OpVariable %19 Output\n"
+                               "%21 = OpConstant %15 1\n"
+                               "%22 = OpTypePointer Output %15\n"
+                               "%24 = OpConstant %6 1\n"
+                               "%26 = OpConstant %16 3\n"
+                               "%30 = OpTypeVector %15 4\n"
+                               "%31 = OpConstant %16 2\n"
+                               "%32 = OpTypeArray %30 %31\n"
+                               "%33 = OpTypePointer Output %32\n"
+                               "%34 = OpVariable %33 Output\n"
+                               "%36 = OpConstant %15 " + electedValue.str() + "\n"
+                               "%37 = OpConstant %16 0\n"
+                               "%40 = OpConstant %16 1\n"
+                               "%44 = OpConstant %15 " + unelectedValue.str() + "\n"
+                               "%47 = OpConstant %15 0\n"
+                               "%49 = OpTypeArray %15 %40\n"
+                               "%50 = OpTypeStruct %30 %15 %49 %49\n"
+                               "%51 = OpTypeArray %50 %31\n"
+                               "%52 = OpTypePointer Output %51\n"
+                               "%53 = OpVariable %52 Output\n"
+                               "%55 = OpTypeStruct %30 %15 %49 %49\n"
+                               "%56 = OpConstant %16 32\n"
+                               "%57 = OpTypeArray %55 %56\n"
+                               "%58 = OpTypePointer Input %57\n"
+                               "%59 = OpVariable %58 Input\n"
+                               "%61 = OpTypePointer Input %30\n"
+                               "%64 = OpTypePointer Output %30\n"
+                               "%4 = OpFunction %2 None %3\n"
+                               "%5 = OpLabel\n"
+                               "%9 = OpLoad %6 %8\n"
+                               "%12 = OpIEqual %11 %9 %10\n"
+                               "OpSelectionMerge %14 None\n"
+                               "OpBranchConditional %12 %13 %14\n"
+                               "%13 = OpLabel\n"
+                               "%23 = OpAccessChain %22 %20 %10\n"
+                               "OpStore %23 %21\n"
+                               "%25 = OpAccessChain %22 %20 %24\n"
+                               "OpStore %25 %21\n"
+                               "OpBranch %14\n"
+                               "%14 = OpLabel\n"
+                               "%27 = OpGroupNonUniformElect %11 %26\n"
+                               "OpSelectionMerge %29 None\n"
+                               "OpBranchConditional %27 %28 %42\n"
+                               "%28 = OpLabel\n"
+                               "%35 = OpLoad %6 %8\n"
+                               "%38 = OpAccessChain %22 %34 %35 %37\n"
+                               "OpStore %38 %36\n"
+                               "%39 = OpLoad %6 %8\n"
+                               "%41 = OpAccessChain %22 %34 %39 %40\n"
+                               "OpStore %41 %21\n"
+                               "OpBranch %29\n"
+                               "%42 = OpLabel\n"
+                               "%43 = OpLoad %6 %8\n"
+                               "%45 = OpAccessChain %22 %34 %43 %37\n"
+                               "OpStore %45 %44\n"
+                               "%46 = OpLoad %6 %8\n"
+                               "%48 = OpAccessChain %22 %34 %46 %40\n"
+                               "OpStore %48 %47\n"
+                               "OpBranch %29\n"
+                               "%29 = OpLabel\n"
+                               "%54 = OpLoad %6 %8\n"
+                               "%60 = OpLoad %6 %8\n"
+                               "%62 = OpAccessChain %61 %59 %60 %10\n"
+                               "%63 = OpLoad %30 %62\n"
+                               "%65 = OpAccessChain %64 %53 %54 %10\n"
+                               "OpStore %65 %63\n"
+                               "OpReturn\n"
+                               "OpFunctionEnd\n";
+                       programCollection.spirvAsmSources.add("tesc") << controlSource << buildOptionsSpr;
+
+                       /*
+                               "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                               "#extension GL_EXT_tessellation_shader : require\n"
+                               "layout(isolines, equal_spacing, ccw ) in;\n"
+                               "layout(location = 0) in vec4 in_color[];\n"
+                               "layout(location = 0) out vec4 out_color;\n"
+                               "\n"
+                               "void main (void)\n"
+                               "{\n"
+                               "  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
+                               "  out_color = in_color[0];\n"
+                               "}\n";
+                       */
+
+                       const string evaluationSource =
+                               "; SPIR-V\n"
+                               "; Version: 1.3\n"
+                               "; Generator: Khronos Glslang Reference Front End; 2\n"
+                               "; Bound: 44\n"
+                               "; Schema: 0\n"
+                               "OpCapability Tessellation\n"
+                               "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+                               "OpMemoryModel Logical GLSL450\n"
+                               "OpEntryPoint TessellationEvaluation %4 \"main\" %13 %20 %29 %38 %41\n"
+                               "OpExecutionMode %4 Isolines\n"
+                               "OpExecutionMode %4 SpacingEqual\n"
+                               "OpExecutionMode %4 VertexOrderCcw\n"
+                               "OpMemberDecorate %11 0 BuiltIn Position\n"
+                               "OpMemberDecorate %11 1 BuiltIn PointSize\n"
+                               "OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
+                               "OpMemberDecorate %11 3 BuiltIn CullDistance\n"
+                               "OpDecorate %11 Block\n"
+                               "OpMemberDecorate %16 0 BuiltIn Position\n"
+                               "OpMemberDecorate %16 1 BuiltIn PointSize\n"
+                               "OpMemberDecorate %16 2 BuiltIn ClipDistance\n"
+                               "OpMemberDecorate %16 3 BuiltIn CullDistance\n"
+                               "OpDecorate %16 Block\n"
+                               "OpDecorate %29 BuiltIn TessCoord\n"
+                               "OpDecorate %38 Location 0\n"
+                               "OpDecorate %41 Location 0\n"
+                               "%2 = OpTypeVoid\n"
+                               "%3 = OpTypeFunction %2\n"
+                               "%6 = OpTypeFloat 32\n"
+                               "%7 = OpTypeVector %6 4\n"
+                               "%8 = OpTypeInt 32 0\n"
+                               "%9 = OpConstant %8 1\n"
+                               "%10 = OpTypeArray %6 %9\n"
+                               "%11 = OpTypeStruct %7 %6 %10 %10\n"
+                               "%12 = OpTypePointer Output %11\n"
+                               "%13 = OpVariable %12 Output\n"
+                               "%14 = OpTypeInt 32 1\n"
+                               "%15 = OpConstant %14 0\n"
+                               "%16 = OpTypeStruct %7 %6 %10 %10\n"
+                               "%17 = OpConstant %8 32\n"
+                               "%18 = OpTypeArray %16 %17\n"
+                               "%19 = OpTypePointer Input %18\n"
+                               "%20 = OpVariable %19 Input\n"
+                               "%21 = OpTypePointer Input %7\n"
+                               "%24 = OpConstant %14 1\n"
+                               "%27 = OpTypeVector %6 3\n"
+                               "%28 = OpTypePointer Input %27\n"
+                               "%29 = OpVariable %28 Input\n"
+                               "%30 = OpConstant %8 0\n"
+                               "%31 = OpTypePointer Input %6\n"
+                               "%36 = OpTypePointer Output %7\n"
+                               "%38 = OpVariable %36 Output\n"
+                               "%39 = OpTypeArray %7 %17\n"
+                               "%40 = OpTypePointer Input %39\n"
+                               "%41 = OpVariable %40 Input\n"
+                               "%4 = OpFunction %2 None %3\n"
+                               "%5 = OpLabel\n"
+                               "%22 = OpAccessChain %21 %20 %15 %15\n"
+                               "%23 = OpLoad %7 %22\n"
+                               "%25 = OpAccessChain %21 %20 %24 %15\n"
+                               "%26 = OpLoad %7 %25\n"
+                               "%32 = OpAccessChain %31 %29 %30\n"
+                               "%33 = OpLoad %6 %32\n"
+                               "%34 = OpCompositeConstruct %7 %33 %33 %33 %33\n"
+                               "%35 = OpExtInst %7 %1 FMix %23 %26 %34\n"
+                               "%37 = OpAccessChain %36 %13 %15\n"
+                               "OpStore %37 %35\n"
+                               "%42 = OpAccessChain %21 %41 %15\n"
+                               "%43 = OpLoad %7 %42\n"
+                               "OpStore %38 %43\n"
+                               "OpReturn\n"
+                               "OpFunctionEnd\n";
+                       programCollection.spirvAsmSources.add("tese") << evaluationSource << buildOptionsSpr;
+               }
+               else
+               {
+                       DE_FATAL("Unsupported shader stage");
+               }
+       }
+       else
+       {
+               std::ostringstream bdy;
+               string color = (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage) ? "out_color[gl_InvocationID].b = 1.0f;\n" : "out_color.b = 1.0f;\n";
+               switch (caseDef.opType)
+               {
+                       default:
+                               DE_FATAL("Unhandled op type!");
+                               break;
+                       case OPTYPE_SUBGROUP_BARRIER:
+                       case OPTYPE_SUBGROUP_MEMORY_BARRIER:
+                       case OPTYPE_SUBGROUP_MEMORY_BARRIER_BUFFER:
+                       {
+                               bdy << " tempResult2 = tempBuffer[id];\n"
+                                       << "  if (subgroupElect())\n"
+                                       << "  {\n"
+                                       << "    tempResult = value;\n"
+                                       << "    " << color
+                                       << "  }\n"
+                                        << "  else\n"
+                                       << "  {\n"
+                                       << "    tempResult = tempBuffer[id];\n"
+                                       << "  }\n"
+                                       << "  " << getOpTypeName(caseDef.opType) << "();\n";
+                               break;
+                       }
+                       case OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE:
+                               bdy <<"tempResult2 = imageLoad(tempImage, ivec2(id, 0)).x;\n"
+                                       << "  if (subgroupElect())\n"
+                                       << "  {\n"
+                                       << "    tempResult = value;\n"
+                                       << "     " << color
+                                       << "  }\n"
+                                       << "  else\n"
+                                       << "  {\n"
+                                       << "    tempResult = imageLoad(tempImage, ivec2(id, 0)).x;\n"
+                                       << "  }\n"
+                                       << "  subgroupMemoryBarrierImage();\n";
+
+                               break;
+               }
+
+               if (VK_SHADER_STAGE_FRAGMENT_BIT == caseDef.shaderStage)
+               {
+                       std::ostringstream      fragment;
+                       fragment        << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+                               << "#extension GL_KHR_shader_subgroup_basic: enable\n"
+                               << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                               << "layout(location = 0) out vec4 out_color;\n"
+                               << "\n"
+                               << "layout(set = 0, binding = 0) uniform Buffer1\n"
+                               << "{\n"
+                               << "  uint tempBuffer["<<SHADER_BUFFER_SIZE/4ull<<"];\n"
+                               << "};\n"
+                               << "\n"
+                               << "layout(set = 0, binding = 1) uniform Buffer2\n"
+                               << "{\n"
+                               << "  uint value;\n"
+                               << "};\n"
+                               << (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? "layout(set = 0, binding = 2, r32ui) readonly uniform highp uimage2D tempImage;\n" : "\n")
+                               << "void main (void)\n"
+                               << "{\n"
+                               << "  if (gl_HelperInvocation) return;\n"
+                               << "  uint id = 0;\n"
+                               << "  if (subgroupElect())\n"
+                               << "  {\n"
+                               << "    id = uint(gl_FragCoord.x);\n"
+                               << "  }\n"
+                               << "  id = subgroupBroadcastFirst(id);\n"
+                               << "  uint localId = id;\n"
+                               << "  uint tempResult = 0u;\n"
+                               << "  uint tempResult2 = 0u;\n"
+                               << "  out_color.b = 0.0f;\n"
+                               << bdy.str()
+                               << "  out_color.r = float(tempResult);\n"
+                               << "  out_color.g = float(value);\n"
+                               << "  out_color.a = float(tempResult2);\n"
+                               << "}\n";
+                       programCollection.glslSources.add("fragment")
+                               << glu::FragmentSource(fragment.str()) << buildOptions;
+               }
+               else if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+               {
+                       std::ostringstream      vertex;
+                       vertex  << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+                               << "#extension GL_KHR_shader_subgroup_basic: enable\n"
+                               << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                               <<"\n"
+                               << "layout(location = 0) out vec4 out_color;\n"
+                               << "layout(location = 0) in highp vec4 in_position;\n"
+                               << "\n"
+                               << "layout(set = 0, binding = 0) uniform Buffer1\n"
+                               << "{\n"
+                               << "  uint tempBuffer["<<SHADER_BUFFER_SIZE/4ull<<"];\n"
+                               << "};\n"
+                               << "\n"
+                               << "layout(set = 0, binding = 1) uniform Buffer2\n"
+                               << "{\n"
+                               << "  uint value;\n"
+                               << "};\n"
+                               << (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? "layout(set = 0, binding = 2, r32ui) readonly uniform highp uimage2D tempImage;\n" : "\n")
+                               << "void main (void)\n"
+                               << "{\n"
+                               << "  uint id = 0;\n"
+                               << "  if (subgroupElect())\n"
+                               << "  {\n"
+                               << "    id = gl_VertexIndex;\n"
+                               << "  }\n"
+                               << "  id = subgroupBroadcastFirst(id);\n"
+                               << "  uint tempResult = 0u;\n"
+                               << "  uint tempResult2 = 0u;\n"
+                               << "  out_color.b = 0.0f;\n"
+                               << bdy.str()
+                               << "  out_color.r = float(tempResult);\n"
+                               << "  out_color.g = float(value);\n"
+                               << "  out_color.a = float(tempResult2);\n"
+                               << "  gl_Position = in_position;\n"
+                               << "  gl_PointSize = 1.0f;\n"
+                               << "}\n";
+                       programCollection.glslSources.add("vert")
+                               << glu::VertexSource(vertex.str()) << buildOptions;
+               }
+       else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
+               {
+                       std::ostringstream geometry;
+
+                       geometry << "#version 450\n"
+                                       << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                                       << "#extension GL_KHR_shader_subgroup_basic: enable\n"
+                                       << "layout(points) in;\n"
+                                       << "layout(points, max_vertices = 1) out;\n"
+                                       << "layout(location = 0) out vec4 out_color;\n"
+                                       << "layout(set = 0, binding = 0) uniform Buffer1\n"
+                                       << "{\n"
+                                       << "  uint tempBuffer["<<SHADER_BUFFER_SIZE/4ull<<"];\n"
+                                       << "};\n"
+                                       << "\n"
+                                       << "layout(set = 0, binding = 1) uniform Buffer2\n"
+                                       << "{\n"
+                                       << "  uint value;\n"
+                                       << "};\n"
+                                       << (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? "layout(set = 0, binding = 2, r32ui) readonly uniform highp uimage2D tempImage;\n" : "\n")
+                                       << "void main (void)\n"
+                                       << "{\n"
+                                       << "  uint id = 0;\n"
+                                       << "  if (subgroupElect())\n"
+                                       << "  {\n"
+                                       << "    id = gl_InvocationID;\n"
+                                       << "  }\n"
+                                       << "  id = subgroupBroadcastFirst(id);\n"
+                                       << "  uint tempResult = 0u;\n"
+                                       << "  uint tempResult2 = 0u;\n"
+                                       << "  out_color.b = 0.0f;\n"
+                                       << bdy.str()
+                                       << "  out_color.r = float(tempResult);\n"
+                                       << "  out_color.g = float(value);\n"
+                                       << "  out_color.a = float(tempResult2);\n"
+                                       << "  gl_Position = gl_in[0].gl_Position;\n"
+                                       << "  EmitVertex();\n"
+                                       << "  EndPrimitive();\n"
+                                       << "}\n";
+
+                       programCollection.glslSources.add("geometry")
+                               << glu::GeometrySource(geometry.str()) << buildOptions;
+               }
+               else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
+               {
+                       std::ostringstream controlSource;
+                       std::ostringstream evaluationSource;
+
+                       controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+                               << "#extension GL_EXT_tessellation_shader : require\n"
+                               << "layout(vertices = 2) out;\n"
+                               << "void main (void)\n"
+                               << "{\n"
+                               << "  if (gl_InvocationID == 0)\n"
+                               <<"  {\n"
+                               << "    gl_TessLevelOuter[0] = 1.0f;\n"
+                               << "    gl_TessLevelOuter[1] = 1.0f;\n"
+                               << "  }\n"
+                               << "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+                               << "}\n";
+
+                       evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+                               << "#extension GL_KHR_shader_subgroup_basic: enable\n"
+                               << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                               << "#extension GL_EXT_tessellation_shader : require\n"
+                               << "layout(isolines, equal_spacing, ccw ) in;\n"
+                               << "layout(location = 0) out vec4 out_color;\n"
+                               << "layout(set = 0, binding = 0) uniform Buffer1\n"
+                               << "{\n"
+                               << "  uint tempBuffer["<<SHADER_BUFFER_SIZE/4ull<<"];\n"
+                               << "};\n"
+                               << "\n"
+                               << "layout(set = 0, binding = 1) uniform Buffer2\n"
+                               << "{\n"
+                               << "  uint value;\n"
+                               << "};\n"
+                               << (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? "layout(set = 0, binding = 2, r32ui) readonly uniform highp uimage2D tempImage;\n" : "\n")
+                               << "void main (void)\n"
+                               << "{\n"
+                               << "  uint id = 0;\n"
+                               << "  if (subgroupElect())\n"
+                               << "  {\n"
+                               << "    id = gl_PrimitiveID;\n"
+                               << "  }\n"
+                               << "  id = subgroupBroadcastFirst(id);\n"
+                               << "  uint tempResult = 0u;\n"
+                               << "  uint tempResult2 = 0u;\n"
+                               << "  out_color.b = 0.0f;\n"
+                               << bdy.str()
+                               << "  out_color.r = float(tempResult);\n"
+                               << "  out_color.g = float(value);\n"
+                               << "  out_color.a = float(tempResult2);\n"
+                               << "  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
+                               << "}\n";
+
+                       programCollection.glslSources.add("tesc")
+                               << glu::TessellationControlSource(controlSource.str()) << buildOptions;
+                       programCollection.glslSources.add("tese")
+                               << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
+               }
+               else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+               {
+                       std::ostringstream controlSource;
+                       std::ostringstream evaluationSource;
+
+                       controlSource  << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+                               << "#extension GL_KHR_shader_subgroup_basic: enable\n"
+                               << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                               << "#extension GL_EXT_tessellation_shader : require\n"
+                               << "layout(vertices = 2) out;\n"
+                               << "layout(location = 0) out vec4 out_color[];\n"
+                               << "layout(set = 0, binding = 0) uniform Buffer1\n"
+                               << "{\n"
+                               << "  uint tempBuffer["<<SHADER_BUFFER_SIZE/4ull<<"];\n"
+                               << "};\n"
+                               << "\n"
+                               << "layout(set = 0, binding = 1) uniform Buffer2\n"
+                               << "{\n"
+                               << "  uint value;\n"
+                               << "};\n"
+                               << (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? "layout(set = 0, binding = 2, r32ui) readonly uniform highp uimage2D tempImage;\n" : "\n")
+                               << "void main (void)\n"
+                               << "{\n"
+                               << "  uint id = 0;\n"
+                               << "  if (gl_InvocationID == 0)\n"
+                               <<"  {\n"
+                               << "    gl_TessLevelOuter[0] = 1.0f;\n"
+                               << "    gl_TessLevelOuter[1] = 1.0f;\n"
+                               << "  }\n"
+                               << "  if (subgroupElect())\n"
+                               << "  {\n"
+                               << "    id = gl_InvocationID;\n"
+                               << "  }\n"
+                               << "  id = subgroupBroadcastFirst(id);\n"
+                               << "  uint tempResult = 0u;\n"
+                               << "  uint tempResult2 = 0u;\n"
+                               << "  out_color[gl_InvocationID].b = 0.0f;\n"
+                               << bdy.str()
+                               << "  out_color[gl_InvocationID].r = float(tempResult);\n"
+                               << "  out_color[gl_InvocationID].g = float(value);\n"
+                               << "  out_color[gl_InvocationID].a = float(tempResult2);\n"
+                               << "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+                               << "}\n";
+
+                       evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+                               << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                               << "#extension GL_EXT_tessellation_shader : require\n"
+                               << "layout(isolines, equal_spacing, ccw ) in;\n"
+                               << "layout(location = 0) in vec4 in_color[];\n"
+                               << "layout(location = 0) out vec4 out_color;\n"
+                               << "\n"
+                               << "void main (void)\n"
+                               << "{\n"
+                               << "  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
+                               << "  out_color = in_color[0];\n"
+                               << "}\n";
+
+                       programCollection.glslSources.add("tesc")
+                               << glu::TessellationControlSource(controlSource.str()) << buildOptions;
+                       programCollection.glslSources.add("tese")
+                               << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
+               }
+               else
+               {
+                       DE_FATAL("Unsupported shader stage");
+               }
+       }
+}
+
+void initPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
+{
+       if (OPTYPE_ELECT == caseDef.opType)
+       {
+               if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+               {
+                       std::ostringstream src;
+
+                       src << "#version 450\n"
+                               << "#extension GL_KHR_shader_subgroup_basic: enable\n"
+                               << "layout (local_size_x_id = 0, local_size_y_id = 1, "
+                               "local_size_z_id = 2) in;\n"
+                               << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
+                               << "{\n"
+                               << "  uint result[];\n"
+                               << "};\n"
+                               << "\n"
+                               << subgroups::getSharedMemoryBallotHelper()
+                               << "void main (void)\n"
+                               << "{\n"
+                               << "  uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
+                               << "  highp uint offset = globalSize.x * ((globalSize.y * "
+                               "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
+                               "gl_GlobalInvocationID.x;\n"
+                               << "  uint value = " << UNELECTED_VALUE << ";\n"
+                               << "  if (subgroupElect())\n"
+                               << "  {\n"
+                               << "    value = " << ELECTED_VALUE << ";\n"
+                               << "  }\n"
+                               << "  uvec4 bits = bitCount(sharedMemoryBallot(value == " << ELECTED_VALUE << "));\n"
+                               << "  result[offset] = bits.x + bits.y + bits.z + bits.w;\n"
+                               << "}\n";
+
+                       programCollection.glslSources.add("comp")
+                                       << glu::ComputeSource(src.str()) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+               }
+               else
+               {
+                       {
+                               std::ostringstream  vertex;
+                               vertex  << "#version 450\n"
+                                               << "#extension GL_KHR_shader_subgroup_basic: enable\n"
+                                               << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
+                                               << "{\n"
+                                               << "  uint result[];\n"
+                                               << "};\n"
+                                               << "layout(set = 0, binding = 4, std430) buffer Buffer2\n"
+                                               << "{\n"
+                                               << "  uint numSubgroupsExecuted;\n"
+                                               << "};\n"
+                                               << "\n"
+                                               << "void main (void)\n"
+                                               << "{\n"
+                                               << "  if (subgroupElect())\n"
+                                               << "  {\n"
+                                               << "    result[gl_VertexIndex] = " << ELECTED_VALUE << ";\n"
+                                               << "    atomicAdd(numSubgroupsExecuted, 1);\n"
+                                               << "  }\n"
+                                               << "  else\n"
+                                               << "  {\n"
+                                               << "    result[gl_VertexIndex] = " << UNELECTED_VALUE << ";\n"
+                                               << "  }\n"
+                                               << "  float pixelSize = 2.0f/1024.0f;\n"
+                                               << "  float pixelPosition = pixelSize/2.0f - 1.0f;\n"
+                                               << "  gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
+                                               << "  gl_PointSize = 1.0f;\n"
+                                               << "}\n";
+                               programCollection.glslSources.add("vert")
+                                       << glu::VertexSource(vertex.str()) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+                       }
+
+                       {
+                               std::ostringstream tesc;
+                               tesc    << "#version 450\n"
+                                               << "#extension GL_KHR_shader_subgroup_basic: enable\n"
+                                               << "layout(vertices=1) out;\n"
+                                               << "layout(set = 0, binding = 1, std430) buffer Buffer1\n"
+                                               << "{\n"
+                                               << "  uint result[];\n"
+                                               << "};\n"
+                                               << "layout(set = 0, binding = 5, std430) buffer Buffer2\n"
+                                               << "{\n"
+                                               << "  uint numSubgroupsExecuted;\n"
+                                               << "};\n"
+                                               << "\n"
+                                               << "void main (void)\n"
+                                               << "{\n"
+                                               << "  if (subgroupElect())\n"
+                                               << "  {\n"
+                                               << "    result[gl_PrimitiveID] = " << ELECTED_VALUE << ";\n"
+                                               << "    atomicAdd(numSubgroupsExecuted, 1);\n"
+                                               << "  }\n"
+                                               << "  else\n"
+                                               << "  {\n"
+                                               << "    result[gl_PrimitiveID] = " << UNELECTED_VALUE << ";\n"
+                                               << "  }\n"
+                                               << "  if (gl_InvocationID == 0)\n"
+                                               << "  {\n"
+                                               << "    gl_TessLevelOuter[0] = 1.0f;\n"
+                                               << "    gl_TessLevelOuter[1] = 1.0f;\n"
+                                               << "  }\n"
+                                               << "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+                                               << "}\n";
+                               programCollection.glslSources.add("tesc")
+                                       << glu::TessellationControlSource(tesc.str()) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+                       }
+
+                       {
+                               std::ostringstream tese;
+                               tese    << "#version 450\n"
+                                               << "#extension GL_KHR_shader_subgroup_basic: enable\n"
+                                               << "layout(isolines) in;\n"
+                                               << "layout(set = 0, binding = 2, std430) buffer Buffer1\n"
+                                               << "{\n"
+                                               << "  uint result[];\n"
+                                               << "};\n"
+                                               << "layout(set = 0, binding = 6, std430) buffer Buffer2\n"
+                                               << "{\n"
+                                               << "  uint numSubgroupsExecuted;\n"
+                                               << "};\n"
+                                               << "\n"
+                                               << "void main (void)\n"
+                                               << "{\n"
+                                               << "  if (subgroupElect())\n"
+                                               << "  {\n"
+                                               << "    result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = " << ELECTED_VALUE << ";\n"
+                                               << "    atomicAdd(numSubgroupsExecuted, 1);\n"
+                                               << "  }\n"
+                                               << "  else\n"
+                                               << "  {\n"
+                                               << "    result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = " << UNELECTED_VALUE << ";\n"
+                                               << "  }\n"
+                                               << "  float pixelSize = 2.0f/1024.0f;\n"
+                                               << "  gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
+                                               << "}\n";
+                               programCollection.glslSources.add("tese")
+                                       << glu::TessellationEvaluationSource(tese.str()) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+                       }
+
+                       {
+                               std::ostringstream geometry;
+                               geometry        << "#version 450\n"
+                                                       << "#extension GL_KHR_shader_subgroup_basic: enable\n"
+                                                       << "layout(${TOPOLOGY}) in;\n"
+                                                       << "layout(points, max_vertices = 1) out;\n"
+                                                       << "layout(set = 0, binding = 3, std430) buffer Buffer1\n"
+                                                       << "{\n"
+                                                       << "  uint result[];\n"
+                                                       << "};\n"
+                                                       << "layout(set = 0, binding = 7, std430) buffer Buffer2\n"
+                                                       << "{\n"
+                                                       << "  uint numSubgroupsExecuted;\n"
+                                                       << "};\n"
+                                                       << "\n"
+                                                       << "void main (void)\n"
+                                                       << "{\n"
+                                                       << "  if (subgroupElect())\n"
+                                                       << "  {\n"
+                                                       << "    result[gl_PrimitiveIDIn] = " << ELECTED_VALUE << ";\n"
+                                                       << "    atomicAdd(numSubgroupsExecuted, 1);\n"
+                                                       << "  }\n"
+                                                       << "  else\n"
+                                                       << "  {\n"
+                                                       << "    result[gl_PrimitiveIDIn] = " << UNELECTED_VALUE << ";\n"
+                                                       << "  }\n"
+                                                       << "  gl_Position = gl_in[0].gl_Position;\n"
+                                                       << "  EmitVertex();\n"
+                                                       << "  EndPrimitive();\n"
+                                                       << "}\n";
+                               subgroups::addGeometryShadersFromTemplate(geometry.str(), vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u),
+                                                                                                                 programCollection.glslSources);
+                       }
+
+                       {
+                               std::ostringstream fragment;
+                               fragment        << "#version 450\n"
+                                                       << "#extension GL_KHR_shader_subgroup_basic: enable\n"
+                                                       << "layout(location = 0) out uint data;\n"
+                                                       << "layout(set = 0, binding = 8, std430) buffer Buffer\n"
+                                                       << "{\n"
+                                                       << "  uint numSubgroupsExecuted;\n"
+                                                       << "};\n"
+                                                       << "void main (void)\n"
+                                                       << "{\n"
+                                                       << "  if (gl_HelperInvocation) return;\n"
+                                                       << "  if (subgroupElect())\n"
+                                                       << "  {\n"
+                                                       << "    data = " << ELECTED_VALUE << ";\n"
+                                                       << "    atomicAdd(numSubgroupsExecuted, 1);\n"
+                                                       << "  }\n"
+                                                       << "  else\n"
+                                                       << "  {\n"
+                                                       << "    data = " << UNELECTED_VALUE << ";\n"
+                                                       << "  }\n"
+                                                       << "}\n";
+                               programCollection.glslSources.add("fragment")
+                                       << glu::FragmentSource(fragment.str())<< vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+                       }
+                       subgroups::addNoSubgroupShader(programCollection);
+               }
+       }
+       else
+       {
+               std::ostringstream bdy;
+
+               switch (caseDef.opType)
+               {
+                       default:
+                               DE_FATAL("Unhandled op type!");
+                               break;
+                       case OPTYPE_SUBGROUP_BARRIER:
+                       case OPTYPE_SUBGROUP_MEMORY_BARRIER:
+                       case OPTYPE_SUBGROUP_MEMORY_BARRIER_BUFFER:
+                               bdy << "  if (subgroupElect())\n"
+                                       << "  {\n"
+                                       << "    tempBuffer[id] = value;\n"
+                                       << "  }\n"
+                                       << "  " << getOpTypeName(caseDef.opType) << "();\n"
+                                       << "  tempResult = tempBuffer[id];\n";
+                               break;
+                       case OPTYPE_SUBGROUP_MEMORY_BARRIER_SHARED:
+                               bdy << "  if (subgroupElect())\n"
+                                       << "  {\n"
+                                       << "    tempShared[localId] = value;\n"
+                                       << "  }\n"
+                                       << "  subgroupMemoryBarrierShared();\n"
+                                       << "  tempResult = tempShared[localId];\n";
+                               break;
+                       case OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE:
+                               bdy << "  if (subgroupElect())\n"
+                                       << "  {\n"
+                                       << "    imageStore(tempImage, ivec2(id, 0), ivec4(value));\n"
+                                       << "  }\n"
+                                       << "  subgroupMemoryBarrierImage();\n"
+                                       << "  tempResult = imageLoad(tempImage, ivec2(id, 0)).x;\n";
+                               break;
+               }
+
+               if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+               {
+                       std::ostringstream src;
+
+                       src << "#version 450\n"
+                               << "#extension GL_KHR_shader_subgroup_basic: enable\n"
+                               << "layout (local_size_x_id = 0, local_size_y_id = 1, "
+                               "local_size_z_id = 2) in;\n"
+                               << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
+                               << "{\n"
+                               << "  uint result[];\n"
+                               << "};\n"
+                               << "layout(set = 0, binding = 1, std430) buffer Buffer2\n"
+                               << "{\n"
+                               << "  uint tempBuffer[];\n"
+                               << "};\n"
+                               << "layout(set = 0, binding = 2, std430) buffer Buffer3\n"
+                               << "{\n"
+                               << "  uint value;\n"
+                               << "};\n"
+                               << "layout(set = 0, binding = 3, r32ui) uniform uimage2D tempImage;\n"
+                               << "shared uint tempShared[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];\n"
+                               << "\n"
+                               << "void main (void)\n"
+                               << "{\n"
+                               << "  uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
+                               << "  highp uint offset = globalSize.x * ((globalSize.y * "
+                               "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
+                               "gl_GlobalInvocationID.x;\n"
+                               << "  uint localId = gl_SubgroupID;\n"
+                               << "  uint id = globalSize.x * ((globalSize.y * "
+                               "gl_WorkGroupID.z) + gl_WorkGroupID.y) + "
+                               "gl_WorkGroupID.x + localId;\n"
+                               << "  uint tempResult = 0;\n"
+                               << bdy.str()
+                               << "  result[offset] = tempResult;\n"
+                               << "}\n";
+
+                       programCollection.glslSources.add("comp")
+                                       << glu::ComputeSource(src.str()) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+               }
+               else
+               {
+                       {
+                               const string vertex =
+                                       "#version 450\n"
+                                       "#extension GL_KHR_shader_subgroup_basic: enable\n"
+                                       "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                                       "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
+                                       "{\n"
+                                       "  uint result[];\n"
+                                       "};\n"
+                                       "layout(set = 0, binding = 4, std430) buffer Buffer2\n"
+                                       "{\n"
+                                       "  uint tempBuffer[];\n"
+                                       "};\n"
+                                       "layout(set = 0, binding = 5, std430) buffer Buffer3\n"
+                                       "{\n"
+                                       "  uint subgroupID;\n"
+                                       "};\n"
+                                       "layout(set = 0, binding = 6, std430) buffer Buffer4\n"
+                                       "{\n"
+                                       "  uint value;\n"
+                                       "};\n"
+                                       "layout(set = 0, binding = 7, r32ui) uniform uimage2D tempImage;\n"
+                                       "void main (void)\n"
+                                       "{\n"
+                                       "  uint id = 0;\n"
+                                       "  if (subgroupElect())\n"
+                                       "  {\n"
+                                       "    id = atomicAdd(subgroupID, 1);\n"
+                                       "  }\n"
+                                       "  id = subgroupBroadcastFirst(id);\n"
+                                       "  uint localId = id;\n"
+                                       "  uint tempResult = 0;\n"
+                                       + bdy.str() +
+                                       "  result[gl_VertexIndex] = tempResult;\n"
+                                       "  float pixelSize = 2.0f/1024.0f;\n"
+                                       "  float pixelPosition = pixelSize/2.0f - 1.0f;\n"
+                                       "  gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
+                                       "  gl_PointSize = 1.0f;\n"
+                                       "}\n";
+                               programCollection.glslSources.add("vert")
+                                       << glu::VertexSource(vertex) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+                       }
+
+                       {
+                               const string tesc =
+                                       "#version 450\n"
+                                       "#extension GL_KHR_shader_subgroup_basic: enable\n"
+                                       "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                                       "layout(vertices=1) out;\n"
+                                       "layout(set = 0, binding = 1, std430) buffer Buffer1\n"
+                                       "{\n"
+                                       "  uint result[];\n"
+                                       "};\n"
+                                       "layout(set = 0, binding = 8, std430) buffer Buffer2\n"
+                                       "{\n"
+                                       "  uint tempBuffer[];\n"
+                                       "};\n"
+                                       "layout(set = 0, binding = 9, std430) buffer Buffer3\n"
+                                       "{\n"
+                                       "  uint subgroupID;\n"
+                                       "};\n"
+                                       "layout(set = 0, binding = 10, std430) buffer Buffer4\n"
+                                       "{\n"
+                                       "  uint value;\n"
+                                       "};\n"
+                                       "layout(set = 0, binding = 11, r32ui) uniform uimage2D tempImage;\n"
+                                       "void main (void)\n"
+                                       "{\n"
+                                       "  uint id = 0;\n"
+                                       "  if (subgroupElect())\n"
+                                       "  {\n"
+                                       "    id = atomicAdd(subgroupID, 1);\n"
+                                       "  }\n"
+                                       "  id = subgroupBroadcastFirst(id);\n"
+                                       "  uint localId = id;\n"
+                                       "  uint tempResult = 0;\n"
+                                       + bdy.str() +
+                                       "  result[gl_PrimitiveID] = tempResult;\n"
+                                       "  if (gl_InvocationID == 0)\n"
+                                       "  {\n"
+                                       "    gl_TessLevelOuter[0] = 1.0f;\n"
+                                       "    gl_TessLevelOuter[1] = 1.0f;\n"
+                                       "  }\n"
+                                       "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+                                       "}\n";
+                               programCollection.glslSources.add("tesc")
+                                       << glu::TessellationControlSource(tesc) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+                       }
+
+                       {
+                               const string tese =
+                                       "#version 450\n"
+                                       "#extension GL_KHR_shader_subgroup_basic: enable\n"
+                                       "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                                       "layout(isolines) in;\n"
+                                       "layout(set = 0, binding = 2, std430) buffer Buffer1\n"
+                                       "{\n"
+                                       "  uint result[];\n"
+                                       "};\n"
+                                       "layout(set = 0, binding = 12, std430) buffer Buffer2\n"
+                                       "{\n"
+                                       "  uint tempBuffer[];\n"
+                                       "};\n"
+                                       "layout(set = 0, binding = 13, std430) buffer Buffer3\n"
+                                       "{\n"
+                                       "  uint subgroupID;\n"
+                                       "};\n"
+                                       "layout(set = 0, binding = 14, std430) buffer Buffer4\n"
+                                       "{\n"
+                                       "  uint value;\n"
+                                       "};\n"
+                                       "layout(set = 0, binding = 15, r32ui) uniform uimage2D tempImage;\n"
+                                       "void main (void)\n"
+                                       "{\n"
+                                       "  uint id = 0;\n"
+                                       "  if (subgroupElect())\n"
+                                       "  {\n"
+                                       "    id = atomicAdd(subgroupID, 1);\n"
+                                       "  }\n"
+                                       "  id = subgroupBroadcastFirst(id);\n"
+                                       "  uint localId = id;\n"
+                                       "  uint tempResult = 0;\n"
+                                       + bdy.str() +
+                                       "  result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = tempResult;\n"
+                                       "  float pixelSize = 2.0f/1024.0f;\n""  gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
+                                       "}\n";
+                               programCollection.glslSources.add("tese")
+                                       << glu::TessellationEvaluationSource(tese) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+                       }
+
+                       {
+                               const string geometry =
+                                       "#version 450\n"
+                                       "#extension GL_KHR_shader_subgroup_basic: enable\n"
+                                       "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                                       "layout(${TOPOLOGY}) in;\n"
+                                       "layout(points, max_vertices = 1) out;\n"
+                                       "layout(set = 0, binding = 3, std430) buffer Buffer1\n"
+                                       "{\n"
+                                       "  uint result[];\n"
+                                       "};\n"
+                                       "layout(set = 0, binding = 16, std430) buffer Buffer2\n"
+                                       "{\n"
+                                       "  uint tempBuffer[];\n"
+                                       "};\n"
+                                       "layout(set = 0, binding = 17, std430) buffer Buffer3\n"
+                                       "{\n"
+                                       "  uint subgroupID;\n"
+                                       "};\n"
+                                       "layout(set = 0, binding = 18, std430) buffer Buffer4\n"
+                                       "{\n"
+                                       "  uint value;\n"
+                                       "};\n"
+                                       "layout(set = 0, binding = 19, r32ui) uniform uimage2D tempImage;\n"
+                                       "void main (void)\n"
+                                       "{\n"
+                                       "  uint id = 0;\n"
+                                       "  if (subgroupElect())\n"
+                                       "  {\n"
+                                       "    id = atomicAdd(subgroupID, 1);\n"
+                                       "  }\n"
+                                       "  id = subgroupBroadcastFirst(id);\n"
+                                       "  uint localId = id;\n"
+                                       "  uint tempResult = 0;\n"
+                                        + bdy.str() +
+                                       "  result[gl_PrimitiveIDIn] = tempResult;\n"
+                                       "  gl_Position = gl_in[0].gl_Position;\n"
+                                       "  EmitVertex();\n"
+                                       "  EndPrimitive();\n"
+                                       "}\n";
+                               subgroups::addGeometryShadersFromTemplate(geometry, vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u),
+                                                                                                                 programCollection.glslSources);
+                       }
+
+                       {
+                               const string fragment =
+                                       "#version 450\n"
+                                       "#extension GL_KHR_shader_subgroup_basic: enable\n"
+                                       "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                                       "layout(location = 0) out uint result;\n"
+                                       "layout(set = 0, binding = 20, std430) buffer Buffer1\n"
+                                       "{\n"
+                                       "  uint tempBuffer[];\n"
+                                       "};\n"
+                                       "layout(set = 0, binding = 21, std430) buffer Buffer2\n"
+                                       "{\n"
+                                       "  uint subgroupID;\n"
+                                       "};\n"
+                                       "layout(set = 0, binding = 22, std430) buffer Buffer3\n"
+                                       "{\n"
+                                       "  uint value;\n"
+                                       "};\n"
+                                       "layout(set = 0, binding = 23, r32ui) uniform uimage2D tempImage;\n"
+                                       "void main (void)\n"
+                                       "{\n"
+                                       "  if (gl_HelperInvocation) return;\n"
+                                       "  uint id = 0;\n"
+                                       "  if (subgroupElect())\n"
+                                       "  {\n"
+                                       "    id = atomicAdd(subgroupID, 1);\n"
+                                       "  }\n"
+                                       "  id = subgroupBroadcastFirst(id);\n"
+                                       "  uint localId = id;\n"
+                                       "  uint tempResult = 0;\n"
+                                       + bdy.str() +
+                                       "  result = tempResult;\n"
+                                       "}\n";
+                               programCollection.glslSources.add("fragment")
+                                       << glu::FragmentSource(fragment)<< vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+                       }
+
+               subgroups::addNoSubgroupShader(programCollection);
+               }
+       }
+}
+
+void supportedCheck (Context& context, CaseDefinition caseDef)
+{
+       DE_UNREF(caseDef);
+       if (!subgroups::isSubgroupSupported(context))
+               TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
+}
+
+tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
+{
+       if (!subgroups::areSubgroupOperationsSupportedForStage(
+                               context, caseDef.shaderStage))
+       {
+               if (subgroups::areSubgroupOperationsRequiredForStage(
+                                       caseDef.shaderStage))
+               {
+                       return tcu::TestStatus::fail(
+                                          "Shader stage " +
+                                          subgroups::getShaderStageName(caseDef.shaderStage) +
+                                          " is required to support subgroup operations!");
+               }
+               else
+               {
+                       TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
+               }
+       }
+
+       if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_BASIC_BIT))
+       {
+               return tcu::TestStatus::fail(
+                                  "Subgroup feature " +
+                                  subgroups::getSubgroupFeatureName(VK_SUBGROUP_FEATURE_BASIC_BIT) +
+                                  " is a required capability!");
+       }
+
+       if (OPTYPE_ELECT != caseDef.opType && VK_SHADER_STAGE_COMPUTE_BIT != caseDef.shaderStage)
+       {
+               if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_BALLOT_BIT))
+               {
+                       TCU_THROW(NotSupportedError, "Subgroup basic operation non-compute stage test required that ballot operations are supported!");
+               }
+       }
+
+       const deUint32                                          inputDatasCount = OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? 3u : 2u;
+       std::vector<subgroups::SSBOData>        inputDatas              (inputDatasCount);
+
+       inputDatas[0].format = VK_FORMAT_R32_UINT;
+       inputDatas[0].numElements = SHADER_BUFFER_SIZE/4ull;
+       inputDatas[0].initializeType = subgroups::SSBOData::InitializeNonZero;
+
+       inputDatas[1].format = VK_FORMAT_R32_UINT;
+       inputDatas[1].numElements = 1ull;
+       inputDatas[1].initializeType = subgroups::SSBOData::InitializeNonZero;
+
+       if(OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType )
+       {
+               inputDatas[2].format = VK_FORMAT_R32_UINT;
+               inputDatas[2].numElements = SHADER_BUFFER_SIZE;
+               inputDatas[2].initializeType = subgroups::SSBOData::InitializeNone;
+               inputDatas[2].isImage = true;
+       }
+
+       if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+       {
+               if (OPTYPE_ELECT == caseDef.opType)
+                       return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32G32_SFLOAT, DE_NULL, 0u, checkVertexPipelineStagesSubgroupElectNoSSBO);
+               else
+                       return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32G32B32A32_SFLOAT, &inputDatas[0], inputDatasCount, checkVertexPipelineStagesSubgroupBarriersNoSSBO);
+       }
+       else if (VK_SHADER_STAGE_FRAGMENT_BIT == caseDef.shaderStage)
+       {
+               return subgroups::makeFragmentFrameBufferTest(context, VK_FORMAT_R32G32B32A32_SFLOAT, &inputDatas[0], inputDatasCount, checkFragmentSubgroupBarriersNoSSBO);
+       }
+       else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
+       {
+               if (OPTYPE_ELECT == caseDef.opType)
+                       return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32G32_SFLOAT, DE_NULL, 0u, checkVertexPipelineStagesSubgroupElectNoSSBO);
+               else
+                       return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32G32B32A32_SFLOAT, &inputDatas[0], inputDatasCount, checkVertexPipelineStagesSubgroupBarriersNoSSBO);
+       }
+
+       if (OPTYPE_ELECT == caseDef.opType)
+               return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32G32_SFLOAT, DE_NULL, 0u, checkVertexPipelineStagesSubgroupElectNoSSBO, caseDef.shaderStage);
+
+       return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32G32B32A32_SFLOAT, &inputDatas[0], inputDatasCount,
+               (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)? checkVertexPipelineStagesSubgroupBarriersNoSSBO : checkTessellationEvaluationSubgroupBarriersNoSSBO,
+               caseDef.shaderStage);
+}
+
+tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
+{
+       if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_BASIC_BIT))
+       {
+               return tcu::TestStatus::fail(
+                                       "Subgroup feature " +
+                                       subgroups::getSubgroupFeatureName(VK_SUBGROUP_FEATURE_BASIC_BIT) +
+                                       " is a required capability!");
+       }
+
+       if (OPTYPE_ELECT != caseDef.opType && VK_SHADER_STAGE_COMPUTE_BIT != caseDef.shaderStage)
+       {
+               if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_BALLOT_BIT))
+               {
+                       TCU_THROW(NotSupportedError, "Subgroup basic operation non-compute stage test required that ballot operations are supported!");
+               }
+       }
+
+       if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+       {
+               if (!subgroups::areSubgroupOperationsSupportedForStage(context, caseDef.shaderStage))
+               {
+                               return tcu::TestStatus::fail("Shader stage " +
+                                                                               subgroups::getShaderStageName(caseDef.shaderStage) +
+                                                                               " is required to support subgroup operations!");
+               }
+
+               if (OPTYPE_ELECT == caseDef.opType)
+               {
+                       return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkComputeSubgroupElect);
+               }
+               else
+               {
+                       const deUint32 inputDatasCount = 3;
+                       subgroups::SSBOData inputDatas[inputDatasCount];
+                       inputDatas[0].format = VK_FORMAT_R32_UINT;
+                       inputDatas[0].numElements = SHADER_BUFFER_SIZE;
+                       inputDatas[0].initializeType = subgroups::SSBOData::InitializeNone;
+
+                       inputDatas[1].format = VK_FORMAT_R32_UINT;
+                       inputDatas[1].numElements = 1;
+                       inputDatas[1].initializeType = subgroups::SSBOData::InitializeNonZero;
+
+                       inputDatas[2].format = VK_FORMAT_R32_UINT;
+                       inputDatas[2].numElements = SHADER_BUFFER_SIZE;
+                       inputDatas[2].initializeType = subgroups::SSBOData::InitializeNone;
+                       inputDatas[2].isImage = true;
+
+                       return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, inputDatas, inputDatasCount, checkComputeSubgroupBarriers);
+               }
+       }
+       else
+       {
+               if (!subgroups::isFragmentSSBOSupportedForDevice(context))
+               {
+                       TCU_THROW(NotSupportedError, "Subgroup basic operation require that the fragment stage be able to write to SSBOs!");
+               }
+
+               VkPhysicalDeviceSubgroupProperties subgroupProperties;
+               subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
+               subgroupProperties.pNext = DE_NULL;
+
+               VkPhysicalDeviceProperties2 properties;
+               properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+               properties.pNext = &subgroupProperties;
+
+               context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
+
+               VkShaderStageFlagBits stages = (VkShaderStageFlagBits)(caseDef.shaderStage  & subgroupProperties.supportedStages);
+
+               if ( VK_SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
+               {
+                       if ( (stages & VK_SHADER_STAGE_FRAGMENT_BIT) == 0)
+                               TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
+                       else
+                               stages = VK_SHADER_STAGE_FRAGMENT_BIT;
+               }
+
+               if ((VkShaderStageFlagBits)0u == stages)
+                       TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
+
+               if (OPTYPE_ELECT == caseDef.opType)
+               {
+                       const deUint32 inputCount = 5u;
+                       subgroups::SSBOData inputData[inputCount];
+
+                       inputData[0].format                     = VK_FORMAT_R32_UINT;
+                       inputData[0].numElements        = 1;
+                       inputData[0].initializeType     = subgroups::SSBOData::InitializeZero;
+                       inputData[0].binding            = 4u;
+                       inputData[0].stages                     = VK_SHADER_STAGE_VERTEX_BIT;
+
+                       inputData[1].format                     = VK_FORMAT_R32_UINT;
+                       inputData[1].numElements        = 1;
+                       inputData[1].initializeType     = subgroups::SSBOData::InitializeZero;
+                       inputData[1].binding            = 5u;
+                       inputData[1].stages                     = VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
+
+                       inputData[2].format                     = VK_FORMAT_R32_UINT;
+                       inputData[2].numElements        = 1;
+                       inputData[2].initializeType     = subgroups::SSBOData::InitializeZero;
+                       inputData[2].binding            = 6u;
+                       inputData[2].stages                     = VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
+
+                       inputData[3].format                     = VK_FORMAT_R32_UINT;
+                       inputData[3].numElements        = 1;
+                       inputData[3].initializeType     = subgroups::SSBOData::InitializeZero;
+                       inputData[3].binding            = 7u;
+                       inputData[3].stages                     = VK_SHADER_STAGE_GEOMETRY_BIT;
+
+                       inputData[4].format                     = VK_FORMAT_R32_UINT;
+                       inputData[4].numElements        = 1;
+                       inputData[4].initializeType     = subgroups::SSBOData::InitializeZero;
+                       inputData[4].binding            = 8u;
+                       inputData[4].stages                     = VK_SHADER_STAGE_FRAGMENT_BIT;
+
+                       return subgroups::allStages(context, VK_FORMAT_R32_UINT, inputData, inputCount, checkVertexPipelineStagesSubgroupElect, stages);
+               }
+               else
+               {
+                       const VkShaderStageFlagBits stagesBits[] =
+                       {
+                               VK_SHADER_STAGE_VERTEX_BIT,
+                               VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
+                               VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
+                               VK_SHADER_STAGE_GEOMETRY_BIT,
+                               VK_SHADER_STAGE_FRAGMENT_BIT,
+                       };
+
+                       const deUint32 inputDatasCount = DE_LENGTH_OF_ARRAY(stagesBits) * 4u;
+                       subgroups::SSBOData inputDatas[inputDatasCount];
+
+                       for (int ndx = 0; ndx < DE_LENGTH_OF_ARRAY(stagesBits); ++ndx)
+                       {
+                               const deUint32 index = ndx*4;
+                               inputDatas[index].format                                = VK_FORMAT_R32_UINT;
+                               inputDatas[index].numElements                   = SHADER_BUFFER_SIZE;
+                               inputDatas[index].initializeType                = subgroups::SSBOData::InitializeNonZero;
+                               inputDatas[index].binding                               = index + 4u;
+                               inputDatas[index].stages                                = stagesBits[ndx];
+
+                               inputDatas[index + 1].format                    = VK_FORMAT_R32_UINT;
+                               inputDatas[index + 1].numElements               = 1;
+                               inputDatas[index + 1].initializeType    = subgroups::SSBOData::InitializeZero;
+                               inputDatas[index + 1].binding                   = index + 5u;
+                               inputDatas[index + 1].stages                    = stagesBits[ndx];
+
+                               inputDatas[index + 2].format                    = VK_FORMAT_R32_UINT;
+                               inputDatas[index + 2].numElements               = 1;
+                               inputDatas[index + 2].initializeType    = subgroups::SSBOData::InitializeNonZero;
+                               inputDatas[index + 2].binding                   = index + 6u;
+                               inputDatas[index + 2].stages                    = stagesBits[ndx];
+
+                               inputDatas[index + 3].format                    = VK_FORMAT_R32_UINT;
+                               inputDatas[index + 3].numElements               = SHADER_BUFFER_SIZE;
+                               inputDatas[index + 3].initializeType    = subgroups::SSBOData::InitializeNone;
+                               inputDatas[index + 3].isImage                   = true;
+                               inputDatas[index + 3].binding                   = index + 7u;
+                               inputDatas[index + 3].stages                    = stagesBits[ndx];
+                       }
+
+                       return subgroups::allStages(context, VK_FORMAT_R32_UINT, inputDatas, inputDatasCount, checkVertexPipelineStagesSubgroupBarriers, stages);
+               }
+       }
+}
+}
+
+namespace vkt
+{
+namespace subgroups
+{
+tcu::TestCaseGroup* createSubgroupsBasicTests(tcu::TestContext& testCtx)
+{
+       de::MovePtr<tcu::TestCaseGroup> graphicGroup(new tcu::TestCaseGroup(
+               testCtx, "graphics", "Subgroup basic category tests: graphics"));
+       de::MovePtr<tcu::TestCaseGroup> computeGroup(new tcu::TestCaseGroup(
+               testCtx, "compute", "Subgroup basic category tests: compute"));
+       de::MovePtr<tcu::TestCaseGroup> framebufferGroup(new tcu::TestCaseGroup(
+               testCtx, "framebuffer", "Subgroup basic category tests: framebuffer"));
+
+
+       const VkShaderStageFlags stages[] =
+       {
+               VK_SHADER_STAGE_FRAGMENT_BIT,
+               VK_SHADER_STAGE_VERTEX_BIT,
+               VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
+               VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
+               VK_SHADER_STAGE_GEOMETRY_BIT,
+       };
+
+       for (int opTypeIndex = 0; opTypeIndex < OPTYPE_LAST; ++opTypeIndex)
+       {
+               const std::string op = de::toLower(getOpTypeName(opTypeIndex));
+
+               {
+                       const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_COMPUTE_BIT};
+                       addFunctionCaseWithPrograms(computeGroup.get(), op, "",
+                                                                               supportedCheck, initPrograms, test, caseDef);
+               }
+
+               if (OPTYPE_SUBGROUP_MEMORY_BARRIER_SHARED == opTypeIndex)
+               {
+                       // Shared isn't available in non compute shaders.
+                       continue;
+               }
+
+               {
+                       const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_ALL_GRAPHICS};
+                       addFunctionCaseWithPrograms(graphicGroup.get(),
+                                                                               op, "",
+                                                                               supportedCheck, initPrograms, test, caseDef);
+               }
+
+               if (OPTYPE_ELECT == opTypeIndex)
+               {
+                       for (int stageIndex = 1; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
+                       {
+                               const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex]};
+                               addFunctionCaseWithPrograms(framebufferGroup.get(),
+                                                       op + "_" + getShaderStageName(caseDef.shaderStage), "",
+                                                       supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
+                       }
+               }
+               else
+               {
+                       for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
+                       {
+                               const CaseDefinition caseDefFrag = {opTypeIndex, stages[stageIndex]};
+                               addFunctionCaseWithPrograms(framebufferGroup.get(),
+                                                       op + "_" + getShaderStageName(caseDefFrag.shaderStage), "",
+                                                       supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDefFrag);
+                       }
+               }
+
+       }
+
+       de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(
+               testCtx, "basic", "Subgroup basic category tests"));
+
+       group->addChild(graphicGroup.release());
+       group->addChild(computeGroup.release());
+       group->addChild(framebufferGroup.release());
+
+       return group.release();
+}
+
+} // subgroups
+} // vkt
diff --git a/external/openglcts/modules/common/subgroups/glcSubgroupsBasicTests.hpp b/external/openglcts/modules/common/subgroups/glcSubgroupsBasicTests.hpp
new file mode 100644 (file)
index 0000000..5e3639a
--- /dev/null
@@ -0,0 +1,40 @@
+#ifndef _VKTSUBGROUPSBASICTESTS_HPP
+#define _VKTSUBGROUPSBASICTESTS_HPP
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "tcuDefs.hpp"
+#include "vktTestCase.hpp"
+
+namespace vkt
+{
+namespace subgroups
+{
+
+tcu::TestCaseGroup* createSubgroupsBasicTests(tcu::TestContext& testCtx);
+
+} // subgroups
+} // vkt
+
+#endif // _VKTSUBGROUPSBASICTESTS_HPP
diff --git a/external/openglcts/modules/common/subgroups/glcSubgroupsBuiltinMaskVarTests.cpp b/external/openglcts/modules/common/subgroups/glcSubgroupsBuiltinMaskVarTests.cpp
new file mode 100755 (executable)
index 0000000..9db45d0
--- /dev/null
@@ -0,0 +1,1486 @@
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "vktSubgroupsBuiltinMaskVarTests.hpp"
+#include "vktSubgroupsTestsUtils.hpp"
+
+#include <string>
+#include <vector>
+
+using namespace tcu;
+using namespace std;
+using namespace vk;
+
+namespace vkt
+{
+namespace subgroups
+{
+
+static bool checkVertexPipelineStages(std::vector<const void*> datas,
+                                                                         deUint32 width, deUint32)
+{
+       return check(datas, width, 1);
+}
+
+static bool checkComputeStage(std::vector<const void*> datas,
+                                                const deUint32 numWorkgroups[3], const deUint32 localSize[3],
+                                                deUint32)
+{
+       return checkCompute(datas, numWorkgroups, localSize, 1);
+}
+
+namespace
+{
+struct CaseDefinition
+{
+       std::string                     varName;
+       VkShaderStageFlags      shaderStage;
+};
+}
+
+std::string subgroupComparison (const CaseDefinition& caseDef)
+{
+       if ("gl_SubgroupEqMask" == caseDef.varName)
+       {
+               if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+                       return "%56 = OpIEqual %11 %53 %55\n";
+               else
+                       return "%38 = OpIEqual %16 %35 %37\n";
+       }
+       else if ("gl_SubgroupGeMask" == caseDef.varName)
+       {
+               if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+                       return "%56 = OpUGreaterThanEqual %11 %53 %55\n";
+               else
+                       return "%38 = OpUGreaterThanEqual %16 %35 %37\n";
+       }
+       else if ("gl_SubgroupGtMask" == caseDef.varName)
+       {
+               if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+                       return "%56 = OpUGreaterThan %11 %53 %55\n";
+               else
+                       return "%38 = OpUGreaterThan %16 %35 %37\n";
+       }
+       else if ("gl_SubgroupLeMask" == caseDef.varName)
+       {
+               if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+                       return "%56 = OpULessThanEqual %11 %53 %55\n";
+               else
+                       return "%38 = OpULessThanEqual %16 %35 %37\n";
+       }
+       else if ("gl_SubgroupLtMask" == caseDef.varName)
+       {
+               if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+                       return "%56 = OpULessThan %11 %53 %55\n";
+               else
+                       return "%38 = OpULessThan %16 %35 %37\n";
+       }
+       return "";
+}
+
+std::string varSubgroupMask (const CaseDefinition& caseDef)
+{
+       if ("gl_SubgroupEqMask" == caseDef.varName)
+       {
+               if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+                       return "OpDecorate %40 BuiltIn SubgroupEqMask\n";
+               else
+                       return "OpDecorate %22 BuiltIn SubgroupEqMask\n";
+       }
+       else if ("gl_SubgroupGeMask" == caseDef.varName)
+       {
+               if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+                       return "OpDecorate %40 BuiltIn SubgroupGeMask\n";
+               else
+                       return "OpDecorate %22 BuiltIn SubgroupGeMask\n";
+       }
+       else if ("gl_SubgroupGtMask" == caseDef.varName)
+       {
+               if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+                       return "OpDecorate %40 BuiltIn SubgroupGtMask\n";
+               else
+                       return "OpDecorate %22 BuiltIn SubgroupGtMask\n";
+       }
+       else if ("gl_SubgroupLeMask" == caseDef.varName)
+       {
+               if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+                       return "OpDecorate %40 BuiltIn SubgroupLeMask\n";
+               else
+                       return "OpDecorate %22 BuiltIn SubgroupLeMask\n";
+       }
+       else if ("gl_SubgroupLtMask" == caseDef.varName)
+       {
+               if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+                       return "OpDecorate %40 BuiltIn SubgroupLtMask\n";
+               else
+                       return "OpDecorate %22 BuiltIn SubgroupLtMask\n";
+       }
+       return "";
+}
+
+std::string subgroupMask (const CaseDefinition& caseDef)
+{
+       std::ostringstream bdy;
+
+       bdy << "  uint tempResult = 0x1;\n"
+               << "  uint bit        = 0x1;\n"
+               << "  uint bitCount   = 0x0;\n"
+               << "  uvec4 mask = subgroupBallot(true);\n"
+               << "  const uvec4 var = " << caseDef.varName << ";\n"
+               << "  for (uint i = 0; i < gl_SubgroupSize; i++)\n"
+               << "  {\n";
+
+       if ("gl_SubgroupEqMask" == caseDef.varName)
+       {
+               bdy << "    if ((i == gl_SubgroupInvocationID) ^^ subgroupBallotBitExtract(var, i))\n"
+                       << "    {\n"
+                       << "      tempResult = 0;\n"
+                       << "    }\n";
+       }
+       else if ("gl_SubgroupGeMask" == caseDef.varName)
+       {
+               bdy << "    if ((i >= gl_SubgroupInvocationID) ^^ subgroupBallotBitExtract(var, i))\n"
+                       << "    {\n"
+                       << "      tempResult = 0;\n"
+                       << "    }\n";
+       }
+       else if ("gl_SubgroupGtMask" == caseDef.varName)
+       {
+               bdy << "    if ((i > gl_SubgroupInvocationID) ^^ subgroupBallotBitExtract(var, i))\n"
+                       << "    {\n"
+                       << "      tempResult = 0;\n"
+                       << "    }\n";
+       }
+       else if ("gl_SubgroupLeMask" == caseDef.varName)
+       {
+               bdy << "    if ((i <= gl_SubgroupInvocationID) ^^ subgroupBallotBitExtract(var, i))\n"
+                       << "    {\n"
+                       << "      tempResult = 0;\n"
+                       << "    }\n";
+       }
+       else if ("gl_SubgroupLtMask" == caseDef.varName)
+       {
+               bdy << "    if ((i < gl_SubgroupInvocationID) ^^ subgroupBallotBitExtract(var, i))\n"
+                       << "    {\n"
+                       << "      tempResult = 0;\n"
+                       << "    }\n";
+       }
+
+       bdy << "  }\n"
+               << "  for (uint i = 0; i < 32; i++)\n"
+               << "  {\n"
+               << "    if ((var.x & bit) > 0)\n"
+               << "    {\n"
+               << "      bitCount++;\n"
+               << "    }\n"
+               << "    if ((var.y & bit) > 0)\n"
+               << "    {\n"
+               << "      bitCount++;\n"
+               << "    }\n"
+               << "    if ((var.z & bit) > 0)\n"
+               << "    {\n"
+               << "      bitCount++;\n"
+               << "    }\n"
+               << "    if ((var.w & bit) > 0)\n"
+               << "    {\n"
+               << "      bitCount++;\n"
+               << "    }\n"
+               << "    bit = bit<<1;\n"
+               << "  }\n"
+               << "  if (subgroupBallotBitCount(var) != bitCount)\n"
+               << "  {\n"
+               << "    tempResult = 0;\n"
+               << "  }\n";
+       return bdy.str();
+}
+
+void initFrameBufferPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
+{
+       const vk::SpirVAsmBuildOptions  buildOptionsSpr (programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3);
+       const string                                    comparison              = subgroupComparison(caseDef);
+       const string                                    mask                    = varSubgroupMask(caseDef);
+
+       subgroups::setFragmentShaderFrameBuffer(programCollection);
+
+       if (VK_SHADER_STAGE_VERTEX_BIT != caseDef.shaderStage)
+               subgroups::setVertexShaderFrameBuffer(programCollection);
+
+       if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+       {
+               /*
+                       const string bdy = subgroupMask(caseDef);
+                       const string vertex =
+                       "#version 450\n"
+                       "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       "layout(location = 0) out float out_color;\n"
+                       "layout(location = 0) in highp vec4 in_position;\n"
+                       "\n"
+                       "void main (void)\n"
+                       "{\n"
+                       + bdy +
+                       "  out_color = float(tempResult);\n"
+                       "  gl_Position = in_position;\n"
+                       "  gl_PointSize = 1.0f;\n"
+                       "}\n";
+                       programCollection.glslSources.add("vert")
+                               << glu::VertexSource(vertex) << vk::ShaderBuildOptions(vk::SPIRV_VERSION_1_3, 0u);
+               */
+
+               const string vertex =
+                       "; SPIR-V\n"
+                       "; Version: 1.3\n"
+                       "; Generator: Khronos Glslang Reference Front End; 2\n"
+                       "; Bound: 123\n"
+                       "; Schema: 0\n"
+                       "OpCapability Shader\n"
+                       "OpCapability GroupNonUniform\n"
+                       "OpCapability GroupNonUniformBallot\n"
+                       "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+                       "OpMemoryModel Logical GLSL450\n"
+                       "OpEntryPoint Vertex %4 \"main\" %22 %32 %36 %107 %114 %117\n"
+                       + mask +
+                       "OpDecorate %32 RelaxedPrecision\n"
+                       "OpDecorate %32 BuiltIn SubgroupSize\n"
+                       "OpDecorate %33 RelaxedPrecision\n"
+                       "OpDecorate %36 RelaxedPrecision\n"
+                       "OpDecorate %36 BuiltIn SubgroupLocalInvocationId\n"
+                       "OpDecorate %37 RelaxedPrecision\n"
+                       "OpDecorate %107 Location 0\n"
+                       "OpMemberDecorate %112 0 BuiltIn Position\n"
+                       "OpMemberDecorate %112 1 BuiltIn PointSize\n"
+                       "OpMemberDecorate %112 2 BuiltIn ClipDistance\n"
+                       "OpMemberDecorate %112 3 BuiltIn CullDistance\n"
+                       "OpDecorate %112 Block\n"
+                       "OpDecorate %117 Location 0\n"
+                       "%2 = OpTypeVoid\n"
+                       "%3 = OpTypeFunction %2\n"
+                       "%6 = OpTypeInt 32 0\n"
+                       "%7 = OpTypePointer Function %6\n"
+                       "%9 = OpConstant %6 1\n"
+                       "%12 = OpConstant %6 0\n"
+                       "%13 = OpTypeVector %6 4\n"
+                       "%14 = OpTypePointer Function %13\n"
+                       "%16 = OpTypeBool\n"
+                       "%17 = OpConstantTrue %16\n"
+                       "%18 = OpConstant %6 3\n"
+                       "%21 = OpTypePointer Input %13\n"
+                       "%22 = OpVariable %21 Input\n"
+                       "%31 = OpTypePointer Input %6\n"
+                       "%32 = OpVariable %31 Input\n"
+                       "%36 = OpVariable %31 Input\n"
+                       "%46 = OpTypeInt 32 1\n"
+                       "%47 = OpConstant %46 1\n"
+                       "%56 = OpConstant %6 32\n"
+                       "%76 = OpConstant %6 2\n"
+                       "%105 = OpTypeFloat 32\n"
+                       "%106 = OpTypePointer Output %105\n"
+                       "%107 = OpVariable %106 Output\n"
+                       "%110 = OpTypeVector %105 4\n"
+                       "%111 = OpTypeArray %105 %9\n"
+                       "%112 = OpTypeStruct %110 %105 %111 %111\n"
+                       "%113 = OpTypePointer Output %112\n"
+                       "%114 = OpVariable %113 Output\n"
+                       "%115 = OpConstant %46 0\n"
+                       "%116 = OpTypePointer Input %110\n"
+                       "%117 = OpVariable %116 Input\n"
+                       "%119 = OpTypePointer Output %110\n"
+                       "%121 = OpConstant %105 1\n"
+                       "%4 = OpFunction %2 None %3\n"
+                       "%5 = OpLabel\n"
+                       "%8 = OpVariable %7 Function\n"
+                       "%10 = OpVariable %7 Function\n"
+                       "%11 = OpVariable %7 Function\n"
+                       "%15 = OpVariable %14 Function\n"
+                       "%20 = OpVariable %14 Function\n"
+                       "%24 = OpVariable %7 Function\n"
+                       "%49 = OpVariable %7 Function\n"
+                       "OpStore %8 %9\n"
+                       "OpStore %10 %9\n"
+                       "OpStore %11 %12\n"
+                       "%19 = OpGroupNonUniformBallot %13 %18 %17\n"
+                       "OpStore %15 %19\n"
+                       "%23 = OpLoad %13 %22\n"
+                       "OpStore %20 %23\n"
+                       "OpStore %24 %12\n"
+                       "OpBranch %25\n"
+                       "%25 = OpLabel\n"
+                       "OpLoopMerge %27 %28 None\n"
+                       "OpBranch %29\n"
+                       "%29 = OpLabel\n"
+                       "%30 = OpLoad %6 %24\n"
+                       "%33 = OpLoad %6 %32\n"
+                       "%34 = OpULessThan %16 %30 %33\n"
+                       "OpBranchConditional %34 %26 %27\n"
+                       "%26 = OpLabel\n"
+                       "%35 = OpLoad %6 %24\n"
+                       "%37 = OpLoad %6 %36\n"
+                       + comparison +
+                       "%39 = OpLoad %13 %20\n"
+                       "%40 = OpLoad %6 %24\n"
+                       "%41 = OpGroupNonUniformBallotBitExtract %16 %18 %39 %40\n"
+                       "%42 = OpLogicalNotEqual %16 %38 %41\n"
+                       "OpSelectionMerge %44 None\n"
+                       "OpBranchConditional %42 %43 %44\n"
+                       "%43 = OpLabel\n"
+                       "OpStore %8 %12\n"
+                       "OpBranch %44\n"
+                       "%44 = OpLabel\n"
+                       "OpBranch %28\n"
+                       "%28 = OpLabel\n"
+                       "%45 = OpLoad %6 %24\n"
+                       "%48 = OpIAdd %6 %45 %47\n"
+                       "OpStore %24 %48\n"
+                       "OpBranch %25\n"
+                       "%27 = OpLabel\n"
+                       "OpStore %49 %12\n"
+                       "OpBranch %50\n"
+                       "%50 = OpLabel\n"
+                       "OpLoopMerge %52 %53 None\n"
+                       "OpBranch %54\n"
+                       "%54 = OpLabel\n"
+                       "%55 = OpLoad %6 %49\n"
+                       "%57 = OpULessThan %16 %55 %56\n"
+                       "OpBranchConditional %57 %51 %52\n"
+                       "%51 = OpLabel\n"
+                       "%58 = OpAccessChain %7 %20 %12\n"
+                       "%59 = OpLoad %6 %58\n"
+                       "%60 = OpLoad %6 %10\n"
+                       "%61 = OpBitwiseAnd %6 %59 %60\n"
+                       "%62 = OpUGreaterThan %16 %61 %12\n"
+                       "OpSelectionMerge %64 None\n"
+                       "OpBranchConditional %62 %63 %64\n"
+                       "%63 = OpLabel\n"
+                       "%65 = OpLoad %6 %11\n"
+                       "%66 = OpIAdd %6 %65 %47\n"
+                       "OpStore %11 %66\n"
+                       "OpBranch %64\n"
+                       "%64 = OpLabel\n"
+                       "%67 = OpAccessChain %7 %20 %9\n"
+                       "%68 = OpLoad %6 %67\n"
+                       "%69 = OpLoad %6 %10\n"
+                       "%70 = OpBitwiseAnd %6 %68 %69\n"
+                       "%71 = OpUGreaterThan %16 %70 %12\n"
+                       "OpSelectionMerge %73 None\n"
+                       "OpBranchConditional %71 %72 %73\n"
+                       "%72 = OpLabel\n"
+                       "%74 = OpLoad %6 %11\n"
+                       "%75 = OpIAdd %6 %74 %47\n"
+                       "OpStore %11 %75\n"
+                       "OpBranch %73\n"
+                       "%73 = OpLabel\n"
+                       "%77 = OpAccessChain %7 %20 %76\n"
+                       "%78 = OpLoad %6 %77\n"
+                       "%79 = OpLoad %6 %10\n"
+                       "%80 = OpBitwiseAnd %6 %78 %79\n"
+                       "%81 = OpUGreaterThan %16 %80 %12\n"
+                       "OpSelectionMerge %83 None\n"
+                       "OpBranchConditional %81 %82 %83\n"
+                       "%82 = OpLabel\n"
+                       "%84 = OpLoad %6 %11\n"
+                       "%85 = OpIAdd %6 %84 %47\n"
+                       "OpStore %11 %85\n"
+                       "OpBranch %83\n"
+                       "%83 = OpLabel\n"
+                       "%86 = OpAccessChain %7 %20 %18\n"
+                       "%87 = OpLoad %6 %86\n"
+                       "%88 = OpLoad %6 %10\n"
+                       "%89 = OpBitwiseAnd %6 %87 %88\n"
+                       "%90 = OpUGreaterThan %16 %89 %12\n"
+                       "OpSelectionMerge %92 None\n"
+                       "OpBranchConditional %90 %91 %92\n"
+                       "%91 = OpLabel\n"
+                       "%93 = OpLoad %6 %11\n"
+                       "%94 = OpIAdd %6 %93 %47\n"
+                       "OpStore %11 %94\n"
+                       "OpBranch %92\n"
+                       "%92 = OpLabel\n"
+                       "%95 = OpLoad %6 %10\n"
+                       "%96 = OpShiftLeftLogical %6 %95 %47\n"
+                       "OpStore %10 %96\n"
+                       "OpBranch %53\n"
+                       "%53 = OpLabel\n"
+                       "%97 = OpLoad %6 %49\n"
+                       "%98 = OpIAdd %6 %97 %47\n"
+                       "OpStore %49 %98\n"
+                       "OpBranch %50\n"
+                       "%52 = OpLabel\n"
+                       "%99 = OpLoad %13 %20\n"
+                       "%100 = OpGroupNonUniformBallotBitCount %6 %18 Reduce %99\n"
+                       "%101 = OpLoad %6 %11\n"
+                       "%102 = OpINotEqual %16 %100 %101\n"
+                       "OpSelectionMerge %104 None\n"
+                       "OpBranchConditional %102 %103 %104\n"
+                       "%103 = OpLabel\n"
+                       "OpStore %8 %12\n"
+                       "OpBranch %104\n"
+                       "%104 = OpLabel\n"
+                       "%108 = OpLoad %6 %8\n"
+                       "%109 = OpConvertUToF %105 %108\n"
+                       "OpStore %107 %109\n"
+                       "%118 = OpLoad %110 %117\n"
+                       "%120 = OpAccessChain %119 %114 %115\n"
+                       "OpStore %120 %118\n"
+                       "%122 = OpAccessChain %106 %114 %47\n"
+                       "OpStore %122 %121\n"
+                       "OpReturn\n"
+                       "OpFunctionEnd\n";
+               programCollection.spirvAsmSources.add("vert") << vertex << buildOptionsSpr;
+       }
+       else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
+       {
+               /*
+                       const string bdy = subgroupMask(caseDef);
+                       const string  evaluationSource =
+                       "#version 450\n"
+                       "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       "#extension GL_EXT_tessellation_shader : require\n"
+                       "layout(isolines, equal_spacing, ccw ) in;\n"
+                       "layout(location = 0) out float out_color;\n"
+                       "\n"
+                       "void main (void)\n"
+                       "{\n"
+                       + bdy +
+                       "  out_color = float(tempResult);\n"
+                       "  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
+                       "}\n";
+                       programCollection.glslSources.add("tese")
+                               << glu::TessellationEvaluationSource(evaluationSource) << vk::ShaderBuildOptions(vk::SPIRV_VERSION_1_3, 0u);
+               */
+               const string evaluationSource =
+                       "; SPIR-V\n"
+                       "; Version: 1.3\n"
+                       "; Generator: Khronos Glslang Reference Front End; 2\n"
+                       "; Bound: 136\n"
+                       "; Schema: 0\n"
+                       "OpCapability Tessellation\n"
+                       "OpCapability GroupNonUniform\n"
+                       "OpCapability GroupNonUniformBallot\n"
+                       "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+                       "OpMemoryModel Logical GLSL450\n"
+                       "OpEntryPoint TessellationEvaluation %4 \"main\" %22 %32 %36 %107 %114 %120 %128\n"
+                       "OpExecutionMode %4 Isolines\n"
+                       "OpExecutionMode %4 SpacingEqual\n"
+                       "OpExecutionMode %4 VertexOrderCcw\n"
+                       + mask +
+                       "OpDecorate %32 RelaxedPrecision\n"
+                       "OpDecorate %32 BuiltIn SubgroupSize\n"
+                       "OpDecorate %33 RelaxedPrecision\n"
+                       "OpDecorate %36 RelaxedPrecision\n"
+                       "OpDecorate %36 BuiltIn SubgroupLocalInvocationId\n"
+                       "OpDecorate %37 RelaxedPrecision\n"
+                       "OpDecorate %107 Location 0\n"
+                       "OpMemberDecorate %112 0 BuiltIn Position\n"
+                       "OpMemberDecorate %112 1 BuiltIn PointSize\n"
+                       "OpMemberDecorate %112 2 BuiltIn ClipDistance\n"
+                       "OpMemberDecorate %112 3 BuiltIn CullDistance\n"
+                       "OpDecorate %112 Block\n"
+                       "OpMemberDecorate %116 0 BuiltIn Position\n"
+                       "OpMemberDecorate %116 1 BuiltIn PointSize\n"
+                       "OpMemberDecorate %116 2 BuiltIn ClipDistance\n"
+                       "OpMemberDecorate %116 3 BuiltIn CullDistance\n"
+                       "OpDecorate %116 Block\n"
+                       "OpDecorate %128 BuiltIn TessCoord\n"
+                       "%2 = OpTypeVoid\n"
+                       "%3 = OpTypeFunction %2\n"
+                       "%6 = OpTypeInt 32 0\n"
+                       "%7 = OpTypePointer Function %6\n"
+                       "%9 = OpConstant %6 1\n"
+                       "%12 = OpConstant %6 0\n"
+                       "%13 = OpTypeVector %6 4\n"
+                       "%14 = OpTypePointer Function %13\n"
+                       "%16 = OpTypeBool\n"
+                       "%17 = OpConstantTrue %16\n"
+                       "%18 = OpConstant %6 3\n"
+                       "%21 = OpTypePointer Input %13\n"
+                       "%22 = OpVariable %21 Input\n"
+                       "%31 = OpTypePointer Input %6\n"
+                       "%32 = OpVariable %31 Input\n"
+                       "%36 = OpVariable %31 Input\n"
+                       "%46 = OpTypeInt 32 1\n"
+                       "%47 = OpConstant %46 1\n"
+                       "%56 = OpConstant %6 32\n"
+                       "%76 = OpConstant %6 2\n"
+                       "%105 = OpTypeFloat 32\n"
+                       "%106 = OpTypePointer Output %105\n"
+                       "%107 = OpVariable %106 Output\n"
+                       "%110 = OpTypeVector %105 4\n"
+                       "%111 = OpTypeArray %105 %9\n"
+                       "%112 = OpTypeStruct %110 %105 %111 %111\n"
+                       "%113 = OpTypePointer Output %112\n"
+                       "%114 = OpVariable %113 Output\n"
+                       "%115 = OpConstant %46 0\n"
+                       "%116 = OpTypeStruct %110 %105 %111 %111\n"
+                       "%117 = OpConstant %6 32\n"
+                       "%118 = OpTypeArray %116 %117\n"
+                       "%119 = OpTypePointer Input %118\n"
+                       "%120 = OpVariable %119 Input\n"
+                       "%121 = OpTypePointer Input %110\n"
+                       "%126 = OpTypeVector %105 3\n"
+                       "%127 = OpTypePointer Input %126\n"
+                       "%128 = OpVariable %127 Input\n"
+                       "%129 = OpTypePointer Input %105\n"
+                       "%134 = OpTypePointer Output %110\n"
+                       "%4 = OpFunction %2 None %3\n"
+                       "%5 = OpLabel\n"
+                       "%8 = OpVariable %7 Function\n"
+                       "%10 = OpVariable %7 Function\n"
+                       "%11 = OpVariable %7 Function\n"
+                       "%15 = OpVariable %14 Function\n"
+                       "%20 = OpVariable %14 Function\n"
+                       "%24 = OpVariable %7 Function\n"
+                       "%49 = OpVariable %7 Function\n"
+                       "OpStore %8 %9\n"
+                       "OpStore %10 %9\n"
+                       "OpStore %11 %12\n"
+                       "%19 = OpGroupNonUniformBallot %13 %18 %17\n"
+                       "OpStore %15 %19\n"
+                       "%23 = OpLoad %13 %22\n"
+                       "OpStore %20 %23\n"
+                       "OpStore %24 %12\n"
+                       "OpBranch %25\n"
+                       "%25 = OpLabel\n"
+                       "OpLoopMerge %27 %28 None\n"
+                       "OpBranch %29\n"
+                       "%29 = OpLabel\n"
+                       "%30 = OpLoad %6 %24\n"
+                       "%33 = OpLoad %6 %32\n"
+                       "%34 = OpULessThan %16 %30 %33\n"
+                       "OpBranchConditional %34 %26 %27\n"
+                       "%26 = OpLabel\n"
+                       "%35 = OpLoad %6 %24\n"
+                       "%37 = OpLoad %6 %36\n"
+                       + comparison +
+                       "%39 = OpLoad %13 %20\n"
+                       "%40 = OpLoad %6 %24\n"
+                       "%41 = OpGroupNonUniformBallotBitExtract %16 %18 %39 %40\n"
+                       "%42 = OpLogicalNotEqual %16 %38 %41\n"
+                       "OpSelectionMerge %44 None\n"
+                       "OpBranchConditional %42 %43 %44\n"
+                       "%43 = OpLabel\n"
+                       "OpStore %8 %12\n"
+                       "OpBranch %44\n"
+                       "%44 = OpLabel\n"
+                       "OpBranch %28\n"
+                       "%28 = OpLabel\n"
+                       "%45 = OpLoad %6 %24\n"
+                       "%48 = OpIAdd %6 %45 %47\n"
+                       "OpStore %24 %48\n"
+                       "OpBranch %25\n"
+                       "%27 = OpLabel\n"
+                       "OpStore %49 %12\n"
+                       "OpBranch %50\n"
+                       "%50 = OpLabel\n"
+                       "OpLoopMerge %52 %53 None\n"
+                       "OpBranch %54\n"
+                       "%54 = OpLabel\n"
+                       "%55 = OpLoad %6 %49\n"
+                       "%57 = OpULessThan %16 %55 %56\n"
+                       "OpBranchConditional %57 %51 %52\n"
+                       "%51 = OpLabel\n"
+                       "%58 = OpAccessChain %7 %20 %12\n"
+                       "%59 = OpLoad %6 %58\n"
+                       "%60 = OpLoad %6 %10\n"
+                       "%61 = OpBitwiseAnd %6 %59 %60\n"
+                       "%62 = OpUGreaterThan %16 %61 %12\n"
+                       "OpSelectionMerge %64 None\n"
+                       "OpBranchConditional %62 %63 %64\n"
+                       "%63 = OpLabel\n"
+                       "%65 = OpLoad %6 %11\n"
+                       "%66 = OpIAdd %6 %65 %47\n"
+                       "OpStore %11 %66\n"
+                       "OpBranch %64\n"
+                       "%64 = OpLabel\n"
+                       "%67 = OpAccessChain %7 %20 %9\n"
+                       "%68 = OpLoad %6 %67\n"
+                       "%69 = OpLoad %6 %10\n"
+                       "%70 = OpBitwiseAnd %6 %68 %69\n"
+                       "%71 = OpUGreaterThan %16 %70 %12\n"
+                       "OpSelectionMerge %73 None\n"
+                       "OpBranchConditional %71 %72 %73\n"
+                       "%72 = OpLabel\n"
+                       "%74 = OpLoad %6 %11\n"
+                       "%75 = OpIAdd %6 %74 %47\n"
+                       "OpStore %11 %75\n"
+                       "OpBranch %73\n"
+                       "%73 = OpLabel\n"
+                       "%77 = OpAccessChain %7 %20 %76\n"
+                       "%78 = OpLoad %6 %77\n"
+                       "%79 = OpLoad %6 %10\n"
+                       "%80 = OpBitwiseAnd %6 %78 %79\n"
+                       "%81 = OpUGreaterThan %16 %80 %12\n"
+                       "OpSelectionMerge %83 None\n"
+                       "OpBranchConditional %81 %82 %83\n"
+                       "%82 = OpLabel\n"
+                       "%84 = OpLoad %6 %11\n"
+                       "%85 = OpIAdd %6 %84 %47\n"
+                       "OpStore %11 %85\n"
+                       "OpBranch %83\n"
+                       "%83 = OpLabel\n"
+                       "%86 = OpAccessChain %7 %20 %18\n"
+                       "%87 = OpLoad %6 %86\n"
+                       "%88 = OpLoad %6 %10\n"
+                       "%89 = OpBitwiseAnd %6 %87 %88\n"
+                       "%90 = OpUGreaterThan %16 %89 %12\n"
+                       "OpSelectionMerge %92 None\n"
+                       "OpBranchConditional %90 %91 %92\n"
+                       "%91 = OpLabel\n"
+                       "%93 = OpLoad %6 %11\n"
+                       "%94 = OpIAdd %6 %93 %47\n"
+                       "OpStore %11 %94\n"
+                       "OpBranch %92\n"
+                       "%92 = OpLabel\n"
+                       "%95 = OpLoad %6 %10\n"
+                       "%96 = OpShiftLeftLogical %6 %95 %47\n"
+                       "OpStore %10 %96\n"
+                       "OpBranch %53\n"
+                       "%53 = OpLabel\n"
+                       "%97 = OpLoad %6 %49\n"
+                       "%98 = OpIAdd %6 %97 %47\n"
+                       "OpStore %49 %98\n"
+                       "OpBranch %50\n"
+                       "%52 = OpLabel\n"
+                       "%99 = OpLoad %13 %20\n"
+                       "%100 = OpGroupNonUniformBallotBitCount %6 %18 Reduce %99\n"
+                       "%101 = OpLoad %6 %11\n"
+                       "%102 = OpINotEqual %16 %100 %101\n"
+                       "OpSelectionMerge %104 None\n"
+                       "OpBranchConditional %102 %103 %104\n"
+                       "%103 = OpLabel\n"
+                       "OpStore %8 %12\n"
+                       "OpBranch %104\n"
+                       "%104 = OpLabel\n"
+                       "%108 = OpLoad %6 %8\n"
+                       "%109 = OpConvertUToF %105 %108\n"
+                       "OpStore %107 %109\n"
+                       "%122 = OpAccessChain %121 %120 %115 %115\n"
+                       "%123 = OpLoad %110 %122\n"
+                       "%124 = OpAccessChain %121 %120 %47 %115\n"
+                       "%125 = OpLoad %110 %124\n"
+                       "%130 = OpAccessChain %129 %128 %12\n"
+                       "%131 = OpLoad %105 %130\n"
+                       "%132 = OpCompositeConstruct %110 %131 %131 %131 %131\n"
+                       "%133 = OpExtInst %110 %1 FMix %123 %125 %132\n"
+                       "%135 = OpAccessChain %134 %114 %115\n"
+                       "OpStore %135 %133\n"
+                       "OpReturn\n"
+                       "OpFunctionEnd\n";
+               programCollection.spirvAsmSources.add("tese") << evaluationSource << buildOptionsSpr;
+               subgroups::setTesCtrlShaderFrameBuffer(programCollection);
+       }
+       else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+       {
+               /*
+                       const string bdy = subgroupMask(caseDef);
+                       const string  controlSource =
+                       "#version 450\n"
+                       "#extension GL_EXT_tessellation_shader : require\n"
+                       "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       "layout(vertices = 2) out;\n"
+                       "layout(location = 0) out float out_color[];\n"
+                       "void main (void)\n"
+                       "{\n"
+                       "  if (gl_InvocationID == 0)\n"
+                       "  {\n"
+                       "    gl_TessLevelOuter[0] = 1.0f;\n"
+                       "    gl_TessLevelOuter[1] = 1.0f;\n"
+                       "  }\n"
+                       + bdy +
+                       "  out_color[gl_InvocationID] = float(tempResult);\n"
+                       "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+                       "}\n";
+                       programCollection.glslSources.add("tesc")
+                       << glu::TessellationControlSource(controlSource) << vk::ShaderBuildOptions(vk::SPIRV_VERSION_1_3, 0u);
+               */
+               const string controlSource =
+                       "; SPIR-V\n"
+                       "; Version: 1.3\n"
+                       "; Generator: Khronos Glslang Reference Front End; 2\n"
+                       "; Bound: 146\n"
+                       "; Schema: 0\n"
+                       "OpCapability Tessellation\n"
+                       "OpCapability GroupNonUniform\n"
+                       "OpCapability GroupNonUniformBallot\n"
+                       "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+                       "OpMemoryModel Logical GLSL450\n"
+                       "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %40 %50 %54 %123 %133 %139\n"
+                       "OpExecutionMode %4 OutputVertices 2\n"
+                       "OpDecorate %8 BuiltIn InvocationId\n"
+                       "OpDecorate %20 Patch\n"
+                       "OpDecorate %20 BuiltIn TessLevelOuter\n"
+                       + mask +
+                       "OpDecorate %50 RelaxedPrecision\n"
+                       "OpDecorate %50 BuiltIn SubgroupSize\n"
+                       "OpDecorate %51 RelaxedPrecision\n"
+                       "OpDecorate %54 RelaxedPrecision\n"
+                       "OpDecorate %54 BuiltIn SubgroupLocalInvocationId\n"
+                       "OpDecorate %55 RelaxedPrecision\n"
+                       "OpDecorate %123 Location 0\n"
+                       "OpMemberDecorate %130 0 BuiltIn Position\n"
+                       "OpMemberDecorate %130 1 BuiltIn PointSize\n"
+                       "OpMemberDecorate %130 2 BuiltIn ClipDistance\n"
+                       "OpMemberDecorate %130 3 BuiltIn CullDistance\n"
+                       "OpDecorate %130 Block\n"
+                       "OpMemberDecorate %135 0 BuiltIn Position\n"
+                       "OpMemberDecorate %135 1 BuiltIn PointSize\n"
+                       "OpMemberDecorate %135 2 BuiltIn ClipDistance\n"
+                       "OpMemberDecorate %135 3 BuiltIn CullDistance\n"
+                       "OpDecorate %135 Block\n"
+                       "%2 = OpTypeVoid\n"
+                       "%3 = OpTypeFunction %2\n"
+                       "%6 = OpTypeInt 32 1\n"
+                       "%7 = OpTypePointer Input %6\n"
+                       "%8 = OpVariable %7 Input\n"
+                       "%10 = OpConstant %6 0\n"
+                       "%11 = OpTypeBool\n"
+                       "%15 = OpTypeFloat 32\n"
+                       "%16 = OpTypeInt 32 0\n"
+                       "%17 = OpConstant %16 4\n"
+                       "%18 = OpTypeArray %15 %17\n"
+                       "%19 = OpTypePointer Output %18\n"
+                       "%20 = OpVariable %19 Output\n"
+                       "%21 = OpConstant %15 1\n"
+                       "%22 = OpTypePointer Output %15\n"
+                       "%24 = OpConstant %6 1\n"
+                       "%26 = OpTypePointer Function %16\n"
+                       "%28 = OpConstant %16 1\n"
+                       "%31 = OpConstant %16 0\n"
+                       "%32 = OpTypeVector %16 4\n"
+                       "%33 = OpTypePointer Function %32\n"
+                       "%35 = OpConstantTrue %11\n"
+                       "%36 = OpConstant %16 3\n"
+                       "%39 = OpTypePointer Input %32\n"
+                       "%40 = OpVariable %39 Input\n"
+                       "%49 = OpTypePointer Input %16\n"
+                       "%50 = OpVariable %49 Input\n"
+                       "%54 = OpVariable %49 Input\n"
+                       "%72 = OpConstant %16 32\n"
+                       "%92 = OpConstant %16 2\n"
+                       "%121 = OpTypeArray %15 %92\n"
+                       "%122 = OpTypePointer Output %121\n"
+                       "%123 = OpVariable %122 Output\n"
+                       "%128 = OpTypeVector %15 4\n"
+                       "%129 = OpTypeArray %15 %28\n"
+                       "%130 = OpTypeStruct %128 %15 %129 %129\n"
+                       "%131 = OpTypeArray %130 %92\n"
+                       "%132 = OpTypePointer Output %131\n"
+                       "%133 = OpVariable %132 Output\n"
+                       "%135 = OpTypeStruct %128 %15 %129 %129\n"
+                       "%136 = OpConstant %16 32\n"
+                       "%137 = OpTypeArray %135 %136\n"
+                       "%138 = OpTypePointer Input %137\n"
+                       "%139 = OpVariable %138 Input\n"
+                       "%141 = OpTypePointer Input %128\n"
+                       "%144 = OpTypePointer Output %128\n"
+                       "%4 = OpFunction %2 None %3\n"
+                       "%5 = OpLabel\n"
+                       "%27 = OpVariable %26 Function\n"
+                       "%29 = OpVariable %26 Function\n"
+                       "%30 = OpVariable %26 Function\n"
+                       "%34 = OpVariable %33 Function\n"
+                       "%38 = OpVariable %33 Function\n"
+                       "%42 = OpVariable %26 Function\n"
+                       "%65 = OpVariable %26 Function\n"
+                       "%9 = OpLoad %6 %8\n"
+                       "%12 = OpIEqual %11 %9 %10\n"
+                       "OpSelectionMerge %14 None\n"
+                       "OpBranchConditional %12 %13 %14\n"
+                       "%13 = OpLabel\n"
+                       "%23 = OpAccessChain %22 %20 %10\n"
+                       "OpStore %23 %21\n"
+                       "%25 = OpAccessChain %22 %20 %24\n"
+                       "OpStore %25 %21\n"
+                       "OpBranch %14\n"
+                       "%14 = OpLabel\n"
+                       "OpStore %27 %28\n"
+                       "OpStore %29 %28\n"
+                       "OpStore %30 %31\n"
+                       "%37 = OpGroupNonUniformBallot %32 %36 %35\n"
+                       "OpStore %34 %37\n"
+                       "%41 = OpLoad %32 %40\n"
+                       "OpStore %38 %41\n"
+                       "OpStore %42 %31\n"
+                       "OpBranch %43\n"
+                       "%43 = OpLabel\n"
+                       "OpLoopMerge %45 %46 None\n"
+                       "OpBranch %47\n"
+                       "%47 = OpLabel\n"
+                       "%48 = OpLoad %16 %42\n"
+                       "%51 = OpLoad %16 %50\n"
+                       "%52 = OpULessThan %11 %48 %51\n"
+                       "OpBranchConditional %52 %44 %45\n"
+                       "%44 = OpLabel\n"
+                       "%53 = OpLoad %16 %42\n"
+                       "%55 = OpLoad %16 %54\n"
+                       + comparison +
+                       "%57 = OpLoad %32 %38\n"
+                       "%58 = OpLoad %16 %42\n"
+                       "%59 = OpGroupNonUniformBallotBitExtract %11 %36 %57 %58\n"
+                       "%60 = OpLogicalNotEqual %11 %56 %59\n"
+                       "OpSelectionMerge %62 None\n"
+                       "OpBranchConditional %60 %61 %62\n"
+                       "%61 = OpLabel\n"
+                       "OpStore %27 %31\n"
+                       "OpBranch %62\n"
+                       "%62 = OpLabel\n"
+                       "OpBranch %46\n"
+                       "%46 = OpLabel\n"
+                       "%63 = OpLoad %16 %42\n"
+                       "%64 = OpIAdd %16 %63 %24\n"
+                       "OpStore %42 %64\n"
+                       "OpBranch %43\n"
+                       "%45 = OpLabel\n"
+                       "OpStore %65 %31\n"
+                       "OpBranch %66\n"
+                       "%66 = OpLabel\n"
+                       "OpLoopMerge %68 %69 None\n"
+                       "OpBranch %70\n"
+                       "%70 = OpLabel\n"
+                       "%71 = OpLoad %16 %65\n"
+                       "%73 = OpULessThan %11 %71 %72\n"
+                       "OpBranchConditional %73 %67 %68\n"
+                       "%67 = OpLabel\n"
+                       "%74 = OpAccessChain %26 %38 %31\n"
+                       "%75 = OpLoad %16 %74\n"
+                       "%76 = OpLoad %16 %29\n"
+                       "%77 = OpBitwiseAnd %16 %75 %76\n"
+                       "%78 = OpUGreaterThan %11 %77 %31\n"
+                       "OpSelectionMerge %80 None\n"
+                       "OpBranchConditional %78 %79 %80\n"
+                       "%79 = OpLabel\n"
+                       "%81 = OpLoad %16 %30\n"
+                       "%82 = OpIAdd %16 %81 %24\n"
+                       "OpStore %30 %82\n"
+                       "OpBranch %80\n"
+                       "%80 = OpLabel\n"
+                       "%83 = OpAccessChain %26 %38 %28\n"
+                       "%84 = OpLoad %16 %83\n"
+                       "%85 = OpLoad %16 %29\n"
+                       "%86 = OpBitwiseAnd %16 %84 %85\n"
+                       "%87 = OpUGreaterThan %11 %86 %31\n"
+                       "OpSelectionMerge %89 None\n"
+                       "OpBranchConditional %87 %88 %89\n"
+                       "%88 = OpLabel\n"
+                       "%90 = OpLoad %16 %30\n"
+                       "%91 = OpIAdd %16 %90 %24\n"
+                       "OpStore %30 %91\n"
+                       "OpBranch %89\n"
+                       "%89 = OpLabel\n"
+                       "%93 = OpAccessChain %26 %38 %92\n"
+                       "%94 = OpLoad %16 %93\n"
+                       "%95 = OpLoad %16 %29\n"
+                       "%96 = OpBitwiseAnd %16 %94 %95\n"
+                       "%97 = OpUGreaterThan %11 %96 %31\n"
+                       "OpSelectionMerge %99 None\n"
+                       "OpBranchConditional %97 %98 %99\n"
+                       "%98 = OpLabel\n"
+                       "%100 = OpLoad %16 %30\n"
+                       "%101 = OpIAdd %16 %100 %24\n"
+                       "OpStore %30 %101\n"
+                       "OpBranch %99\n"
+                       "%99 = OpLabel\n"
+                       "%102 = OpAccessChain %26 %38 %36\n"
+                       "%103 = OpLoad %16 %102\n"
+                       "%104 = OpLoad %16 %29\n"
+                       "%105 = OpBitwiseAnd %16 %103 %104\n"
+                       "%106 = OpUGreaterThan %11 %105 %31\n"
+                       "OpSelectionMerge %108 None\n"
+                       "OpBranchConditional %106 %107 %108\n"
+                       "%107 = OpLabel\n"
+                       "%109 = OpLoad %16 %30\n"
+                       "%110 = OpIAdd %16 %109 %24\n"
+                       "OpStore %30 %110\n"
+                       "OpBranch %108\n"
+                       "%108 = OpLabel\n"
+                       "%111 = OpLoad %16 %29\n"
+                       "%112 = OpShiftLeftLogical %16 %111 %24\n"
+                       "OpStore %29 %112\n"
+                       "OpBranch %69\n"
+                       "%69 = OpLabel\n"
+                       "%113 = OpLoad %16 %65\n"
+                       "%114 = OpIAdd %16 %113 %24\n"
+                       "OpStore %65 %114\n"
+                       "OpBranch %66\n"
+                       "%68 = OpLabel\n"
+                       "%115 = OpLoad %32 %38\n"
+                       "%116 = OpGroupNonUniformBallotBitCount %16 %36 Reduce %115\n"
+                       "%117 = OpLoad %16 %30\n"
+                       "%118 = OpINotEqual %11 %116 %117\n"
+                       "OpSelectionMerge %120 None\n"
+                       "OpBranchConditional %118 %119 %120\n"
+                       "%119 = OpLabel\n"
+                       "OpStore %27 %31\n"
+                       "OpBranch %120\n"
+                       "%120 = OpLabel\n"
+                       "%124 = OpLoad %6 %8\n"
+                       "%125 = OpLoad %16 %27\n"
+                       "%126 = OpConvertUToF %15 %125\n"
+                       "%127 = OpAccessChain %22 %123 %124\n"
+                       "OpStore %127 %126\n"
+                       "%134 = OpLoad %6 %8\n"
+                       "%140 = OpLoad %6 %8\n"
+                       "%142 = OpAccessChain %141 %139 %140 %10\n"
+                       "%143 = OpLoad %128 %142\n"
+                       "%145 = OpAccessChain %144 %133 %134 %10\n"
+                       "OpStore %145 %143\n"
+                       "OpReturn\n"
+                       "OpFunctionEnd\n";
+               programCollection.spirvAsmSources.add("tesc") << controlSource << buildOptionsSpr;
+               subgroups::setTesEvalShaderFrameBuffer(programCollection);
+       }
+       else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
+       {
+               /*
+               const string bdy = subgroupMask(caseDef);
+               const string geometry =
+               "#version 450\n"
+               "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+               "layout(points) in;\n"
+               "layout(points, max_vertices = 1) out;\n"
+               "layout(location = 0) out float out_color;\n"
+               "\n"
+               "void main (void)\n"
+               "{\n"
+               + bdy +
+               "  out_color = float(tempResult);\n"
+               "  gl_Position = gl_in[0].gl_Position;\n"
+               "  EmitVertex();\n"
+               "  EndPrimitive();\n"
+               "}\n";
+               programCollection.glslSources.add("geometry")
+                       << glu::GeometrySource(geometry) << vk::ShaderBuildOptions(vk::SPIRV_VERSION_1_3, 0u);
+               */
+
+               const string geometry =
+               "; SPIR-V\n"
+               "; Version: 1.3\n"
+               "; Generator: Khronos Glslang Reference Front End; 2\n"
+               "; Bound: 125\n"
+               "; Schema: 0\n"
+               "OpCapability Geometry\n"
+               "OpCapability GroupNonUniform\n"
+               "OpCapability GroupNonUniformBallot\n"
+               "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+               "OpMemoryModel Logical GLSL450\n"
+               "OpEntryPoint Geometry %4 \"main\" %22 %32 %36 %107 %114 %119\n"
+               "OpExecutionMode %4 InputPoints\n"
+               "OpExecutionMode %4 Invocations 1\n"
+               "OpExecutionMode %4 OutputPoints\n"
+               "OpExecutionMode %4 OutputVertices 1\n"
+               + mask +
+               "OpDecorate %32 RelaxedPrecision\n"
+               "OpDecorate %32 BuiltIn SubgroupSize\n"
+               "OpDecorate %33 RelaxedPrecision\n"
+               "OpDecorate %36 RelaxedPrecision\n"
+               "OpDecorate %36 BuiltIn SubgroupLocalInvocationId\n"
+               "OpDecorate %37 RelaxedPrecision\n"
+               "OpDecorate %107 Location 0\n"
+               "OpMemberDecorate %112 0 BuiltIn Position\n"
+               "OpMemberDecorate %112 1 BuiltIn PointSize\n"
+               "OpMemberDecorate %112 2 BuiltIn ClipDistance\n"
+               "OpMemberDecorate %112 3 BuiltIn CullDistance\n"
+               "OpDecorate %112 Block\n"
+               "OpMemberDecorate %116 0 BuiltIn Position\n"
+               "OpMemberDecorate %116 1 BuiltIn PointSize\n"
+               "OpMemberDecorate %116 2 BuiltIn ClipDistance\n"
+               "OpMemberDecorate %116 3 BuiltIn CullDistance\n"
+               "OpDecorate %116 Block\n"
+               "%2 = OpTypeVoid\n"
+               "%3 = OpTypeFunction %2\n"
+               "%6 = OpTypeInt 32 0\n"
+               "%7 = OpTypePointer Function %6\n"
+               "%9 = OpConstant %6 1\n"
+               "%12 = OpConstant %6 0\n"
+               "%13 = OpTypeVector %6 4\n"
+               "%14 = OpTypePointer Function %13\n"
+               "%16 = OpTypeBool\n"
+               "%17 = OpConstantTrue %16\n"
+               "%18 = OpConstant %6 3\n"
+               "%21 = OpTypePointer Input %13\n"
+               "%22 = OpVariable %21 Input\n"
+               "%31 = OpTypePointer Input %6\n"
+               "%32 = OpVariable %31 Input\n"
+               "%36 = OpVariable %31 Input\n"
+               "%46 = OpTypeInt 32 1\n"
+               "%47 = OpConstant %46 1\n"
+               "%56 = OpConstant %6 32\n"
+               "%76 = OpConstant %6 2\n"
+               "%105 = OpTypeFloat 32\n"
+               "%106 = OpTypePointer Output %105\n"
+               "%107 = OpVariable %106 Output\n"
+               "%110 = OpTypeVector %105 4\n"
+               "%111 = OpTypeArray %105 %9\n"
+               "%112 = OpTypeStruct %110 %105 %111 %111\n"
+               "%113 = OpTypePointer Output %112\n"
+               "%114 = OpVariable %113 Output\n"
+               "%115 = OpConstant %46 0\n"
+               "%116 = OpTypeStruct %110 %105 %111 %111\n"
+               "%117 = OpTypeArray %116 %9\n"
+               "%118 = OpTypePointer Input %117\n"
+               "%119 = OpVariable %118 Input\n"
+               "%120 = OpTypePointer Input %110\n"
+               "%123 = OpTypePointer Output %110\n"
+               "%4 = OpFunction %2 None %3\n"
+               "%5 = OpLabel\n"
+               "%8 = OpVariable %7 Function\n"
+               "%10 = OpVariable %7 Function\n"
+               "%11 = OpVariable %7 Function\n"
+               "%15 = OpVariable %14 Function\n"
+               "%20 = OpVariable %14 Function\n"
+               "%24 = OpVariable %7 Function\n"
+               "%49 = OpVariable %7 Function\n"
+               "OpStore %8 %9\n"
+               "OpStore %10 %9\n"
+               "OpStore %11 %12\n"
+               "%19 = OpGroupNonUniformBallot %13 %18 %17\n"
+               "OpStore %15 %19\n"
+               "%23 = OpLoad %13 %22\n"
+               "OpStore %20 %23\n"
+               "OpStore %24 %12\n"
+               "OpBranch %25\n"
+               "%25 = OpLabel\n"
+               "OpLoopMerge %27 %28 None\n"
+               "OpBranch %29\n"
+               "%29 = OpLabel\n"
+               "%30 = OpLoad %6 %24\n"
+               "%33 = OpLoad %6 %32\n"
+               "%34 = OpULessThan %16 %30 %33\n"
+               "OpBranchConditional %34 %26 %27\n"
+               "%26 = OpLabel\n"
+               "%35 = OpLoad %6 %24\n"
+               "%37 = OpLoad %6 %36\n"
+               + comparison +
+               "%39 = OpLoad %13 %20\n"
+               "%40 = OpLoad %6 %24\n"
+               "%41 = OpGroupNonUniformBallotBitExtract %16 %18 %39 %40\n"
+               "%42 = OpLogicalNotEqual %16 %38 %41\n"
+               "OpSelectionMerge %44 None\n"
+               "OpBranchConditional %42 %43 %44\n"
+               "%43 = OpLabel\n"
+               "OpStore %8 %12\n"
+               "OpBranch %44\n"
+               "%44 = OpLabel\n"
+               "OpBranch %28\n"
+               "%28 = OpLabel\n"
+               "%45 = OpLoad %6 %24\n"
+               "%48 = OpIAdd %6 %45 %47\n"
+               "OpStore %24 %48\n"
+               "OpBranch %25\n"
+               "%27 = OpLabel\n"
+               "OpStore %49 %12\n"
+               "OpBranch %50\n"
+               "%50 = OpLabel\n"
+               "OpLoopMerge %52 %53 None\n"
+               "OpBranch %54\n"
+               "%54 = OpLabel\n"
+               "%55 = OpLoad %6 %49\n"
+               "%57 = OpULessThan %16 %55 %56\n"
+               "OpBranchConditional %57 %51 %52\n"
+               "%51 = OpLabel\n"
+               "%58 = OpAccessChain %7 %20 %12\n"
+               "%59 = OpLoad %6 %58\n"
+               "%60 = OpLoad %6 %10\n"
+               "%61 = OpBitwiseAnd %6 %59 %60\n"
+               "%62 = OpUGreaterThan %16 %61 %12\n"
+               "OpSelectionMerge %64 None\n"
+               "OpBranchConditional %62 %63 %64\n"
+               "%63 = OpLabel\n"
+               "%65 = OpLoad %6 %11\n"
+               "%66 = OpIAdd %6 %65 %47\n"
+               "OpStore %11 %66\n"
+               "OpBranch %64\n"
+               "%64 = OpLabel\n"
+               "%67 = OpAccessChain %7 %20 %9\n"
+               "%68 = OpLoad %6 %67\n"
+               "%69 = OpLoad %6 %10\n"
+               "%70 = OpBitwiseAnd %6 %68 %69\n"
+               "%71 = OpUGreaterThan %16 %70 %12\n"
+               "OpSelectionMerge %73 None\n"
+               "OpBranchConditional %71 %72 %73\n"
+               "%72 = OpLabel\n"
+               "%74 = OpLoad %6 %11\n"
+               "%75 = OpIAdd %6 %74 %47\n"
+               "OpStore %11 %75\n"
+               "OpBranch %73\n"
+               "%73 = OpLabel\n"
+               "%77 = OpAccessChain %7 %20 %76\n"
+               "%78 = OpLoad %6 %77\n"
+               "%79 = OpLoad %6 %10\n"
+               "%80 = OpBitwiseAnd %6 %78 %79\n"
+               "%81 = OpUGreaterThan %16 %80 %12\n"
+               "OpSelectionMerge %83 None\n"
+               "OpBranchConditional %81 %82 %83\n"
+               "%82 = OpLabel\n"
+               "%84 = OpLoad %6 %11\n"
+               "%85 = OpIAdd %6 %84 %47\n"
+               "OpStore %11 %85\n"
+               "OpBranch %83\n"
+               "%83 = OpLabel\n"
+               "%86 = OpAccessChain %7 %20 %18\n"
+               "%87 = OpLoad %6 %86\n"
+               "%88 = OpLoad %6 %10\n"
+               "%89 = OpBitwiseAnd %6 %87 %88\n"
+               "%90 = OpUGreaterThan %16 %89 %12\n"
+               "OpSelectionMerge %92 None\n"
+               "OpBranchConditional %90 %91 %92\n"
+               "%91 = OpLabel\n"
+               "%93 = OpLoad %6 %11\n"
+               "%94 = OpIAdd %6 %93 %47\n"
+               "OpStore %11 %94\n"
+               "OpBranch %92\n"
+               "%92 = OpLabel\n"
+               "%95 = OpLoad %6 %10\n"
+               "%96 = OpShiftLeftLogical %6 %95 %47\n"
+               "OpStore %10 %96\n"
+               "OpBranch %53\n"
+               "%53 = OpLabel\n"
+               "%97 = OpLoad %6 %49\n"
+               "%98 = OpIAdd %6 %97 %47\n"
+               "OpStore %49 %98\n"
+               "OpBranch %50\n"
+               "%52 = OpLabel\n"
+               "%99 = OpLoad %13 %20\n"
+               "%100 = OpGroupNonUniformBallotBitCount %6 %18 Reduce %99\n"
+               "%101 = OpLoad %6 %11\n"
+               "%102 = OpINotEqual %16 %100 %101\n"
+               "OpSelectionMerge %104 None\n"
+               "OpBranchConditional %102 %103 %104\n"
+               "%103 = OpLabel\n"
+               "OpStore %8 %12\n"
+               "OpBranch %104\n"
+               "%104 = OpLabel\n"
+               "%108 = OpLoad %6 %8\n"
+               "%109 = OpConvertUToF %105 %108\n"
+               "OpStore %107 %109\n"
+               "%121 = OpAccessChain %120 %119 %115 %115\n"
+               "%122 = OpLoad %110 %121\n"
+               "%124 = OpAccessChain %123 %114 %115\n"
+               "OpStore %124 %122\n"
+               "OpEmitVertex\n"
+               "OpEndPrimitive\n"
+               "OpReturn\n"
+               "OpFunctionEnd\n";
+       programCollection.spirvAsmSources.add("geometry") << geometry << buildOptionsSpr;
+       }
+       else
+       {
+               DE_FATAL("Unsupported shader stage");
+       }
+}
+
+
+void initPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
+{
+       const string bdy = subgroupMask(caseDef);
+
+       if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+       {
+               std::ostringstream src;
+
+               src << "#version 450\n"
+                       << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       << "layout (local_size_x_id = 0, local_size_y_id = 1, "
+                       "local_size_z_id = 2) in;\n"
+                       << "layout(set = 0, binding = 0, std430) buffer Output\n"
+                       << "{\n"
+                       << "  uint result[];\n"
+                       << "};\n"
+                       << "\n"
+                       << "void main (void)\n"
+                       << "{\n"
+                       << "  uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
+                       << "  highp uint offset = globalSize.x * ((globalSize.y * "
+                       "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
+                       "gl_GlobalInvocationID.x;\n"
+                       << bdy
+                       << "  result[offset] = tempResult;\n"
+                       << "}\n";
+
+               programCollection.glslSources.add("comp")
+                               << glu::ComputeSource(src.str()) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+       }
+       else
+       {
+               {
+                       const string vertex =
+                               "#version 450\n"
+                               "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                               "layout(set = 0, binding = 0, std430) buffer Output\n"
+                               "{\n"
+                               "  uint result[];\n"
+                               "};\n"
+                               "\n"
+                               "void main (void)\n"
+                               "{\n"
+                               + bdy +
+                               "  result[gl_VertexIndex] = tempResult;\n"
+                               "  float pixelSize = 2.0f/1024.0f;\n"
+                               "  float pixelPosition = pixelSize/2.0f - 1.0f;\n"
+                               "  gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
+                               "  gl_PointSize = 1.0f;\n"
+                               "}\n";
+                       programCollection.glslSources.add("vert")
+                               << glu::VertexSource(vertex) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+               }
+
+               {
+                       const string tesc =
+                               "#version 450\n"
+                               "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                               "layout(vertices=1) out;\n"
+                               "layout(set = 0, binding = 1, std430) buffer Output\n"
+                               "{\n"
+                               "  uint result[];\n"
+                               "};\n"
+                               "\n"
+                               "void main (void)\n"
+                               "{\n"
+                               + bdy +
+                               "  result[gl_PrimitiveID] = tempResult;\n"
+                               "  if (gl_InvocationID == 0)\n"
+                               "  {\n"
+                               "    gl_TessLevelOuter[0] = 1.0f;\n"
+                               "    gl_TessLevelOuter[1] = 1.0f;\n"
+                               "  }\n"
+                               "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+                               "}\n";
+                       programCollection.glslSources.add("tesc")
+                                       << glu::TessellationControlSource(tesc) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+               }
+
+               {
+                       const string tese =
+                               "#version 450\n"
+                               "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                               "layout(isolines) in;\n"
+                               "layout(set = 0, binding = 2, std430) buffer Output\n"
+                               "{\n"
+                               "  uint result[];\n"
+                               "};\n"
+                               "\n"
+                               "void main (void)\n"
+                               "{\n"
+                               + bdy +
+                               "  result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = tempResult;\n"
+                               "  float pixelSize = 2.0f/1024.0f;\n"
+                               "  gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
+                               "}\n";
+
+                       programCollection.glslSources.add("tese")
+                                       << glu::TessellationEvaluationSource(tese) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+               }
+
+               {
+                       const string geometry =
+                               "#version 450\n"
+                               "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                               "layout(${TOPOLOGY}) in;\n"
+                               "layout(points, max_vertices = 1) out;\n"
+                               "layout(set = 0, binding = 3, std430) buffer Output\n"
+                               "{\n"
+                               "  uint result[];\n"
+                               "};\n"
+                               "\n"
+                               "void main (void)\n"
+                               "{\n"
+                               + bdy +
+                               "  result[gl_PrimitiveIDIn] = tempResult;\n"
+                               "  gl_Position = gl_in[0].gl_Position;\n"
+                               "  EmitVertex();\n"
+                               "  EndPrimitive();\n"
+                               "}\n";
+
+                       subgroups::addGeometryShadersFromTemplate(geometry, vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u),
+                                                                                                         programCollection.glslSources);
+               }
+
+               {
+                       const string fragment =
+                               "#version 450\n"
+                               "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                               "layout(location = 0) out uint result;\n"
+                               "void main (void)\n"
+                               "{\n"
+                               + bdy +
+                               "  result = tempResult;\n"
+                               "}\n";
+
+                       programCollection.glslSources.add("fragment")
+                               << glu::FragmentSource(fragment)<< vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+               }
+
+               subgroups::addNoSubgroupShader(programCollection);
+       }
+}
+
+void supportedCheck (Context& context, CaseDefinition caseDef)
+{
+       DE_UNREF(caseDef);
+       if (!subgroups::isSubgroupSupported(context))
+               TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
+}
+
+tcu::TestStatus noSSBOtest(Context& context, const CaseDefinition caseDef)
+{
+       if (!areSubgroupOperationsSupportedForStage(
+                               context, caseDef.shaderStage))
+       {
+               if (areSubgroupOperationsRequiredForStage(caseDef.shaderStage))
+               {
+                       return tcu::TestStatus::fail(
+                                          "Shader stage " + getShaderStageName(caseDef.shaderStage) +
+                                          " is required to support subgroup operations!");
+               }
+               else
+               {
+                       TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
+               }
+       }
+
+       if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_BALLOT_BIT))
+       {
+               TCU_THROW(NotSupportedError, "Device does not support subgroup ballot operations");
+       }
+
+       if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+               return makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages);
+       else if ((VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) & caseDef.shaderStage )
+               return makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages);
+
+       return makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages);
+}
+
+
+tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
+{
+       if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_BALLOT_BIT))
+       {
+               TCU_THROW(NotSupportedError, "Device does not support subgroup ballot operations");
+       }
+
+       if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+       {
+               if (!areSubgroupOperationsSupportedForStage(context, caseDef.shaderStage))
+               {
+                               return tcu::TestStatus::fail(
+                                                  "Shader stage " + getShaderStageName(caseDef.shaderStage) +
+                                                  " is required to support subgroup operations!");
+               }
+               return makeComputeTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkComputeStage);
+       }
+       else
+       {
+               VkPhysicalDeviceSubgroupProperties subgroupProperties;
+               subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
+               subgroupProperties.pNext = DE_NULL;
+
+               VkPhysicalDeviceProperties2 properties;
+               properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+               properties.pNext = &subgroupProperties;
+
+               context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
+
+               VkShaderStageFlagBits stages = (VkShaderStageFlagBits)(caseDef.shaderStage  & subgroupProperties.supportedStages);
+
+               if ( VK_SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
+               {
+                       if ( (stages & VK_SHADER_STAGE_FRAGMENT_BIT) == 0)
+                               TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
+                       else
+                               stages = VK_SHADER_STAGE_FRAGMENT_BIT;
+               }
+
+               if ((VkShaderStageFlagBits)0u == stages)
+                       TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
+
+               return subgroups::allStages(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages, stages);
+       }
+}
+
+tcu::TestCaseGroup* createSubgroupsBuiltinMaskVarTests(tcu::TestContext& testCtx)
+{
+de::MovePtr<tcu::TestCaseGroup> graphicGroup(new tcu::TestCaseGroup(
+               testCtx, "graphics", "Subgroup builtin mask category    tests: graphics"));
+       de::MovePtr<tcu::TestCaseGroup> computeGroup(new tcu::TestCaseGroup(
+               testCtx, "compute", "Subgroup builtin mask category tests: compute"));
+       de::MovePtr<tcu::TestCaseGroup> framebufferGroup(new tcu::TestCaseGroup(
+               testCtx, "framebuffer", "Subgroup builtin mask category tests: framebuffer"));
+
+       const char* const all_stages_vars[] =
+       {
+               "SubgroupEqMask",
+               "SubgroupGeMask",
+               "SubgroupGtMask",
+               "SubgroupLeMask",
+               "SubgroupLtMask",
+       };
+
+       const VkShaderStageFlags stages[] =
+       {
+               VK_SHADER_STAGE_VERTEX_BIT,
+               VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
+               VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
+               VK_SHADER_STAGE_GEOMETRY_BIT,
+       };
+
+
+       for (int a = 0; a < DE_LENGTH_OF_ARRAY(all_stages_vars); ++a)
+       {
+               const std::string var = all_stages_vars[a];
+               const std::string varLower = de::toLower(var);
+
+               {
+                       const CaseDefinition caseDef = {"gl_" + var, VK_SHADER_STAGE_ALL_GRAPHICS};
+                       addFunctionCaseWithPrograms(graphicGroup.get(),
+                                                                               varLower, "",
+                                                                               supportedCheck, initPrograms, test, caseDef);
+               }
+
+               {
+                       const CaseDefinition caseDef = {"gl_" + var, VK_SHADER_STAGE_COMPUTE_BIT};
+                       addFunctionCaseWithPrograms(computeGroup.get(),
+                                                                               varLower, "",
+                                                                               supportedCheck, initPrograms, test, caseDef);
+               }
+
+               for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
+               {
+                       const CaseDefinition caseDef = {"gl_" + var, stages[stageIndex]};
+                       addFunctionCaseWithPrograms(framebufferGroup.get(),
+                                               varLower + "_" +
+                                               getShaderStageName(caseDef.shaderStage), "",
+                                               supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
+               }
+       }
+
+       de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(
+               testCtx, "builtin_mask_var", "Subgroup builtin mask variable tests"));
+
+       group->addChild(graphicGroup.release());
+       group->addChild(computeGroup.release());
+       group->addChild(framebufferGroup.release());
+
+       return group.release();
+}
+} // subgroups
+} // vkt
diff --git a/external/openglcts/modules/common/subgroups/glcSubgroupsBuiltinMaskVarTests.hpp b/external/openglcts/modules/common/subgroups/glcSubgroupsBuiltinMaskVarTests.hpp
new file mode 100644 (file)
index 0000000..98585ba
--- /dev/null
@@ -0,0 +1,40 @@
+#ifndef _VKTSUBGROUPSBUILTINMASKVARTESTS_HPP
+#define _VKTSUBGROUPSBUILTINMASKVARTESTS_HPP
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "tcuDefs.hpp"
+#include "vktTestCase.hpp"
+
+namespace vkt
+{
+namespace subgroups
+{
+
+tcu::TestCaseGroup* createSubgroupsBuiltinMaskVarTests(tcu::TestContext& testCtx);
+
+} // subgroups
+} // vkt
+
+#endif // _VKTSUBGROUPSBUILTINMASKVARTESTS_HPP
diff --git a/external/openglcts/modules/common/subgroups/glcSubgroupsBuiltinVarTests.cpp b/external/openglcts/modules/common/subgroups/glcSubgroupsBuiltinVarTests.cpp
new file mode 100755 (executable)
index 0000000..b825a74
--- /dev/null
@@ -0,0 +1,1756 @@
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "vktSubgroupsBuiltinVarTests.hpp"
+#include "vktSubgroupsTestsUtils.hpp"
+
+#include <string>
+#include <vector>
+
+using namespace tcu;
+using namespace std;
+using namespace vk;
+
+namespace vkt
+{
+namespace subgroups
+{
+
+bool checkVertexPipelineStagesSubgroupSize(std::vector<const void*> datas,
+               deUint32 width, deUint32 subgroupSize)
+{
+       const deUint32* data =
+               reinterpret_cast<const deUint32*>(datas[0]);
+       for (deUint32 x = 0; x < width; ++x)
+       {
+               deUint32 val = data[x * 4];
+
+               if (subgroupSize != val)
+                       return false;
+       }
+
+       return true;
+}
+
+bool checkVertexPipelineStagesSubgroupInvocationID(std::vector<const void*> datas,
+               deUint32 width, deUint32 subgroupSize)
+{
+       const deUint32* data =
+               reinterpret_cast<const deUint32*>(datas[0]);
+       vector<deUint32> subgroupInvocationHits(subgroupSize, 0);
+
+       for (deUint32 x = 0; x < width; ++x)
+       {
+               deUint32 subgroupInvocationID = data[(x * 4) + 1];
+
+               if (subgroupInvocationID >= subgroupSize)
+                       return false;
+               subgroupInvocationHits[subgroupInvocationID]++;
+       }
+
+       const deUint32 totalSize = width;
+
+       deUint32 totalInvocationsRun = 0;
+       for (deUint32 i = 0; i < subgroupSize; ++i)
+       {
+               totalInvocationsRun += subgroupInvocationHits[i];
+       }
+
+       if (totalInvocationsRun != totalSize)
+               return false;
+
+       return true;
+}
+
+static bool checkComputeSubgroupSize(std::vector<const void*> datas,
+                                                                        const deUint32 numWorkgroups[3], const deUint32 localSize[3],
+                                                                        deUint32 subgroupSize)
+{
+       const deUint32* data = reinterpret_cast<const deUint32*>(datas[0]);
+
+       for (deUint32 nX = 0; nX < numWorkgroups[0]; ++nX)
+       {
+               for (deUint32 nY = 0; nY < numWorkgroups[1]; ++nY)
+               {
+                       for (deUint32 nZ = 0; nZ < numWorkgroups[2]; ++nZ)
+                       {
+                               for (deUint32 lX = 0; lX < localSize[0]; ++lX)
+                               {
+                                       for (deUint32 lY = 0; lY < localSize[1]; ++lY)
+                                       {
+                                               for (deUint32 lZ = 0; lZ < localSize[2];
+                                                               ++lZ)
+                                               {
+                                                       const deUint32 globalInvocationX =
+                                                               nX * localSize[0] + lX;
+                                                       const deUint32 globalInvocationY =
+                                                               nY * localSize[1] + lY;
+                                                       const deUint32 globalInvocationZ =
+                                                               nZ * localSize[2] + lZ;
+
+                                                       const deUint32 globalSizeX =
+                                                               numWorkgroups[0] * localSize[0];
+                                                       const deUint32 globalSizeY =
+                                                               numWorkgroups[1] * localSize[1];
+
+                                                       const deUint32 offset =
+                                                               globalSizeX *
+                                                               ((globalSizeY *
+                                                                 globalInvocationZ) +
+                                                                globalInvocationY) +
+                                                               globalInvocationX;
+
+                                                       if (subgroupSize != data[offset * 4])
+                                                               return false;
+                                               }
+                                       }
+                               }
+                       }
+               }
+       }
+
+       return true;
+}
+
+static bool checkComputeSubgroupInvocationID(std::vector<const void*> datas,
+               const deUint32 numWorkgroups[3], const deUint32 localSize[3],
+               deUint32 subgroupSize)
+{
+       const deUint32* data = reinterpret_cast<const deUint32*>(datas[0]);
+
+       for (deUint32 nX = 0; nX < numWorkgroups[0]; ++nX)
+       {
+               for (deUint32 nY = 0; nY < numWorkgroups[1]; ++nY)
+               {
+                       for (deUint32 nZ = 0; nZ < numWorkgroups[2]; ++nZ)
+                       {
+                               const deUint32 totalLocalSize =
+                                       localSize[0] * localSize[1] * localSize[2];
+                               vector<deUint32> subgroupInvocationHits(subgroupSize, 0);
+
+                               for (deUint32 lX = 0; lX < localSize[0]; ++lX)
+                               {
+                                       for (deUint32 lY = 0; lY < localSize[1]; ++lY)
+                                       {
+                                               for (deUint32 lZ = 0; lZ < localSize[2];
+                                                               ++lZ)
+                                               {
+                                                       const deUint32 globalInvocationX =
+                                                               nX * localSize[0] + lX;
+                                                       const deUint32 globalInvocationY =
+                                                               nY * localSize[1] + lY;
+                                                       const deUint32 globalInvocationZ =
+                                                               nZ * localSize[2] + lZ;
+
+                                                       const deUint32 globalSizeX =
+                                                               numWorkgroups[0] * localSize[0];
+                                                       const deUint32 globalSizeY =
+                                                               numWorkgroups[1] * localSize[1];
+
+                                                       const deUint32 offset =
+                                                               globalSizeX *
+                                                               ((globalSizeY *
+                                                                 globalInvocationZ) +
+                                                                globalInvocationY) +
+                                                               globalInvocationX;
+
+                                                       deUint32 subgroupInvocationID = data[(offset * 4) + 1];
+
+                                                       if (subgroupInvocationID >= subgroupSize)
+                                                               return false;
+
+                                                       subgroupInvocationHits[subgroupInvocationID]++;
+                                               }
+                                       }
+                               }
+
+                               deUint32 totalInvocationsRun = 0;
+                               for (deUint32 i = 0; i < subgroupSize; ++i)
+                               {
+                                       totalInvocationsRun += subgroupInvocationHits[i];
+                               }
+
+                               if (totalInvocationsRun != totalLocalSize)
+                                       return false;
+                       }
+               }
+       }
+
+       return true;
+}
+
+static bool checkComputeNumSubgroups   (std::vector<const void*>       datas,
+                                                                               const deUint32                          numWorkgroups[3],
+                                                                               const deUint32                          localSize[3],
+                                                                               deUint32)
+{
+       const deUint32* data = reinterpret_cast<const deUint32*>(datas[0]);
+
+       for (deUint32 nX = 0; nX < numWorkgroups[0]; ++nX)
+       {
+               for (deUint32 nY = 0; nY < numWorkgroups[1]; ++nY)
+               {
+                       for (deUint32 nZ = 0; nZ < numWorkgroups[2]; ++nZ)
+                       {
+                               const deUint32 totalLocalSize =
+                                       localSize[0] * localSize[1] * localSize[2];
+
+                               for (deUint32 lX = 0; lX < localSize[0]; ++lX)
+                               {
+                                       for (deUint32 lY = 0; lY < localSize[1]; ++lY)
+                                       {
+                                               for (deUint32 lZ = 0; lZ < localSize[2];
+                                                               ++lZ)
+                                               {
+                                                       const deUint32 globalInvocationX =
+                                                               nX * localSize[0] + lX;
+                                                       const deUint32 globalInvocationY =
+                                                               nY * localSize[1] + lY;
+                                                       const deUint32 globalInvocationZ =
+                                                               nZ * localSize[2] + lZ;
+
+                                                       const deUint32 globalSizeX =
+                                                               numWorkgroups[0] * localSize[0];
+                                                       const deUint32 globalSizeY =
+                                                               numWorkgroups[1] * localSize[1];
+
+                                                       const deUint32 offset =
+                                                               globalSizeX *
+                                                               ((globalSizeY *
+                                                                 globalInvocationZ) +
+                                                                globalInvocationY) +
+                                                               globalInvocationX;
+
+                                                       deUint32 numSubgroups = data[(offset * 4) + 2];
+
+                                                       if (numSubgroups > totalLocalSize)
+                                                               return false;
+                                               }
+                                       }
+                               }
+                       }
+               }
+       }
+
+       return true;
+}
+
+static bool checkComputeSubgroupID     (std::vector<const void*>       datas,
+                                                                       const deUint32                          numWorkgroups[3],
+                                                                       const deUint32                          localSize[3],
+                                                                       deUint32)
+{
+       const deUint32* data = reinterpret_cast<const deUint32*>(datas[0]);
+
+       for (deUint32 nX = 0; nX < numWorkgroups[0]; ++nX)
+       {
+               for (deUint32 nY = 0; nY < numWorkgroups[1]; ++nY)
+               {
+                       for (deUint32 nZ = 0; nZ < numWorkgroups[2]; ++nZ)
+                       {
+                               for (deUint32 lX = 0; lX < localSize[0]; ++lX)
+                               {
+                                       for (deUint32 lY = 0; lY < localSize[1]; ++lY)
+                                       {
+                                               for (deUint32 lZ = 0; lZ < localSize[2];
+                                                               ++lZ)
+                                               {
+                                                       const deUint32 globalInvocationX =
+                                                               nX * localSize[0] + lX;
+                                                       const deUint32 globalInvocationY =
+                                                               nY * localSize[1] + lY;
+                                                       const deUint32 globalInvocationZ =
+                                                               nZ * localSize[2] + lZ;
+
+                                                       const deUint32 globalSizeX =
+                                                               numWorkgroups[0] * localSize[0];
+                                                       const deUint32 globalSizeY =
+                                                               numWorkgroups[1] * localSize[1];
+
+                                                       const deUint32 offset =
+                                                               globalSizeX *
+                                                               ((globalSizeY *
+                                                                 globalInvocationZ) +
+                                                                globalInvocationY) +
+                                                               globalInvocationX;
+
+                                                       deUint32 numSubgroups = data[(offset * 4) + 2];
+                                                       deUint32 subgroupID = data[(offset * 4) + 3];
+
+                                                       if (subgroupID >= numSubgroups)
+                                                               return false;
+                                               }
+                                       }
+                               }
+                       }
+               }
+       }
+
+       return true;
+}
+
+namespace
+{
+struct CaseDefinition
+{
+       std::string varName;
+       VkShaderStageFlags shaderStage;
+};
+}
+
+void initFrameBufferPrograms (SourceCollections& programCollection, CaseDefinition caseDef)
+{
+       const vk::ShaderBuildOptions    buildOptions    (programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+       const vk::SpirVAsmBuildOptions  buildOptionsSpr (programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3);
+
+       {
+               /*
+                       "layout(location = 0) in vec4 in_color;\n"
+                       "layout(location = 0) out uvec4 out_color;\n"
+                       "void main()\n"
+                       "{\n"
+                        "      out_color = uvec4(in_color);\n"
+                        "}\n";
+               */
+               const string fragment =
+                       "; SPIR-V\n"
+                       "; Version: 1.3\n"
+                       "; Generator: Khronos Glslang Reference Front End; 2\n"
+                       "; Bound: 16\n"
+                       "; Schema: 0\n"
+                       "OpCapability Shader\n"
+                       "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+                       "OpMemoryModel Logical GLSL450\n"
+                       "OpEntryPoint Fragment %4 \"main\" %9 %13\n"
+                       "OpExecutionMode %4 OriginUpperLeft\n"
+                       "OpDecorate %9 Location 0\n"
+                       "OpDecorate %13 Location 0\n"
+                       "%2 = OpTypeVoid\n"
+                       "%3 = OpTypeFunction %2\n"
+                       "%6 = OpTypeInt 32 0\n"
+                       "%7 = OpTypeVector %6 4\n"
+                       "%8 = OpTypePointer Output %7\n"
+                       "%9 = OpVariable %8 Output\n"
+                       "%10 = OpTypeFloat 32\n"
+                       "%11 = OpTypeVector %10 4\n"
+                       "%12 = OpTypePointer Input %11\n"
+                       "%13 = OpVariable %12 Input\n"
+                       "%4 = OpFunction %2 None %3\n"
+                       "%5 = OpLabel\n"
+                       "%14 = OpLoad %11 %13\n"
+                       "%15 = OpConvertFToU %7 %14\n"
+                       "OpStore %9 %15\n"
+                       "OpReturn\n"
+                       "OpFunctionEnd\n";
+               programCollection.spirvAsmSources.add("fragment") << fragment << buildOptionsSpr;
+       }
+
+       if (VK_SHADER_STAGE_VERTEX_BIT != caseDef.shaderStage)
+               subgroups::setVertexShaderFrameBuffer(programCollection);
+
+       if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+       {
+               /*
+                       "#extension GL_KHR_shader_subgroup_basic: enable\n"
+                       "layout(location = 0) out vec4 out_color;\n"
+                       "layout(location = 0) in highp vec4 in_position;\n"
+                       "\n"
+                       "void main (void)\n"
+                       "{\n"
+                       "  out_color = vec4(gl_SubgroupSize, gl_SubgroupInvocationID, 1.0f, 1.0f);\n"
+                       "  gl_Position = in_position;\n"
+                       "  gl_PointSize = 1.0f;\n"
+                       "}\n";
+               */
+               const string vertex =
+                       "; SPIR-V\n"
+                       "; Version: 1.3\n"
+                       "; Generator: Khronos Glslang Reference Front End; 2\n"
+                       "; Bound: 31\n"
+                       "; Schema: 0\n"
+                       "OpCapability Shader\n"
+                       "OpCapability GroupNonUniform\n"
+                       "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+                       "OpMemoryModel Logical GLSL450\n"
+                       "OpEntryPoint Vertex %4 \"main\" %9 %12 %15 %24 %28\n"
+                       "OpDecorate %9 Location 0\n"
+                       "OpDecorate %12 RelaxedPrecision\n"
+                       "OpDecorate %12 BuiltIn SubgroupSize\n"
+                       "OpDecorate %13 RelaxedPrecision\n"
+                       "OpDecorate %15 RelaxedPrecision\n"
+                       "OpDecorate %15 BuiltIn SubgroupLocalInvocationId\n"
+                       "OpDecorate %16 RelaxedPrecision\n"
+                       "OpMemberDecorate %22 0 BuiltIn Position\n"
+                       "OpMemberDecorate %22 1 BuiltIn PointSize\n"
+                       "OpMemberDecorate %22 2 BuiltIn ClipDistance\n"
+                       "OpMemberDecorate %22 3 BuiltIn CullDistance\n"
+                       "OpDecorate %22 Block\n"
+                       "OpDecorate %28 Location 0\n"
+                       "%2 = OpTypeVoid\n"
+                       "%3 = OpTypeFunction %2\n"
+                       "%6 = OpTypeFloat 32\n"
+                       "%7 = OpTypeVector %6 4\n"
+                       "%8 = OpTypePointer Output %7\n"
+                       "%9 = OpVariable %8 Output\n"
+                       "%10 = OpTypeInt 32 0\n"
+                       "%11 = OpTypePointer Input %10\n"
+                       "%12 = OpVariable %11 Input\n"
+                       "%15 = OpVariable %11 Input\n"
+                       "%18 = OpConstant %6 1\n"
+                       "%20 = OpConstant %10 1\n"
+                       "%21 = OpTypeArray %6 %20\n"
+                       "%22 = OpTypeStruct %7 %6 %21 %21\n"
+                       "%23 = OpTypePointer Output %22\n"
+                       "%24 = OpVariable %23 Output\n"
+                       "%25 = OpTypeInt 32 1\n"
+                       "%26 = OpConstant %25 0\n"
+                       "%27 = OpTypePointer Input %7\n"
+                       "%28 = OpVariable %27 Input\n"
+                       "%31 = OpConstant %25 1\n"
+                       "%32 = OpTypePointer Output %6\n"
+                       "%4 = OpFunction %2 None %3\n"
+                       "%5 = OpLabel\n"
+                       "%13 = OpLoad %10 %12\n"
+                       "%14 = OpConvertUToF %6 %13\n"
+                       "%16 = OpLoad %10 %15\n"
+                       "%17 = OpConvertUToF %6 %16\n"
+                       "%19 = OpCompositeConstruct %7 %14 %17 %18 %18\n"
+                       "OpStore %9 %19\n"
+                       "%29 = OpLoad %7 %28\n"
+                       "%30 = OpAccessChain %8 %24 %26\n"
+                       "OpStore %30 %29\n"
+                       "%33 = OpAccessChain %32 %24 %31\n"
+                       "OpStore %33 %18\n"
+                       "OpReturn\n"
+                       "OpFunctionEnd\n";
+               programCollection.spirvAsmSources.add("vert") << vertex << buildOptionsSpr;
+       }
+       else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
+       {
+               /*
+                       "#extension GL_EXT_tessellation_shader : require\n"
+                       "layout(vertices = 2) out;\n"
+                       "layout(location = 0) out vec4 out_color[];\n"
+                       "void main (void)\n"
+                       "{\n"
+                       "  if (gl_InvocationID == 0)\n"
+                         {\n"
+                       "    gl_TessLevelOuter[0] = 1.0f;\n"
+                       "    gl_TessLevelOuter[1] = 1.0f;\n"
+                       "  }\n"
+                       "  out_color[gl_InvocationID] = vec4(0.0f);\n"
+                       "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+                       "}\n";
+               */
+               const string controlSource =
+                       "; SPIR-V\n"
+                       "; Version: 1.3\n"
+                       "; Generator: Khronos Glslang Reference Front End; 2\n"
+                       "; Bound: 53\n"
+                       "; Schema: 0\n"
+                       "OpCapability Tessellation\n"
+                       "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+                       "OpMemoryModel Logical GLSL450\n"
+                       "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %30 %41 %47\n"
+                       "OpExecutionMode %4 OutputVertices 2\n"
+                       "OpDecorate %8 BuiltIn InvocationId\n"
+                       "OpDecorate %20 Patch\n"
+                       "OpDecorate %20 BuiltIn TessLevelOuter\n"
+                       "OpDecorate %30 Location 0\n"
+                       "OpMemberDecorate %38 0 BuiltIn Position\n"
+                       "OpMemberDecorate %38 1 BuiltIn PointSize\n"
+                       "OpMemberDecorate %38 2 BuiltIn ClipDistance\n"
+                       "OpMemberDecorate %38 3 BuiltIn CullDistance\n"
+                       "OpDecorate %38 Block\n"
+                       "OpMemberDecorate %43 0 BuiltIn Position\n"
+                       "OpMemberDecorate %43 1 BuiltIn PointSize\n"
+                       "OpMemberDecorate %43 2 BuiltIn ClipDistance\n"
+                       "OpMemberDecorate %43 3 BuiltIn CullDistance\n"
+                       "OpDecorate %43 Block\n"
+                       "%2 = OpTypeVoid\n"
+                       "%3 = OpTypeFunction %2\n"
+                       "%6 = OpTypeInt 32 1\n"
+                       "%7 = OpTypePointer Input %6\n"
+                       "%8 = OpVariable %7 Input\n"
+                       "%10 = OpConstant %6 0\n"
+                       "%11 = OpTypeBool\n"
+                       "%15 = OpTypeFloat 32\n"
+                       "%16 = OpTypeInt 32 0\n"
+                       "%17 = OpConstant %16 4\n"
+                       "%18 = OpTypeArray %15 %17\n"
+                       "%19 = OpTypePointer Output %18\n"
+                       "%20 = OpVariable %19 Output\n"
+                       "%21 = OpConstant %15 1\n"
+                       "%22 = OpTypePointer Output %15\n"
+                       "%24 = OpConstant %6 1\n"
+                       "%26 = OpTypeVector %15 4\n"
+                       "%27 = OpConstant %16 2\n"
+                       "%28 = OpTypeArray %26 %27\n"
+                       "%29 = OpTypePointer Output %28\n"
+                       "%30 = OpVariable %29 Output\n"
+                       "%32 = OpConstant %15 0\n"
+                       "%33 = OpConstantComposite %26 %32 %32 %32 %32\n"
+                       "%34 = OpTypePointer Output %26\n"
+                       "%36 = OpConstant %16 1\n"
+                       "%37 = OpTypeArray %15 %36\n"
+                       "%38 = OpTypeStruct %26 %15 %37 %37\n"
+                       "%39 = OpTypeArray %38 %27\n"
+                       "%40 = OpTypePointer Output %39\n"
+                       "%41 = OpVariable %40 Output\n"
+                       "%43 = OpTypeStruct %26 %15 %37 %37\n"
+                       "%44 = OpConstant %16 32\n"
+                       "%45 = OpTypeArray %43 %44\n"
+                       "%46 = OpTypePointer Input %45\n"
+                       "%47 = OpVariable %46 Input\n"
+                       "%49 = OpTypePointer Input %26\n"
+                       "%4 = OpFunction %2 None %3\n"
+                       "%5 = OpLabel\n"
+                       "%9 = OpLoad %6 %8\n"
+                       "%12 = OpIEqual %11 %9 %10\n"
+                       "OpSelectionMerge %14 None\n"
+                       "OpBranchConditional %12 %13 %14\n"
+                       "%13 = OpLabel\n"
+                       "%23 = OpAccessChain %22 %20 %10\n"
+                       "OpStore %23 %21\n"
+                       "%25 = OpAccessChain %22 %20 %24\n"
+                       "OpStore %25 %21\n"
+                       "OpBranch %14\n"
+                       "%14 = OpLabel\n"
+                       "%31 = OpLoad %6 %8\n"
+                       "%35 = OpAccessChain %34 %30 %31\n"
+                       "OpStore %35 %33\n"
+                       "%42 = OpLoad %6 %8\n"
+                       "%48 = OpLoad %6 %8\n"
+                       "%50 = OpAccessChain %49 %47 %48 %10\n"
+                       "%51 = OpLoad %26 %50\n"
+                       "%52 = OpAccessChain %34 %41 %42 %10\n"
+                       "OpStore %52 %51\n"
+                       "OpReturn\n"
+                       "OpFunctionEnd\n";
+               programCollection.spirvAsmSources.add("tesc") << controlSource << buildOptionsSpr;
+
+               /*
+                       "#extension GL_KHR_shader_subgroup_basic: enable\n"
+                       "#extension GL_EXT_tessellation_shader : require\n"
+                       "layout(isolines, equal_spacing, ccw ) in;\n"
+                       "layout(location = 0) in vec4 in_color[];\n"
+                       "layout(location = 0) out vec4 out_color;\n"
+                       "\n"
+                       "void main (void)\n"
+                       "{\n"
+                       "  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
+                       "  out_color = vec4(gl_SubgroupSize, gl_SubgroupInvocationID, 0.0f, 0.0f);\n"
+                       "}\n";
+               */
+               const string evaluationSource =
+                       "; SPIR-V\n"
+                       "; Version: 1.3\n"
+                       "; Generator: Khronos Glslang Reference Front End; 2\n"
+                       "; Bound: 51\n"
+                       "; Schema: 0\n"
+                       "OpCapability Tessellation\n"
+                       "OpCapability GroupNonUniform\n"
+                       "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+                       "OpMemoryModel Logical GLSL450\n"
+                       "OpEntryPoint TessellationEvaluation %4 \"main\" %13 %20 %29 %38 %40 %43 %50\n"
+                       "OpExecutionMode %4 Isolines\n"
+                       "OpExecutionMode %4 SpacingEqual\n"
+                       "OpExecutionMode %4 VertexOrderCcw\n"
+                       "OpMemberDecorate %11 0 BuiltIn Position\n"
+                       "OpMemberDecorate %11 1 BuiltIn PointSize\n"
+                       "OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
+                       "OpMemberDecorate %11 3 BuiltIn CullDistance\n"
+                       "OpDecorate %11 Block\n"
+                       "OpMemberDecorate %16 0 BuiltIn Position\n"
+                       "OpMemberDecorate %16 1 BuiltIn PointSize\n"
+                       "OpMemberDecorate %16 2 BuiltIn ClipDistance\n"
+                       "OpMemberDecorate %16 3 BuiltIn CullDistance\n"
+                       "OpDecorate %16 Block\n"
+                       "OpDecorate %29 BuiltIn TessCoord\n"
+                       "OpDecorate %38 Location 0\n"
+                       "OpDecorate %40 RelaxedPrecision\n"
+                       "OpDecorate %40 BuiltIn SubgroupSize\n"
+                       "OpDecorate %41 RelaxedPrecision\n"
+                       "OpDecorate %43 RelaxedPrecision\n"
+                       "OpDecorate %43 BuiltIn SubgroupLocalInvocationId\n"
+                       "OpDecorate %44 RelaxedPrecision\n"
+                       "OpDecorate %50 Location 0\n"
+                       "%2 = OpTypeVoid\n"
+                       "%3 = OpTypeFunction %2\n"
+                       "%6 = OpTypeFloat 32\n"
+                       "%7 = OpTypeVector %6 4\n"
+                       "%8 = OpTypeInt 32 0\n"
+                       "%9 = OpConstant %8 1\n"
+                       "%10 = OpTypeArray %6 %9\n"
+                       "%11 = OpTypeStruct %7 %6 %10 %10\n"
+                       "%12 = OpTypePointer Output %11\n"
+                       "%13 = OpVariable %12 Output\n"
+                       "%14 = OpTypeInt 32 1\n"
+                       "%15 = OpConstant %14 0\n"
+                       "%16 = OpTypeStruct %7 %6 %10 %10\n"
+                       "%17 = OpConstant %8 32\n"
+                       "%18 = OpTypeArray %16 %17\n"
+                       "%19 = OpTypePointer Input %18\n"
+                       "%20 = OpVariable %19 Input\n"
+                       "%21 = OpTypePointer Input %7\n"
+                       "%24 = OpConstant %14 1\n"
+                       "%27 = OpTypeVector %6 3\n"
+                       "%28 = OpTypePointer Input %27\n"
+                       "%29 = OpVariable %28 Input\n"
+                       "%30 = OpConstant %8 0\n"
+                       "%31 = OpTypePointer Input %6\n"
+                       "%36 = OpTypePointer Output %7\n"
+                       "%38 = OpVariable %36 Output\n"
+                       "%39 = OpTypePointer Input %8\n"
+                       "%40 = OpVariable %39 Input\n"
+                       "%43 = OpVariable %39 Input\n"
+                       "%46 = OpConstant %6 0\n"
+                       "%48 = OpTypeArray %7 %17\n"
+                       "%49 = OpTypePointer Input %48\n"
+                       "%50 = OpVariable %49 Input\n"
+                       "%4 = OpFunction %2 None %3\n"
+                       "%5 = OpLabel\n"
+                       "%22 = OpAccessChain %21 %20 %15 %15\n"
+                       "%23 = OpLoad %7 %22\n"
+                       "%25 = OpAccessChain %21 %20 %24 %15\n"
+                       "%26 = OpLoad %7 %25\n"
+                       "%32 = OpAccessChain %31 %29 %30\n"
+                       "%33 = OpLoad %6 %32\n"
+                       "%34 = OpCompositeConstruct %7 %33 %33 %33 %33\n"
+                       "%35 = OpExtInst %7 %1 FMix %23 %26 %34\n"
+                       "%37 = OpAccessChain %36 %13 %15\n"
+                       "OpStore %37 %35\n"
+                       "%41 = OpLoad %8 %40\n"
+                       "%42 = OpConvertUToF %6 %41\n"
+                       "%44 = OpLoad %8 %43\n"
+                       "%45 = OpConvertUToF %6 %44\n"
+                       "%47 = OpCompositeConstruct %7 %42 %45 %46 %46\n"
+                       "OpStore %38 %47\n"
+                       "OpReturn\n"
+                       "OpFunctionEnd\n";
+
+               programCollection.spirvAsmSources.add("tese") << evaluationSource << buildOptionsSpr;
+       }
+       else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+       {
+               /*
+                       "#extension GL_EXT_tessellation_shader : require\n"
+                       "#extension GL_KHR_shader_subgroup_basic: enable\n"
+                       "layout(vertices = 2) out;\n"
+                       "layout(location = 0) out vec4 out_color[];\n"
+                       "void main (void)\n"
+                       "{\n"
+                       "  if (gl_InvocationID == 0)\n"
+                         {\n"
+                       "    gl_TessLevelOuter[0] = 1.0f;\n"
+                       "    gl_TessLevelOuter[1] = 1.0f;\n"
+                       "  }\n"
+                       "  out_color[gl_InvocationID] = vec4(gl_SubgroupSize, gl_SubgroupInvocationID, 0, 0);\n"
+                       "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+                       "}\n";
+               */
+               const string controlSource =
+                       "; SPIR-V\n"
+                       "; Version: 1.3\n"
+                       "; Generator: Khronos Glslang Reference Front End; 2\n"
+                       "; Bound: 60\n"
+                       "; Schema: 0\n"
+                       "OpCapability Tessellation\n"
+                       "OpCapability GroupNonUniform\n"
+                       "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+                       "OpMemoryModel Logical GLSL450\n"
+                       "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %30 %33 %36 %48 %54\n"
+                       "OpExecutionMode %4 OutputVertices 2\n"
+                       "OpDecorate %8 BuiltIn InvocationId\n"
+                       "OpDecorate %20 Patch\n"
+                       "OpDecorate %20 BuiltIn TessLevelOuter\n"
+                       "OpDecorate %30 Location 0\n"
+                       "OpDecorate %33 RelaxedPrecision\n"
+                       "OpDecorate %33 BuiltIn SubgroupSize\n"
+                       "OpDecorate %34 RelaxedPrecision\n"
+                       "OpDecorate %36 RelaxedPrecision\n"
+                       "OpDecorate %36 BuiltIn SubgroupLocalInvocationId\n"
+                       "OpDecorate %37 RelaxedPrecision\n"
+                       "OpMemberDecorate %45 0 BuiltIn Position\n"
+                       "OpMemberDecorate %45 1 BuiltIn PointSize\n"
+                       "OpMemberDecorate %45 2 BuiltIn ClipDistance\n"
+                       "OpMemberDecorate %45 3 BuiltIn CullDistance\n"
+                       "OpDecorate %45 Block\n"
+                       "OpMemberDecorate %50 0 BuiltIn Position\n"
+                       "OpMemberDecorate %50 1 BuiltIn PointSize\n"
+                       "OpMemberDecorate %50 2 BuiltIn ClipDistance\n"
+                       "OpMemberDecorate %50 3 BuiltIn CullDistance\n"
+                       "OpDecorate %50 Block\n"
+                       "%2 = OpTypeVoid\n"
+                       "%3 = OpTypeFunction %2\n"
+                       "%6 = OpTypeInt 32 1\n"
+                       "%7 = OpTypePointer Input %6\n"
+                       "%8 = OpVariable %7 Input\n"
+                       "%10 = OpConstant %6 0\n"
+                       "%11 = OpTypeBool\n"
+                       "%15 = OpTypeFloat 32\n"
+                       "%16 = OpTypeInt 32 0\n"
+                       "%17 = OpConstant %16 4\n"
+                       "%18 = OpTypeArray %15 %17\n"
+                       "%19 = OpTypePointer Output %18\n"
+                       "%20 = OpVariable %19 Output\n"
+                       "%21 = OpConstant %15 1\n"
+                       "%22 = OpTypePointer Output %15\n"
+                       "%24 = OpConstant %6 1\n"
+                       "%26 = OpTypeVector %15 4\n"
+                       "%27 = OpConstant %16 2\n"
+                       "%28 = OpTypeArray %26 %27\n"
+                       "%29 = OpTypePointer Output %28\n"
+                       "%30 = OpVariable %29 Output\n"
+                       "%32 = OpTypePointer Input %16\n"
+                       "%33 = OpVariable %32 Input\n"
+                       "%36 = OpVariable %32 Input\n"
+                       "%39 = OpConstant %15 0\n"
+                       "%41 = OpTypePointer Output %26\n"
+                       "%43 = OpConstant %16 1\n"
+                       "%44 = OpTypeArray %15 %43\n"
+                       "%45 = OpTypeStruct %26 %15 %44 %44\n"
+                       "%46 = OpTypeArray %45 %27\n"
+                       "%47 = OpTypePointer Output %46\n"
+                       "%48 = OpVariable %47 Output\n"
+                       "%50 = OpTypeStruct %26 %15 %44 %44\n"
+                       "%51 = OpConstant %16 32\n"
+                       "%52 = OpTypeArray %50 %51\n"
+                       "%53 = OpTypePointer Input %52\n"
+                       "%54 = OpVariable %53 Input\n"
+                       "%56 = OpTypePointer Input %26\n"
+                       "%4 = OpFunction %2 None %3\n"
+                       "%5 = OpLabel\n"
+                       "%9 = OpLoad %6 %8\n"
+                       "%12 = OpIEqual %11 %9 %10\n"
+                       "OpSelectionMerge %14 None\n"
+                       "OpBranchConditional %12 %13 %14\n"
+                       "%13 = OpLabel\n"
+                       "%23 = OpAccessChain %22 %20 %10\n"
+                       "OpStore %23 %21\n"
+                       "%25 = OpAccessChain %22 %20 %24\n"
+                       "OpStore %25 %21\n"
+                       "OpBranch %14\n"
+                       "%14 = OpLabel\n"
+                       "%31 = OpLoad %6 %8\n"
+                       "%34 = OpLoad %16 %33\n"
+                       "%35 = OpConvertUToF %15 %34\n"
+                       "%37 = OpLoad %16 %36\n"
+                       "%38 = OpConvertUToF %15 %37\n"
+                       "%40 = OpCompositeConstruct %26 %35 %38 %39 %39\n"
+                       "%42 = OpAccessChain %41 %30 %31\n"
+                       "OpStore %42 %40\n"
+                       "%49 = OpLoad %6 %8\n"
+                       "%55 = OpLoad %6 %8\n"
+                       "%57 = OpAccessChain %56 %54 %55 %10\n"
+                       "%58 = OpLoad %26 %57\n"
+                       "%59 = OpAccessChain %41 %48 %49 %10\n"
+                       "OpStore %59 %58\n"
+                       "OpReturn\n"
+                       "OpFunctionEnd\n";
+               programCollection.spirvAsmSources.add("tesc") << controlSource << buildOptionsSpr;
+
+               /*
+                       "#extension GL_KHR_shader_subgroup_basic: enable\n"
+                       "#extension GL_EXT_tessellation_shader : require\n"
+                       "layout(isolines, equal_spacing, ccw ) in;\n"
+                       "layout(location = 0) in vec4 in_color[];\n"
+                       "layout(location = 0) out vec4 out_color;\n"
+                       "\n"
+                       "void main (void)\n"
+                       "{\n"
+                       "  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
+                       "  out_color = in_color[0];\n"
+                       "}\n";
+               */
+               const string  evaluationSource =
+                       "; SPIR-V\n"
+                       "; Version: 1.3\n"
+                       "; Generator: Khronos Glslang Reference Front End; 2\n"
+                       "; Bound: 44\n"
+                       "; Schema: 0\n"
+                       "OpCapability Tessellation\n"
+                       "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+                       "OpMemoryModel Logical GLSL450\n"
+                       "OpEntryPoint TessellationEvaluation %4 \"main\" %13 %20 %29 %38 %41\n"
+                       "OpExecutionMode %4 Isolines\n"
+                       "OpExecutionMode %4 SpacingEqual\n"
+                       "OpExecutionMode %4 VertexOrderCcw\n"
+                       "OpMemberDecorate %11 0 BuiltIn Position\n"
+                       "OpMemberDecorate %11 1 BuiltIn PointSize\n"
+                       "OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
+                       "OpMemberDecorate %11 3 BuiltIn CullDistance\n"
+                       "OpDecorate %11 Block\n"
+                       "OpMemberDecorate %16 0 BuiltIn Position\n"
+                       "OpMemberDecorate %16 1 BuiltIn PointSize\n"
+                       "OpMemberDecorate %16 2 BuiltIn ClipDistance\n"
+                       "OpMemberDecorate %16 3 BuiltIn CullDistance\n"
+                       "OpDecorate %16 Block\n"
+                       "OpDecorate %29 BuiltIn TessCoord\n"
+                       "OpDecorate %38 Location 0\n"
+                       "OpDecorate %41 Location 0\n"
+                       "%2 = OpTypeVoid\n"
+                       "%3 = OpTypeFunction %2\n"
+                       "%6 = OpTypeFloat 32\n"
+                       "%7 = OpTypeVector %6 4\n"
+                       "%8 = OpTypeInt 32 0\n"
+                       "%9 = OpConstant %8 1\n"
+                       "%10 = OpTypeArray %6 %9\n"
+                       "%11 = OpTypeStruct %7 %6 %10 %10\n"
+                       "%12 = OpTypePointer Output %11\n"
+                       "%13 = OpVariable %12 Output\n"
+                       "%14 = OpTypeInt 32 1\n"
+                       "%15 = OpConstant %14 0\n"
+                       "%16 = OpTypeStruct %7 %6 %10 %10\n"
+                       "%17 = OpConstant %8 32\n"
+                       "%18 = OpTypeArray %16 %17\n"
+                       "%19 = OpTypePointer Input %18\n"
+                       "%20 = OpVariable %19 Input\n"
+                       "%21 = OpTypePointer Input %7\n"
+                       "%24 = OpConstant %14 1\n"
+                       "%27 = OpTypeVector %6 3\n"
+                       "%28 = OpTypePointer Input %27\n"
+                       "%29 = OpVariable %28 Input\n"
+                       "%30 = OpConstant %8 0\n"
+                       "%31 = OpTypePointer Input %6\n"
+                       "%36 = OpTypePointer Output %7\n"
+                       "%38 = OpVariable %36 Output\n"
+                       "%39 = OpTypeArray %7 %17\n"
+                       "%40 = OpTypePointer Input %39\n"
+                       "%41 = OpVariable %40 Input\n"
+                       "%4 = OpFunction %2 None %3\n"
+                       "%5 = OpLabel\n"
+                       "%22 = OpAccessChain %21 %20 %15 %15\n"
+                       "%23 = OpLoad %7 %22\n"
+                       "%25 = OpAccessChain %21 %20 %24 %15\n"
+                       "%26 = OpLoad %7 %25\n"
+                       "%32 = OpAccessChain %31 %29 %30\n"
+                       "%33 = OpLoad %6 %32\n"
+                       "%34 = OpCompositeConstruct %7 %33 %33 %33 %33\n"
+                       "%35 = OpExtInst %7 %1 FMix %23 %26 %34\n"
+                       "%37 = OpAccessChain %36 %13 %15\n"
+                       "OpStore %37 %35\n"
+                       "%42 = OpAccessChain %21 %41 %15\n"
+                       "%43 = OpLoad %7 %42\n"
+                       "OpStore %38 %43\n"
+                       "OpReturn\n"
+                       "OpFunctionEnd\n";
+               programCollection.spirvAsmSources.add("tese") << evaluationSource << buildOptionsSpr;
+       }
+       else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
+       {
+               /*
+                       "#version 450\n"
+                       "#extension GL_KHR_shader_subgroup_basic: enable\n"
+                       "layout(points) in;\n"
+                       "layout(points, max_vertices = 1) out;\n"
+                       "layout(location = 0) out vec4 out_color;\n"
+                       "void main (void)\n"
+                       "{\n"
+                       "  out_color = vec4(gl_SubgroupSize, gl_SubgroupInvocationID, 0, 0);\n"
+                       "  gl_Position = gl_in[0].gl_Position;\n"
+                       "  EmitVertex();\n"
+                       "  EndPrimitive();\n"
+                       "}\n";
+               */
+               const string geometry =
+                       "; SPIR-V\n"
+                       "; Version: 1.3\n"
+                       "; Generator: Khronos Glslang Reference Front End; 2\n"
+                       "; Bound: 35\n"
+                       "; Schema: 0\n"
+                       "OpCapability Geometry\n"
+                       "OpCapability GroupNonUniform\n"
+                       "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+                       "OpMemoryModel Logical GLSL450\n"
+                       "OpEntryPoint Geometry %4 \"main\" %9 %12 %15 %24 %30\n"
+                       "OpExecutionMode %4 InputPoints\n"
+                       "OpExecutionMode %4 Invocations 1\n"
+                       "OpExecutionMode %4 OutputPoints\n"
+                       "OpExecutionMode %4 OutputVertices 1\n"
+                       "OpDecorate %9 Location 0\n"
+                       "OpDecorate %12 RelaxedPrecision\n"
+                       "OpDecorate %12 BuiltIn SubgroupSize\n"
+                       "OpDecorate %13 RelaxedPrecision\n"
+                       "OpDecorate %15 RelaxedPrecision\n"
+                       "OpDecorate %15 BuiltIn SubgroupLocalInvocationId\n"
+                       "OpDecorate %16 RelaxedPrecision\n"
+                       "OpMemberDecorate %22 0 BuiltIn Position\n"
+                       "OpMemberDecorate %22 1 BuiltIn PointSize\n"
+                       "OpMemberDecorate %22 2 BuiltIn ClipDistance\n"
+                       "OpMemberDecorate %22 3 BuiltIn CullDistance\n"
+                       "OpDecorate %22 Block\n"
+                       "OpMemberDecorate %27 0 BuiltIn Position\n"
+                       "OpMemberDecorate %27 1 BuiltIn PointSize\n"
+                       "OpMemberDecorate %27 2 BuiltIn ClipDistance\n"
+                       "OpMemberDecorate %27 3 BuiltIn CullDistance\n"
+                       "OpDecorate %27 Block\n"
+                       "%2 = OpTypeVoid\n"
+                       "%3 = OpTypeFunction %2\n"
+                       "%6 = OpTypeFloat 32\n"
+                       "%7 = OpTypeVector %6 4\n"
+                       "%8 = OpTypePointer Output %7\n"
+                       "%9 = OpVariable %8 Output\n"
+                       "%10 = OpTypeInt 32 0\n"
+                       "%11 = OpTypePointer Input %10\n"
+                       "%12 = OpVariable %11 Input\n"
+                       "%15 = OpVariable %11 Input\n"
+                       "%18 = OpConstant %6 0\n"
+                       "%20 = OpConstant %10 1\n"
+                       "%21 = OpTypeArray %6 %20\n"
+                       "%22 = OpTypeStruct %7 %6 %21 %21\n"
+                       "%23 = OpTypePointer Output %22\n"
+                       "%24 = OpVariable %23 Output\n"
+                       "%25 = OpTypeInt 32 1\n"
+                       "%26 = OpConstant %25 0\n"
+                       "%27 = OpTypeStruct %7 %6 %21 %21\n"
+                       "%28 = OpTypeArray %27 %20\n"
+                       "%29 = OpTypePointer Input %28\n"
+                       "%30 = OpVariable %29 Input\n"
+                       "%31 = OpTypePointer Input %7\n"
+                       "%4 = OpFunction %2 None %3\n"
+                       "%5 = OpLabel\n"
+                       "%13 = OpLoad %10 %12\n"
+                       "%14 = OpConvertUToF %6 %13\n"
+                       "%16 = OpLoad %10 %15\n"
+                       "%17 = OpConvertUToF %6 %16\n"
+                       "%19 = OpCompositeConstruct %7 %14 %17 %18 %18\n"
+                       "OpStore %9 %19\n"
+                       "%32 = OpAccessChain %31 %30 %26 %26\n"
+                       "%33 = OpLoad %7 %32\n"
+                       "%34 = OpAccessChain %8 %24 %26\n"
+                       "OpStore %34 %33\n"
+                       "OpEmitVertex\n"
+                       "OpEndPrimitive\n"
+                       "OpReturn\n"
+                       "OpFunctionEnd\n";
+               programCollection.spirvAsmSources.add("geometry") << geometry << buildOptionsSpr;
+       }
+       else
+       {
+               DE_FATAL("Unsupported shader stage");
+       }
+}
+
+void initPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
+{
+       if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+       {
+               std::ostringstream src;
+
+               src << "#version 450\n"
+                       << "#extension GL_KHR_shader_subgroup_basic: enable\n"
+                       << "layout (local_size_x_id = 0, local_size_y_id = 1, "
+                       "local_size_z_id = 2) in;\n"
+                       << "layout(set = 0, binding = 0, std430) buffer Output\n"
+                       << "{\n"
+                       << "  uvec4 result[];\n"
+                       << "};\n"
+                       << "\n"
+                       << "void main (void)\n"
+                       << "{\n"
+                       << "  uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
+                       << "  highp uint offset = globalSize.x * ((globalSize.y * "
+                       "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
+                       "gl_GlobalInvocationID.x;\n"
+                       << "  result[offset] = uvec4(gl_SubgroupSize, gl_SubgroupInvocationID, gl_NumSubgroups, gl_SubgroupID);\n"
+                       << "}\n";
+
+               programCollection.glslSources.add("comp")
+                               << glu::ComputeSource(src.str()) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+       }
+       else
+       {
+               {
+                       /*
+                               "#version 450\n"
+                               "#extension GL_KHR_shader_subgroup_basic: enable\n"
+                               "layout(set = 0, binding = 0, std430) buffer Output\n"
+                               "{\n"
+                               "  uvec4 result[];\n"
+                               "};\n"
+                               "\n"
+                               "void main (void)\n"
+                               "{\n"
+                               "  result[gl_VertexIndex] = uvec4(gl_SubgroupSize, gl_SubgroupInvocationID, 0, 0);\n"
+                               "  float pixelSize = 2.0f/1024.0f;\n"
+                               "  float pixelPosition = pixelSize/2.0f - 1.0f;\n"
+                               "  gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
+                               "  gl_PointSize = 1.0f;\n"
+                               "}\n";
+                       */
+                       const string vertex =
+                               "; SPIR-V\n"
+                               "; Version: 1.3\n"
+                               "; Generator: Khronos Glslang Reference Front End; 1\n"
+                               "; Bound: 52\n"
+                               "; Schema: 0\n"
+                               "OpCapability Shader\n"
+                               "OpCapability GroupNonUniform\n"
+                               "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+                               "OpMemoryModel Logical GLSL450\n"
+                               "OpEntryPoint Vertex %4 \"main\" %15 %18 %20 %41\n"
+                               "OpDecorate %8 ArrayStride 16\n"
+                               "OpMemberDecorate %9 0 Offset 0\n"
+                               "OpDecorate %9 BufferBlock\n"
+                               "OpDecorate %11 DescriptorSet 0\n"
+                               "OpDecorate %11 Binding 0\n"
+                               "OpDecorate %15 BuiltIn VertexIndex\n"
+                               "OpDecorate %18 RelaxedPrecision\n"
+                               "OpDecorate %18 BuiltIn SubgroupSize\n"
+                               "OpDecorate %19 RelaxedPrecision\n"
+                               "OpDecorate %20 RelaxedPrecision\n"
+                               "OpDecorate %20 BuiltIn SubgroupLocalInvocationId\n"
+                               "OpDecorate %21 RelaxedPrecision\n"
+                               "OpMemberDecorate %39 0 BuiltIn Position\n"
+                               "OpMemberDecorate %39 1 BuiltIn PointSize\n"
+                               "OpMemberDecorate %39 2 BuiltIn ClipDistance\n"
+                               "OpMemberDecorate %39 3 BuiltIn CullDistance\n"
+                               "OpDecorate %39 Block\n"
+                               "%2 = OpTypeVoid\n"
+                               "%3 = OpTypeFunction %2\n"
+                               "%6 = OpTypeInt 32 0\n"
+                               "%7 = OpTypeVector %6 4\n"
+                               "%8 = OpTypeRuntimeArray %7\n"
+                               "%9 = OpTypeStruct %8\n"
+                               "%10 = OpTypePointer Uniform %9\n"
+                               "%11 = OpVariable %10 Uniform\n"
+                               "%12 = OpTypeInt 32 1\n"
+                               "%13 = OpConstant %12 0\n"
+                               "%14 = OpTypePointer Input %12\n"
+                               "%15 = OpVariable %14 Input\n"
+                               "%17 = OpTypePointer Input %6\n"
+                               "%18 = OpVariable %17 Input\n"
+                               "%20 = OpVariable %17 Input\n"
+                               "%22 = OpConstant %6 0\n"
+                               "%24 = OpTypePointer Uniform %7\n"
+                               "%26 = OpTypeFloat 32\n"
+                               "%27 = OpTypePointer Function %26\n"
+                               "%29 = OpConstant %26 0.00195313\n"
+                               "%32 = OpConstant %26 2\n"
+                               "%34 = OpConstant %26 1\n"
+                               "%36 = OpTypeVector %26 4\n"
+                               "%37 = OpConstant %6 1\n"
+                               "%38 = OpTypeArray %26 %37\n"
+                               "%39 = OpTypeStruct %36 %26 %38 %38\n"
+                               "%40 = OpTypePointer Output %39\n"
+                               "%41 = OpVariable %40 Output\n"
+                               "%48 = OpConstant %26 0\n"
+                               "%50 = OpTypePointer Output %36\n"
+                               "%52 = OpConstant %12 1\n"
+                               "%53 = OpTypePointer Output %26\n"
+                               "%4 = OpFunction %2 None %3\n"
+                               "%5 = OpLabel\n"
+                               "%28 = OpVariable %27 Function\n"
+                               "%30 = OpVariable %27 Function\n"
+                               "%16 = OpLoad %12 %15\n"
+                               "%19 = OpLoad %6 %18\n"
+                               "%21 = OpLoad %6 %20\n"
+                               "%23 = OpCompositeConstruct %7 %19 %21 %22 %22\n"
+                               "%25 = OpAccessChain %24 %11 %13 %16\n"
+                               "OpStore %25 %23\n"
+                               "OpStore %28 %29\n"
+                               "%31 = OpLoad %26 %28\n"
+                               "%33 = OpFDiv %26 %31 %32\n"
+                               "%35 = OpFSub %26 %33 %34\n"
+                               "OpStore %30 %35\n"
+                               "%42 = OpLoad %12 %15\n"
+                               "%43 = OpConvertSToF %26 %42\n"
+                               "%44 = OpLoad %26 %28\n"
+                               "%45 = OpFMul %26 %43 %44\n"
+                               "%46 = OpLoad %26 %30\n"
+                               "%47 = OpFAdd %26 %45 %46\n"
+                               "%49 = OpCompositeConstruct %36 %47 %48 %48 %34\n"
+                               "%51 = OpAccessChain %50 %41 %13\n"
+                               "OpStore %51 %49\n"
+                               "%54 = OpAccessChain %53 %41 %52\n"
+                               "OpStore %54 %34\n"
+                               "OpReturn\n"
+                               "OpFunctionEnd\n";
+                               programCollection.spirvAsmSources.add("vert") << vertex << SpirVAsmBuildOptions(programCollection.usedVulkanVersion, SPIRV_VERSION_1_3);
+               }
+
+               {
+                       /*
+                               "#version 450\n"
+                               "#extension GL_KHR_shader_subgroup_basic: enable\n"
+                               "layout(vertices=1) out;\n"
+                               "layout(set = 0, binding = 1, std430) buffer Output\n"
+                               "{\n"
+                               "  uvec4 result[];\n"
+                               "};\n"
+                               "\n"
+                               "void main (void)\n"
+                               "{\n"
+                               "  result[gl_PrimitiveID] = uvec4(gl_SubgroupSize, gl_SubgroupInvocationID, 0, 0);\n"
+                               "  if (gl_InvocationID == 0)\n"
+                               "  {\n"
+                               "    gl_TessLevelOuter[0] = 1.0f;\n"
+                               "    gl_TessLevelOuter[1] = 1.0f;\n"
+                               "  }\n"
+                               "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+                               "}\n";
+                       */
+                       const string tesc =
+                               "; SPIR-V\n"
+                               "; Version: 1.3\n"
+                               "; Generator: Khronos Glslang Reference Front End; 1\n"
+                               "; Bound: 61\n"
+                               "; Schema: 0\n"
+                               "OpCapability Tessellation\n"
+                               "OpCapability GroupNonUniform\n"
+                               "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+                               "OpMemoryModel Logical GLSL450\n"
+                               "OpEntryPoint TessellationControl %4 \"main\" %15 %18 %20 %26 %36 %48 %54\n"
+                               "OpExecutionMode %4 OutputVertices 1\n"
+                               "OpDecorate %8 ArrayStride 16\n"
+                               "OpMemberDecorate %9 0 Offset 0\n"
+                               "OpDecorate %9 BufferBlock\n"
+                               "OpDecorate %11 DescriptorSet 0\n"
+                               "OpDecorate %11 Binding 1\n"
+                               "OpDecorate %15 BuiltIn PrimitiveId\n"
+                               "OpDecorate %18 RelaxedPrecision\n"
+                               "OpDecorate %18 BuiltIn SubgroupSize\n"
+                               "OpDecorate %19 RelaxedPrecision\n"
+                               "OpDecorate %20 RelaxedPrecision\n"
+                               "OpDecorate %20 BuiltIn SubgroupLocalInvocationId\n"
+                               "OpDecorate %21 RelaxedPrecision\n"
+                               "OpDecorate %26 BuiltIn InvocationId\n"
+                               "OpDecorate %36 Patch\n"
+                               "OpDecorate %36 BuiltIn TessLevelOuter\n"
+                               "OpMemberDecorate %45 0 BuiltIn Position\n"
+                               "OpMemberDecorate %45 1 BuiltIn PointSize\n"
+                               "OpMemberDecorate %45 2 BuiltIn ClipDistance\n"
+                               "OpMemberDecorate %45 3 BuiltIn CullDistance\n"
+                               "OpDecorate %45 Block\n"
+                               "OpMemberDecorate %50 0 BuiltIn Position\n"
+                               "OpMemberDecorate %50 1 BuiltIn PointSize\n"
+                               "OpMemberDecorate %50 2 BuiltIn ClipDistance\n"
+                               "OpMemberDecorate %50 3 BuiltIn CullDistance\n"
+                               "OpDecorate %50 Block\n"
+                               "%2 = OpTypeVoid\n"
+                               "%3 = OpTypeFunction %2\n"
+                               "%6 = OpTypeInt 32 0\n"
+                               "%7 = OpTypeVector %6 4\n"
+                               "%8 = OpTypeRuntimeArray %7\n"
+                               "%9 = OpTypeStruct %8\n"
+                               "%10 = OpTypePointer Uniform %9\n"
+                               "%11 = OpVariable %10 Uniform\n"
+                               "%12 = OpTypeInt 32 1\n"
+                               "%13 = OpConstant %12 0\n"
+                               "%14 = OpTypePointer Input %12\n"
+                               "%15 = OpVariable %14 Input\n"
+                               "%17 = OpTypePointer Input %6\n"
+                               "%18 = OpVariable %17 Input\n"
+                               "%20 = OpVariable %17 Input\n"
+                               "%22 = OpConstant %6 0\n"
+                               "%24 = OpTypePointer Uniform %7\n"
+                               "%26 = OpVariable %14 Input\n"
+                               "%28 = OpTypeBool\n"
+                               "%32 = OpTypeFloat 32\n"
+                               "%33 = OpConstant %6 4\n"
+                               "%34 = OpTypeArray %32 %33\n"
+                               "%35 = OpTypePointer Output %34\n"
+                               "%36 = OpVariable %35 Output\n"
+                               "%37 = OpConstant %32 1\n"
+                               "%38 = OpTypePointer Output %32\n"
+                               "%40 = OpConstant %12 1\n"
+                               "%42 = OpTypeVector %32 4\n"
+                               "%43 = OpConstant %6 1\n"
+                               "%44 = OpTypeArray %32 %43\n"
+                               "%45 = OpTypeStruct %42 %32 %44 %44\n"
+                               "%46 = OpTypeArray %45 %43\n"
+                               "%47 = OpTypePointer Output %46\n"
+                               "%48 = OpVariable %47 Output\n"
+                               "%50 = OpTypeStruct %42 %32 %44 %44\n"
+                               "%51 = OpConstant %6 32\n"
+                               "%52 = OpTypeArray %50 %51\n"
+                               "%53 = OpTypePointer Input %52\n"
+                               "%54 = OpVariable %53 Input\n"
+                               "%56 = OpTypePointer Input %42\n"
+                               "%59 = OpTypePointer Output %42\n"
+                               "%4 = OpFunction %2 None %3\n"
+                               "%5 = OpLabel\n"
+                               "%16 = OpLoad %12 %15\n"
+                               "%19 = OpLoad %6 %18\n"
+                               "%21 = OpLoad %6 %20\n"
+                               "%23 = OpCompositeConstruct %7 %19 %21 %22 %22\n"
+                               "%25 = OpAccessChain %24 %11 %13 %16\n"
+                               "OpStore %25 %23\n"
+                               "%27 = OpLoad %12 %26\n"
+                               "%29 = OpIEqual %28 %27 %13\n"
+                               "OpSelectionMerge %31 None\n"
+                               "OpBranchConditional %29 %30 %31\n"
+                               "%30 = OpLabel\n"
+                               "%39 = OpAccessChain %38 %36 %13\n"
+                               "OpStore %39 %37\n"
+                               "%41 = OpAccessChain %38 %36 %40\n"
+                               "OpStore %41 %37\n"
+                               "OpBranch %31\n"
+                               "%31 = OpLabel\n"
+                               "%49 = OpLoad %12 %26\n"
+                               "%55 = OpLoad %12 %26\n"
+                               "%57 = OpAccessChain %56 %54 %55 %13\n"
+                               "%58 = OpLoad %42 %57\n"
+                               "%60 = OpAccessChain %59 %48 %49 %13\n"
+                               "OpStore %60 %58\n"
+                               "OpReturn\n"
+                               "OpFunctionEnd\n";
+                               programCollection.spirvAsmSources.add("tesc") << tesc << SpirVAsmBuildOptions(programCollection.usedVulkanVersion, SPIRV_VERSION_1_3);
+               }
+
+               {
+                       /*
+                               "#version 450\n"
+                               "#extension GL_KHR_shader_subgroup_basic: enable\n"
+                               "layout(isolines) in;\n"
+                               "layout(set = 0, binding = 2, std430) buffer Output\n"
+                               "{\n"
+                               "  uvec4 result[];\n"
+                               "};\n"
+                               "\n"
+                               "void main (void)\n"
+                               "{\n"
+                               "  result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = uvec4(gl_SubgroupSize, gl_SubgroupInvocationID, 0, 0);\n"
+                               "  float pixelSize = 2.0f/1024.0f;\n"
+                               "  gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
+                               "}\n";
+                       */
+                       const string tese =
+                               "; SPIR - V\n"
+                               "; Version: 1.3\n"
+                               "; Generator: Khronos Glslang Reference Front End; 2\n"
+                               "; Bound: 67\n"
+                               "; Schema: 0\n"
+                               "OpCapability Tessellation\n"
+                               "OpCapability GroupNonUniform\n"
+                               "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+                               "OpMemoryModel Logical GLSL450\n"
+                               "OpEntryPoint TessellationEvaluation %4 \"main\" %15 %23 %33 %35 %48 %53\n"
+                               "OpExecutionMode %4 Isolines\n"
+                               "OpExecutionMode %4 SpacingEqual\n"
+                               "OpExecutionMode %4 VertexOrderCcw\n"
+                               "OpDecorate %8 ArrayStride 16\n"
+                               "OpMemberDecorate %9 0 Offset 0\n"
+                               "OpDecorate %9 BufferBlock\n"
+                               "OpDecorate %11 DescriptorSet 0\n"
+                               "OpDecorate %11 Binding 2\n"
+                               "OpDecorate %15 BuiltIn PrimitiveId\n"
+                               "OpDecorate %23 BuiltIn TessCoord\n"
+                               "OpDecorate %33 RelaxedPrecision\n"
+                               "OpDecorate %33 BuiltIn SubgroupSize\n"
+                               "OpDecorate %34 RelaxedPrecision\n"
+                               "OpDecorate %35 RelaxedPrecision\n"
+                               "OpDecorate %35 BuiltIn SubgroupLocalInvocationId\n"
+                               "OpDecorate %36 RelaxedPrecision\n"
+                               "OpMemberDecorate %46 0 BuiltIn Position\n"
+                               "OpMemberDecorate %46 1 BuiltIn PointSize\n"
+                               "OpMemberDecorate %46 2 BuiltIn ClipDistance\n"
+                               "OpMemberDecorate %46 3 BuiltIn CullDistance\n"
+                               "OpDecorate %46 Block\n"
+                               "OpMemberDecorate %49 0 BuiltIn Position\n"
+                               "OpMemberDecorate %49 1 BuiltIn PointSize\n"
+                               "OpMemberDecorate %49 2 BuiltIn ClipDistance\n"
+                               "OpMemberDecorate %49 3 BuiltIn CullDistance\n"
+                               "OpDecorate %49 Block\n"
+                               "%2 = OpTypeVoid\n"
+                               "%3 = OpTypeFunction %2\n"
+                               "%6 = OpTypeInt 32 0\n"
+                               "%7 = OpTypeVector %6 4\n"
+                               "%8 = OpTypeRuntimeArray %7\n"
+                               "%9 = OpTypeStruct %8\n"
+                               "%10 = OpTypePointer Uniform %9\n"
+                               "%11 = OpVariable %10 Uniform\n"
+                               "%12 = OpTypeInt 32 1\n"
+                               "%13 = OpConstant %12 0\n"
+                               "%14 = OpTypePointer Input %12\n"
+                               "%15 = OpVariable %14 Input\n"
+                               "%17 = OpConstant %12 2\n"
+                               "%20 = OpTypeFloat 32\n"
+                               "%21 = OpTypeVector %20 3\n"
+                               "%22 = OpTypePointer Input %21\n"
+                               "%23 = OpVariable %22 Input\n"
+                               "%24 = OpConstant %6 0\n"
+                               "%25 = OpTypePointer Input %20\n"
+                               "%28 = OpConstant %20 0.5\n"
+                               "%32 = OpTypePointer Input %6\n"
+                               "%33 = OpVariable %32 Input\n"
+                               "%35 = OpVariable %32 Input\n"
+                               "%38 = OpTypePointer Uniform %7\n"
+                               "%40 = OpTypePointer Function %20\n"
+                               "%42 = OpConstant %20 0.00195313\n"
+                               "%43 = OpTypeVector %20 4\n"
+                               "%44 = OpConstant %6 1\n"
+                               "%45 = OpTypeArray %20 %44\n"
+                               "%46 = OpTypeStruct %43 %20 %45 %45\n"
+                               "%47 = OpTypePointer Output %46\n"
+                               "%48 = OpVariable %47 Output\n"
+                               "%49 = OpTypeStruct %43 %20 %45 %45\n"
+                               "%50 = OpConstant %6 32\n"
+                               "%51 = OpTypeArray %49 %50\n"
+                               "%52 = OpTypePointer Input %51\n"
+                               "%53 = OpVariable %52 Input\n"
+                               "%54 = OpTypePointer Input %43\n"
+                               "%61 = OpConstant %20 2\n"
+                               "%65 = OpTypePointer Output %43\n"
+                               "%4 = OpFunction %2 None %3\n"
+                               "%5 = OpLabel\n"
+                               "%41 = OpVariable %40 Function\n"
+                               "%16 = OpLoad %12 %15\n"
+                               "%18 = OpIMul %12 %16 %17\n"
+                               "%19 = OpBitcast %6 %18\n"
+                               "%26 = OpAccessChain %25 %23 %24\n"
+                               "%27 = OpLoad %20 %26\n"
+                               "%29 = OpFAdd %20 %27 %28\n"
+                               "%30 = OpConvertFToU %6 %29\n"
+                               "%31 = OpIAdd %6 %19 %30\n"
+                               "%34 = OpLoad %6 %33\n"
+                               "%36 = OpLoad %6 %35\n"
+                               "%37 = OpCompositeConstruct %7 %34 %36 %24 %24\n"
+                               "%39 = OpAccessChain %38 %11 %13 %31\n"
+                               "OpStore %39 %37\n"
+                               "OpStore %41 %42\n"
+                               "%55 = OpAccessChain %54 %53 %13 %13\n"
+                               "%56 = OpLoad %43 %55\n"
+                               "%57 = OpAccessChain %25 %23 %24\n"
+                               "%58 = OpLoad %20 %57\n"
+                               "%59 = OpLoad %20 %41\n"
+                               "%60 = OpFMul %20 %58 %59\n"
+                               "%62 = OpFDiv %20 %60 %61\n"
+                               "%63 = OpCompositeConstruct %43 %62 %62 %62 %62\n"
+                               "%64 = OpFAdd %43 %56 %63\n"
+                               "%66 = OpAccessChain %65 %48 %13\n"
+                               "OpStore %66 %64\n"
+                               "OpReturn\n"
+                               "OpFunctionEnd\n";
+                               programCollection.spirvAsmSources.add("tese") << tese << SpirVAsmBuildOptions(programCollection.usedVulkanVersion, SPIRV_VERSION_1_3);
+               }
+
+               {
+                       /*
+                               "#version 450\n"
+                               "#extension GL_KHR_shader_subgroup_basic: enable\n"
+                               "// Note: ${TOPOLOGY} variable is substituted manually at SPIR-V ASM level"
+                               "layout(${TOPOLOGY}) in;\n"
+                               "layout(points, max_vertices = 1) out;\n"
+                               "layout(set = 0, binding = 3, std430) buffer Output\n"
+                               "{\n"
+                               "  uvec4 result[];\n"
+                               "};\n"
+                               "\n"
+                               "void main (void)\n"
+                               "{\n"
+                               "  result[gl_PrimitiveIDIn] = uvec4(gl_SubgroupSize, gl_SubgroupInvocationID, 0, 0);\n"
+                               "  gl_Position = gl_in[0].gl_Position;\n"
+                               "  EmitVertex();\n"
+                               "  EndPrimitive();\n"
+                               "}\n";
+                       */
+                       const string geometry =
+                       "; SPIR-V\n"
+                       "; Version: 1.3\n"
+                       "; Generator: Khronos Glslang Reference Front End; 1\n"
+                       "; Bound: 42\n"
+                       "; Schema: 0\n"
+                       "OpCapability Geometry\n"
+                       "OpCapability GroupNonUniform\n"
+                       "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+                       "OpMemoryModel Logical GLSL450\n"
+                       "OpEntryPoint Geometry %4 \"main\" %15 %18 %20 %32 %36\n"
+                       "OpExecutionMode %4 ${TOPOLOGY}\n"
+                       "OpExecutionMode %4 Invocations 1\n"
+                       "OpExecutionMode %4 OutputPoints\n"
+                       "OpExecutionMode %4 OutputVertices 1\n"
+                       "OpDecorate %8 ArrayStride 16\n"
+                       "OpMemberDecorate %9 0 Offset 0\n"
+                       "OpDecorate %9 BufferBlock\n"
+                       "OpDecorate %11 DescriptorSet 0\n"
+                       "OpDecorate %11 Binding 3\n"
+                       "OpDecorate %15 BuiltIn PrimitiveId\n"
+                       "OpDecorate %18 RelaxedPrecision\n"
+                       "OpDecorate %18 BuiltIn SubgroupSize\n"
+                       "OpDecorate %19 RelaxedPrecision\n"
+                       "OpDecorate %20 RelaxedPrecision\n"
+                       "OpDecorate %20 BuiltIn SubgroupLocalInvocationId\n"
+                       "OpDecorate %21 RelaxedPrecision\n"
+                       "OpMemberDecorate %30 0 BuiltIn Position\n"
+                       "OpMemberDecorate %30 1 BuiltIn PointSize\n"
+                       "OpMemberDecorate %30 2 BuiltIn ClipDistance\n"
+                       "OpMemberDecorate %30 3 BuiltIn CullDistance\n"
+                       "OpDecorate %30 Block\n"
+                       "OpMemberDecorate %33 0 BuiltIn Position\n"
+                       "OpMemberDecorate %33 1 BuiltIn PointSize\n"
+                       "OpMemberDecorate %33 2 BuiltIn ClipDistance\n"
+                       "OpMemberDecorate %33 3 BuiltIn CullDistance\n"
+                       "OpDecorate %33 Block\n"
+                       "%2 = OpTypeVoid\n"
+                       "%3 = OpTypeFunction %2\n"
+                       "%6 = OpTypeInt 32 0\n"
+                       "%7 = OpTypeVector %6 4\n"
+                       "%8 = OpTypeRuntimeArray %7\n"
+                       "%9 = OpTypeStruct %8\n"
+                       "%10 = OpTypePointer Uniform %9\n"
+                       "%11 = OpVariable %10 Uniform\n"
+                       "%12 = OpTypeInt 32 1\n"
+                       "%13 = OpConstant %12 0\n"
+                       "%14 = OpTypePointer Input %12\n"
+                       "%15 = OpVariable %14 Input\n"
+                       "%17 = OpTypePointer Input %6\n"
+                       "%18 = OpVariable %17 Input\n"
+                       "%20 = OpVariable %17 Input\n"
+                       "%22 = OpConstant %6 0\n"
+                       "%24 = OpTypePointer Uniform %7\n"
+                       "%26 = OpTypeFloat 32\n"
+                       "%27 = OpTypeVector %26 4\n"
+                       "%28 = OpConstant %6 1\n"
+                       "%29 = OpTypeArray %26 %28\n"
+                       "%30 = OpTypeStruct %27 %26 %29 %29\n"
+                       "%31 = OpTypePointer Output %30\n"
+                       "%32 = OpVariable %31 Output\n"
+                       "%33 = OpTypeStruct %27 %26 %29 %29\n"
+                       "%34 = OpTypeArray %33 %28\n"
+                       "%35 = OpTypePointer Input %34\n"
+                       "%36 = OpVariable %35 Input\n"
+                       "%37 = OpTypePointer Input %27\n"
+                       "%40 = OpTypePointer Output %27\n"
+                       "%4 = OpFunction %2 None %3\n"
+                       "%5 = OpLabel\n"
+                       "%16 = OpLoad %12 %15\n"
+                       "%19 = OpLoad %6 %18\n"
+                       "%21 = OpLoad %6 %20\n"
+                       "%23 = OpCompositeConstruct %7 %19 %21 %22 %22\n"
+                       "%25 = OpAccessChain %24 %11 %13 %16\n"
+                       "OpStore %25 %23\n"
+                       "%38 = OpAccessChain %37 %36 %13 %13\n"
+                       "%39 = OpLoad %27 %38\n"
+                       "%41 = OpAccessChain %40 %32 %13\n"
+                       "OpStore %41 %39\n"
+                       "OpEmitVertex\n"
+                       "OpEndPrimitive\n"
+                       "OpReturn\n"
+                       "OpFunctionEnd\n";
+                       addGeometryShadersFromTemplate(geometry, SpirVAsmBuildOptions(programCollection.usedVulkanVersion, SPIRV_VERSION_1_3), programCollection.spirvAsmSources);
+               }
+
+               {
+                       /*
+                               "#version 450\n"
+                               "#extension GL_KHR_shader_subgroup_basic: enable\n"
+                               "layout(location = 0) out uvec4 data;\n"
+                               "void main (void)\n"
+                               "{\n"
+                               "  data = uvec4(gl_SubgroupSize, gl_SubgroupInvocationID, 0, 0);\n"
+                               "}\n";
+                       */
+                       const string fragment =
+                       "; SPIR-V\n"
+                       "; Version: 1.3\n"
+                       "; Generator: Khronos Glslang Reference Front End; 1\n"
+                       "; Bound: 17\n"
+                       "; Schema: 0\n"
+                       "OpCapability Shader\n"
+                       "OpCapability GroupNonUniform\n"
+                       "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+                       "OpMemoryModel Logical GLSL450\n"
+                       "OpEntryPoint Fragment %4 \"main\" %9 %11 %13\n"
+                       "OpExecutionMode %4 OriginUpperLeft\n"
+                       "OpDecorate %9 Location 0\n"
+                       "OpDecorate %11 RelaxedPrecision\n"
+                       "OpDecorate %11 Flat\n"
+                       "OpDecorate %11 BuiltIn SubgroupSize\n"
+                       "OpDecorate %12 RelaxedPrecision\n"
+                       "OpDecorate %13 RelaxedPrecision\n"
+                       "OpDecorate %13 Flat\n"
+                       "OpDecorate %13 BuiltIn SubgroupLocalInvocationId\n"
+                       "OpDecorate %14 RelaxedPrecision\n"
+                       "%2 = OpTypeVoid\n"
+                       "%3 = OpTypeFunction %2\n"
+                       "%6 = OpTypeInt 32 0\n"
+                       "%7 = OpTypeVector %6 4\n"
+                       "%8 = OpTypePointer Output %7\n"
+                       "%9 = OpVariable %8 Output\n"
+                       "%10 = OpTypePointer Input %6\n"
+                       "%11 = OpVariable %10 Input\n"
+                       "%13 = OpVariable %10 Input\n"
+                       "%15 = OpConstant %6 0\n"
+                       "%4 = OpFunction %2 None %3\n"
+                       "%5 = OpLabel\n"
+                       "%12 = OpLoad %6 %11\n"
+                       "%14 = OpLoad %6 %13\n"
+                       "%16 = OpCompositeConstruct %7 %12 %14 %15 %15\n"
+                       "OpStore %9 %16\n"
+                       "OpReturn\n"
+                       "OpFunctionEnd\n";
+
+                       programCollection.spirvAsmSources.add("fragment") << fragment << SpirVAsmBuildOptions(programCollection.usedVulkanVersion, SPIRV_VERSION_1_3);
+               }
+
+               subgroups::addNoSubgroupShader(programCollection);
+       }
+}
+
+void supportedCheck (Context& context, CaseDefinition caseDef)
+{
+       DE_UNREF(caseDef);
+       if (!subgroups::isSubgroupSupported(context))
+               TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
+}
+
+tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
+{
+       if (!areSubgroupOperationsSupportedForStage(
+                               context, caseDef.shaderStage))
+       {
+               if (areSubgroupOperationsRequiredForStage(caseDef.shaderStage))
+               {
+                       return tcu::TestStatus::fail(
+                                          "Shader stage " + getShaderStageName(caseDef.shaderStage) +
+                                          " is required to support subgroup operations!");
+               }
+               else
+               {
+                       TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
+               }
+       }
+
+       if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+       {
+               if ("gl_SubgroupSize" == caseDef.varName)
+               {
+                       return makeVertexFrameBufferTest(
+                                          context, VK_FORMAT_R32G32B32A32_UINT, DE_NULL, 0, checkVertexPipelineStagesSubgroupSize);
+               }
+               else if ("gl_SubgroupInvocationID" == caseDef.varName)
+               {
+                       return makeVertexFrameBufferTest(
+                                          context, VK_FORMAT_R32G32B32A32_UINT, DE_NULL, 0, checkVertexPipelineStagesSubgroupInvocationID);
+               }
+               else
+               {
+                       return tcu::TestStatus::fail(
+                                          caseDef.varName + " failed (unhandled error checking case " +
+                                          caseDef.varName + ")!");
+               }
+       }
+       else if ((VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) & caseDef.shaderStage )
+       {
+               if ("gl_SubgroupSize" == caseDef.varName)
+               {
+                       return makeTessellationEvaluationFrameBufferTest(
+                                       context, VK_FORMAT_R32G32B32A32_UINT, DE_NULL, 0, checkVertexPipelineStagesSubgroupSize);
+               }
+               else if ("gl_SubgroupInvocationID" == caseDef.varName)
+               {
+                       return makeTessellationEvaluationFrameBufferTest(
+                                       context, VK_FORMAT_R32G32B32A32_UINT, DE_NULL, 0, checkVertexPipelineStagesSubgroupInvocationID);
+               }
+               else
+               {
+                       return tcu::TestStatus::fail(
+                                       caseDef.varName + " failed (unhandled error checking case " +
+                                       caseDef.varName + ")!");
+               }
+       }
+       else if (VK_SHADER_STAGE_GEOMETRY_BIT & caseDef.shaderStage )
+       {
+               if ("gl_SubgroupSize" == caseDef.varName)
+               {
+                       return makeGeometryFrameBufferTest(
+                                       context, VK_FORMAT_R32G32B32A32_UINT, DE_NULL, 0, checkVertexPipelineStagesSubgroupSize);
+               }
+               else if ("gl_SubgroupInvocationID" == caseDef.varName)
+               {
+                       return makeGeometryFrameBufferTest(
+                                       context, VK_FORMAT_R32G32B32A32_UINT, DE_NULL, 0, checkVertexPipelineStagesSubgroupInvocationID);
+               }
+               else
+               {
+                       return tcu::TestStatus::fail(
+                                       caseDef.varName + " failed (unhandled error checking case " +
+                                       caseDef.varName + ")!");
+               }
+       }
+       else
+       {
+               TCU_THROW(InternalError, "Unhandled shader stage");
+       }
+}
+
+
+tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
+{
+       if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+       {
+               if (!areSubgroupOperationsSupportedForStage(context, caseDef.shaderStage))
+               {
+                       return tcu::TestStatus::fail(
+                                          "Shader stage " + getShaderStageName(caseDef.shaderStage) +
+                                          " is required to support subgroup operations!");
+               }
+
+               if ("gl_SubgroupSize" == caseDef.varName)
+               {
+                       return makeComputeTest(context, VK_FORMAT_R32G32B32A32_UINT, DE_NULL, 0, checkComputeSubgroupSize);
+               }
+               else if ("gl_SubgroupInvocationID" == caseDef.varName)
+               {
+                       return makeComputeTest(context, VK_FORMAT_R32G32B32A32_UINT, DE_NULL, 0, checkComputeSubgroupInvocationID);
+               }
+               else if ("gl_NumSubgroups" == caseDef.varName)
+               {
+                       return makeComputeTest(context, VK_FORMAT_R32G32B32A32_UINT, DE_NULL, 0, checkComputeNumSubgroups);
+               }
+               else if ("gl_SubgroupID" == caseDef.varName)
+               {
+                       return makeComputeTest(context, VK_FORMAT_R32G32B32A32_UINT, DE_NULL, 0, checkComputeSubgroupID);
+               }
+               else
+               {
+                       return tcu::TestStatus::fail(
+                                       caseDef.varName + " failed (unhandled error checking case " +
+                                       caseDef.varName + ")!");
+               }
+       }
+       else
+       {
+               VkPhysicalDeviceSubgroupProperties subgroupProperties;
+               subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
+               subgroupProperties.pNext = DE_NULL;
+
+               VkPhysicalDeviceProperties2 properties;
+               properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+               properties.pNext = &subgroupProperties;
+
+               context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
+
+               VkShaderStageFlagBits stages = (VkShaderStageFlagBits)(caseDef.shaderStage  & subgroupProperties.supportedStages);
+
+               if (VK_SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
+               {
+                       if ( (stages & VK_SHADER_STAGE_FRAGMENT_BIT) == 0)
+                               TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
+                       else
+                               stages = VK_SHADER_STAGE_FRAGMENT_BIT;
+               }
+
+               if ((VkShaderStageFlagBits)0u == stages)
+                       TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
+
+               if ("gl_SubgroupSize" == caseDef.varName)
+               {
+                       return subgroups::allStages(context, VK_FORMAT_R32G32B32A32_UINT, DE_NULL, 0, checkVertexPipelineStagesSubgroupSize, stages);
+               }
+               else if ("gl_SubgroupInvocationID" == caseDef.varName)
+               {
+                       return subgroups::allStages(context, VK_FORMAT_R32G32B32A32_UINT, DE_NULL, 0, checkVertexPipelineStagesSubgroupInvocationID, stages);
+               }
+               else
+               {
+                       return tcu::TestStatus::fail(
+                                          caseDef.varName + " failed (unhandled error checking case " +
+                                          caseDef.varName + ")!");
+               }
+       }
+}
+
+tcu::TestCaseGroup* createSubgroupsBuiltinVarTests(tcu::TestContext& testCtx)
+{
+       de::MovePtr<tcu::TestCaseGroup> graphicGroup(new tcu::TestCaseGroup(
+               testCtx, "graphics", "Subgroup builtin variable tests: graphics"));
+       de::MovePtr<tcu::TestCaseGroup> computeGroup(new tcu::TestCaseGroup(
+               testCtx, "compute", "Subgroup builtin variable tests: compute"));
+       de::MovePtr<tcu::TestCaseGroup> framebufferGroup(new tcu::TestCaseGroup(
+               testCtx, "framebuffer", "Subgroup builtin variable tests: framebuffer"));
+
+       const char* const all_stages_vars[] =
+       {
+               "SubgroupSize",
+               "SubgroupInvocationID"
+       };
+
+       const char* const compute_only_vars[] =
+       {
+               "NumSubgroups",
+               "SubgroupID"
+       };
+
+       const VkShaderStageFlags stages[] =
+       {
+               VK_SHADER_STAGE_VERTEX_BIT,
+               VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
+               VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
+               VK_SHADER_STAGE_GEOMETRY_BIT,
+       };
+
+       for (int a = 0; a < DE_LENGTH_OF_ARRAY(all_stages_vars); ++a)
+       {
+               const std::string var = all_stages_vars[a];
+               const std::string varLower = de::toLower(var);
+
+               {
+                       const CaseDefinition caseDef = { "gl_" + var, VK_SHADER_STAGE_ALL_GRAPHICS};
+
+                       addFunctionCaseWithPrograms(graphicGroup.get(),
+                                                                               varLower, "",
+                                                                               supportedCheck, initPrograms, test, caseDef);
+               }
+
+               {
+                       const CaseDefinition caseDef = {"gl_" + var, VK_SHADER_STAGE_COMPUTE_BIT};
+                       addFunctionCaseWithPrograms(computeGroup.get(),
+                                               varLower + "_" + getShaderStageName(caseDef.shaderStage), "",
+                                               supportedCheck, initPrograms, test, caseDef);
+               }
+
+               for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
+               {
+                       const CaseDefinition caseDef = {"gl_" + var, stages[stageIndex]};
+                       addFunctionCaseWithPrograms(framebufferGroup.get(),
+                                               varLower + "_" + getShaderStageName(caseDef.shaderStage), "",
+                                               supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
+               }
+       }
+
+       for (int a = 0; a < DE_LENGTH_OF_ARRAY(compute_only_vars); ++a)
+       {
+               const std::string var = compute_only_vars[a];
+
+               const CaseDefinition caseDef = {"gl_" + var, VK_SHADER_STAGE_COMPUTE_BIT};
+
+               addFunctionCaseWithPrograms(computeGroup.get(), de::toLower(var), "",
+                                                                       supportedCheck, initPrograms, test, caseDef);
+       }
+
+       de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(
+               testCtx, "builtin_var", "Subgroup builtin variable tests"));
+
+       group->addChild(graphicGroup.release());
+       group->addChild(computeGroup.release());
+       group->addChild(framebufferGroup.release());
+
+       return group.release();
+}
+
+} // subgroups
+} // vkt
diff --git a/external/openglcts/modules/common/subgroups/glcSubgroupsBuiltinVarTests.hpp b/external/openglcts/modules/common/subgroups/glcSubgroupsBuiltinVarTests.hpp
new file mode 100644 (file)
index 0000000..f7a1dc8
--- /dev/null
@@ -0,0 +1,40 @@
+#ifndef _VKTSUBGROUPSBUILTINVARTESTS_HPP
+#define _VKTSUBGROUPSBUILTINVARTESTS_HPP
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "tcuDefs.hpp"
+#include "vktTestCase.hpp"
+
+namespace vkt
+{
+namespace subgroups
+{
+
+tcu::TestCaseGroup* createSubgroupsBuiltinVarTests(tcu::TestContext& testCtx);
+
+} // subgroups
+} // vkt
+
+#endif // _VKTSUBGROUPSBUILTINVARTESTS_HPP
diff --git a/external/openglcts/modules/common/subgroups/glcSubgroupsClusteredTests.cpp b/external/openglcts/modules/common/subgroups/glcSubgroupsClusteredTests.cpp
new file mode 100755 (executable)
index 0000000..718be21
--- /dev/null
@@ -0,0 +1,884 @@
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "vktSubgroupsClusteredTests.hpp"
+#include "vktSubgroupsTestsUtils.hpp"
+
+#include <string>
+#include <vector>
+
+using namespace tcu;
+using namespace std;
+using namespace vk;
+using namespace vkt;
+
+namespace
+{
+enum OpType
+{
+       OPTYPE_CLUSTERED_ADD = 0,
+       OPTYPE_CLUSTERED_MUL,
+       OPTYPE_CLUSTERED_MIN,
+       OPTYPE_CLUSTERED_MAX,
+       OPTYPE_CLUSTERED_AND,
+       OPTYPE_CLUSTERED_OR,
+       OPTYPE_CLUSTERED_XOR,
+       OPTYPE_CLUSTERED_LAST
+};
+
+static bool checkVertexPipelineStages(std::vector<const void*> datas,
+                                                                         deUint32 width, deUint32)
+{
+       return vkt::subgroups::check(datas, width, 1);
+}
+
+static bool checkCompute(std::vector<const void*> datas,
+                                                const deUint32 numWorkgroups[3], const deUint32 localSize[3],
+                                                deUint32)
+{
+       return vkt::subgroups::checkCompute(datas, numWorkgroups, localSize, 1);
+}
+
+std::string getOpTypeName(int opType)
+{
+       switch (opType)
+       {
+               default:
+                       DE_FATAL("Unsupported op type");
+                       return "";
+               case OPTYPE_CLUSTERED_ADD:
+                       return "subgroupClusteredAdd";
+               case OPTYPE_CLUSTERED_MUL:
+                       return "subgroupClusteredMul";
+               case OPTYPE_CLUSTERED_MIN:
+                       return "subgroupClusteredMin";
+               case OPTYPE_CLUSTERED_MAX:
+                       return "subgroupClusteredMax";
+               case OPTYPE_CLUSTERED_AND:
+                       return "subgroupClusteredAnd";
+               case OPTYPE_CLUSTERED_OR:
+                       return "subgroupClusteredOr";
+               case OPTYPE_CLUSTERED_XOR:
+                       return "subgroupClusteredXor";
+       }
+}
+
+std::string getOpTypeOperation(int opType, vk::VkFormat format, std::string lhs, std::string rhs)
+{
+       switch (opType)
+       {
+               default:
+                       DE_FATAL("Unsupported op type");
+                       return "";
+               case OPTYPE_CLUSTERED_ADD:
+                       return lhs + " + " + rhs;
+               case OPTYPE_CLUSTERED_MUL:
+                       return lhs + " * " + rhs;
+               case OPTYPE_CLUSTERED_MIN:
+                       switch (format)
+                       {
+                               default:
+                                       return "min(" + lhs + ", " + rhs + ")";
+                               case VK_FORMAT_R32_SFLOAT:
+                               case VK_FORMAT_R64_SFLOAT:
+                                       return "(isnan(" + lhs + ") ? " + rhs + " : (isnan(" + rhs + ") ? " + lhs + " : min(" + lhs + ", " + rhs + ")))";
+                               case VK_FORMAT_R32G32_SFLOAT:
+                               case VK_FORMAT_R32G32B32_SFLOAT:
+                               case VK_FORMAT_R32G32B32A32_SFLOAT:
+                               case VK_FORMAT_R64G64_SFLOAT:
+                               case VK_FORMAT_R64G64B64_SFLOAT:
+                               case VK_FORMAT_R64G64B64A64_SFLOAT:
+                                       return "mix(mix(min(" + lhs + ", " + rhs + "), " + lhs + ", isnan(" + rhs + ")), " + rhs + ", isnan(" + lhs + "))";
+                       }
+               case OPTYPE_CLUSTERED_MAX:
+                       switch (format)
+                       {
+                               default:
+                                       return "max(" + lhs + ", " + rhs + ")";
+                               case VK_FORMAT_R32_SFLOAT:
+                               case VK_FORMAT_R64_SFLOAT:
+                                       return "(isnan(" + lhs + ") ? " + rhs + " : (isnan(" + rhs + ") ? " + lhs + " : max(" + lhs + ", " + rhs + ")))";
+                               case VK_FORMAT_R32G32_SFLOAT:
+                               case VK_FORMAT_R32G32B32_SFLOAT:
+                               case VK_FORMAT_R32G32B32A32_SFLOAT:
+                               case VK_FORMAT_R64G64_SFLOAT:
+                               case VK_FORMAT_R64G64B64_SFLOAT:
+                               case VK_FORMAT_R64G64B64A64_SFLOAT:
+                                       return "mix(mix(max(" + lhs + ", " + rhs + "), " + lhs + ", isnan(" + rhs + ")), " + rhs + ", isnan(" + lhs + "))";
+                       }
+               case OPTYPE_CLUSTERED_AND:
+                       switch (format)
+                       {
+                               default:
+                                       return lhs + " & " + rhs;
+                               case VK_FORMAT_R8_USCALED:
+                                       return lhs + " && " + rhs;
+                               case VK_FORMAT_R8G8_USCALED:
+                                       return "bvec2(" + lhs + ".x && " + rhs + ".x, " + lhs + ".y && " + rhs + ".y)";
+                               case VK_FORMAT_R8G8B8_USCALED:
+                                       return "bvec3(" + lhs + ".x && " + rhs + ".x, " + lhs + ".y && " + rhs + ".y, " + lhs + ".z && " + rhs + ".z)";
+                               case VK_FORMAT_R8G8B8A8_USCALED:
+                                       return "bvec4(" + lhs + ".x && " + rhs + ".x, " + lhs + ".y && " + rhs + ".y, " + lhs + ".z && " + rhs + ".z, " + lhs + ".w && " + rhs + ".w)";
+                       }
+               case OPTYPE_CLUSTERED_OR:
+                       switch (format)
+                       {
+                               default:
+                                       return lhs + " | " + rhs;
+                               case VK_FORMAT_R8_USCALED:
+                                       return lhs + " || " + rhs;
+                               case VK_FORMAT_R8G8_USCALED:
+                                       return "bvec2(" + lhs + ".x || " + rhs + ".x, " + lhs + ".y || " + rhs + ".y)";
+                               case VK_FORMAT_R8G8B8_USCALED:
+                                       return "bvec3(" + lhs + ".x || " + rhs + ".x, " + lhs + ".y || " + rhs + ".y, " + lhs + ".z || " + rhs + ".z)";
+                               case VK_FORMAT_R8G8B8A8_USCALED:
+                                       return "bvec4(" + lhs + ".x || " + rhs + ".x, " + lhs + ".y || " + rhs + ".y, " + lhs + ".z || " + rhs + ".z, " + lhs + ".w || " + rhs + ".w)";
+                       }
+               case OPTYPE_CLUSTERED_XOR:
+                       switch (format)
+                       {
+                               default:
+                                       return lhs + " ^ " + rhs;
+                               case VK_FORMAT_R8_USCALED:
+                                       return lhs + " ^^ " + rhs;
+                               case VK_FORMAT_R8G8_USCALED:
+                                       return "bvec2(" + lhs + ".x ^^ " + rhs + ".x, " + lhs + ".y ^^ " + rhs + ".y)";
+                               case VK_FORMAT_R8G8B8_USCALED:
+                                       return "bvec3(" + lhs + ".x ^^ " + rhs + ".x, " + lhs + ".y ^^ " + rhs + ".y, " + lhs + ".z ^^ " + rhs + ".z)";
+                               case VK_FORMAT_R8G8B8A8_USCALED:
+                                       return "bvec4(" + lhs + ".x ^^ " + rhs + ".x, " + lhs + ".y ^^ " + rhs + ".y, " + lhs + ".z ^^ " + rhs + ".z, " + lhs + ".w ^^ " + rhs + ".w)";
+                       }
+       }
+}
+
+std::string getIdentity(int opType, vk::VkFormat format)
+{
+       bool isFloat = false;
+       bool isInt = false;
+       bool isUnsigned = false;
+
+       switch (format)
+       {
+               default:
+                       DE_FATAL("Unhandled format!");
+                       break;
+               case VK_FORMAT_R32_SINT:
+               case VK_FORMAT_R32G32_SINT:
+               case VK_FORMAT_R32G32B32_SINT:
+               case VK_FORMAT_R32G32B32A32_SINT:
+                       isInt = true;
+                       break;
+               case VK_FORMAT_R32_UINT:
+               case VK_FORMAT_R32G32_UINT:
+               case VK_FORMAT_R32G32B32_UINT:
+               case VK_FORMAT_R32G32B32A32_UINT:
+                       isUnsigned = true;
+                       break;
+               case VK_FORMAT_R32_SFLOAT:
+               case VK_FORMAT_R32G32_SFLOAT:
+               case VK_FORMAT_R32G32B32_SFLOAT:
+               case VK_FORMAT_R32G32B32A32_SFLOAT:
+               case VK_FORMAT_R64_SFLOAT:
+               case VK_FORMAT_R64G64_SFLOAT:
+               case VK_FORMAT_R64G64B64_SFLOAT:
+               case VK_FORMAT_R64G64B64A64_SFLOAT:
+                       isFloat = true;
+                       break;
+               case VK_FORMAT_R8_USCALED:
+               case VK_FORMAT_R8G8_USCALED:
+               case VK_FORMAT_R8G8B8_USCALED:
+               case VK_FORMAT_R8G8B8A8_USCALED:
+                       break; // bool types are not anything
+       }
+
+       switch (opType)
+       {
+               default:
+                       DE_FATAL("Unsupported op type");
+                       return "";
+               case OPTYPE_CLUSTERED_ADD:
+                       return subgroups::getFormatNameForGLSL(format) + "(0)";
+               case OPTYPE_CLUSTERED_MUL:
+                       return subgroups::getFormatNameForGLSL(format) + "(1)";
+               case OPTYPE_CLUSTERED_MIN:
+                       if (isFloat)
+                       {
+                               return subgroups::getFormatNameForGLSL(format) + "(intBitsToFloat(0x7f800000))";
+                       }
+                       else if (isInt)
+                       {
+                               return subgroups::getFormatNameForGLSL(format) + "(0x7fffffff)";
+                       }
+                       else if (isUnsigned)
+                       {
+                               return subgroups::getFormatNameForGLSL(format) + "(0xffffffffu)";
+                       }
+                       else
+                       {
+                               DE_FATAL("Unhandled case");
+                               return "";
+                       }
+               case OPTYPE_CLUSTERED_MAX:
+                       if (isFloat)
+                       {
+                               return subgroups::getFormatNameForGLSL(format) + "(intBitsToFloat(0xff800000))";
+                       }
+                       else if (isInt)
+                       {
+                               return subgroups::getFormatNameForGLSL(format) + "(0x80000000)";
+                       }
+                       else if (isUnsigned)
+                       {
+                               return subgroups::getFormatNameForGLSL(format) + "(0)";
+                       }
+                       else
+                       {
+                               DE_FATAL("Unhandled case");
+                               return "";
+                       }
+               case OPTYPE_CLUSTERED_AND:
+                       return subgroups::getFormatNameForGLSL(format) + "(~0)";
+               case OPTYPE_CLUSTERED_OR:
+                       return subgroups::getFormatNameForGLSL(format) + "(0)";
+               case OPTYPE_CLUSTERED_XOR:
+                       return subgroups::getFormatNameForGLSL(format) + "(0)";
+       }
+}
+
+std::string getCompare(int opType, vk::VkFormat format, std::string lhs, std::string rhs)
+{
+       std::string formatName = subgroups::getFormatNameForGLSL(format);
+       switch (format)
+       {
+               default:
+                       return "all(equal(" + lhs + ", " + rhs + "))";
+               case VK_FORMAT_R8_USCALED:
+               case VK_FORMAT_R32_UINT:
+               case VK_FORMAT_R32_SINT:
+                       return "(" + lhs + " == " + rhs + ")";
+               case VK_FORMAT_R32_SFLOAT:
+               case VK_FORMAT_R64_SFLOAT:
+                       switch (opType)
+                       {
+                               default:
+                                       return "(abs(" + lhs + " - " + rhs + ") < 0.00001)";
+                               case OPTYPE_CLUSTERED_MIN:
+                               case OPTYPE_CLUSTERED_MAX:
+                                       return "(" + lhs + " == " + rhs + ")";
+                       }
+               case VK_FORMAT_R32G32_SFLOAT:
+               case VK_FORMAT_R32G32B32_SFLOAT:
+               case VK_FORMAT_R32G32B32A32_SFLOAT:
+               case VK_FORMAT_R64G64_SFLOAT:
+               case VK_FORMAT_R64G64B64_SFLOAT:
+               case VK_FORMAT_R64G64B64A64_SFLOAT:
+                       switch (opType)
+                       {
+                               default:
+                                       return "all(lessThan(abs(" + lhs + " - " + rhs + "), " + formatName + "(0.00001)))";
+                               case OPTYPE_CLUSTERED_MIN:
+                               case OPTYPE_CLUSTERED_MAX:
+                                       return "all(equal(" + lhs + ", " + rhs + "))";
+                       }
+       }
+}
+
+struct CaseDefinition
+{
+       int                                     opType;
+       VkShaderStageFlags      shaderStage;
+       VkFormat                        format;
+};
+
+std::string getBodySource(CaseDefinition caseDef)
+{
+       std::ostringstream bdy;
+       bdy << "  bool tempResult = true;\n";
+
+       for (deUint32 i = 1; i <= subgroups::maxSupportedSubgroupSize(); i *= 2)
+       {
+               bdy     << "  {\n"
+                       << "    const uint clusterSize = " << i << ";\n"
+                       << "    if (clusterSize <= gl_SubgroupSize)\n"
+                       << "    {\n"
+                       << "      " << subgroups::getFormatNameForGLSL(caseDef.format) << " op = "
+                       << getOpTypeName(caseDef.opType) + "(data[gl_SubgroupInvocationID], clusterSize);\n"
+                       << "      for (uint clusterOffset = 0; clusterOffset < gl_SubgroupSize; clusterOffset += clusterSize)\n"
+                       << "      {\n"
+                       << "        " << subgroups::getFormatNameForGLSL(caseDef.format) << " ref = "
+                       << getIdentity(caseDef.opType, caseDef.format) << ";\n"
+                       << "        for (uint index = clusterOffset; index < (clusterOffset + clusterSize); index++)\n"
+                       << "        {\n"
+                       << "          if (subgroupBallotBitExtract(mask, index))\n"
+                       << "          {\n"
+                       << "            ref = " << getOpTypeOperation(caseDef.opType, caseDef.format, "ref", "data[index]") << ";\n"
+                       << "          }\n"
+                       << "        }\n"
+                       << "        if ((clusterOffset <= gl_SubgroupInvocationID) && (gl_SubgroupInvocationID < (clusterOffset + clusterSize)))\n"
+                       << "        {\n"
+                       << "          if (!" << getCompare(caseDef.opType, caseDef.format, "ref", "op") << ")\n"
+                       << "          {\n"
+                       << "            tempResult = false;\n"
+                       << "          }\n"
+                       << "        }\n"
+                       << "      }\n"
+                       << "    }\n"
+                       << "  }\n";
+       }
+       return bdy.str();
+}
+
+void initFrameBufferPrograms (SourceCollections& programCollection, CaseDefinition caseDef)
+{
+       const vk::ShaderBuildOptions    buildOptions    (programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+
+       subgroups::setFragmentShaderFrameBuffer(programCollection);
+
+       if (VK_SHADER_STAGE_VERTEX_BIT != caseDef.shaderStage)
+               subgroups::setVertexShaderFrameBuffer(programCollection);
+
+       std::string bdy = getBodySource(caseDef);
+
+       if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+       {
+               std::ostringstream                              vertexSrc;
+               vertexSrc << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450 )<< "\n"
+                       << "#extension GL_KHR_shader_subgroup_clustered: enable\n"
+                       << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       << "layout(location = 0) in highp vec4 in_position;\n"
+                       << "layout(location = 0) out float out_color;\n"
+                       << "layout(set = 0, binding = 0) uniform Buffer1\n"
+                       << "{\n"
+                       << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+                       << "};\n"
+                       << "\n"
+                       << "void main (void)\n"
+                       << "{\n"
+                       << "  uvec4 mask = subgroupBallot(true);\n"
+                       << bdy
+                       << "  out_color = float(tempResult ? 1 : 0);\n"
+                       << "  gl_Position = in_position;\n"
+                       << "  gl_PointSize = 1.0f;\n"
+                       << "}\n";
+               programCollection.glslSources.add("vert")
+                       << glu::VertexSource(vertexSrc.str()) <<buildOptions;
+       }
+       else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
+       {
+               std::ostringstream geometry;
+
+               geometry  << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+                       << "#extension GL_KHR_shader_subgroup_clustered: enable\n"
+                       << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       << "layout(points) in;\n"
+                       << "layout(points, max_vertices = 1) out;\n"
+                       << "layout(location = 0) out float out_color;\n"
+                       << "layout(set = 0, binding = 0) uniform Buffer1\n"
+                       << "{\n"
+                       << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+                       << "};\n"
+                       << "\n"
+                       << "void main (void)\n"
+                       << "{\n"
+                       << "  uvec4 mask = subgroupBallot(true);\n"
+                       << bdy
+                       << "  out_color = tempResult ? 1.0 : 0.0;\n"
+                       << "  gl_Position = gl_in[0].gl_Position;\n"
+                       << "  EmitVertex();\n"
+                       << "  EndPrimitive();\n"
+                       << "}\n";
+
+               programCollection.glslSources.add("geometry")
+                       << glu::GeometrySource(geometry.str()) << buildOptions;
+       }
+       else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+       {
+               std::ostringstream controlSource;
+
+               controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+                       << "#extension GL_KHR_shader_subgroup_clustered: enable\n"
+                       << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       << "layout(vertices = 2) out;\n"
+                       << "layout(location = 0) out float out_color[];\n"
+                       << "layout(set = 0, binding = 0) uniform Buffer1\n"
+                       << "{\n"
+                       << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+                       << "};\n"
+                       << "\n"
+                       << "void main (void)\n"
+                       << "{\n"
+                       << "  if (gl_InvocationID == 0)\n"
+                       <<"  {\n"
+                       << "    gl_TessLevelOuter[0] = 1.0f;\n"
+                       << "    gl_TessLevelOuter[1] = 1.0f;\n"
+                       << "  }\n"
+                       << "  uvec4 mask = subgroupBallot(true);\n"
+                       << bdy
+                       << "  out_color[gl_InvocationID] = tempResult ? 1.0 : 0.0;\n"
+                       << "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+                       << "}\n";
+
+               programCollection.glslSources.add("tesc")
+                       << glu::TessellationControlSource(controlSource.str()) << buildOptions;
+               subgroups::setTesEvalShaderFrameBuffer(programCollection);
+       }
+       else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
+       {
+               std::ostringstream evaluationSource;
+
+               evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+                       << "#extension GL_KHR_shader_subgroup_clustered: enable\n"
+                       << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       << "layout(isolines, equal_spacing, ccw ) in;\n"
+                       << "layout(location = 0) out float out_color;\n"
+                       << "layout(set = 0, binding = 0) uniform Buffer1\n"
+                       << "{\n"
+                       << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+                       << "};\n"
+                       << "\n"
+                       << "void main (void)\n"
+                       << "{\n"
+                       << "  uvec4 mask = subgroupBallot(true);\n"
+                       << bdy
+                       << "  out_color = tempResult ? 1.0 : 0.0;\n"
+                       << "  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
+                       << "}\n";
+
+               subgroups::setTesCtrlShaderFrameBuffer(programCollection);
+               programCollection.glslSources.add("tese")
+                       << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
+       }
+       else
+       {
+               DE_FATAL("Unsupported shader stage");
+       }
+}
+
+void initPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
+{
+       std::string bdy = getBodySource(caseDef);
+
+       if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+       {
+               std::ostringstream src;
+
+               src << "#version 450\n"
+                       << "#extension GL_KHR_shader_subgroup_clustered: enable\n"
+                       << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       << "layout (local_size_x_id = 0, local_size_y_id = 1, "
+                       "local_size_z_id = 2) in;\n"
+                       << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
+                       << "{\n"
+                       << "  uint result[];\n"
+                       << "};\n"
+                       << "layout(set = 0, binding = 1, std430) buffer Buffer2\n"
+                       << "{\n"
+                       << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[];\n"
+                       << "};\n"
+                       << "\n"
+                       << "void main (void)\n"
+                       << "{\n"
+                       << "  uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
+                       << "  highp uint offset = globalSize.x * ((globalSize.y * "
+                       "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
+                       "gl_GlobalInvocationID.x;\n"
+                       << "  uvec4 mask = subgroupBallot(true);\n"
+                       << bdy
+                       << "  result[offset] = tempResult ? 1 : 0;\n"
+                       << "}\n";
+
+               programCollection.glslSources.add("comp")
+                               << glu::ComputeSource(src.str()) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+       }
+       else
+       {
+               {
+                       const string vertex =
+                               "#version 450\n"
+                               "#extension GL_KHR_shader_subgroup_clustered: enable\n"
+                               "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                               "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
+                               "{\n"
+                               "  uint result[];\n"
+                               "};\n"
+                               "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+                               "{\n"
+                               "  " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
+                               "};\n"
+                               "\n"
+                               "void main (void)\n"
+                               "{\n"
+                               "  uvec4 mask = subgroupBallot(true);\n"
+                               + bdy +
+                               "  result[gl_VertexIndex] = tempResult ? 1 : 0;\n"
+                               "  float pixelSize = 2.0f/1024.0f;\n"
+                               "  float pixelPosition = pixelSize/2.0f - 1.0f;\n"
+                               "  gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
+                               "}\n";
+
+                       programCollection.glslSources.add("vert")
+                               << glu::VertexSource(vertex) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+               }
+
+               {
+                       const string tesc =
+                       "#version 450\n"
+                       "#extension GL_KHR_shader_subgroup_clustered: enable\n"
+                       "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       "layout(vertices=1) out;\n"
+                       "layout(set = 0, binding = 1, std430) buffer Buffer1\n"
+                       "{\n"
+                       "  uint result[];\n"
+                       "};\n"
+                       "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+                       "{\n"
+                       "  " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
+                       "};\n"
+                       "\n"
+                       "void main (void)\n"
+                       "{\n"
+                       "  uvec4 mask = subgroupBallot(true);\n"
+                       + bdy +
+                       "  result[gl_PrimitiveID] = tempResult ? 1 : 0;\n"
+                       "  if (gl_InvocationID == 0)\n"
+                       "  {\n"
+                       "    gl_TessLevelOuter[0] = 1.0f;\n"
+                       "    gl_TessLevelOuter[1] = 1.0f;\n"
+                       "  }\n"
+                       "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+                       "}\n";
+
+                       programCollection.glslSources.add("tesc")
+                                       << glu::TessellationControlSource(tesc) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+               }
+
+               {
+                       const string tese =
+                               "#version 450\n"
+                               "#extension GL_KHR_shader_subgroup_clustered: enable\n"
+                               "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                               "layout(isolines) in;\n"
+                               "layout(set = 0, binding = 2, std430) buffer Buffer1\n"
+                               "{\n"
+                               "  uint result[];\n"
+                               "};\n"
+                               "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+                               "{\n"
+                               "  " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
+                               "};\n"
+                               "\n"
+                               "void main (void)\n"
+                               "{\n"
+                               "  uvec4 mask = subgroupBallot(true);\n"
+                               + bdy +
+                               "  result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = tempResult ? 1 : 0;\n"
+                               "  float pixelSize = 2.0f/1024.0f;\n"
+                               "  gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
+                               "}\n";
+                       programCollection.glslSources.add("tese")
+                                       << glu::TessellationEvaluationSource(tese) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+               }
+
+               {
+                       const string geometry =
+                               "#version 450\n"
+                               "#extension GL_KHR_shader_subgroup_clustered: enable\n"
+                               "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                               "layout(${TOPOLOGY}) in;\n"
+                               "layout(points, max_vertices = 1) out;\n"
+                               "layout(set = 0, binding = 3, std430) buffer Buffer1\n"
+                               "{\n"
+                               "  uint result[];\n"
+                               "};\n"
+                               "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+                               "{\n"
+                               "  " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
+                               "};\n"
+                               "\n"
+                               "void main (void)\n"
+                               "{\n"
+                               "  uvec4 mask = subgroupBallot(true);\n"
+                               + bdy +
+                               "  result[gl_PrimitiveIDIn] = tempResult ? 1 : 0;\n"
+                               "  gl_Position = gl_in[0].gl_Position;\n"
+                               "  EmitVertex();\n"
+                               "  EndPrimitive();\n"
+                               "}\n";
+                       subgroups::addGeometryShadersFromTemplate(geometry, vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u), programCollection.glslSources);
+               }
+
+               {
+                       const string fragment =
+                               "#version 450\n"
+                               "#extension GL_KHR_shader_subgroup_clustered: enable\n"
+                               "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                               "layout(location = 0) out uint result;\n"
+                               "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+                               "{\n"
+                               "  " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
+                               "};\n"
+                               "void main (void)\n"
+                               "{\n"
+                               "  uvec4 mask = subgroupBallot(true);\n"
+                               + bdy +
+                               "  result = tempResult ? 1 : 0;\n"
+                               "}\n";
+                       programCollection.glslSources.add("fragment")
+                               << glu::FragmentSource(fragment)<< vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+               }
+
+               subgroups::addNoSubgroupShader(programCollection);
+       }
+}
+
+void supportedCheck (Context& context, CaseDefinition caseDef)
+{
+       if (!subgroups::isSubgroupSupported(context))
+               TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
+
+       if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_CLUSTERED_BIT))
+               TCU_THROW(NotSupportedError, "Device does not support subgroup clustered operations");
+
+       if (subgroups::isDoubleFormat(caseDef.format) &&
+                       !subgroups::isDoubleSupportedForDevice(context))
+       {
+               TCU_THROW(NotSupportedError, "Device does not support subgroup double operations");
+       }
+}
+
+tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
+{
+       if (!subgroups::areSubgroupOperationsSupportedForStage(
+                               context, caseDef.shaderStage))
+       {
+               if (subgroups::areSubgroupOperationsRequiredForStage(
+                                       caseDef.shaderStage))
+               {
+                       return tcu::TestStatus::fail(
+                                          "Shader stage " +
+                                          subgroups::getShaderStageName(caseDef.shaderStage) +
+                                          " is required to support subgroup operations!");
+               }
+               else
+               {
+                       TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
+               }
+       }
+
+       subgroups::SSBOData inputData;
+       inputData.format = caseDef.format;
+       inputData.numElements = subgroups::maxSupportedSubgroupSize();
+       inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
+
+       if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+               return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages);
+       else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
+               return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages);
+       else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+               return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
+       else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
+               return subgroups::makeTessellationEvaluationFrameBufferTest(context,  VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
+       else
+               TCU_THROW(InternalError, "Unhandled shader stage");
+}
+
+tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
+{
+       if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+       {
+               if (!subgroups::areSubgroupOperationsSupportedForStage(context, caseDef.shaderStage))
+               {
+                               return tcu::TestStatus::fail(
+                                                  "Shader stage " +
+                                                  subgroups::getShaderStageName(caseDef.shaderStage) +
+                                                  " is required to support subgroup operations!");
+               }
+               subgroups::SSBOData inputData;
+               inputData.format = caseDef.format;
+               inputData.numElements = subgroups::maxSupportedSubgroupSize();
+               inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
+
+               return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkCompute);
+       }
+       else
+       {
+               VkPhysicalDeviceSubgroupProperties subgroupProperties;
+               subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
+               subgroupProperties.pNext = DE_NULL;
+
+               VkPhysicalDeviceProperties2 properties;
+               properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+               properties.pNext = &subgroupProperties;
+
+               context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
+
+               VkShaderStageFlagBits stages = (VkShaderStageFlagBits)(caseDef.shaderStage  & subgroupProperties.supportedStages);
+
+               if (VK_SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
+               {
+                       if ( (stages & VK_SHADER_STAGE_FRAGMENT_BIT) == 0)
+                               TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
+                       else
+                               stages = VK_SHADER_STAGE_FRAGMENT_BIT;
+               }
+
+               if ((VkShaderStageFlagBits)0u == stages)
+                       TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
+
+               subgroups::SSBOData inputData;
+               inputData.format                        = caseDef.format;
+               inputData.numElements           = subgroups::maxSupportedSubgroupSize();
+               inputData.initializeType        = subgroups::SSBOData::InitializeNonZero;
+               inputData.binding                       = 4u;
+               inputData.stages                        = stages;
+
+               return subgroups::allStages(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, stages);
+       }
+}
+}
+
+namespace vkt
+{
+namespace subgroups
+{
+tcu::TestCaseGroup* createSubgroupsClusteredTests(tcu::TestContext& testCtx)
+{
+       de::MovePtr<tcu::TestCaseGroup> graphicGroup(new tcu::TestCaseGroup(
+               testCtx, "graphics", "Subgroup clustered category tests: graphics"));
+       de::MovePtr<tcu::TestCaseGroup> computeGroup(new tcu::TestCaseGroup(
+               testCtx, "compute", "Subgroup clustered category tests: compute"));
+       de::MovePtr<tcu::TestCaseGroup> framebufferGroup(new tcu::TestCaseGroup(
+               testCtx, "framebuffer", "Subgroup clustered category tests: framebuffer"));
+
+       const VkShaderStageFlags stages[] =
+       {
+               VK_SHADER_STAGE_VERTEX_BIT,
+               VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
+               VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
+               VK_SHADER_STAGE_GEOMETRY_BIT
+       };
+
+       const VkFormat formats[] =
+       {
+               VK_FORMAT_R32_SINT, VK_FORMAT_R32G32_SINT, VK_FORMAT_R32G32B32_SINT,
+               VK_FORMAT_R32G32B32A32_SINT, VK_FORMAT_R32_UINT, VK_FORMAT_R32G32_UINT,
+               VK_FORMAT_R32G32B32_UINT, VK_FORMAT_R32G32B32A32_UINT,
+               VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT,
+               VK_FORMAT_R32G32B32_SFLOAT, VK_FORMAT_R32G32B32A32_SFLOAT,
+               VK_FORMAT_R64_SFLOAT, VK_FORMAT_R64G64_SFLOAT,
+               VK_FORMAT_R64G64B64_SFLOAT, VK_FORMAT_R64G64B64A64_SFLOAT,
+               VK_FORMAT_R8_USCALED, VK_FORMAT_R8G8_USCALED,
+               VK_FORMAT_R8G8B8_USCALED, VK_FORMAT_R8G8B8A8_USCALED,
+       };
+
+       for (int formatIndex = 0; formatIndex < DE_LENGTH_OF_ARRAY(formats); ++formatIndex)
+       {
+               const VkFormat format = formats[formatIndex];
+
+               for (int opTypeIndex = 0; opTypeIndex < OPTYPE_CLUSTERED_LAST; ++opTypeIndex)
+               {
+                       bool isBool = false;
+                       bool isFloat = false;
+
+                       switch (format)
+                       {
+                               default:
+                                       break;
+                               case VK_FORMAT_R32_SFLOAT:
+                               case VK_FORMAT_R32G32_SFLOAT:
+                               case VK_FORMAT_R32G32B32_SFLOAT:
+                               case VK_FORMAT_R32G32B32A32_SFLOAT:
+                               case VK_FORMAT_R64_SFLOAT:
+                               case VK_FORMAT_R64G64_SFLOAT:
+                               case VK_FORMAT_R64G64B64_SFLOAT:
+                               case VK_FORMAT_R64G64B64A64_SFLOAT:
+                                       isFloat = true;
+                                       break;
+                               case VK_FORMAT_R8_USCALED:
+                               case VK_FORMAT_R8G8_USCALED:
+                               case VK_FORMAT_R8G8B8_USCALED:
+                               case VK_FORMAT_R8G8B8A8_USCALED:
+                                       isBool = true;
+                                       break;
+                       }
+
+                       bool isBitwiseOp = false;
+
+                       switch (opTypeIndex)
+                       {
+                               default:
+                                       break;
+                               case OPTYPE_CLUSTERED_AND:
+                               case OPTYPE_CLUSTERED_OR:
+                               case OPTYPE_CLUSTERED_XOR:
+                                       isBitwiseOp = true;
+                                       break;
+                       }
+
+                       if (isFloat && isBitwiseOp)
+                       {
+                               // Skip float with bitwise category.
+                               continue;
+                       }
+
+                       if (isBool && !isBitwiseOp)
+                       {
+                               // Skip bool when its not the bitwise category.
+                               continue;
+                       }
+
+                       const std::string name = de::toLower(getOpTypeName(opTypeIndex))
+                               +"_" + subgroups::getFormatNameForGLSL(format);
+
+                       {
+                               const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_COMPUTE_BIT, format};
+                               addFunctionCaseWithPrograms(computeGroup.get(), name, "", supportedCheck, initPrograms, test, caseDef);
+                       }
+
+                       {
+                               const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_ALL_GRAPHICS, format};
+                               addFunctionCaseWithPrograms(graphicGroup.get(), name,
+                                                                               "", supportedCheck, initPrograms, test, caseDef);
+                       }
+
+                       for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
+                       {
+                               const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex], format};
+                               addFunctionCaseWithPrograms(framebufferGroup.get(), name +"_" + getShaderStageName(caseDef.shaderStage), "",
+                                                                                       supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
+                       }
+               }
+       }
+       de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(
+               testCtx, "clustered", "Subgroup clustered category tests"));
+
+       group->addChild(graphicGroup.release());
+       group->addChild(computeGroup.release());
+       group->addChild(framebufferGroup.release());
+
+       return group.release();
+}
+
+} // subgroups
+} // vkt
diff --git a/external/openglcts/modules/common/subgroups/glcSubgroupsClusteredTests.hpp b/external/openglcts/modules/common/subgroups/glcSubgroupsClusteredTests.hpp
new file mode 100644 (file)
index 0000000..d1f518c
--- /dev/null
@@ -0,0 +1,40 @@
+#ifndef _VKTSUBGROUPSCLUSTEREDTESTS_HPP
+#define _VKTSUBGROUPSCLUSTEREDTESTS_HPP
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "tcuDefs.hpp"
+#include "vktTestCase.hpp"
+
+namespace vkt
+{
+namespace subgroups
+{
+
+tcu::TestCaseGroup* createSubgroupsClusteredTests(tcu::TestContext& testCtx);
+
+} // subgroups
+} // vkt
+
+#endif // _VKTSUBGROUPSCLUSTEREDTESTS_HPP
diff --git a/external/openglcts/modules/common/subgroups/glcSubgroupsPartitionedTests.cpp b/external/openglcts/modules/common/subgroups/glcSubgroupsPartitionedTests.cpp
new file mode 100755 (executable)
index 0000000..c1a658a
--- /dev/null
@@ -0,0 +1,1053 @@
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ * Copyright (c) 2018 NVIDIA Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "vktSubgroupsPartitionedTests.hpp"
+#include "vktSubgroupsTestsUtils.hpp"
+
+#include <string>
+#include <vector>
+
+using namespace tcu;
+using namespace std;
+using namespace vk;
+using namespace vkt;
+
+namespace
+{
+enum OpType
+{
+       OPTYPE_ADD = 0,
+       OPTYPE_MUL,
+       OPTYPE_MIN,
+       OPTYPE_MAX,
+       OPTYPE_AND,
+       OPTYPE_OR,
+       OPTYPE_XOR,
+       OPTYPE_INCLUSIVE_ADD,
+       OPTYPE_INCLUSIVE_MUL,
+       OPTYPE_INCLUSIVE_MIN,
+       OPTYPE_INCLUSIVE_MAX,
+       OPTYPE_INCLUSIVE_AND,
+       OPTYPE_INCLUSIVE_OR,
+       OPTYPE_INCLUSIVE_XOR,
+       OPTYPE_EXCLUSIVE_ADD,
+       OPTYPE_EXCLUSIVE_MUL,
+       OPTYPE_EXCLUSIVE_MIN,
+       OPTYPE_EXCLUSIVE_MAX,
+       OPTYPE_EXCLUSIVE_AND,
+       OPTYPE_EXCLUSIVE_OR,
+       OPTYPE_EXCLUSIVE_XOR,
+       OPTYPE_LAST
+};
+
+static bool checkVertexPipelineStages(std::vector<const void*> datas,
+                                                                         deUint32 width, deUint32)
+{
+       const deUint32* data =
+               reinterpret_cast<const deUint32*>(datas[0]);
+       for (deUint32 x = 0; x < width; ++x)
+       {
+               deUint32 val = data[x];
+
+               if (0xFFFFFF != val)
+               {
+                       return false;
+               }
+       }
+
+       return true;
+}
+
+static bool checkCompute(std::vector<const void*> datas,
+                                                const deUint32 numWorkgroups[3], const deUint32 localSize[3],
+                                                deUint32)
+{
+       const deUint32* data =
+               reinterpret_cast<const deUint32*>(datas[0]);
+
+       for (deUint32 nX = 0; nX < numWorkgroups[0]; ++nX)
+       {
+               for (deUint32 nY = 0; nY < numWorkgroups[1]; ++nY)
+               {
+                       for (deUint32 nZ = 0; nZ < numWorkgroups[2]; ++nZ)
+                       {
+                               for (deUint32 lX = 0; lX < localSize[0]; ++lX)
+                               {
+                                       for (deUint32 lY = 0; lY < localSize[1]; ++lY)
+                                       {
+                                               for (deUint32 lZ = 0; lZ < localSize[2];
+                                                               ++lZ)
+                                               {
+                                                       const deUint32 globalInvocationX =
+                                                               nX * localSize[0] + lX;
+                                                       const deUint32 globalInvocationY =
+                                                               nY * localSize[1] + lY;
+                                                       const deUint32 globalInvocationZ =
+                                                               nZ * localSize[2] + lZ;
+
+                                                       const deUint32 globalSizeX =
+                                                               numWorkgroups[0] * localSize[0];
+                                                       const deUint32 globalSizeY =
+                                                               numWorkgroups[1] * localSize[1];
+
+                                                       const deUint32 offset =
+                                                               globalSizeX *
+                                                               ((globalSizeY *
+                                                                 globalInvocationZ) +
+                                                                globalInvocationY) +
+                                                               globalInvocationX;
+
+                                                       if (0xFFFFFF != data[offset])
+                                                       {
+                                                               return false;
+                                                       }
+                                               }
+                                       }
+                               }
+                       }
+               }
+       }
+
+       return true;
+}
+
+std::string getOpTypeName(int opType)
+{
+       switch (opType)
+       {
+               default:
+                       DE_FATAL("Unsupported op type");
+                       return "";
+               case OPTYPE_ADD:
+                       return "subgroupAdd";
+               case OPTYPE_MUL:
+                       return "subgroupMul";
+               case OPTYPE_MIN:
+                       return "subgroupMin";
+               case OPTYPE_MAX:
+                       return "subgroupMax";
+               case OPTYPE_AND:
+                       return "subgroupAnd";
+               case OPTYPE_OR:
+                       return "subgroupOr";
+               case OPTYPE_XOR:
+                       return "subgroupXor";
+               case OPTYPE_INCLUSIVE_ADD:
+                       return "subgroupInclusiveAdd";
+               case OPTYPE_INCLUSIVE_MUL:
+                       return "subgroupInclusiveMul";
+               case OPTYPE_INCLUSIVE_MIN:
+                       return "subgroupInclusiveMin";
+               case OPTYPE_INCLUSIVE_MAX:
+                       return "subgroupInclusiveMax";
+               case OPTYPE_INCLUSIVE_AND:
+                       return "subgroupInclusiveAnd";
+               case OPTYPE_INCLUSIVE_OR:
+                       return "subgroupInclusiveOr";
+               case OPTYPE_INCLUSIVE_XOR:
+                       return "subgroupInclusiveXor";
+               case OPTYPE_EXCLUSIVE_ADD:
+                       return "subgroupExclusiveAdd";
+               case OPTYPE_EXCLUSIVE_MUL:
+                       return "subgroupExclusiveMul";
+               case OPTYPE_EXCLUSIVE_MIN:
+                       return "subgroupExclusiveMin";
+               case OPTYPE_EXCLUSIVE_MAX:
+                       return "subgroupExclusiveMax";
+               case OPTYPE_EXCLUSIVE_AND:
+                       return "subgroupExclusiveAnd";
+               case OPTYPE_EXCLUSIVE_OR:
+                       return "subgroupExclusiveOr";
+               case OPTYPE_EXCLUSIVE_XOR:
+                       return "subgroupExclusiveXor";
+       }
+}
+
+std::string getOpTypeNamePartitioned(int opType)
+{
+       switch (opType)
+       {
+               default:
+                       DE_FATAL("Unsupported op type");
+                       return "";
+               case OPTYPE_ADD:
+                       return "subgroupPartitionedAddNV";
+               case OPTYPE_MUL:
+                       return "subgroupPartitionedMulNV";
+               case OPTYPE_MIN:
+                       return "subgroupPartitionedMinNV";
+               case OPTYPE_MAX:
+                       return "subgroupPartitionedMaxNV";
+               case OPTYPE_AND:
+                       return "subgroupPartitionedAndNV";
+               case OPTYPE_OR:
+                       return "subgroupPartitionedOrNV";
+               case OPTYPE_XOR:
+                       return "subgroupPartitionedXorNV";
+               case OPTYPE_INCLUSIVE_ADD:
+                       return "subgroupPartitionedInclusiveAddNV";
+               case OPTYPE_INCLUSIVE_MUL:
+                       return "subgroupPartitionedInclusiveMulNV";
+               case OPTYPE_INCLUSIVE_MIN:
+                       return "subgroupPartitionedInclusiveMinNV";
+               case OPTYPE_INCLUSIVE_MAX:
+                       return "subgroupPartitionedInclusiveMaxNV";
+               case OPTYPE_INCLUSIVE_AND:
+                       return "subgroupPartitionedInclusiveAndNV";
+               case OPTYPE_INCLUSIVE_OR:
+                       return "subgroupPartitionedInclusiveOrNV";
+               case OPTYPE_INCLUSIVE_XOR:
+                       return "subgroupPartitionedInclusiveXorNV";
+               case OPTYPE_EXCLUSIVE_ADD:
+                       return "subgroupPartitionedExclusiveAddNV";
+               case OPTYPE_EXCLUSIVE_MUL:
+                       return "subgroupPartitionedExclusiveMulNV";
+               case OPTYPE_EXCLUSIVE_MIN:
+                       return "subgroupPartitionedExclusiveMinNV";
+               case OPTYPE_EXCLUSIVE_MAX:
+                       return "subgroupPartitionedExclusiveMaxNV";
+               case OPTYPE_EXCLUSIVE_AND:
+                       return "subgroupPartitionedExclusiveAndNV";
+               case OPTYPE_EXCLUSIVE_OR:
+                       return "subgroupPartitionedExclusiveOrNV";
+               case OPTYPE_EXCLUSIVE_XOR:
+                       return "subgroupPartitionedExclusiveXorNV";
+       }
+}
+
+std::string getIdentity(int opType, vk::VkFormat format)
+{
+       bool isFloat = false;
+       bool isInt = false;
+       bool isUnsigned = false;
+
+       switch (format)
+       {
+               default:
+                       DE_FATAL("Unhandled format!");
+                       return "";
+               case VK_FORMAT_R32_SINT:
+               case VK_FORMAT_R32G32_SINT:
+               case VK_FORMAT_R32G32B32_SINT:
+               case VK_FORMAT_R32G32B32A32_SINT:
+                       isInt = true;
+                       break;
+               case VK_FORMAT_R32_UINT:
+               case VK_FORMAT_R32G32_UINT:
+               case VK_FORMAT_R32G32B32_UINT:
+               case VK_FORMAT_R32G32B32A32_UINT:
+                       isUnsigned = true;
+                       break;
+               case VK_FORMAT_R32_SFLOAT:
+               case VK_FORMAT_R32G32_SFLOAT:
+               case VK_FORMAT_R32G32B32_SFLOAT:
+               case VK_FORMAT_R32G32B32A32_SFLOAT:
+               case VK_FORMAT_R64_SFLOAT:
+               case VK_FORMAT_R64G64_SFLOAT:
+               case VK_FORMAT_R64G64B64_SFLOAT:
+               case VK_FORMAT_R64G64B64A64_SFLOAT:
+                       isFloat = true;
+                       break;
+               case VK_FORMAT_R8_USCALED:
+               case VK_FORMAT_R8G8_USCALED:
+               case VK_FORMAT_R8G8B8_USCALED:
+               case VK_FORMAT_R8G8B8A8_USCALED:
+                       break; // bool types are not anything
+       }
+
+       switch (opType)
+       {
+               default:
+                       DE_FATAL("Unsupported op type");
+                       return "";
+               case OPTYPE_ADD:
+               case OPTYPE_INCLUSIVE_ADD:
+               case OPTYPE_EXCLUSIVE_ADD:
+                       return subgroups::getFormatNameForGLSL(format) + "(0)";
+               case OPTYPE_MUL:
+               case OPTYPE_INCLUSIVE_MUL:
+               case OPTYPE_EXCLUSIVE_MUL:
+                       return subgroups::getFormatNameForGLSL(format) + "(1)";
+               case OPTYPE_MIN:
+               case OPTYPE_INCLUSIVE_MIN:
+               case OPTYPE_EXCLUSIVE_MIN:
+                       if (isFloat)
+                       {
+                               return subgroups::getFormatNameForGLSL(format) + "(intBitsToFloat(0x7f800000))";
+                       }
+                       else if (isInt)
+                       {
+                               return subgroups::getFormatNameForGLSL(format) + "(0x7fffffff)";
+                       }
+                       else if (isUnsigned)
+                       {
+                               return subgroups::getFormatNameForGLSL(format) + "(0xffffffffu)";
+                       }
+                       else
+                       {
+                               DE_FATAL("Unhandled case");
+                               return "";
+                       }
+               case OPTYPE_MAX:
+               case OPTYPE_INCLUSIVE_MAX:
+               case OPTYPE_EXCLUSIVE_MAX:
+                       if (isFloat)
+                       {
+                               return subgroups::getFormatNameForGLSL(format) + "(intBitsToFloat(0xff800000))";
+                       }
+                       else if (isInt)
+                       {
+                               return subgroups::getFormatNameForGLSL(format) + "(0x80000000)";
+                       }
+                       else if (isUnsigned)
+                       {
+                               return subgroups::getFormatNameForGLSL(format) + "(0)";
+                       }
+                       else
+                       {
+                               DE_FATAL("Unhandled case");
+                               return "";
+                       }
+               case OPTYPE_AND:
+               case OPTYPE_INCLUSIVE_AND:
+               case OPTYPE_EXCLUSIVE_AND:
+                       return subgroups::getFormatNameForGLSL(format) + "(~0)";
+               case OPTYPE_OR:
+               case OPTYPE_INCLUSIVE_OR:
+               case OPTYPE_EXCLUSIVE_OR:
+                       return subgroups::getFormatNameForGLSL(format) + "(0)";
+               case OPTYPE_XOR:
+               case OPTYPE_INCLUSIVE_XOR:
+               case OPTYPE_EXCLUSIVE_XOR:
+                       return subgroups::getFormatNameForGLSL(format) + "(0)";
+       }
+}
+
+std::string getCompare(int opType, vk::VkFormat format, std::string lhs, std::string rhs)
+{
+       std::string formatName = subgroups::getFormatNameForGLSL(format);
+       switch (format)
+       {
+               default:
+                       return "all(equal(" + lhs + ", " + rhs + "))";
+               case VK_FORMAT_R8_USCALED:
+               case VK_FORMAT_R32_UINT:
+               case VK_FORMAT_R32_SINT:
+                       return "(" + lhs + " == " + rhs + ")";
+               case VK_FORMAT_R32_SFLOAT:
+               case VK_FORMAT_R64_SFLOAT:
+                       switch (opType)
+                       {
+                               default:
+                                       return "(abs(" + lhs + " - " + rhs + ") < 0.00001)";
+                               case OPTYPE_MIN:
+                               case OPTYPE_INCLUSIVE_MIN:
+                               case OPTYPE_EXCLUSIVE_MIN:
+                               case OPTYPE_MAX:
+                               case OPTYPE_INCLUSIVE_MAX:
+                               case OPTYPE_EXCLUSIVE_MAX:
+                                       return "(" + lhs + " == " + rhs + ")";
+                       }
+               case VK_FORMAT_R32G32_SFLOAT:
+               case VK_FORMAT_R32G32B32_SFLOAT:
+               case VK_FORMAT_R32G32B32A32_SFLOAT:
+               case VK_FORMAT_R64G64_SFLOAT:
+               case VK_FORMAT_R64G64B64_SFLOAT:
+               case VK_FORMAT_R64G64B64A64_SFLOAT:
+                       switch (opType)
+                       {
+                               default:
+                                       return "all(lessThan(abs(" + lhs + " - " + rhs + "), " + formatName + "(0.00001)))";
+                               case OPTYPE_MIN:
+                               case OPTYPE_INCLUSIVE_MIN:
+                               case OPTYPE_EXCLUSIVE_MIN:
+                               case OPTYPE_MAX:
+                               case OPTYPE_INCLUSIVE_MAX:
+                               case OPTYPE_EXCLUSIVE_MAX:
+                                       return "all(equal(" + lhs + ", " + rhs + "))";
+                       }
+       }
+}
+
+struct CaseDefinition
+{
+       int                                     opType;
+       VkShaderStageFlags      shaderStage;
+       VkFormat                        format;
+};
+
+string getTestString(const CaseDefinition &caseDef)
+{
+    // NOTE: tempResult can't have anything in bits 31:24 to avoid int->float
+    // conversion overflow in framebuffer tests.
+    string fmt = subgroups::getFormatNameForGLSL(caseDef.format);
+       string bdy =
+               "  uint tempResult = 0;\n"
+               "  uint id = gl_SubgroupInvocationID;\n";
+
+    // Test the case where the partition has a single subset with all invocations in it.
+    // This should generate the same result as the non-partitioned function.
+    bdy +=
+        "  uvec4 allBallot = mask;\n"
+        "  " + fmt + " allResult = " + getOpTypeNamePartitioned(caseDef.opType) + "(data[gl_SubgroupInvocationID], allBallot);\n"
+        "  " + fmt + " refResult = " + getOpTypeName(caseDef.opType) + "(data[gl_SubgroupInvocationID]);\n"
+        "  if (" + getCompare(caseDef.opType, caseDef.format, "allResult", "refResult") + ") {\n"
+        "      tempResult |= 0x1;\n"
+        "  }\n";
+
+    // The definition of a partition doesn't forbid bits corresponding to inactive
+    // invocations being in the subset with active invocations. In other words, test that
+    // bits corresponding to inactive invocations are ignored.
+    bdy +=
+           "  if (0 == (gl_SubgroupInvocationID % 2)) {\n"
+        "    " + fmt + " allResult = " + getOpTypeNamePartitioned(caseDef.opType) + "(data[gl_SubgroupInvocationID], allBallot);\n"
+        "    " + fmt + " refResult = " + getOpTypeName(caseDef.opType) + "(data[gl_SubgroupInvocationID]);\n"
+        "    if (" + getCompare(caseDef.opType, caseDef.format, "allResult", "refResult") + ") {\n"
+        "        tempResult |= 0x2;\n"
+        "    }\n"
+        "  } else {\n"
+        "    tempResult |= 0x2;\n"
+        "  }\n";
+
+    // Test the case where the partition has each invocation in a unique subset. For
+    // exclusive ops, the result is identity. For reduce/inclusive, it's the original value.
+    string expectedSelfResult = "data[gl_SubgroupInvocationID]";
+    if (caseDef.opType >= OPTYPE_EXCLUSIVE_ADD &&
+        caseDef.opType <= OPTYPE_EXCLUSIVE_XOR) {
+        expectedSelfResult = getIdentity(caseDef.opType, caseDef.format);
+    }
+
+    bdy +=
+        "  uvec4 selfBallot = subgroupPartitionNV(gl_SubgroupInvocationID);\n"
+        "  " + fmt + " selfResult = " + getOpTypeNamePartitioned(caseDef.opType) + "(data[gl_SubgroupInvocationID], selfBallot);\n"
+        "  if (" + getCompare(caseDef.opType, caseDef.format, "selfResult", expectedSelfResult) + ") {\n"
+        "      tempResult |= 0x4;\n"
+        "  }\n";
+
+    // Test "random" partitions based on a hash of the invocation id.
+    // This "hash" function produces interesting/randomish partitions.
+    static const char *idhash = "((id%N)+(id%(N+1))-(id%2)+(id/2))%((N+1)/2)";
+
+    bdy +=
+               "  for (uint N = 1; N < 16; ++N) {\n"
+               "    " + fmt + " idhashFmt = " + fmt + "(" + idhash + ");\n"
+               "    uvec4 partitionBallot = subgroupPartitionNV(idhashFmt) & mask;\n"
+               "    " + fmt + " partitionedResult = " + getOpTypeNamePartitioned(caseDef.opType) + "(data[gl_SubgroupInvocationID], partitionBallot);\n"
+               "      for (uint i = 0; i < N; ++i) {\n"
+               "        " + fmt + " iFmt = " + fmt + "(i);\n"
+        "        if (" + getCompare(caseDef.opType, caseDef.format, "idhashFmt", "iFmt") + ") {\n"
+        "          " + fmt + " subsetResult = " + getOpTypeName(caseDef.opType) + "(data[gl_SubgroupInvocationID]);\n"
+        "          tempResult |= " + getCompare(caseDef.opType, caseDef.format, "partitionedResult", "subsetResult") + " ? (0x4 << N) : 0;\n"
+        "        }\n"
+        "      }\n"
+        "  }\n"
+        // tests in flow control:
+               "  if (1 == (gl_SubgroupInvocationID % 2)) {\n"
+        "    for (uint N = 1; N < 7; ++N) {\n"
+               "      " + fmt + " idhashFmt = " + fmt + "(" + idhash + ");\n"
+               "      uvec4 partitionBallot = subgroupPartitionNV(idhashFmt) & mask;\n"
+        "      " + fmt + " partitionedResult = " + getOpTypeNamePartitioned(caseDef.opType) + "(data[gl_SubgroupInvocationID], partitionBallot);\n"
+        "        for (uint i = 0; i < N; ++i) {\n"
+               "          " + fmt + " iFmt = " + fmt + "(i);\n"
+        "          if (" + getCompare(caseDef.opType, caseDef.format, "idhashFmt", "iFmt") + ") {\n"
+        "            " + fmt + " subsetResult = " + getOpTypeName(caseDef.opType) + "(data[gl_SubgroupInvocationID]);\n"
+        "            tempResult |= " + getCompare(caseDef.opType, caseDef.format, "partitionedResult", "subsetResult") + " ? (0x20000 << N) : 0;\n"
+        "          }\n"
+        "        }\n"
+        "    }\n"
+        "  } else {\n"
+        "    tempResult |= 0xFC0000;\n"
+        "  }\n"
+        ;
+
+    return bdy;
+}
+
+void initFrameBufferPrograms (SourceCollections& programCollection, CaseDefinition caseDef)
+{
+       const vk::ShaderBuildOptions    buildOptions    (programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+       std::ostringstream                              bdy;
+
+       subgroups::setFragmentShaderFrameBuffer(programCollection);
+
+       if (VK_SHADER_STAGE_VERTEX_BIT != caseDef.shaderStage)
+               subgroups::setVertexShaderFrameBuffer(programCollection);
+
+       bdy << getTestString(caseDef);
+
+       if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+       {
+               std::ostringstream vertexSrc;
+               vertexSrc << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+                       << "#extension GL_NV_shader_subgroup_partitioned: enable\n"
+                       << "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
+                       << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       << "layout(location = 0) in highp vec4 in_position;\n"
+                       << "layout(location = 0) out float out_color;\n"
+                       << "layout(set = 0, binding = 0) uniform Buffer1\n"
+                       << "{\n"
+                       << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+                       << "};\n"
+                       << "\n"
+                       << "void main (void)\n"
+                       << "{\n"
+                       << "  uvec4 mask = subgroupBallot(true);\n"
+                       << bdy.str()
+                       << "  out_color = float(tempResult);\n"
+                       << "  gl_Position = in_position;\n"
+                       << "  gl_PointSize = 1.0f;\n"
+                       << "}\n";
+               programCollection.glslSources.add("vert")
+                       << glu::VertexSource(vertexSrc.str()) << buildOptions;
+       }
+       else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
+       {
+               std::ostringstream geometry;
+
+               geometry << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+                       << "#extension GL_NV_shader_subgroup_partitioned: enable\n"
+                       << "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
+                       << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       << "layout(points) in;\n"
+                       << "layout(points, max_vertices = 1) out;\n"
+                       << "layout(location = 0) out float out_color;\n"
+                       << "layout(set = 0, binding = 0) uniform Buffer\n"
+                       << "{\n"
+                       << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+                       << "};\n"
+                       << "\n"
+                       << "void main (void)\n"
+                       << "{\n"
+                       << "  uvec4 mask = subgroupBallot(true);\n"
+                       << bdy.str()
+                       << "  out_color = float(tempResult);\n"
+                       << "  gl_Position = gl_in[0].gl_Position;\n"
+                       << "  EmitVertex();\n"
+                       << "  EndPrimitive();\n"
+                       << "}\n";
+
+               programCollection.glslSources.add("geometry")
+                               << glu::GeometrySource(geometry.str()) << buildOptions;
+       }
+       else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+       {
+               std::ostringstream controlSource;
+               controlSource  << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+                       << "#extension GL_NV_shader_subgroup_partitioned: enable\n"
+                       << "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
+                       << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       << "layout(vertices = 2) out;\n"
+                       << "layout(location = 0) out float out_color[];\n"
+                       << "layout(set = 0, binding = 0) uniform Buffer1\n"
+                       << "{\n"
+                       << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+                       << "};\n"
+                       << "\n"
+                       << "void main (void)\n"
+                       << "{\n"
+                       << "  if (gl_InvocationID == 0)\n"
+                       <<"  {\n"
+                       << "    gl_TessLevelOuter[0] = 1.0f;\n"
+                       << "    gl_TessLevelOuter[1] = 1.0f;\n"
+                       << "  }\n"
+                       << "  uvec4 mask = subgroupBallot(true);\n"
+                       << bdy.str()
+                       << "  out_color[gl_InvocationID] = float(tempResult);"
+                       << "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+                       << "}\n";
+
+
+               programCollection.glslSources.add("tesc")
+                       << glu::TessellationControlSource(controlSource.str()) << buildOptions;
+               subgroups::setTesEvalShaderFrameBuffer(programCollection);
+       }
+       else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
+       {
+
+               std::ostringstream evaluationSource;
+               evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+                       << "#extension GL_NV_shader_subgroup_partitioned: enable\n"
+                       << "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
+                       << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       << "layout(isolines, equal_spacing, ccw ) in;\n"
+                       << "layout(location = 0) out float out_color;\n"
+                       << "layout(set = 0, binding = 0) uniform Buffer1\n"
+                       << "{\n"
+                       << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+                       << "};\n"
+                       << "\n"
+                       << "void main (void)\n"
+                       << "{\n"
+                       << "  uvec4 mask = subgroupBallot(true);\n"
+                       << bdy.str()
+                       << "  out_color = float(tempResult);\n"
+                       << "  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
+                       << "}\n";
+
+               subgroups::setTesCtrlShaderFrameBuffer(programCollection);
+               programCollection.glslSources.add("tese") << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
+       }
+       else
+       {
+               DE_FATAL("Unsupported shader stage");
+       }
+}
+
+void initPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
+{
+       const string bdy = getTestString(caseDef);
+
+       if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+       {
+               std::ostringstream src;
+
+               src << "#version 450\n"
+                       << "#extension GL_NV_shader_subgroup_partitioned: enable\n"
+                       << "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
+                       << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       << "layout (local_size_x_id = 0, local_size_y_id = 1, "
+                       "local_size_z_id = 2) in;\n"
+                       << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
+                       << "{\n"
+                       << "  uint result[];\n"
+                       << "};\n"
+                       << "layout(set = 0, binding = 1, std430) buffer Buffer2\n"
+                       << "{\n"
+                       << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[];\n"
+                       << "};\n"
+                       << "\n"
+                       << "void main (void)\n"
+                       << "{\n"
+                       << "  uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
+                       << "  highp uint offset = globalSize.x * ((globalSize.y * "
+                       "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
+                       "gl_GlobalInvocationID.x;\n"
+                       << "  uvec4 mask = subgroupBallot(true);\n"
+                       << bdy
+                       << "  result[offset] = tempResult;\n"
+                       << "}\n";
+
+               programCollection.glslSources.add("comp")
+                               << glu::ComputeSource(src.str()) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+       }
+       else
+       {
+               {
+                       const std::string vertex =
+                               "#version 450\n"
+                               "#extension GL_NV_shader_subgroup_partitioned: enable\n"
+                           "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
+                               "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                               "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
+                               "{\n"
+                               "  uint result[];\n"
+                               "};\n"
+                               "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+                               "{\n"
+                               "  " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
+                               "};\n"
+                               "\n"
+                               "void main (void)\n"
+                               "{\n"
+                               "  uvec4 mask = subgroupBallot(true);\n"
+                               + bdy+
+                               "  result[gl_VertexIndex] = tempResult;\n"
+                               "  float pixelSize = 2.0f/1024.0f;\n"
+                               "  float pixelPosition = pixelSize/2.0f - 1.0f;\n"
+                               "  gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
+                               "  gl_PointSize = 1.0f;\n"
+                               "}\n";
+                       programCollection.glslSources.add("vert")
+                                       << glu::VertexSource(vertex) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+               }
+
+               {
+                       const std::string tesc =
+                               "#version 450\n"
+                               "#extension GL_NV_shader_subgroup_partitioned: enable\n"
+                           "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
+                               "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                               "layout(vertices=1) out;\n"
+                               "layout(set = 0, binding = 1, std430) buffer Buffer1\n"
+                               "{\n"
+                               "  uint result[];\n"
+                               "};\n"
+                               "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+                               "{\n"
+                               "  " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
+                               "};\n"
+                               "\n"
+                               "void main (void)\n"
+                               "{\n"
+                               "  uvec4 mask = subgroupBallot(true);\n"
+                               + bdy +
+                               "  result[gl_PrimitiveID] = tempResult;\n"
+                               "  if (gl_InvocationID == 0)\n"
+                               "  {\n"
+                               "    gl_TessLevelOuter[0] = 1.0f;\n"
+                               "    gl_TessLevelOuter[1] = 1.0f;\n"
+                               "  }\n"
+                               "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+                               "}\n";
+                       programCollection.glslSources.add("tesc")
+                               << glu::TessellationControlSource(tesc) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+               }
+
+               {
+                       const std::string tese =
+                               "#version 450\n"
+                               "#extension GL_NV_shader_subgroup_partitioned: enable\n"
+                           "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
+                               "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                               "layout(isolines) in;\n"
+                               "layout(set = 0, binding = 2, std430) buffer Buffer1\n"
+                               "{\n"
+                               "  uint result[];\n"
+                               "};\n"
+                               "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+                               "{\n"
+                               "  " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
+                               "};\n"
+                               "\n"
+                               "void main (void)\n"
+                               "{\n"
+                               "  uvec4 mask = subgroupBallot(true);\n"
+                               + bdy +
+                               "  result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = tempResult;\n"
+                               "  float pixelSize = 2.0f/1024.0f;\n"
+                               "  gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
+                               "}\n";
+                       programCollection.glslSources.add("tese")
+                               << glu::TessellationEvaluationSource(tese) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+               }
+
+               {
+                       const std::string geometry =
+                               "#version 450\n"
+                               "#extension GL_NV_shader_subgroup_partitioned: enable\n"
+                           "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
+                               "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                               "layout(${TOPOLOGY}) in;\n"
+                               "layout(points, max_vertices = 1) out;\n"
+                               "layout(set = 0, binding = 3, std430) buffer Buffer1\n"
+                               "{\n"
+                               "  uint result[];\n"
+                               "};\n"
+                               "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+                               "{\n"
+                               "  " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
+                               "};\n"
+                               "\n"
+                               "void main (void)\n"
+                               "{\n"
+                               "  uvec4 mask = subgroupBallot(true);\n"
+                                + bdy +
+                               "  result[gl_PrimitiveIDIn] = tempResult;\n"
+                               "  gl_Position = gl_in[0].gl_Position;\n"
+                               "  EmitVertex();\n"
+                               "  EndPrimitive();\n"
+                               "}\n";
+                       subgroups::addGeometryShadersFromTemplate(geometry, vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u),
+                                                                                                         programCollection.glslSources);
+               }
+
+               {
+                       const std::string fragment =
+                               "#version 450\n"
+                               "#extension GL_NV_shader_subgroup_partitioned: enable\n"
+                           "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
+                               "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                               "layout(location = 0) out uint result;\n"
+                               "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+                               "{\n"
+                               "  " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
+                               "};\n"
+                               "void main (void)\n"
+                               "{\n"
+                               "  uvec4 mask = subgroupBallot(true);\n"
+                               + bdy +
+                               "  result = tempResult;\n"
+                               "}\n";
+                       programCollection.glslSources.add("fragment")
+                               << glu::FragmentSource(fragment)<< vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+               }
+               subgroups::addNoSubgroupShader(programCollection);
+       }
+}
+
+void supportedCheck (Context& context, CaseDefinition caseDef)
+{
+       if (!subgroups::isSubgroupSupported(context))
+               TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
+
+       if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_PARTITIONED_BIT_NV))
+       {
+               TCU_THROW(NotSupportedError, "Device does not support subgroup partitioned operations");
+       }
+
+       if (subgroups::isDoubleFormat(caseDef.format) &&
+                       !subgroups::isDoubleSupportedForDevice(context))
+       {
+               TCU_THROW(NotSupportedError, "Device does not support subgroup double operations");
+       }
+}
+
+tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
+{
+       if (!subgroups::areSubgroupOperationsSupportedForStage(
+                               context, caseDef.shaderStage))
+       {
+               if (subgroups::areSubgroupOperationsRequiredForStage(
+                                       caseDef.shaderStage))
+               {
+                       return tcu::TestStatus::fail(
+                                          "Shader stage " +
+                                          subgroups::getShaderStageName(caseDef.shaderStage) +
+                                          " is required to support subgroup operations!");
+               }
+               else
+               {
+                       TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
+               }
+       }
+
+       subgroups::SSBOData inputData;
+       inputData.format = caseDef.format;
+       inputData.numElements = subgroups::maxSupportedSubgroupSize();
+       inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
+
+       if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+               return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages);
+       else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
+               return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages);
+       else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+               return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
+       else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
+               return subgroups::makeTessellationEvaluationFrameBufferTest(context,  VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
+       else
+               TCU_THROW(InternalError, "Unhandled shader stage");
+}
+
+bool checkShaderStages (Context& context, const CaseDefinition& caseDef)
+{
+       if (!subgroups::areSubgroupOperationsSupportedForStage(
+                               context, caseDef.shaderStage))
+       {
+               if (subgroups::areSubgroupOperationsRequiredForStage(
+                                       caseDef.shaderStage))
+               {
+                       return false;
+               }
+               else
+               {
+                       TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
+               }
+       }
+       return true;
+}
+
+tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
+{
+       if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+       {
+               if(!checkShaderStages(context,caseDef))
+               {
+                       return tcu::TestStatus::fail(
+                                                       "Shader stage " +
+                                                       subgroups::getShaderStageName(caseDef.shaderStage) +
+                                                       " is required to support subgroup operations!");
+               }
+               subgroups::SSBOData inputData;
+               inputData.format = caseDef.format;
+               inputData.numElements = subgroups::maxSupportedSubgroupSize();
+               inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
+
+               return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkCompute);
+       }
+       else
+       {
+               VkPhysicalDeviceSubgroupProperties subgroupProperties;
+               subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
+               subgroupProperties.pNext = DE_NULL;
+
+               VkPhysicalDeviceProperties2 properties;
+               properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+               properties.pNext = &subgroupProperties;
+
+               context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
+
+               VkShaderStageFlagBits stages = (VkShaderStageFlagBits)(caseDef.shaderStage  & subgroupProperties.supportedStages);
+
+               if ( VK_SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
+               {
+                       if ( (stages & VK_SHADER_STAGE_FRAGMENT_BIT) == 0)
+                               TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
+                       else
+                               stages = VK_SHADER_STAGE_FRAGMENT_BIT;
+               }
+
+               if ((VkShaderStageFlagBits)0u == stages)
+                       TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
+
+               subgroups::SSBOData inputData;
+               inputData.format                        = caseDef.format;
+               inputData.numElements           = subgroups::maxSupportedSubgroupSize();
+               inputData.initializeType        = subgroups::SSBOData::InitializeNonZero;
+               inputData.binding                       = 4u;
+               inputData.stages                        = stages;
+
+               return subgroups::allStages(context, VK_FORMAT_R32_UINT, &inputData,
+                                                                                1, checkVertexPipelineStages, stages);
+       }
+}
+}
+
+namespace vkt
+{
+namespace subgroups
+{
+tcu::TestCaseGroup* createSubgroupsPartitionedTests(tcu::TestContext& testCtx)
+{
+       de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(
+                       testCtx, "partitioned", "NV_shader_subgroup_partitioned category tests"));
+
+       const VkShaderStageFlags stages[] =
+       {
+               VK_SHADER_STAGE_VERTEX_BIT,
+               VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
+               VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
+               VK_SHADER_STAGE_GEOMETRY_BIT,
+       };
+
+       const VkFormat formats[] =
+       {
+               VK_FORMAT_R32_SINT, VK_FORMAT_R32G32_SINT, VK_FORMAT_R32G32B32_SINT,
+               VK_FORMAT_R32G32B32A32_SINT, VK_FORMAT_R32_UINT, VK_FORMAT_R32G32_UINT,
+               VK_FORMAT_R32G32B32_UINT, VK_FORMAT_R32G32B32A32_UINT,
+               VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT,
+               VK_FORMAT_R32G32B32_SFLOAT, VK_FORMAT_R32G32B32A32_SFLOAT,
+               VK_FORMAT_R64_SFLOAT, VK_FORMAT_R64G64_SFLOAT,
+               VK_FORMAT_R64G64B64_SFLOAT, VK_FORMAT_R64G64B64A64_SFLOAT,
+               VK_FORMAT_R8_USCALED, VK_FORMAT_R8G8_USCALED,
+               VK_FORMAT_R8G8B8_USCALED, VK_FORMAT_R8G8B8A8_USCALED,
+       };
+
+       for (int formatIndex = 0; formatIndex < DE_LENGTH_OF_ARRAY(formats); ++formatIndex)
+       {
+               const VkFormat format = formats[formatIndex];
+
+               for (int opTypeIndex = 0; opTypeIndex < OPTYPE_LAST; ++opTypeIndex)
+               {
+                       bool isBool = false;
+                       bool isFloat = false;
+
+                       switch (format)
+                       {
+                               default:
+                                       break;
+                               case VK_FORMAT_R32_SFLOAT:
+                               case VK_FORMAT_R32G32_SFLOAT:
+                               case VK_FORMAT_R32G32B32_SFLOAT:
+                               case VK_FORMAT_R32G32B32A32_SFLOAT:
+                               case VK_FORMAT_R64_SFLOAT:
+                               case VK_FORMAT_R64G64_SFLOAT:
+                               case VK_FORMAT_R64G64B64_SFLOAT:
+                               case VK_FORMAT_R64G64B64A64_SFLOAT:
+                                       isFloat = true;
+                                       break;
+                               case VK_FORMAT_R8_USCALED:
+                               case VK_FORMAT_R8G8_USCALED:
+                               case VK_FORMAT_R8G8B8_USCALED:
+                               case VK_FORMAT_R8G8B8A8_USCALED:
+                                       isBool = true;
+                                       break;
+                       }
+
+                       bool isBitwiseOp = false;
+
+                       switch (opTypeIndex)
+                       {
+                               default:
+                                       break;
+                               case OPTYPE_AND:
+                               case OPTYPE_INCLUSIVE_AND:
+                               case OPTYPE_EXCLUSIVE_AND:
+                               case OPTYPE_OR:
+                               case OPTYPE_INCLUSIVE_OR:
+                               case OPTYPE_EXCLUSIVE_OR:
+                               case OPTYPE_XOR:
+                               case OPTYPE_INCLUSIVE_XOR:
+                               case OPTYPE_EXCLUSIVE_XOR:
+                                       isBitwiseOp = true;
+                                       break;
+                       }
+
+                       if (isFloat && isBitwiseOp)
+                       {
+                               // Skip float with bitwise category.
+                               continue;
+                       }
+
+                       if (isBool && !isBitwiseOp)
+                       {
+                               // Skip bool when its not the bitwise category.
+                               continue;
+                       }
+                       std::string op = getOpTypeName(opTypeIndex);
+
+                       {
+                               const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_COMPUTE_BIT, format};
+                               addFunctionCaseWithPrograms(group.get(),
+                                                                                       de::toLower(op) + "_" +
+                                                                                       subgroups::getFormatNameForGLSL(format) +
+                                                                                       "_" + getShaderStageName(caseDef.shaderStage),
+                                                                                       "", supportedCheck, initPrograms, test, caseDef);
+                       }
+
+                       {
+                               const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_ALL_GRAPHICS, format};
+                               addFunctionCaseWithPrograms(group.get(),
+                                                                                       de::toLower(op) + "_" +
+                                                                                       subgroups::getFormatNameForGLSL(format) +
+                                                                                       "_graphic",
+                                                                                       "", supportedCheck, initPrograms, test, caseDef);
+                       }
+
+                       for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
+                       {
+                               const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex], format};
+                               addFunctionCaseWithPrograms(group.get(), de::toLower(op) + "_" + subgroups::getFormatNameForGLSL(format) +
+                                                                                       "_" + getShaderStageName(caseDef.shaderStage) + "_framebuffer", "",
+                                                                                       supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
+                       }
+               }
+       }
+
+       return group.release();
+}
+
+} // subgroups
+} // vkt
+
diff --git a/external/openglcts/modules/common/subgroups/glcSubgroupsPartitionedTests.hpp b/external/openglcts/modules/common/subgroups/glcSubgroupsPartitionedTests.hpp
new file mode 100755 (executable)
index 0000000..c5e5dcf
--- /dev/null
@@ -0,0 +1,41 @@
+#ifndef _VKTSUBGROUPSPARTITIONEDTESTS_HPP
+#define _VKTSUBGROUPSPARTITIONEDTESTS_HPP
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ * Copyright (c) 2018 NVIDIA Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "tcuDefs.hpp"
+#include "vktTestCase.hpp"
+
+namespace vkt
+{
+namespace subgroups
+{
+
+tcu::TestCaseGroup* createSubgroupsPartitionedTests(tcu::TestContext& testCtx);
+
+} // subgroups
+} // vkt
+
+#endif // _VKTSUBGROUPSPARTITIONEDTESTS_HPP
diff --git a/external/openglcts/modules/common/subgroups/glcSubgroupsQuadTests.cpp b/external/openglcts/modules/common/subgroups/glcSubgroupsQuadTests.cpp
new file mode 100755 (executable)
index 0000000..1abe256
--- /dev/null
@@ -0,0 +1,761 @@
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "vktSubgroupsQuadTests.hpp"
+#include "vktSubgroupsTestsUtils.hpp"
+
+#include <string>
+#include <vector>
+
+using namespace tcu;
+using namespace std;
+using namespace vk;
+using namespace vkt;
+
+namespace
+{
+enum OpType
+{
+       OPTYPE_QUAD_BROADCAST = 0,
+       OPTYPE_QUAD_SWAP_HORIZONTAL,
+       OPTYPE_QUAD_SWAP_VERTICAL,
+       OPTYPE_QUAD_SWAP_DIAGONAL,
+       OPTYPE_LAST
+};
+
+static bool checkVertexPipelineStages(std::vector<const void*> datas,
+                                                                         deUint32 width, deUint32)
+{
+       return vkt::subgroups::check(datas, width, 1);
+}
+
+static bool checkCompute(std::vector<const void*> datas,
+                                                const deUint32 numWorkgroups[3], const deUint32 localSize[3],
+                                                deUint32)
+{
+       return vkt::subgroups::checkCompute(datas, numWorkgroups, localSize, 1);
+}
+
+std::string getOpTypeName(int opType)
+{
+       switch (opType)
+       {
+               default:
+                       DE_FATAL("Unsupported op type");
+                       return "";
+               case OPTYPE_QUAD_BROADCAST:
+                       return "subgroupQuadBroadcast";
+               case OPTYPE_QUAD_SWAP_HORIZONTAL:
+                       return "subgroupQuadSwapHorizontal";
+               case OPTYPE_QUAD_SWAP_VERTICAL:
+                       return "subgroupQuadSwapVertical";
+               case OPTYPE_QUAD_SWAP_DIAGONAL:
+                       return "subgroupQuadSwapDiagonal";
+       }
+}
+
+struct CaseDefinition
+{
+       int                                     opType;
+       VkShaderStageFlags      shaderStage;
+       VkFormat                        format;
+       int                                     direction;
+};
+
+void initFrameBufferPrograms (SourceCollections& programCollection, CaseDefinition caseDef)
+{
+       const vk::ShaderBuildOptions    buildOptions    (programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+       std::string                     swapTable[OPTYPE_LAST];
+
+       subgroups::setFragmentShaderFrameBuffer(programCollection);
+
+       if (VK_SHADER_STAGE_VERTEX_BIT != caseDef.shaderStage)
+               subgroups::setVertexShaderFrameBuffer(programCollection);
+
+       swapTable[OPTYPE_QUAD_BROADCAST] = "";
+       swapTable[OPTYPE_QUAD_SWAP_HORIZONTAL] = "  const uint swapTable[4] = {1, 0, 3, 2};\n";
+       swapTable[OPTYPE_QUAD_SWAP_VERTICAL] = "  const uint swapTable[4] = {2, 3, 0, 1};\n";
+       swapTable[OPTYPE_QUAD_SWAP_DIAGONAL] = "  const uint swapTable[4] = {3, 2, 1, 0};\n";
+
+       if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+       {
+               std::ostringstream      vertexSrc;
+               vertexSrc << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+                       << "#extension GL_KHR_shader_subgroup_quad: enable\n"
+                       << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       << "layout(location = 0) in highp vec4 in_position;\n"
+                       << "layout(location = 0) out float result;\n"
+                       << "layout(set = 0, binding = 0) uniform Buffer1\n"
+                       << "{\n"
+                       << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+                       << "};\n"
+                       << "\n"
+                       << "void main (void)\n"
+                       << "{\n"
+                       << "  uvec4 mask = subgroupBallot(true);\n"
+                       << swapTable[caseDef.opType];
+
+               if (OPTYPE_QUAD_BROADCAST == caseDef.opType)
+               {
+                       vertexSrc << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " op = "
+                               << getOpTypeName(caseDef.opType) << "(data[gl_SubgroupInvocationID], " << caseDef.direction << ");\n"
+                               << "  uint otherID = (gl_SubgroupInvocationID & ~0x3) + " << caseDef.direction << ";\n";
+               }
+               else
+               {
+                       vertexSrc << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " op = "
+                               << getOpTypeName(caseDef.opType) << "(data[gl_SubgroupInvocationID]);\n"
+                               << "  uint otherID = (gl_SubgroupInvocationID & ~0x3) + swapTable[gl_SubgroupInvocationID & 0x3];\n";
+               }
+
+               vertexSrc << "  if (subgroupBallotBitExtract(mask, otherID))\n"
+                       << "  {\n"
+                       << "    result = (op == data[otherID]) ? 1.0f : 0.0f;\n"
+                       << "  }\n"
+                       << "  else\n"
+                       << "  {\n"
+                       << "    result = 1.0f;\n" // Invocation we read from was inactive, so we can't verify results!
+                       << "  }\n"
+                       << "  gl_Position = in_position;\n"
+                       << "  gl_PointSize = 1.0f;\n"
+                       << "}\n";
+               programCollection.glslSources.add("vert")
+                       << glu::VertexSource(vertexSrc.str()) << buildOptions;
+       }
+       else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
+       {
+               std::ostringstream geometry;
+
+               geometry << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+                       << "#extension GL_KHR_shader_subgroup_quad: enable\n"
+                       << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       << "layout(points) in;\n"
+                       << "layout(points, max_vertices = 1) out;\n"
+                       << "layout(location = 0) out float out_color;\n"
+
+                       << "layout(set = 0, binding = 0) uniform Buffer1\n"
+                       << "{\n"
+                       << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+                       << "};\n"
+                       << "\n"
+                       << "void main (void)\n"
+                       << "{\n"
+                       << "  uvec4 mask = subgroupBallot(true);\n"
+                       << swapTable[caseDef.opType];
+
+               if (OPTYPE_QUAD_BROADCAST == caseDef.opType)
+               {
+                       geometry << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " op = "
+                               << getOpTypeName(caseDef.opType) << "(data[gl_SubgroupInvocationID], " << caseDef.direction << ");\n"
+                               << "  uint otherID = (gl_SubgroupInvocationID & ~0x3) + " << caseDef.direction << ";\n";
+               }
+               else
+               {
+                       geometry << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " op = "
+                               << getOpTypeName(caseDef.opType) << "(data[gl_SubgroupInvocationID]);\n"
+                               << "  uint otherID = (gl_SubgroupInvocationID & ~0x3) + swapTable[gl_SubgroupInvocationID & 0x3];\n";
+               }
+
+               geometry << "  if (subgroupBallotBitExtract(mask, otherID))\n"
+                       << "  {\n"
+                       << "    out_color = (op == data[otherID]) ? 1.0 : 0.0;\n"
+                       << "  }\n"
+                       << "  else\n"
+                       << "  {\n"
+                       << "    out_color = 1.0;\n" // Invocation we read from was inactive, so we can't verify results!
+                       << "  }\n"
+                       << "  gl_Position = gl_in[0].gl_Position;\n"
+                       << "  EmitVertex();\n"
+                       << "  EndPrimitive();\n"
+                       << "}\n";
+
+               programCollection.glslSources.add("geometry")
+                       << glu::GeometrySource(geometry.str()) << buildOptions;
+       }
+       else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+       {
+               std::ostringstream controlSource;
+
+               controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+                       << "#extension GL_KHR_shader_subgroup_quad: enable\n"
+                       << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       << "layout(vertices = 2) out;\n"
+                       << "layout(location = 0) out float out_color[];\n"
+                       << "layout(set = 0, binding = 0) uniform Buffer1\n"
+                       << "{\n"
+                       << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+                       << "};\n"
+                       << "\n"
+                       << "void main (void)\n"
+                       << "{\n"
+                       << "  if (gl_InvocationID == 0)\n"
+                       <<"  {\n"
+                       << "    gl_TessLevelOuter[0] = 1.0f;\n"
+                       << "    gl_TessLevelOuter[1] = 1.0f;\n"
+                       << "  }\n"
+                       << "  uvec4 mask = subgroupBallot(true);\n"
+                       << swapTable[caseDef.opType];
+
+               if (OPTYPE_QUAD_BROADCAST == caseDef.opType)
+               {
+                       controlSource << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " op = "
+                               << getOpTypeName(caseDef.opType) << "(data[gl_SubgroupInvocationID], " << caseDef.direction << ");\n"
+                               << "  uint otherID = (gl_SubgroupInvocationID & ~0x3) + " << caseDef.direction << ";\n";
+               }
+               else
+               {
+                       controlSource << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " op = "
+                               << getOpTypeName(caseDef.opType) << "(data[gl_SubgroupInvocationID]);\n"
+                               << "  uint otherID = (gl_SubgroupInvocationID & ~0x3) + swapTable[gl_SubgroupInvocationID & 0x3];\n";
+               }
+
+               controlSource << "  if (subgroupBallotBitExtract(mask, otherID))\n"
+                       << "  {\n"
+                       << "    out_color[gl_InvocationID] = (op == data[otherID]) ? 1.0 : 0.0;\n"
+                       << "  }\n"
+                       << "  else\n"
+                       << "  {\n"
+                       << "    out_color[gl_InvocationID] = 1.0; \n"// Invocation we read from was inactive, so we can't verify results!
+                       << "  }\n"
+                       << "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+                       << "}\n";
+
+               programCollection.glslSources.add("tesc")
+                       << glu::TessellationControlSource(controlSource.str()) << buildOptions;
+               subgroups::setTesEvalShaderFrameBuffer(programCollection);
+       }
+       else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
+       {
+               ostringstream evaluationSource;
+               evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+                       << "#extension GL_KHR_shader_subgroup_quad: enable\n"
+                       << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       << "layout(isolines, equal_spacing, ccw ) in;\n"
+                       << "layout(location = 0) out float out_color;\n"
+                       << "layout(set = 0, binding = 0) uniform Buffer1\n"
+                       << "{\n"
+                       << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+                       << "};\n"
+                       << "\n"
+                       << "void main (void)\n"
+                       << "{\n"
+                       << "  uvec4 mask = subgroupBallot(true);\n"
+                       << swapTable[caseDef.opType];
+
+               if (OPTYPE_QUAD_BROADCAST == caseDef.opType)
+               {
+                       evaluationSource << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " op = "
+                               << getOpTypeName(caseDef.opType) << "(data[gl_SubgroupInvocationID], " << caseDef.direction << ");\n"
+                               << "  uint otherID = (gl_SubgroupInvocationID & ~0x3) + " << caseDef.direction << ";\n";
+               }
+               else
+               {
+                       evaluationSource << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " op = "
+                               << getOpTypeName(caseDef.opType) << "(data[gl_SubgroupInvocationID]);\n"
+                               << "  uint otherID = (gl_SubgroupInvocationID & ~0x3) + swapTable[gl_SubgroupInvocationID & 0x3];\n";
+               }
+
+               evaluationSource << "  if (subgroupBallotBitExtract(mask, otherID))\n"
+                       << "  {\n"
+                       << "    out_color = (op == data[otherID]) ? 1.0 : 0.0;\n"
+                       << "  }\n"
+                       << "  else\n"
+                       << "  {\n"
+                       << "    out_color = 1.0;\n" // Invocation we read from was inactive, so we can't verify results!
+                       << "  }\n"
+                       << "  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
+                       << "}\n";
+
+               subgroups::setTesCtrlShaderFrameBuffer(programCollection);
+               programCollection.glslSources.add("tese")
+                               << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
+       }
+       else
+       {
+               DE_FATAL("Unsupported shader stage");
+       }
+}
+
+void initPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
+{
+       std::string swapTable[OPTYPE_LAST];
+       swapTable[OPTYPE_QUAD_BROADCAST] = "";
+       swapTable[OPTYPE_QUAD_SWAP_HORIZONTAL] = "  const uint swapTable[4] = {1, 0, 3, 2};\n";
+       swapTable[OPTYPE_QUAD_SWAP_VERTICAL] = "  const uint swapTable[4] = {2, 3, 0, 1};\n";
+       swapTable[OPTYPE_QUAD_SWAP_DIAGONAL] = "  const uint swapTable[4] = {3, 2, 1, 0};\n";
+
+       if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+       {
+               std::ostringstream src;
+
+               src << "#version 450\n"
+                       << "#extension GL_KHR_shader_subgroup_quad: enable\n"
+                       << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       << "layout (local_size_x_id = 0, local_size_y_id = 1, "
+                       "local_size_z_id = 2) in;\n"
+                       << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
+                       << "{\n"
+                       << "  uint result[];\n"
+                       << "};\n"
+                       << "layout(set = 0, binding = 1, std430) buffer Buffer2\n"
+                       << "{\n"
+                       << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[];\n"
+                       << "};\n"
+                       << "\n"
+                       << "void main (void)\n"
+                       << "{\n"
+                       << "  uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
+                       << "  highp uint offset = globalSize.x * ((globalSize.y * "
+                       "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
+                       "gl_GlobalInvocationID.x;\n"
+                       << "  uvec4 mask = subgroupBallot(true);\n"
+                       << swapTable[caseDef.opType];
+
+
+               if (OPTYPE_QUAD_BROADCAST == caseDef.opType)
+               {
+                       src << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " op = "
+                               << getOpTypeName(caseDef.opType) << "(data[gl_SubgroupInvocationID], " << caseDef.direction << ");\n"
+                               << "  uint otherID = (gl_SubgroupInvocationID & ~0x3) + " << caseDef.direction << ";\n";
+               }
+               else
+               {
+                       src << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " op = "
+                               << getOpTypeName(caseDef.opType) << "(data[gl_SubgroupInvocationID]);\n"
+                               << "  uint otherID = (gl_SubgroupInvocationID & ~0x3) + swapTable[gl_SubgroupInvocationID & 0x3];\n";
+               }
+
+               src << "  if (subgroupBallotBitExtract(mask, otherID))\n"
+                       << "  {\n"
+                       << "    result[offset] = (op == data[otherID]) ? 1 : 0;\n"
+                       << "  }\n"
+                       << "  else\n"
+                       << "  {\n"
+                       << "    result[offset] = 1; // Invocation we read from was inactive, so we can't verify results!\n"
+                       << "  }\n"
+                       << "}\n";
+
+               programCollection.glslSources.add("comp")
+                               << glu::ComputeSource(src.str()) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+       }
+       else
+       {
+               std::ostringstream src;
+               if (OPTYPE_QUAD_BROADCAST == caseDef.opType)
+               {
+                       src << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " op = "
+                               << getOpTypeName(caseDef.opType) << "(data[gl_SubgroupInvocationID], " << caseDef.direction << ");\n"
+                               << "  uint otherID = (gl_SubgroupInvocationID & ~0x3) + " << caseDef.direction << ";\n";
+               }
+               else
+               {
+                       src << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " op = "
+                               << getOpTypeName(caseDef.opType) << "(data[gl_SubgroupInvocationID]);\n"
+                               << "  uint otherID = (gl_SubgroupInvocationID & ~0x3) + swapTable[gl_SubgroupInvocationID & 0x3];\n";
+               }
+               const string sourceType = src.str();
+
+               {
+                       const string vertex =
+                               "#version 450\n"
+                               "#extension GL_KHR_shader_subgroup_quad: enable\n"
+                               "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                               "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
+                               "{\n"
+                               "  uint result[];\n"
+                               "};\n"
+                               "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+                               "{\n"
+                               "  " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
+                               "};\n"
+                               "\n"
+                               "void main (void)\n"
+                               "{\n"
+                               "  uvec4 mask = subgroupBallot(true);\n"
+                               + swapTable[caseDef.opType]
+                               + sourceType +
+                               "  if (subgroupBallotBitExtract(mask, otherID))\n"
+                               "  {\n"
+                               "    result[gl_VertexIndex] = (op == data[otherID]) ? 1 : 0;\n"
+                               "  }\n"
+                               "  else\n"
+                               "  {\n"
+                               "    result[gl_VertexIndex] = 1; // Invocation we read from was inactive, so we can't verify results!\n"
+                               "  }\n"
+                               "  float pixelSize = 2.0f/1024.0f;\n"
+                               "  float pixelPosition = pixelSize/2.0f - 1.0f;\n"
+                               "  gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
+                               "}\n";
+                       programCollection.glslSources.add("vert")
+                               << glu::VertexSource(vertex) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+               }
+
+               {
+                       const string tesc =
+                               "#version 450\n"
+                               "#extension GL_KHR_shader_subgroup_quad: enable\n"
+                               "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                               "layout(vertices=1) out;\n"
+                               "layout(set = 0, binding = 1, std430) buffer Buffer1\n"
+                               "{\n"
+                               "  uint result[];\n"
+                               "};\n"
+                               "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+                               "{\n"
+                               "  " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
+                               "};\n"
+                               "\n"
+                               "void main (void)\n"
+                               "{\n"
+                               "  uvec4 mask = subgroupBallot(true);\n"
+                               + swapTable[caseDef.opType]
+                               + sourceType +
+                               "  if (subgroupBallotBitExtract(mask, otherID))\n"
+                               "  {\n"
+                               "    result[gl_PrimitiveID] = (op == data[otherID]) ? 1 : 0;\n"
+                               "  }\n"
+                               "  else\n"
+                               "  {\n"
+                               "    result[gl_PrimitiveID] = 1; // Invocation we read from was inactive, so we can't verify results!\n"
+                               "  }\n"
+                               "  if (gl_InvocationID == 0)\n"
+                               "  {\n"
+                               "    gl_TessLevelOuter[0] = 1.0f;\n"
+                               "    gl_TessLevelOuter[1] = 1.0f;\n"
+                               "  }\n"
+                               "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+                               "}\n";
+                       programCollection.glslSources.add("tesc")
+                                       << glu::TessellationControlSource(tesc) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+               }
+
+               {
+                       const string tese =
+                               "#version 450\n"
+                               "#extension GL_KHR_shader_subgroup_quad: enable\n"
+                               "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                               "layout(isolines) in;\n"
+                               "layout(set = 0, binding = 2, std430)  buffer Buffer1\n"
+                               "{\n"
+                               "  uint result[];\n"
+                               "};\n"
+                               "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+                               "{\n"
+                               "  " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
+                               "};\n"
+                               "\n"
+                               "void main (void)\n"
+                               "{\n"
+                               "  uvec4 mask = subgroupBallot(true);\n"
+                               + swapTable[caseDef.opType]
+                               + sourceType +
+                               "  if (subgroupBallotBitExtract(mask, otherID))\n"
+                               "  {\n"
+                               "    result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = (op == data[otherID]) ? 1 : 0;\n"
+                               "  }\n"
+                               "  else\n"
+                               "  {\n"
+                               "    result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = 1; // Invocation we read from was inactive, so we can't verify results!\n"
+                               "  }\n"
+                               "  float pixelSize = 2.0f/1024.0f;\n"
+                               "  gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
+                               "}\n";
+                       programCollection.glslSources.add("tese")
+                                       << glu::TessellationEvaluationSource(tese) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+               }
+
+               {
+                       const string geometry =
+                               "#version 450\n"
+                               "#extension GL_KHR_shader_subgroup_quad: enable\n"
+                               "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                               "layout(${TOPOLOGY}) in;\n"
+                               "layout(points, max_vertices = 1) out;\n"
+                               "layout(set = 0, binding = 3, std430) buffer Buffer1\n"
+                               "{\n"
+                               "  uint result[];\n"
+                               "};\n"
+                               "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+                               "{\n"
+                               "  " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
+                               "};\n"
+                               "\n"
+                               "void main (void)\n"
+                               "{\n"
+                               "  uvec4 mask = subgroupBallot(true);\n"
+                               + swapTable[caseDef.opType]
+                               + sourceType +
+                               "  if (subgroupBallotBitExtract(mask, otherID))\n"
+                               "  {\n"
+                               "    result[gl_PrimitiveIDIn] = (op == data[otherID]) ? 1 : 0;\n"
+                               "  }\n"
+                               "  else\n"
+                               "  {\n"
+                               "    result[gl_PrimitiveIDIn] = 1; // Invocation we read from was inactive, so we can't verify results!\n"
+                               "  }\n"
+                               "  gl_Position = gl_in[0].gl_Position;\n"
+                               "  EmitVertex();\n"
+                               "  EndPrimitive();\n"
+                               "}\n";
+                       subgroups::addGeometryShadersFromTemplate(geometry, vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u),
+                                                                                                         programCollection.glslSources);
+               }
+
+               {
+                       const string fragment =
+                               "#version 450\n"
+                               "#extension GL_KHR_shader_subgroup_quad: enable\n"
+                               "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                               "layout(location = 0) out uint result;\n"
+                               "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+                               "{\n"
+                               "  " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
+                               "};\n"
+                               "void main (void)\n"
+                               "{\n"
+                               "  uvec4 mask = subgroupBallot(true);\n"
+                               + swapTable[caseDef.opType]
+                               + sourceType +
+                               "  if (subgroupBallotBitExtract(mask, otherID))\n"
+                               "  {\n"
+                               "    result = (op == data[otherID]) ? 1 : 0;\n"
+                               "  }\n"
+                               "  else\n"
+                               "  {\n"
+                               "    result = 1; // Invocation we read from was inactive, so we can't verify results!\n"
+                               "  }\n"
+                               "}\n";
+                       programCollection.glslSources.add("fragment")
+                               << glu::FragmentSource(fragment)<< vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+               }
+               subgroups::addNoSubgroupShader(programCollection);
+       }
+}
+
+void supportedCheck (Context& context, CaseDefinition caseDef)
+{
+       if (!subgroups::isSubgroupSupported(context))
+               TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
+
+       if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_QUAD_BIT))
+               TCU_THROW(NotSupportedError, "Device does not support subgroup quad operations");
+
+
+       if (subgroups::isDoubleFormat(caseDef.format) &&
+                       !subgroups::isDoubleSupportedForDevice(context))
+       {
+               TCU_THROW(NotSupportedError, "Device does not support subgroup double operations");
+       }
+}
+
+tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
+{
+       if (!subgroups::areSubgroupOperationsSupportedForStage(
+                               context, caseDef.shaderStage))
+       {
+               if (subgroups::areSubgroupOperationsRequiredForStage(
+                                       caseDef.shaderStage))
+               {
+                       return tcu::TestStatus::fail(
+                                          "Shader stage " +
+                                          subgroups::getShaderStageName(caseDef.shaderStage) +
+                                          " is required to support subgroup operations!");
+               }
+               else
+               {
+                       TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
+               }
+       }
+
+       subgroups::SSBOData inputData;
+       inputData.format = caseDef.format;
+       inputData.numElements = subgroups::maxSupportedSubgroupSize();
+       inputData.initializeType = subgroups::SSBOData::InitializeNonZero;;
+
+       if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+               return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages);
+       else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
+               return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages);
+       else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+               return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
+       else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
+               return subgroups::makeTessellationEvaluationFrameBufferTest(context,  VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
+       else
+               TCU_THROW(InternalError, "Unhandled shader stage");
+}
+
+
+tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
+{
+       if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+       {
+               if (!subgroups::areSubgroupOperationsSupportedForStage(context, caseDef.shaderStage))
+               {
+                       return tcu::TestStatus::fail(
+                                          "Shader stage " +
+                                          subgroups::getShaderStageName(caseDef.shaderStage) +
+                                          " is required to support subgroup operations!");
+               }
+               subgroups::SSBOData inputData;
+               inputData.format = caseDef.format;
+               inputData.numElements = subgroups::maxSupportedSubgroupSize();
+               inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
+
+               return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkCompute);
+       }
+       else
+       {
+               VkPhysicalDeviceSubgroupProperties subgroupProperties;
+               subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
+               subgroupProperties.pNext = DE_NULL;
+
+               VkPhysicalDeviceProperties2 properties;
+               properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+               properties.pNext = &subgroupProperties;
+
+               context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
+
+               VkShaderStageFlagBits stages = (VkShaderStageFlagBits)(caseDef.shaderStage  & subgroupProperties.supportedStages);
+
+               if (VK_SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
+               {
+                       if ( (stages & VK_SHADER_STAGE_FRAGMENT_BIT) == 0)
+                               TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
+                       else
+                               stages = VK_SHADER_STAGE_FRAGMENT_BIT;
+               }
+
+               if ((VkShaderStageFlagBits)0u == stages)
+                       TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
+
+               subgroups::SSBOData inputData;
+               inputData.format                        = caseDef.format;
+               inputData.numElements           = subgroups::maxSupportedSubgroupSize();
+               inputData.initializeType        = subgroups::SSBOData::InitializeNonZero;
+               inputData.binding                       = 4u;
+               inputData.stages                        = stages;
+
+               return subgroups::allStages(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, stages);
+       }
+}
+}
+
+namespace vkt
+{
+namespace subgroups
+{
+tcu::TestCaseGroup* createSubgroupsQuadTests(tcu::TestContext& testCtx)
+{
+       de::MovePtr<tcu::TestCaseGroup> graphicGroup(new tcu::TestCaseGroup(
+               testCtx, "graphics", "Subgroup arithmetic category tests: graphics"));
+       de::MovePtr<tcu::TestCaseGroup> computeGroup(new tcu::TestCaseGroup(
+               testCtx, "compute", "Subgroup arithmetic category tests: compute"));
+       de::MovePtr<tcu::TestCaseGroup> framebufferGroup(new tcu::TestCaseGroup(
+               testCtx, "framebuffer", "Subgroup arithmetic category tests: framebuffer"));
+
+       const VkFormat formats[] =
+       {
+               VK_FORMAT_R32_SINT, VK_FORMAT_R32G32_SINT, VK_FORMAT_R32G32B32_SINT,
+               VK_FORMAT_R32G32B32A32_SINT, VK_FORMAT_R32_UINT, VK_FORMAT_R32G32_UINT,
+               VK_FORMAT_R32G32B32_UINT, VK_FORMAT_R32G32B32A32_UINT,
+               VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT,
+               VK_FORMAT_R32G32B32_SFLOAT, VK_FORMAT_R32G32B32A32_SFLOAT,
+               VK_FORMAT_R64_SFLOAT, VK_FORMAT_R64G64_SFLOAT,
+               VK_FORMAT_R64G64B64_SFLOAT, VK_FORMAT_R64G64B64A64_SFLOAT,
+               VK_FORMAT_R8_USCALED, VK_FORMAT_R8G8_USCALED,
+               VK_FORMAT_R8G8B8_USCALED, VK_FORMAT_R8G8B8A8_USCALED,
+       };
+
+       const VkShaderStageFlags stages[] =
+       {
+               VK_SHADER_STAGE_VERTEX_BIT,
+               VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
+               VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
+               VK_SHADER_STAGE_GEOMETRY_BIT,
+       };
+
+       for (int direction = 0; direction < 4; ++direction)
+       {
+               for (int formatIndex = 0; formatIndex < DE_LENGTH_OF_ARRAY(formats); ++formatIndex)
+               {
+                       const VkFormat format = formats[formatIndex];
+
+                       for (int opTypeIndex = 0; opTypeIndex < OPTYPE_LAST; ++opTypeIndex)
+                       {
+                               const std::string op = de::toLower(getOpTypeName(opTypeIndex));
+                               std::ostringstream name;
+                               name << de::toLower(op);
+
+                               if (OPTYPE_QUAD_BROADCAST == opTypeIndex)
+                               {
+                                       name << "_" << direction;
+                               }
+                               else
+                               {
+                                       if (0 != direction)
+                                       {
+                                               // We don't need direction for swap operations.
+                                               continue;
+                                       }
+                               }
+
+                               name << "_" << subgroups::getFormatNameForGLSL(format);
+
+                               {
+                                       const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_COMPUTE_BIT, format, direction};
+                                       addFunctionCaseWithPrograms(computeGroup.get(), name.str(), "", supportedCheck, initPrograms, test, caseDef);
+                               }
+
+                               {
+                                       const CaseDefinition caseDef =
+                                       {
+                                               opTypeIndex,
+                                               VK_SHADER_STAGE_ALL_GRAPHICS,
+                                               format,
+                                               direction
+                                       };
+                                       addFunctionCaseWithPrograms(graphicGroup.get(), name.str(), "", supportedCheck, initPrograms, test, caseDef);
+                               }
+                               for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
+                               {
+                                       const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex], format, direction};
+                                       addFunctionCaseWithPrograms(framebufferGroup.get(), name.str()+"_"+ getShaderStageName(caseDef.shaderStage), "",
+                                                                                               supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
+                               }
+
+                       }
+               }
+       }
+
+       de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(
+               testCtx, "quad", "Subgroup quad category tests"));
+
+       group->addChild(graphicGroup.release());
+       group->addChild(computeGroup.release());
+       group->addChild(framebufferGroup.release());
+
+       return group.release();
+}
+} // subgroups
+} // vkt
diff --git a/external/openglcts/modules/common/subgroups/glcSubgroupsQuadTests.hpp b/external/openglcts/modules/common/subgroups/glcSubgroupsQuadTests.hpp
new file mode 100644 (file)
index 0000000..6bc23a6
--- /dev/null
@@ -0,0 +1,40 @@
+#ifndef _VKTSUBGROUPSQUADTESTS_HPP
+#define _VKTSUBGROUPSQUADTESTS_HPP
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "tcuDefs.hpp"
+#include "vktTestCase.hpp"
+
+namespace vkt
+{
+namespace subgroups
+{
+
+tcu::TestCaseGroup* createSubgroupsQuadTests(tcu::TestContext& testCtx);
+
+} // subgroups
+} // vkt
+
+#endif // _VKTSUBGROUPSQUADTESTS_HPP
diff --git a/external/openglcts/modules/common/subgroups/glcSubgroupsShapeTests.cpp b/external/openglcts/modules/common/subgroups/glcSubgroupsShapeTests.cpp
new file mode 100755 (executable)
index 0000000..e32862d
--- /dev/null
@@ -0,0 +1,606 @@
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "vktSubgroupsShapeTests.hpp"
+#include "vktSubgroupsTestsUtils.hpp"
+
+#include <string>
+#include <vector>
+
+using namespace tcu;
+using namespace std;
+using namespace vk;
+using namespace vkt;
+
+namespace
+{
+static bool checkVertexPipelineStages(std::vector<const void*> datas,
+                                                                         deUint32 width, deUint32)
+{
+       return vkt::subgroups::check(datas, width, 1);
+}
+
+static bool checkCompute(std::vector<const void*> datas,
+                                                const deUint32 numWorkgroups[3], const deUint32 localSize[3],
+                                                deUint32)
+{
+       return vkt::subgroups::checkCompute(datas, numWorkgroups, localSize, 1);
+}
+
+enum OpType
+{
+       OPTYPE_CLUSTERED = 0,
+       OPTYPE_QUAD,
+       OPTYPE_LAST
+};
+
+std::string getOpTypeName(int opType)
+{
+       switch (opType)
+       {
+               default:
+                       DE_FATAL("Unsupported op type");
+                       return "";
+               case OPTYPE_CLUSTERED:
+                       return "clustered";
+               case OPTYPE_QUAD:
+                       return "quad";
+       }
+}
+
+struct CaseDefinition
+{
+       int                                     opType;
+       VkShaderStageFlags      shaderStage;
+};
+
+void initFrameBufferPrograms (SourceCollections& programCollection, CaseDefinition caseDef)
+{
+       const vk::ShaderBuildOptions    buildOptions    (programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+       std::ostringstream                              bdy;
+       std::string                                             extension = (OPTYPE_CLUSTERED == caseDef.opType) ?
+                                                                               "#extension GL_KHR_shader_subgroup_clustered: enable\n" :
+                                                                               "#extension GL_KHR_shader_subgroup_quad: enable\n";
+
+       subgroups::setFragmentShaderFrameBuffer(programCollection);
+
+       if (VK_SHADER_STAGE_VERTEX_BIT != caseDef.shaderStage)
+               subgroups::setVertexShaderFrameBuffer(programCollection);
+
+       extension += "#extension GL_KHR_shader_subgroup_ballot: enable\n";
+
+       bdy << "  uint tempResult = 0x1;\n"
+               << "  uvec4 mask = subgroupBallot(true);\n";
+
+       if (OPTYPE_CLUSTERED == caseDef.opType)
+       {
+               for (deUint32 i = 1; i <= subgroups::maxSupportedSubgroupSize(); i *= 2)
+               {
+                       bdy << "  if (gl_SubgroupSize >= " << i << ")\n"
+                               << "  {\n"
+                               << "    uvec4 contribution = uvec4(0);\n"
+                               << "    const uint modID = gl_SubgroupInvocationID % 32;\n"
+                               << "    switch (gl_SubgroupInvocationID / 32)\n"
+                               << "    {\n"
+                               << "    case 0: contribution.x = 1 << modID; break;\n"
+                               << "    case 1: contribution.y = 1 << modID; break;\n"
+                               << "    case 2: contribution.z = 1 << modID; break;\n"
+                               << "    case 3: contribution.w = 1 << modID; break;\n"
+                               << "    }\n"
+                               << "    uvec4 result = subgroupClusteredOr(contribution, " << i << ");\n"
+                               << "    uint rootID = gl_SubgroupInvocationID & ~(" << i - 1 << ");\n"
+                               << "    for (uint i = 0; i < " << i << "; i++)\n"
+                               << "    {\n"
+                               << "      uint nextID = rootID + i;\n"
+                               << "      if (subgroupBallotBitExtract(mask, nextID) ^^ subgroupBallotBitExtract(result, nextID))\n"
+                               << "      {\n"
+                               << "        tempResult = 0;\n"
+                               << "      }\n"
+                               << "    }\n"
+                               << "  }\n";
+               }
+       }
+       else
+       {
+               bdy << "  uint cluster[4] =\n"
+                       << "  {\n"
+                       << "    subgroupQuadBroadcast(gl_SubgroupInvocationID, 0),\n"
+                       << "    subgroupQuadBroadcast(gl_SubgroupInvocationID, 1),\n"
+                       << "    subgroupQuadBroadcast(gl_SubgroupInvocationID, 2),\n"
+                       << "    subgroupQuadBroadcast(gl_SubgroupInvocationID, 3)\n"
+                       << "  };\n"
+                       << "  uint rootID = gl_SubgroupInvocationID & ~0x3;\n"
+                       << "  for (uint i = 0; i < 4; i++)\n"
+                       << "  {\n"
+                       << "    uint nextID = rootID + i;\n"
+                       << "    if (subgroupBallotBitExtract(mask, nextID) && (cluster[i] != nextID))\n"
+                       << "    {\n"
+                       << "      tempResult = mask.x;\n"
+                       << "    }\n"
+                       << "  }\n";
+       }
+
+       if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+       {
+               std::ostringstream vertexSrc;
+               vertexSrc << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+                       << extension
+                       << "layout(location = 0) in highp vec4 in_position;\n"
+                       << "layout(location = 0) out float result;\n"
+                       << "\n"
+                       << "void main (void)\n"
+                       << "{\n"
+                       << bdy.str()
+                       << "  result = float(tempResult);\n"
+                       << "  gl_Position = in_position;\n"
+                       << "  gl_PointSize = 1.0f;\n"
+                       << "}\n";
+               programCollection.glslSources.add("vert")
+                       << glu::VertexSource(vertexSrc.str()) << buildOptions;
+       }
+       else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
+       {
+               std::ostringstream geometry;
+
+               geometry << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+                       << extension
+                       << "layout(points) in;\n"
+                       << "layout(points, max_vertices = 1) out;\n"
+                       << "layout(location = 0) out float out_color;\n"
+                       << "\n"
+                       << "void main (void)\n"
+                       << "{\n"
+                       << bdy.str()
+                       << "  out_color = float(tempResult);\n"
+                       << "  gl_Position = gl_in[0].gl_Position;\n"
+                       << "  EmitVertex();\n"
+                       << "  EndPrimitive();\n"
+                       << "}\n";
+
+               programCollection.glslSources.add("geometry")
+                       << glu::GeometrySource(geometry.str()) << buildOptions;
+       }
+       else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+       {
+               std::ostringstream controlSource;
+
+               controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+                       << extension
+                       << "layout(vertices = 2) out;\n"
+                       << "layout(location = 0) out float out_color[];\n"
+                       << "\n"
+                       << "void main (void)\n"
+                       << "{\n"
+                       << "  if (gl_InvocationID == 0)\n"
+                       <<"  {\n"
+                       << "    gl_TessLevelOuter[0] = 1.0f;\n"
+                       << "    gl_TessLevelOuter[1] = 1.0f;\n"
+                       << "  }\n"
+                       << bdy.str()
+                       << "  out_color[gl_InvocationID] = float(tempResult);\n"
+                       << "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+                       << "}\n";
+
+               programCollection.glslSources.add("tesc")
+                       << glu::TessellationControlSource(controlSource.str()) << buildOptions;
+               subgroups::setTesEvalShaderFrameBuffer(programCollection);
+       }
+       else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
+       {
+               std::ostringstream evaluationSource;
+
+               evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+                       << extension
+                       << "layout(isolines, equal_spacing, ccw ) in;\n"
+                       << "layout(location = 0) out float out_color;\n"
+                       << "void main (void)\n"
+                       << "{\n"
+                       << bdy.str()
+                       << "  out_color = float(tempResult);\n"
+                       << "  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
+                       << "}\n";
+
+               subgroups::setTesCtrlShaderFrameBuffer(programCollection);
+               programCollection.glslSources.add("tese")
+                               << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
+       }
+       else
+       {
+               DE_FATAL("Unsupported shader stage");
+       }
+}
+
+void initPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
+{
+       std::string extension = (OPTYPE_CLUSTERED == caseDef.opType) ?
+                                                       "#extension GL_KHR_shader_subgroup_clustered: enable\n" :
+                                                       "#extension GL_KHR_shader_subgroup_quad: enable\n";
+
+       extension += "#extension GL_KHR_shader_subgroup_ballot: enable\n";
+
+       std::ostringstream bdy;
+
+       bdy << "  uint tempResult = 0x1;\n"
+               << "  uvec4 mask = subgroupBallot(true);\n";
+
+       if (OPTYPE_CLUSTERED == caseDef.opType)
+       {
+               for (deUint32 i = 1; i <= subgroups::maxSupportedSubgroupSize(); i *= 2)
+               {
+                       bdy << "  if (gl_SubgroupSize >= " << i << ")\n"
+                               << "  {\n"
+                               << "    uvec4 contribution = uvec4(0);\n"
+                               << "    const uint modID = gl_SubgroupInvocationID % 32;\n"
+                               << "    switch (gl_SubgroupInvocationID / 32)\n"
+                               << "    {\n"
+                               << "    case 0: contribution.x = 1 << modID; break;\n"
+                               << "    case 1: contribution.y = 1 << modID; break;\n"
+                               << "    case 2: contribution.z = 1 << modID; break;\n"
+                               << "    case 3: contribution.w = 1 << modID; break;\n"
+                               << "    }\n"
+                               << "    uvec4 result = subgroupClusteredOr(contribution, " << i << ");\n"
+                               << "    uint rootID = gl_SubgroupInvocationID & ~(" << i - 1 << ");\n"
+                               << "    for (uint i = 0; i < " << i << "; i++)\n"
+                               << "    {\n"
+                               << "      uint nextID = rootID + i;\n"
+                               << "      if (subgroupBallotBitExtract(mask, nextID) ^^ subgroupBallotBitExtract(result, nextID))\n"
+                               << "      {\n"
+                               << "        tempResult = 0;\n"
+                               << "      }\n"
+                               << "    }\n"
+                               << "  }\n";
+               }
+       }
+       else
+       {
+               bdy << "  uint cluster[4] =\n"
+                       << "  {\n"
+                       << "    subgroupQuadBroadcast(gl_SubgroupInvocationID, 0),\n"
+                       << "    subgroupQuadBroadcast(gl_SubgroupInvocationID, 1),\n"
+                       << "    subgroupQuadBroadcast(gl_SubgroupInvocationID, 2),\n"
+                       << "    subgroupQuadBroadcast(gl_SubgroupInvocationID, 3)\n"
+                       << "  };\n"
+                       << "  uint rootID = gl_SubgroupInvocationID & ~0x3;\n"
+                       << "  for (uint i = 0; i < 4; i++)\n"
+                       << "  {\n"
+                       << "    uint nextID = rootID + i;\n"
+                       << "    if (subgroupBallotBitExtract(mask, nextID) && (cluster[i] != nextID))\n"
+                       << "    {\n"
+                       << "      tempResult = mask.x;\n"
+                       << "    }\n"
+                       << "  }\n";
+       }
+
+       if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+       {
+               std::ostringstream src;
+
+               src << "#version 450\n"
+                       << extension
+                       << "layout (local_size_x_id = 0, local_size_y_id = 1, "
+                       "local_size_z_id = 2) in;\n"
+                       << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
+                       << "{\n"
+                       << "  uint result[];\n"
+                       << "};\n"
+                       << "\n"
+                       << "void main (void)\n"
+                       << "{\n"
+                       << "  uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
+                       << "  highp uint offset = globalSize.x * ((globalSize.y * "
+                       "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
+                       "gl_GlobalInvocationID.x;\n"
+                       << bdy.str()
+                       << "  result[offset] = tempResult;\n"
+                       << "}\n";
+
+               programCollection.glslSources.add("comp")
+                               << glu::ComputeSource(src.str()) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+       }
+       else
+       {
+               {
+                       const string vertex =
+                               "#version 450\n"
+                               + extension +
+                               "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
+                               "{\n"
+                               "  uint result[];\n"
+                               "};\n"
+                               "\n"
+                               "void main (void)\n"
+                               "{\n"
+                               + bdy.str() +
+                               "  result[gl_VertexIndex] = tempResult;\n"
+                               "  float pixelSize = 2.0f/1024.0f;\n"
+                               "  float pixelPosition = pixelSize/2.0f - 1.0f;\n"
+                               "  gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
+                               "}\n";
+
+                       programCollection.glslSources.add("vert")
+                               << glu::VertexSource(vertex) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+               }
+
+               {
+                       const string tesc =
+                               "#version 450\n"
+                               + extension +
+                               "layout(vertices=1) out;\n"
+                               "layout(set = 0, binding = 1, std430) buffer Buffer1\n"
+                               "{\n"
+                               "  uint result[];\n"
+                               "};\n"
+                               "\n"
+                               "void main (void)\n"
+                               "{\n"
+                               + bdy.str() +
+                               "  result[gl_PrimitiveID] = 1;\n"
+                               "  if (gl_InvocationID == 0)\n"
+                               "  {\n"
+                               "    gl_TessLevelOuter[0] = 1.0f;\n"
+                               "    gl_TessLevelOuter[1] = 1.0f;\n"
+                               "  }\n"
+                               "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+                               "}\n";
+
+                       programCollection.glslSources.add("tesc")
+                                       << glu::TessellationControlSource(tesc) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+               }
+
+               {
+                       const string tese =
+                               "#version 450\n"
+                               + extension +
+                               "layout(isolines) in;\n"
+                               "layout(set = 0, binding = 2, std430) buffer Buffer1\n"
+                               "{\n"
+                               "  uint result[];\n"
+                               "};\n"
+                               "\n"
+                               "void main (void)\n"
+                               "{\n"
+                               + bdy.str() +
+                               "  result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = 1;\n"
+                               "  float pixelSize = 2.0f/1024.0f;\n"
+                               "  gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
+                               "}\n";
+
+                       programCollection.glslSources.add("tese")
+                                       << glu::TessellationEvaluationSource(tese) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+               }
+
+               {
+                       const string geometry =
+                               "#version 450\n"
+                               + extension +
+                               "layout(${TOPOLOGY}) in;\n"
+                               "layout(points, max_vertices = 1) out;\n"
+                               "layout(set = 0, binding = 3, std430) buffer Buffer1\n"
+                               "{\n"
+                               "  uint result[];\n"
+                               "};\n"
+                               "\n"
+                               "void main (void)\n"
+                               "{\n"
+                               + bdy.str() +
+                               "  result[gl_PrimitiveIDIn] = tempResult;\n"
+                               "  gl_Position = gl_in[0].gl_Position;\n"
+                               "  EmitVertex();\n"
+                               "  EndPrimitive();\n"
+                               "}\n";
+
+                       subgroups::addGeometryShadersFromTemplate(geometry, vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u),
+                                                                                                         programCollection.glslSources);
+               }
+
+               {
+                       const string fragment =
+                               "#version 450\n"
+                               + extension +
+                               "layout(location = 0) out uint result;\n"
+                               "void main (void)\n"
+                               "{\n"
+                               + bdy.str() +
+                               "  result = tempResult;\n"
+                               "}\n";
+
+                       programCollection.glslSources.add("fragment")
+                               << glu::FragmentSource(fragment)<< vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+               }
+               subgroups::addNoSubgroupShader(programCollection);
+       }
+}
+
+void supportedCheck (Context& context, CaseDefinition caseDef)
+{
+       if (!subgroups::isSubgroupSupported(context))
+               TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
+
+       if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_BALLOT_BIT))
+       {
+               TCU_THROW(NotSupportedError, "Device does not support subgroup ballot operations");
+       }
+
+       if (OPTYPE_CLUSTERED == caseDef.opType)
+       {
+               if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_CLUSTERED_BIT))
+               {
+                       TCU_THROW(NotSupportedError, "Subgroup shape tests require that clustered operations are supported!");
+               }
+       }
+
+       if (OPTYPE_QUAD == caseDef.opType)
+       {
+               if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_QUAD_BIT))
+               {
+                       TCU_THROW(NotSupportedError, "Subgroup shape tests require that quad operations are supported!");
+               }
+       }
+}
+
+tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
+{
+       if (!subgroups::areSubgroupOperationsSupportedForStage(
+                               context, caseDef.shaderStage))
+       {
+               if (subgroups::areSubgroupOperationsRequiredForStage(
+                                       caseDef.shaderStage))
+               {
+                       return tcu::TestStatus::fail(
+                                          "Shader stage " +
+                                          subgroups::getShaderStageName(caseDef.shaderStage) +
+                                          " is required to support subgroup operations!");
+               }
+               else
+               {
+                       TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
+               }
+       }
+
+       if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+               return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages);
+       else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
+               return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages);
+       else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+               return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
+       else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
+               return subgroups::makeTessellationEvaluationFrameBufferTest(context,  VK_FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
+       else
+               TCU_THROW(InternalError, "Unhandled shader stage");
+}
+
+
+tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
+{
+       if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_BASIC_BIT))
+       {
+               return tcu::TestStatus::fail(
+                                  "Subgroup feature " +
+                                  subgroups::getShaderStageName(VK_SUBGROUP_FEATURE_BASIC_BIT) +
+                                  " is a required capability!");
+       }
+
+       if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+       {
+               if (!subgroups::areSubgroupOperationsSupportedForStage(context, caseDef.shaderStage))
+               {
+                       return tcu::TestStatus::fail(
+                                          "Shader stage " +
+                                          subgroups::getShaderStageName(caseDef.shaderStage) +
+                                          " is required to support subgroup operations!");
+               }
+               return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkCompute);
+       }
+       else
+       {
+               VkPhysicalDeviceSubgroupProperties subgroupProperties;
+               subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
+               subgroupProperties.pNext = DE_NULL;
+
+               VkPhysicalDeviceProperties2 properties;
+               properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+               properties.pNext = &subgroupProperties;
+
+               context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
+
+               VkShaderStageFlagBits stages = (VkShaderStageFlagBits)(caseDef.shaderStage  & subgroupProperties.supportedStages);
+
+               if (VK_SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
+               {
+                       if ( (stages & VK_SHADER_STAGE_FRAGMENT_BIT) == 0)
+                               TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
+                       else
+                               stages = VK_SHADER_STAGE_FRAGMENT_BIT;
+               }
+
+               if ((VkShaderStageFlagBits)0u == stages)
+                       TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
+
+               return subgroups::allStages(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages, stages);
+       }
+}
+}
+
+namespace vkt
+{
+namespace subgroups
+{
+tcu::TestCaseGroup* createSubgroupsShapeTests(tcu::TestContext& testCtx)
+{
+       de::MovePtr<tcu::TestCaseGroup> graphicGroup(new tcu::TestCaseGroup(
+               testCtx, "graphics", "Subgroup shape category tests: graphics"));
+       de::MovePtr<tcu::TestCaseGroup> computeGroup(new tcu::TestCaseGroup(
+               testCtx, "compute", "Subgroup shape category tests: compute"));
+       de::MovePtr<tcu::TestCaseGroup> framebufferGroup(new tcu::TestCaseGroup(
+               testCtx, "framebuffer", "Subgroup shape category tests: framebuffer"));
+
+       const VkShaderStageFlags stages[] =
+       {
+               VK_SHADER_STAGE_VERTEX_BIT,
+               VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
+               VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
+               VK_SHADER_STAGE_GEOMETRY_BIT,
+       };
+
+       for (int opTypeIndex = 0; opTypeIndex < OPTYPE_LAST; ++opTypeIndex)
+       {
+               const std::string op = de::toLower(getOpTypeName(opTypeIndex));
+
+               {
+                       const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_COMPUTE_BIT};
+                       addFunctionCaseWithPrograms(computeGroup.get(), op, "", supportedCheck, initPrograms, test, caseDef);
+
+               }
+
+               {
+                       const CaseDefinition caseDef =
+                       {
+                               opTypeIndex,
+                               VK_SHADER_STAGE_ALL_GRAPHICS
+                       };
+                       addFunctionCaseWithPrograms(graphicGroup.get(),
+                                                                       op, "",
+                                                                       supportedCheck, initPrograms, test, caseDef);
+               }
+
+               for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
+               {
+                       const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex]};
+                       addFunctionCaseWithPrograms(framebufferGroup.get(),op + "_" + getShaderStageName(caseDef.shaderStage), "",
+                                                                               supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
+               }
+       }
+
+       de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(
+               testCtx, "shape", "Subgroup shape category tests"));
+
+       group->addChild(graphicGroup.release());
+       group->addChild(computeGroup.release());
+       group->addChild(framebufferGroup.release());
+
+       return group.release();
+}
+
+} // subgroups
+} // vkt
diff --git a/external/openglcts/modules/common/subgroups/glcSubgroupsShapeTests.hpp b/external/openglcts/modules/common/subgroups/glcSubgroupsShapeTests.hpp
new file mode 100644 (file)
index 0000000..8626a66
--- /dev/null
@@ -0,0 +1,40 @@
+#ifndef _VKTSUBGROUPSSHAPETESTS_HPP
+#define _VKTSUBGROUPSSHAPETESTS_HPP
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "tcuDefs.hpp"
+#include "vktTestCase.hpp"
+
+namespace vkt
+{
+namespace subgroups
+{
+
+tcu::TestCaseGroup* createSubgroupsShapeTests(tcu::TestContext& testCtx);
+
+} // subgroups
+} // vkt
+
+#endif // _VKTSUBGROUPSSHAPETESTS_HPP
diff --git a/external/openglcts/modules/common/subgroups/glcSubgroupsShuffleTests.cpp b/external/openglcts/modules/common/subgroups/glcSubgroupsShuffleTests.cpp
new file mode 100755 (executable)
index 0000000..749b9b2
--- /dev/null
@@ -0,0 +1,666 @@
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "vktSubgroupsShuffleTests.hpp"
+#include "vktSubgroupsTestsUtils.hpp"
+
+#include <string>
+#include <vector>
+
+using namespace tcu;
+using namespace std;
+using namespace vk;
+using namespace vkt;
+
+namespace
+{
+enum OpType
+{
+       OPTYPE_SHUFFLE = 0,
+       OPTYPE_SHUFFLE_XOR,
+       OPTYPE_SHUFFLE_UP,
+       OPTYPE_SHUFFLE_DOWN,
+       OPTYPE_LAST
+};
+
+static bool checkVertexPipelineStages(std::vector<const void*> datas,
+                                                                         deUint32 width, deUint32)
+{
+       return vkt::subgroups::check(datas, width, 1);
+}
+
+static bool checkCompute(std::vector<const void*> datas,
+                                                const deUint32 numWorkgroups[3], const deUint32 localSize[3],
+                                                deUint32)
+{
+       return vkt::subgroups::checkCompute(datas, numWorkgroups, localSize, 1);
+}
+
+std::string getOpTypeName(int opType)
+{
+       switch (opType)
+       {
+               default:
+                       DE_FATAL("Unsupported op type");
+                       return "";
+               case OPTYPE_SHUFFLE:
+                       return "subgroupShuffle";
+               case OPTYPE_SHUFFLE_XOR:
+                       return "subgroupShuffleXor";
+               case OPTYPE_SHUFFLE_UP:
+                       return "subgroupShuffleUp";
+               case OPTYPE_SHUFFLE_DOWN:
+                       return "subgroupShuffleDown";
+       }
+}
+
+struct CaseDefinition
+{
+       int                                     opType;
+       VkShaderStageFlags      shaderStage;
+       VkFormat                        format;
+};
+
+const std::string to_string(int x) {
+       std::ostringstream oss;
+       oss << x;
+       return oss.str();
+}
+
+const std::string DeclSource(CaseDefinition caseDef, int baseBinding)
+{
+       return
+               "layout(set = 0, binding = " + to_string(baseBinding) + ", std430) readonly buffer Buffer2\n"
+               "{\n"
+               "  " + subgroups::getFormatNameForGLSL(caseDef.format) + " data1[];\n"
+               "};\n"
+               "layout(set = 0, binding = " + to_string(baseBinding + 1) + ", std430) readonly buffer Buffer3\n"
+               "{\n"
+               "  uint data2[];\n"
+               "};\n";
+}
+
+const std::string TestSource(CaseDefinition caseDef)
+{
+       std::string                                             idTable[OPTYPE_LAST];
+       idTable[OPTYPE_SHUFFLE]                 = "id_in";
+       idTable[OPTYPE_SHUFFLE_XOR]             = "gl_SubgroupInvocationID ^ id_in";
+       idTable[OPTYPE_SHUFFLE_UP]              = "gl_SubgroupInvocationID - id_in";
+       idTable[OPTYPE_SHUFFLE_DOWN]    = "gl_SubgroupInvocationID + id_in";
+
+       const std::string testSource =
+               "  uint temp_res;\n"
+               "  uvec4 mask = subgroupBallot(true);\n"
+               "  uint id_in = data2[gl_SubgroupInvocationID] & (gl_SubgroupSize - 1);\n"
+               "  " + subgroups::getFormatNameForGLSL(caseDef.format) + " op = "
+               + getOpTypeName(caseDef.opType) + "(data1[gl_SubgroupInvocationID], id_in);\n"
+               "  uint id = " + idTable[caseDef.opType] + ";\n"
+               "  if ((id < gl_SubgroupSize) && subgroupBallotBitExtract(mask, id))\n"
+               "  {\n"
+               "    temp_res = (op == data1[id]) ? 1 : 0;\n"
+               "  }\n"
+               "  else\n"
+               "  {\n"
+               "    temp_res = 1; // Invocation we read from was inactive, so we can't verify results!\n"
+               "  }\n";
+
+       return testSource;
+}
+
+void initFrameBufferPrograms (SourceCollections& programCollection, CaseDefinition caseDef)
+{
+       const vk::ShaderBuildOptions    buildOptions    (programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+
+       subgroups::setFragmentShaderFrameBuffer(programCollection);
+
+       if (VK_SHADER_STAGE_VERTEX_BIT != caseDef.shaderStage)
+               subgroups::setVertexShaderFrameBuffer(programCollection);
+
+       const std::string extSource =
+       (OPTYPE_SHUFFLE == caseDef.opType || OPTYPE_SHUFFLE_XOR == caseDef.opType) ?
+               "#extension GL_KHR_shader_subgroup_shuffle: enable\n" :
+               "#extension GL_KHR_shader_subgroup_shuffle_relative: enable\n";
+
+       const std::string testSource = TestSource(caseDef);
+
+       if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+       {
+               std::ostringstream vertexSrc;
+               vertexSrc << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+                       << "layout(location = 0) in highp vec4 in_position;\n"
+                       << "layout(location = 0) out float result;\n"
+                       << extSource
+                       << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       << "layout(set = 0, binding = 0) uniform Buffer1\n"
+                       << "{\n"
+                       << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " data1[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+                       << "};\n"
+                       << "layout(set = 0, binding = 1) uniform Buffer2\n"
+                       << "{\n"
+                       << "  uint data2[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+                       << "};\n"
+                       << "\n"
+                       << "void main (void)\n"
+                       << "{\n"
+                       << testSource
+                       << "  result = temp_res;\n"
+                       << "  gl_Position = in_position;\n"
+                       << "  gl_PointSize = 1.0f;\n"
+                       << "}\n";
+               programCollection.glslSources.add("vert")
+                       << glu::VertexSource(vertexSrc.str()) << buildOptions;
+       }
+       else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
+       {
+               std::ostringstream geometry;
+
+               geometry << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+                       << extSource
+                       << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       << "layout(points) in;\n"
+                       << "layout(points, max_vertices = 1) out;\n"
+                       << "layout(location = 0) out float out_color;\n"
+                       << "layout(set = 0, binding = 0) uniform Buffer1\n"
+                       << "{\n"
+                       << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " data1[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+                       << "};\n"
+                       << "layout(set = 0, binding = 1) uniform Buffer2\n"
+                       << "{\n"
+                       << "  uint data2[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+                       << "};\n"
+                       << "\n"
+                       << "void main (void)\n"
+                       << "{\n"
+                       << testSource
+                       << "  out_color = temp_res;\n"
+                       << "  gl_Position = gl_in[0].gl_Position;\n"
+                       << "  EmitVertex();\n"
+                       << "  EndPrimitive();\n"
+                       << "}\n";
+
+               programCollection.glslSources.add("geometry")
+                       << glu::GeometrySource(geometry.str()) << buildOptions;
+       }
+       else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+       {
+               std::ostringstream controlSource;
+
+               controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+                       << extSource
+                       << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       << "layout(vertices = 2) out;\n"
+                       << "layout(location = 0) out float out_color[];\n"
+                       << "layout(set = 0, binding = 0) uniform Buffer1\n"
+                       << "{\n"
+                       << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " data1[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+                       << "};\n"
+                       << "layout(set = 0, binding = 1) uniform Buffer2\n"
+                       << "{\n"
+                       << "  uint data2[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+                       << "};\n"
+                       << "\n"
+                       << "void main (void)\n"
+                       << "{\n"
+                       << "  if (gl_InvocationID == 0)\n"
+                       <<"  {\n"
+                       << "    gl_TessLevelOuter[0] = 1.0f;\n"
+                       << "    gl_TessLevelOuter[1] = 1.0f;\n"
+                       << "  }\n"
+                       << testSource
+                       << "  out_color[gl_InvocationID] = temp_res;\n"
+                       << "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+                       << "}\n";
+
+               programCollection.glslSources.add("tesc")
+                       << glu::TessellationControlSource(controlSource.str()) << buildOptions;
+               subgroups::setTesEvalShaderFrameBuffer(programCollection);
+
+       }
+       else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
+       {
+               std::ostringstream evaluationSource;
+               evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+                       << extSource
+                       << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+                       << "layout(isolines, equal_spacing, ccw ) in;\n"
+                       << "layout(location = 0) out float out_color;\n"
+                       << "layout(set = 0, binding = 0) uniform Buffer1\n"
+                       << "{\n"
+                       << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " data1[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+                       << "};\n"
+                       << "layout(set = 0, binding = 1) uniform Buffer2\n"
+                       << "{\n"
+                       << "  uint data2[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+                       << "};\n"
+                       << "\n"
+                       << "void main (void)\n"
+                       << "{\n"
+                       << testSource
+                       << "  out_color = temp_res;\n"
+                       << "  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
+                       << "}\n";
+
+               subgroups::setTesCtrlShaderFrameBuffer(programCollection);
+               programCollection.glslSources.add("tese")
+                               << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
+       }
+       else
+       {
+               DE_FATAL("Unsupported shader stage");
+       }
+}
+
+void initPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
+{
+       const std::string vSource =
+               "#version 450\n"
+               "#extension GL_KHR_shader_subgroup_ballot: enable\n";
+       const std::string eSource =
+       (OPTYPE_SHUFFLE == caseDef.opType || OPTYPE_SHUFFLE_XOR == caseDef.opType) ?
+               "#extension GL_KHR_shader_subgroup_shuffle: enable\n" :
+               "#extension GL_KHR_shader_subgroup_shuffle_relative: enable\n";
+       const std::string extSource = vSource + eSource;
+
+       const std::string testSource = TestSource(caseDef);
+
+       if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+       {
+               std::ostringstream src;
+
+       src << extSource
+                       << "layout (local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n"
+                       << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
+                       << "{\n"
+                       << "  uint result[];\n"
+                       << "};\n"
+                       << DeclSource(caseDef, 1)
+                       << "\n"
+                       << "void main (void)\n"
+                       << "{\n"
+                       << "  uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
+                       << "  highp uint offset = globalSize.x * ((globalSize.y * "
+                       "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
+                       "gl_GlobalInvocationID.x;\n"
+                       << testSource
+                       << "  result[offset] = temp_res;\n"
+                       << "}\n";
+
+               programCollection.glslSources.add("comp")
+                               << glu::ComputeSource(src.str()) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+       }
+       else
+       {
+               const std::string declSource = DeclSource(caseDef, 4);
+
+               {
+                       const string vertex =
+                               extSource +
+                               "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
+                               "{\n"
+                               "  uint result[];\n"
+                               "};\n"
+                               + declSource +
+                               "\n"
+                               "void main (void)\n"
+                               "{\n"
+                               + testSource +
+                               "  result[gl_VertexIndex] = temp_res;\n"
+                               "  float pixelSize = 2.0f/1024.0f;\n"
+                               "  float pixelPosition = pixelSize/2.0f - 1.0f;\n"
+                               "  gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
+                               "  gl_PointSize = 1.0f;\n"
+                               "}\n";
+
+                       programCollection.glslSources.add("vert")
+                               << glu::VertexSource(vertex) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+               }
+
+               {
+                       const string tesc =
+                               extSource +
+                               "layout(vertices=1) out;\n"
+                               "layout(set = 0, binding = 1, std430)  buffer Buffer1\n"
+                               "{\n"
+                               "  uint result[];\n"
+                               "};\n"
+                               + declSource +
+                               "\n"
+                               "void main (void)\n"
+                               "{\n"
+                               + testSource +
+                               "  result[gl_PrimitiveID] = temp_res;\n"
+                               "  if (gl_InvocationID == 0)\n"
+                               "  {\n"
+                               "    gl_TessLevelOuter[0] = 1.0f;\n"
+                               "    gl_TessLevelOuter[1] = 1.0f;\n"
+                               "  }\n"
+                               "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+                               "}\n";
+
+                       programCollection.glslSources.add("tesc")
+                                       << glu::TessellationControlSource(tesc) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+               }
+
+               {
+                       const string tese =
+                               extSource +
+                               "layout(isolines) in;\n"
+                               "layout(set = 0, binding = 2, std430) buffer Buffer1\n"
+                               "{\n"
+                               "  uint result[];\n"
+                               "};\n"
+                               + declSource +
+                               "\n"
+                               "void main (void)\n"
+                               "{\n"
+                               + testSource +
+                               "  result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = temp_res;\n"
+                               "  float pixelSize = 2.0f/1024.0f;\n"
+                               "  gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
+                               "}\n";
+
+                       programCollection.glslSources.add("tese")
+                                       << glu::TessellationEvaluationSource(tese) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+               }
+
+               {
+                       const string geometry =
+                               extSource +
+                               "layout(${TOPOLOGY}) in;\n"
+                               "layout(points, max_vertices = 1) out;\n"
+                               "layout(set = 0, binding = 3, std430) buffer Buffer1\n"
+                               "{\n"
+                               "  uint result[];\n"
+                               "};\n"
+                               + declSource +
+                               "\n"
+                               "void main (void)\n"
+                               "{\n"
+                               + testSource +
+                               "  result[gl_PrimitiveIDIn] = temp_res;\n"
+                               "  gl_Position = gl_in[0].gl_Position;\n"
+                               "  EmitVertex();\n"
+                               "  EndPrimitive();\n"
+                               "}\n";
+
+                       subgroups::addGeometryShadersFromTemplate(geometry, vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u),
+                                                                                                         programCollection.glslSources);
+               }
+               {
+                       const string fragment =
+                               extSource +
+                               "layout(location = 0) out uint result;\n"
+                               + declSource +
+                               "void main (void)\n"
+                               "{\n"
+                               + testSource +
+                               "  result = temp_res;\n"
+                               "}\n";
+
+                       programCollection.glslSources.add("fragment")
+                               << glu::FragmentSource(fragment)<< vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+               }
+
+               subgroups::addNoSubgroupShader(programCollection);
+       }
+}
+
+void supportedCheck (Context& context, CaseDefinition caseDef)
+{
+       if (!subgroups::isSubgroupSupported(context))
+               TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
+
+       switch (caseDef.opType)
+       {
+               case OPTYPE_SHUFFLE:
+               case OPTYPE_SHUFFLE_XOR:
+                       if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_SHUFFLE_BIT))
+                       {
+                               TCU_THROW(NotSupportedError, "Device does not support subgroup shuffle operations");
+                       }
+                       break;
+               default:
+                       if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT))
+                       {
+                               TCU_THROW(NotSupportedError, "Device does not support subgroup shuffle relative operations");
+                       }
+                       break;
+       }
+
+       if (subgroups::isDoubleFormat(caseDef.format) &&
+                       !subgroups::isDoubleSupportedForDevice(context))
+               TCU_THROW(NotSupportedError, "Device does not support subgroup double operations");
+}
+
+tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
+{
+       if (!subgroups::areSubgroupOperationsSupportedForStage(
+                               context, caseDef.shaderStage))
+       {
+               if (subgroups::areSubgroupOperationsRequiredForStage(
+                                       caseDef.shaderStage))
+               {
+                       return tcu::TestStatus::fail(
+                                          "Shader stage " +
+                                          subgroups::getShaderStageName(caseDef.shaderStage) +
+                                          " is required to support subgroup operations!");
+               }
+               else
+               {
+                       TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
+               }
+       }
+
+       subgroups::SSBOData inputData[2];
+       inputData[0].format = caseDef.format;
+       inputData[0].numElements = subgroups::maxSupportedSubgroupSize();
+       inputData[0].initializeType = subgroups::SSBOData::InitializeNonZero;
+
+       inputData[1].format = VK_FORMAT_R32_UINT;
+       inputData[1].numElements = inputData[0].numElements;
+       inputData[1].initializeType = subgroups::SSBOData::InitializeNonZero;
+
+       if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+               return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, inputData, 2, checkVertexPipelineStages);
+       else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
+               return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, inputData, 2, checkVertexPipelineStages);
+       else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+               return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, inputData, 2, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
+       else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
+               return subgroups::makeTessellationEvaluationFrameBufferTest(context,  VK_FORMAT_R32_UINT, inputData, 2, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
+       else
+               TCU_THROW(InternalError, "Unhandled shader stage");
+}
+
+
+tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
+{
+       switch (caseDef.opType)
+       {
+               case OPTYPE_SHUFFLE:
+               case OPTYPE_SHUFFLE_XOR:
+                       if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_SHUFFLE_BIT))
+                       {
+                               TCU_THROW(NotSupportedError, "Device does not support subgroup shuffle operations");
+                       }
+                       break;
+               default:
+                       if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT))
+                       {
+                               TCU_THROW(NotSupportedError, "Device does not support subgroup shuffle relative operations");
+                       }
+                       break;
+       }
+
+       if (subgroups::isDoubleFormat(caseDef.format) && !subgroups::isDoubleSupportedForDevice(context))
+       {
+               TCU_THROW(NotSupportedError, "Device does not support subgroup double operations");
+       }
+
+       if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+       {
+               if (!subgroups::areSubgroupOperationsSupportedForStage(context, caseDef.shaderStage))
+               {
+                       return tcu::TestStatus::fail(
+                                          "Shader stage " +
+                                          subgroups::getShaderStageName(caseDef.shaderStage) +
+                                          " is required to support subgroup operations!");
+               }
+               subgroups::SSBOData inputData[2];
+               inputData[0].format = caseDef.format;
+               inputData[0].numElements = subgroups::maxSupportedSubgroupSize();
+               inputData[0].initializeType = subgroups::SSBOData::InitializeNonZero;
+
+               inputData[1].format = VK_FORMAT_R32_UINT;
+               inputData[1].numElements = inputData[0].numElements;
+               inputData[1].initializeType = subgroups::SSBOData::InitializeNonZero;
+
+               return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, inputData, 2, checkCompute);
+       }
+
+       else
+       {
+               VkPhysicalDeviceSubgroupProperties subgroupProperties;
+               subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
+               subgroupProperties.pNext = DE_NULL;
+
+               VkPhysicalDeviceProperties2 properties;
+               properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+               properties.pNext = &subgroupProperties;
+
+               context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
+
+               VkShaderStageFlagBits stages = (VkShaderStageFlagBits)(caseDef.shaderStage  & subgroupProperties.supportedStages);
+
+               if (VK_SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
+               {
+                       if ( (stages & VK_SHADER_STAGE_FRAGMENT_BIT) == 0)
+                               TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
+                       else
+                               stages = VK_SHADER_STAGE_FRAGMENT_BIT;
+               }
+
+               if ((VkShaderStageFlagBits)0u == stages)
+                       TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
+
+               subgroups::SSBOData inputData[2];
+               inputData[0].format                     = caseDef.format;
+               inputData[0].numElements        = subgroups::maxSupportedSubgroupSize();
+               inputData[0].initializeType     = subgroups::SSBOData::InitializeNonZero;
+               inputData[0].binding            = 4u;
+               inputData[0].stages                     = stages;
+
+               inputData[1].format                     = VK_FORMAT_R32_UINT;
+               inputData[1].numElements        = inputData[0].numElements;
+               inputData[1].initializeType     = subgroups::SSBOData::InitializeNonZero;
+               inputData[1].binding            = 5u;
+               inputData[1].stages                     = stages;
+
+               return subgroups::allStages(context, VK_FORMAT_R32_UINT, inputData, 2, checkVertexPipelineStages, stages);
+       }
+}
+}
+
+namespace vkt
+{
+namespace subgroups
+{
+tcu::TestCaseGroup* createSubgroupsShuffleTests(tcu::TestContext& testCtx)
+{
+
+       de::MovePtr<tcu::TestCaseGroup> graphicGroup(new tcu::TestCaseGroup(
+               testCtx, "graphics", "Subgroup shuffle category tests: graphics"));
+       de::MovePtr<tcu::TestCaseGroup> computeGroup(new tcu::TestCaseGroup(
+               testCtx, "compute", "Subgroup shuffle category tests: compute"));
+       de::MovePtr<tcu::TestCaseGroup> framebufferGroup(new tcu::TestCaseGroup(
+               testCtx, "framebuffer", "Subgroup shuffle category tests: framebuffer"));
+
+       const VkFormat formats[] =
+       {
+               VK_FORMAT_R32_SINT, VK_FORMAT_R32G32_SINT, VK_FORMAT_R32G32B32_SINT,
+               VK_FORMAT_R32G32B32A32_SINT, VK_FORMAT_R32_UINT, VK_FORMAT_R32G32_UINT,
+               VK_FORMAT_R32G32B32_UINT, VK_FORMAT_R32G32B32A32_UINT,
+               VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT,
+               VK_FORMAT_R32G32B32_SFLOAT, VK_FORMAT_R32G32B32A32_SFLOAT,
+               VK_FORMAT_R64_SFLOAT, VK_FORMAT_R64G64_SFLOAT,
+               VK_FORMAT_R64G64B64_SFLOAT, VK_FORMAT_R64G64B64A64_SFLOAT,
+               VK_FORMAT_R8_USCALED, VK_FORMAT_R8G8_USCALED,
+               VK_FORMAT_R8G8B8_USCALED, VK_FORMAT_R8G8B8A8_USCALED,
+       };
+
+       const VkShaderStageFlags stages[] =
+       {
+               VK_SHADER_STAGE_VERTEX_BIT,
+               VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
+               VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
+               VK_SHADER_STAGE_GEOMETRY_BIT,
+       };
+
+       for (int formatIndex = 0; formatIndex < DE_LENGTH_OF_ARRAY(formats); ++formatIndex)
+       {
+               const VkFormat format = formats[formatIndex];
+
+               for (int opTypeIndex = 0; opTypeIndex < OPTYPE_LAST; ++opTypeIndex)
+               {
+
+                       const string name =
+                               de::toLower(getOpTypeName(opTypeIndex)) +
+                               "_" + subgroups::getFormatNameForGLSL(format);
+
+                       {
+                               const CaseDefinition caseDef =
+                               {
+                                       opTypeIndex,
+                                       VK_SHADER_STAGE_ALL_GRAPHICS,
+                                       format
+                               };
+                               addFunctionCaseWithPrograms(graphicGroup.get(), name, "", supportedCheck, initPrograms, test, caseDef);
+                       }
+
+                       {
+                               const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_COMPUTE_BIT, format};
+                               addFunctionCaseWithPrograms(computeGroup.get(), name, "", supportedCheck, initPrograms, test, caseDef);
+                       }
+
+                       for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
+                       {
+                               const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex], format};
+                               addFunctionCaseWithPrograms(framebufferGroup.get(), name + "_" + getShaderStageName(caseDef.shaderStage), "",
+                                                                                       supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
+                       }
+               }
+       }
+
+       de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(
+               testCtx, "shuffle", "Subgroup shuffle category tests"));
+
+       group->addChild(graphicGroup.release());
+       group->addChild(computeGroup.release());
+       group->addChild(framebufferGroup.release());
+
+       return group.release();
+}
+
+} // subgroups
+} // vkt
diff --git a/external/openglcts/modules/common/subgroups/glcSubgroupsShuffleTests.hpp b/external/openglcts/modules/common/subgroups/glcSubgroupsShuffleTests.hpp
new file mode 100644 (file)
index 0000000..9b8831b
--- /dev/null
@@ -0,0 +1,40 @@
+#ifndef _VKTSUBGROUPSSHUFFLETESTS_HPP
+#define _VKTSUBGROUPSSHUFFLETESTS_HPP
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "tcuDefs.hpp"
+#include "vktTestCase.hpp"
+
+namespace vkt
+{
+namespace subgroups
+{
+
+tcu::TestCaseGroup* createSubgroupsShuffleTests(tcu::TestContext& testCtx);
+
+} // subgroups
+} // vkt
+
+#endif // _VKTSUBGROUPSSHUFFLETESTS_HPP
diff --git a/external/openglcts/modules/common/subgroups/glcSubgroupsTests.cpp b/external/openglcts/modules/common/subgroups/glcSubgroupsTests.cpp
new file mode 100755 (executable)
index 0000000..916f903
--- /dev/null
@@ -0,0 +1,77 @@
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "vktSubgroupsTests.hpp"
+#include "vktSubgroupsBuiltinVarTests.hpp"
+#include "vktSubgroupsBuiltinMaskVarTests.hpp"
+#include "vktSubgroupsBasicTests.hpp"
+#include "vktSubgroupsVoteTests.hpp"
+#include "vktSubgroupsBallotTests.hpp"
+#include "vktSubgroupsBallotBroadcastTests.hpp"
+#include "vktSubgroupsBallotOtherTests.hpp"
+#include "vktSubgroupsArithmeticTests.hpp"
+#include "vktSubgroupsClusteredTests.hpp"
+#include "vktSubgroupsPartitionedTests.hpp"
+#include "vktSubgroupsShuffleTests.hpp"
+#include "vktSubgroupsQuadTests.hpp"
+#include "vktSubgroupsShapeTests.hpp"
+#include "vktTestGroupUtil.hpp"
+
+namespace vkt
+{
+namespace subgroups
+{
+
+namespace
+{
+
+void createChildren(tcu::TestCaseGroup* subgroupsTests)
+{
+       tcu::TestContext& testCtx = subgroupsTests->getTestContext();
+
+       subgroupsTests->addChild(createSubgroupsBuiltinVarTests(testCtx));
+       subgroupsTests->addChild(createSubgroupsBuiltinMaskVarTests(testCtx));
+       subgroupsTests->addChild(createSubgroupsBasicTests(testCtx));
+       subgroupsTests->addChild(createSubgroupsVoteTests(testCtx));
+       subgroupsTests->addChild(createSubgroupsBallotTests(testCtx));
+       subgroupsTests->addChild(createSubgroupsBallotBroadcastTests(testCtx));
+       subgroupsTests->addChild(createSubgroupsBallotOtherTests(testCtx));
+       subgroupsTests->addChild(createSubgroupsArithmeticTests(testCtx));
+       subgroupsTests->addChild(createSubgroupsClusteredTests(testCtx));
+       subgroupsTests->addChild(createSubgroupsPartitionedTests(testCtx));
+       subgroupsTests->addChild(createSubgroupsShuffleTests(testCtx));
+       subgroupsTests->addChild(createSubgroupsQuadTests(testCtx));
+       subgroupsTests->addChild(createSubgroupsShapeTests(testCtx));
+}
+
+} // anonymous
+
+tcu::TestCaseGroup* createTests(tcu::TestContext& testCtx)
+{
+       return createTestGroup(
+                          testCtx, "subgroups", "Subgroups tests", createChildren);
+}
+
+} // subgroups
+} // vkt
diff --git a/external/openglcts/modules/common/subgroups/glcSubgroupsTests.hpp b/external/openglcts/modules/common/subgroups/glcSubgroupsTests.hpp
new file mode 100644 (file)
index 0000000..37afda0
--- /dev/null
@@ -0,0 +1,40 @@
+#ifndef _VKTSUBGROUPSTESTS_HPP
+#define _VKTSUBGROUPSTESTS_HPP
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "tcuDefs.hpp"
+#include "tcuTestCase.hpp"
+
+namespace vkt
+{
+namespace subgroups
+{
+
+tcu::TestCaseGroup* createTests(tcu::TestContext& testCtx);
+
+} // subgroups
+} // vkt
+
+#endif // _VKTSUBGROUPSTESTS_HPP
diff --git a/external/openglcts/modules/common/subgroups/glcSubgroupsTestsUtils.cpp b/external/openglcts/modules/common/subgroups/glcSubgroupsTestsUtils.cpp
new file mode 100644 (file)
index 0000000..b113799
--- /dev/null
@@ -0,0 +1,2833 @@
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests Utils
+ */ /*--------------------------------------------------------------------*/
+
+#include "vktSubgroupsTestsUtils.hpp"
+#include "deRandom.hpp"
+#include "tcuCommandLine.hpp"
+#include "tcuStringTemplate.hpp"
+#include "vkBarrierUtil.hpp"
+#include "vkImageUtil.hpp"
+#include "vkTypeUtil.hpp"
+#include "vkCmdUtil.hpp"
+#include "vkObjUtil.hpp"
+
+using namespace tcu;
+using namespace std;
+using namespace vk;
+using namespace vkt;
+
+namespace
+{
+deUint32 getFormatSizeInBytes(const VkFormat format)
+{
+       switch (format)
+       {
+               default:
+                       DE_FATAL("Unhandled format!");
+                       return 0;
+               case VK_FORMAT_R32_SINT:
+               case VK_FORMAT_R32_UINT:
+                       return sizeof(deInt32);
+               case VK_FORMAT_R32G32_SINT:
+               case VK_FORMAT_R32G32_UINT:
+                       return static_cast<deUint32>(sizeof(deInt32) * 2);
+               case VK_FORMAT_R32G32B32_SINT:
+               case VK_FORMAT_R32G32B32_UINT:
+               case VK_FORMAT_R32G32B32A32_SINT:
+               case VK_FORMAT_R32G32B32A32_UINT:
+                       return static_cast<deUint32>(sizeof(deInt32) * 4);
+               case VK_FORMAT_R32_SFLOAT:
+                       return 4;
+               case VK_FORMAT_R32G32_SFLOAT:
+                       return 8;
+               case VK_FORMAT_R32G32B32_SFLOAT:
+                       return 16;
+               case VK_FORMAT_R32G32B32A32_SFLOAT:
+                       return 16;
+               case VK_FORMAT_R64_SFLOAT:
+                       return 8;
+               case VK_FORMAT_R64G64_SFLOAT:
+                       return 16;
+               case VK_FORMAT_R64G64B64_SFLOAT:
+                       return 32;
+               case VK_FORMAT_R64G64B64A64_SFLOAT:
+                       return 32;
+               // The below formats are used to represent bool and bvec* types. These
+               // types are passed to the shader as int and ivec* types, before the
+               // calculations are done as booleans. We need a distinct type here so
+               // that the shader generators can switch on it and generate the correct
+               // shader source for testing.
+               case VK_FORMAT_R8_USCALED:
+                       return sizeof(deInt32);
+               case VK_FORMAT_R8G8_USCALED:
+                       return static_cast<deUint32>(sizeof(deInt32) * 2);
+               case VK_FORMAT_R8G8B8_USCALED:
+               case VK_FORMAT_R8G8B8A8_USCALED:
+                       return static_cast<deUint32>(sizeof(deInt32) * 4);
+       }
+}
+
+Move<VkPipelineLayout> makePipelineLayout(
+       Context& context, const VkDescriptorSetLayout descriptorSetLayout)
+{
+       const vk::VkPipelineLayoutCreateInfo pipelineLayoutParams = {
+               VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // VkStructureType sType;
+               DE_NULL,                          // const void*            pNext;
+               0u,                                       // VkPipelineLayoutCreateFlags    flags;
+               1u,                                       // deUint32             setLayoutCount;
+               &descriptorSetLayout, // const VkDescriptorSetLayout*   pSetLayouts;
+               0u,                                       // deUint32             pushConstantRangeCount;
+               DE_NULL, // const VkPushConstantRange*   pPushConstantRanges;
+       };
+       return createPipelineLayout(context.getDeviceInterface(),
+                                                               context.getDevice(), &pipelineLayoutParams);
+}
+
+Move<VkRenderPass> makeRenderPass(Context& context, VkFormat format)
+{
+       VkAttachmentReference colorReference = {
+               0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
+       };
+
+       const VkSubpassDescription subpassDescription = {0u,
+                                                                                                        VK_PIPELINE_BIND_POINT_GRAPHICS, 0, DE_NULL, 1, &colorReference,
+                                                                                                        DE_NULL, DE_NULL, 0, DE_NULL
+                                                                                                       };
+
+       const VkSubpassDependency subpassDependencies[2] = {
+               {   VK_SUBPASS_EXTERNAL, 0u, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                       VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
+                       VK_ACCESS_MEMORY_READ_BIT, VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
+                       VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
+                       VK_DEPENDENCY_BY_REGION_BIT
+               },
+               {   0u, VK_SUBPASS_EXTERNAL, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
+                       VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                       VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
+                       VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
+                       VK_ACCESS_MEMORY_READ_BIT, VK_DEPENDENCY_BY_REGION_BIT
+               },
+       };
+
+       VkAttachmentDescription attachmentDescription = {0u, format,
+                                                                                                        VK_SAMPLE_COUNT_1_BIT, VK_ATTACHMENT_LOAD_OP_CLEAR,
+                                                                                                        VK_ATTACHMENT_STORE_OP_STORE, VK_ATTACHMENT_LOAD_OP_DONT_CARE,
+                                                                                                        VK_ATTACHMENT_STORE_OP_DONT_CARE, VK_IMAGE_LAYOUT_UNDEFINED,
+                                                                                                        VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL
+                                                                                                       };
+
+       const VkRenderPassCreateInfo renderPassCreateInfo = {
+               VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, DE_NULL, 0u, 1,
+               &attachmentDescription, 1, &subpassDescription, 2, subpassDependencies
+       };
+
+       return createRenderPass(context.getDeviceInterface(), context.getDevice(),
+                                                       &renderPassCreateInfo);
+}
+
+Move<VkFramebuffer> makeFramebuffer(Context& context,
+                                                                       const VkRenderPass renderPass, const VkImageView imageView, deUint32 width,
+                                                                       deUint32 height)
+{
+       const VkFramebufferCreateInfo framebufferCreateInfo = {
+               VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, DE_NULL, 0u, renderPass, 1,
+               &imageView, width, height, 1
+       };
+
+       return createFramebuffer(context.getDeviceInterface(), context.getDevice(),
+                                                        &framebufferCreateInfo);
+}
+
+Move<VkPipeline> makeGraphicsPipeline(Context&                                                                 context,
+                                                                         const VkPipelineLayout                                        pipelineLayout,
+                                                                         const VkShaderStageFlags                                      stages,
+                                                                         const VkShaderModule                                          vertexShaderModule,
+                                                                         const VkShaderModule                                          fragmentShaderModule,
+                                                                         const VkShaderModule                                          geometryShaderModule,
+                                                                         const VkShaderModule                                          tessellationControlModule,
+                                                                         const VkShaderModule                                          tessellationEvaluationModule,
+                                                                         const VkRenderPass                                            renderPass,
+                                                                         const VkPrimitiveTopology                                     topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
+                                                                         const VkVertexInputBindingDescription*        vertexInputBindingDescription = DE_NULL,
+                                                                         const VkVertexInputAttributeDescription*      vertexInputAttributeDescriptions = DE_NULL,
+                                                                         const bool                                                            frameBufferTests = false,
+                                                                         const vk::VkFormat                                            attachmentFormat = VK_FORMAT_R32G32B32A32_SFLOAT)
+{
+       std::vector<VkViewport> noViewports;
+       std::vector<VkRect2D>   noScissors;
+
+       const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfo =
+       {
+               VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,      // VkStructureType                                                              sType;
+               DE_NULL,                                                                                                        // const void*                                                                  pNext;
+               0u,                                                                                                                     // VkPipelineVertexInputStateCreateFlags                flags;
+               vertexInputBindingDescription == DE_NULL ? 0u : 1u,                     // deUint32                                                                             vertexBindingDescriptionCount;
+               vertexInputBindingDescription,                                                          // const VkVertexInputBindingDescription*               pVertexBindingDescriptions;
+               vertexInputAttributeDescriptions == DE_NULL ? 0u : 1u,          // deUint32                                                                             vertexAttributeDescriptionCount;
+               vertexInputAttributeDescriptions,                                                       // const VkVertexInputAttributeDescription*             pVertexAttributeDescriptions;
+       };
+
+       const deUint32 numChannels = getNumUsedChannels(mapVkFormat(attachmentFormat).order);
+       const VkColorComponentFlags colorComponent =
+                                                                                               numChannels == 1 ? VK_COLOR_COMPONENT_R_BIT :
+                                                                                               numChannels == 2 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT :
+                                                                                               numChannels == 3 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT :
+                                                                                               VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
+
+       const VkPipelineColorBlendAttachmentState colorBlendAttachmentState =
+       {
+               VK_FALSE, VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD,
+               VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD,
+               colorComponent
+       };
+
+       const VkPipelineColorBlendStateCreateInfo colorBlendStateCreateInfo =
+       {
+               VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, DE_NULL, 0u,
+               VK_FALSE, VK_LOGIC_OP_CLEAR, 1, &colorBlendAttachmentState,
+               { 0.0f, 0.0f, 0.0f, 0.0f }
+       };
+
+       const deUint32 patchControlPoints = (VK_SHADER_STAGE_FRAGMENT_BIT & stages && frameBufferTests) ? 2u : 1u;
+
+       return vk::makeGraphicsPipeline(context.getDeviceInterface(),   // const DeviceInterface&                        vk
+                                                                       context.getDevice(),                    // const VkDevice                                device
+                                                                       pipelineLayout,                                 // const VkPipelineLayout                        pipelineLayout
+                                                                       vertexShaderModule,                             // const VkShaderModule                          vertexShaderModule
+                                                                       tessellationControlModule,              // const VkShaderModule                          tessellationControlShaderModule
+                                                                       tessellationEvaluationModule,   // const VkShaderModule                          tessellationEvalShaderModule
+                                                                       geometryShaderModule,                   // const VkShaderModule                          geometryShaderModule
+                                                                       fragmentShaderModule,                   // const VkShaderModule                          fragmentShaderModule
+                                                                       renderPass,                                             // const VkRenderPass                            renderPass
+                                                                       noViewports,                                    // const std::vector<VkViewport>&                viewports
+                                                                       noScissors,                                             // const std::vector<VkRect2D>&                  scissors
+                                                                       topology,                                               // const VkPrimitiveTopology                     topology
+                                                                       0u,                                                             // const deUint32                                subpass
+                                                                       patchControlPoints,                             // const deUint32                                patchControlPoints
+                                                                       &vertexInputStateCreateInfo,    // const VkPipelineVertexInputStateCreateInfo*   vertexInputStateCreateInfo
+                                                                       DE_NULL,                                                // const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo
+                                                                       DE_NULL,                                                // const VkPipelineMultisampleStateCreateInfo*   multisampleStateCreateInfo
+                                                                       DE_NULL,                                                // const VkPipelineDepthStencilStateCreateInfo*  depthStencilStateCreateInfo
+                                                                       &colorBlendStateCreateInfo);    // const VkPipelineColorBlendStateCreateInfo*    colorBlendStateCreateInfo
+}
+
+Move<VkPipeline> makeComputePipeline(Context& context,
+                                                                        const VkPipelineLayout pipelineLayout, const VkShaderModule shaderModule,
+                                                                        deUint32 localSizeX, deUint32 localSizeY, deUint32 localSizeZ)
+{
+       const deUint32 localSize[3] = {localSizeX, localSizeY, localSizeZ};
+
+       const vk::VkSpecializationMapEntry entries[3] =
+       {
+               {0, sizeof(deUint32) * 0, sizeof(deUint32)},
+               {1, sizeof(deUint32) * 1, sizeof(deUint32)},
+               {2, static_cast<deUint32>(sizeof(deUint32) * 2), sizeof(deUint32)},
+       };
+
+       const vk::VkSpecializationInfo info =
+       {
+               /* mapEntryCount = */ 3,
+               /* pMapEntries   = */ entries,
+               /* dataSize      = */ sizeof(localSize),
+               /* pData         = */ localSize
+       };
+
+       const vk::VkPipelineShaderStageCreateInfo pipelineShaderStageParams =
+       {
+               VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,    // VkStructureType                                      sType;
+               DE_NULL,                                                                                                // const void*                                          pNext;
+               0u,                                                                                                             // VkPipelineShaderStageCreateFlags     flags;
+               VK_SHADER_STAGE_COMPUTE_BIT,                                                    // VkShaderStageFlagBits                        stage;
+               shaderModule,                                                                                   // VkShaderModule                                       module;
+               "main",                                                                                                 // const char*                                          pName;
+               &info,                                                                                                  // const VkSpecializationInfo*          pSpecializationInfo;
+       };
+
+       const vk::VkComputePipelineCreateInfo pipelineCreateInfo =
+       {
+               VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // VkStructureType      sType;
+               DE_NULL,                                                                                // const void*                                          pNext;
+               0u,                                                                                             // VkPipelineCreateFlags                        flags;
+               pipelineShaderStageParams,                                              // VkPipelineShaderStageCreateInfo      stage;
+               pipelineLayout,                                                                 // VkPipelineLayout                                     layout;
+               DE_NULL,                                                                                // VkPipeline                                           basePipelineHandle;
+               0,                                                                                              // deInt32                                                      basePipelineIndex;
+       };
+
+       return createComputePipeline(context.getDeviceInterface(),
+                                                                context.getDevice(), DE_NULL, &pipelineCreateInfo);
+}
+
+Move<VkDescriptorSet> makeDescriptorSet(Context& context,
+                                                                               const VkDescriptorPool descriptorPool,
+                                                                               const VkDescriptorSetLayout setLayout)
+{
+       const VkDescriptorSetAllocateInfo allocateParams =
+       {
+               VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, // VkStructureType
+               // sType;
+               DE_NULL,                // const void*          pNext;
+               descriptorPool, // VkDescriptorPool       descriptorPool;
+               1u,                             // deUint32           setLayoutCount;
+               &setLayout,             // const VkDescriptorSetLayout* pSetLayouts;
+       };
+       return allocateDescriptorSet(
+                          context.getDeviceInterface(), context.getDevice(), &allocateParams);
+}
+
+Move<VkCommandPool> makeCommandPool(Context& context)
+{
+       const VkCommandPoolCreateInfo commandPoolParams =
+       {
+               VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, // VkStructureType sType;
+               DE_NULL,                                                                        // const void*        pNext;
+               VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, // VkCommandPoolCreateFlags
+               // flags;
+               context.getUniversalQueueFamilyIndex(), // deUint32 queueFamilyIndex;
+       };
+
+       return createCommandPool(
+                          context.getDeviceInterface(), context.getDevice(), &commandPoolParams);
+}
+
+Move<VkCommandBuffer> makeCommandBuffer(
+       Context& context, const VkCommandPool commandPool)
+{
+       const VkCommandBufferAllocateInfo bufferAllocateParams =
+       {
+               VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, // VkStructureType              sType;
+               DE_NULL,                                                                                // const void*                  pNext;
+               commandPool,                                                                    // VkCommandPool                commandPool;
+               VK_COMMAND_BUFFER_LEVEL_PRIMARY,                                // VkCommandBufferLevel level;
+               1u,                                                                                             // deUint32                             bufferCount;
+       };
+       return allocateCommandBuffer(context.getDeviceInterface(),
+                                                                context.getDevice(), &bufferAllocateParams);
+}
+
+Move<VkFence> submitCommandBuffer(
+       Context& context, const VkCommandBuffer commandBuffer)
+{
+       const VkFenceCreateInfo fenceParams =
+       {
+               VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, // VkStructureType    sType;
+               DE_NULL,                                                         // const void*      pNext;
+               0u,                                                                      // VkFenceCreateFlags flags;
+       };
+
+       Move<VkFence> fence(createFence(
+                                                       context.getDeviceInterface(), context.getDevice(), &fenceParams));
+
+       const VkSubmitInfo submitInfo =
+       {
+               VK_STRUCTURE_TYPE_SUBMIT_INFO, // VkStructureType      sType;
+               DE_NULL,                                           // const void*        pNext;
+               0u,                                                        // deUint32         waitSemaphoreCount;
+               DE_NULL,                                           // const VkSemaphore*   pWaitSemaphores;
+               (const VkPipelineStageFlags*)DE_NULL,
+               1u,                             // deUint32         commandBufferCount;
+               &commandBuffer, // const VkCommandBuffer* pCommandBuffers;
+               0u,                             // deUint32         signalSemaphoreCount;
+               DE_NULL,                // const VkSemaphore*   pSignalSemaphores;
+       };
+
+       vk::VkResult result = (context.getDeviceInterface().queueSubmit(
+                                                          context.getUniversalQueue(), 1u, &submitInfo, *fence));
+       VK_CHECK(result);
+
+       return Move<VkFence>(fence);
+}
+
+void waitFence(Context& context, Move<VkFence> fence)
+{
+       VK_CHECK(context.getDeviceInterface().waitForFences(
+                                context.getDevice(), 1u, &fence.get(), DE_TRUE, ~0ull));
+}
+
+struct Buffer;
+struct Image;
+
+struct BufferOrImage
+{
+       bool isImage() const
+       {
+               return m_isImage;
+       }
+
+       Buffer* getAsBuffer()
+       {
+               if (m_isImage) DE_FATAL("Trying to get a buffer as an image!");
+               return reinterpret_cast<Buffer* >(this);
+       }
+
+       Image* getAsImage()
+       {
+               if (!m_isImage) DE_FATAL("Trying to get an image as a buffer!");
+               return reinterpret_cast<Image*>(this);
+       }
+
+       virtual VkDescriptorType getType() const
+       {
+               if (m_isImage)
+               {
+                       return VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
+               }
+               else
+               {
+                       return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
+               }
+       }
+
+       Allocation& getAllocation() const
+       {
+               return *m_allocation;
+       }
+
+       virtual ~BufferOrImage() {}
+
+protected:
+       explicit BufferOrImage(bool image) : m_isImage(image) {}
+
+       bool m_isImage;
+       de::details::MovePtr<Allocation> m_allocation;
+};
+
+struct Buffer : public BufferOrImage
+{
+       explicit Buffer(
+               Context& context, VkDeviceSize sizeInBytes, VkBufferUsageFlags usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT)
+               : BufferOrImage         (false)
+               , m_sizeInBytes         (sizeInBytes)
+               , m_usage                       (usage)
+       {
+               const vk::VkBufferCreateInfo bufferCreateInfo =
+               {
+                       VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+                       DE_NULL,
+                       0u,
+                       sizeInBytes,
+                       m_usage,
+                       VK_SHARING_MODE_EXCLUSIVE,
+                       0u,
+                       DE_NULL,
+               };
+               m_buffer = createBuffer(context.getDeviceInterface(),
+                                                               context.getDevice(), &bufferCreateInfo);
+               vk::VkMemoryRequirements req = getBufferMemoryRequirements(
+                                                                                  context.getDeviceInterface(), context.getDevice(), *m_buffer);
+               req.size *= 2;
+               m_allocation = context.getDefaultAllocator().allocate(
+                                                  req, MemoryRequirement::HostVisible);
+               VK_CHECK(context.getDeviceInterface().bindBufferMemory(
+                                        context.getDevice(), *m_buffer, m_allocation->getMemory(),
+                                        m_allocation->getOffset()));
+       }
+
+       virtual VkDescriptorType getType() const
+       {
+               if (VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT == m_usage)
+               {
+                       return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
+               }
+               return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
+       }
+
+       VkBuffer getBuffer() const {
+               return *m_buffer;
+       }
+
+       const VkBuffer* getBufferPtr() const {
+               return &(*m_buffer);
+       }
+
+       VkDeviceSize getSize() const {
+               return m_sizeInBytes;
+       }
+
+private:
+       Move<VkBuffer>                          m_buffer;
+       VkDeviceSize                            m_sizeInBytes;
+       const VkBufferUsageFlags        m_usage;
+};
+
+struct Image : public BufferOrImage
+{
+       explicit Image(Context& context, deUint32 width, deUint32 height,
+                                  VkFormat format, VkImageUsageFlags usage = VK_IMAGE_USAGE_STORAGE_BIT)
+               : BufferOrImage(true)
+       {
+               const VkImageCreateInfo imageCreateInfo =
+               {
+                       VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, DE_NULL, 0, VK_IMAGE_TYPE_2D,
+                       format, {width, height, 1}, 1, 1, VK_SAMPLE_COUNT_1_BIT,
+                       VK_IMAGE_TILING_OPTIMAL, usage,
+                       VK_SHARING_MODE_EXCLUSIVE, 0u, DE_NULL,
+                       VK_IMAGE_LAYOUT_UNDEFINED
+               };
+               m_image = createImage(context.getDeviceInterface(), context.getDevice(),
+                                                         &imageCreateInfo);
+               vk::VkMemoryRequirements req = getImageMemoryRequirements(
+                                                                                  context.getDeviceInterface(), context.getDevice(), *m_image);
+               req.size *= 2;
+               m_allocation =
+                       context.getDefaultAllocator().allocate(req, MemoryRequirement::Any);
+               VK_CHECK(context.getDeviceInterface().bindImageMemory(
+                                        context.getDevice(), *m_image, m_allocation->getMemory(),
+                                        m_allocation->getOffset()));
+
+               const VkComponentMapping componentMapping =
+               {
+                       VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
+                       VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY
+               };
+
+               const VkImageViewCreateInfo imageViewCreateInfo =
+               {
+                       VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, DE_NULL, 0, *m_image,
+                       VK_IMAGE_VIEW_TYPE_2D, imageCreateInfo.format, componentMapping,
+                       {
+                               VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1,
+                       }
+               };
+
+               m_imageView = createImageView(context.getDeviceInterface(),
+                                                                         context.getDevice(), &imageViewCreateInfo);
+
+               const struct VkSamplerCreateInfo samplerCreateInfo =
+               {
+                       VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
+                       DE_NULL,
+                       0u,
+                       VK_FILTER_NEAREST,
+                       VK_FILTER_NEAREST,
+                       VK_SAMPLER_MIPMAP_MODE_NEAREST,
+                       VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
+                       VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
+                       VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
+                       0.0f,
+                       VK_FALSE,
+                       1.0f,
+                       DE_FALSE,
+                       VK_COMPARE_OP_ALWAYS,
+                       0.0f,
+                       0.0f,
+                       VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK,
+                       VK_FALSE,
+               };
+
+               m_sampler = createSampler(context.getDeviceInterface(), context.getDevice(), &samplerCreateInfo);
+       }
+
+       VkImage getImage() const {
+               return *m_image;
+       }
+
+       VkImageView getImageView() const {
+               return *m_imageView;
+       }
+
+       VkSampler getSampler() const {
+               return *m_sampler;
+       }
+
+private:
+       Move<VkImage> m_image;
+       Move<VkImageView> m_imageView;
+       Move<VkSampler> m_sampler;
+};
+}
+
+std::string vkt::subgroups::getSharedMemoryBallotHelper()
+{
+       return  "shared uvec4 superSecretComputeShaderHelper[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];\n"
+                       "uvec4 sharedMemoryBallot(bool vote)\n"
+                       "{\n"
+                       "  uint groupOffset = gl_SubgroupID;\n"
+                       "  // One invocation in the group 0's the whole group's data\n"
+                       "  if (subgroupElect())\n"
+                       "  {\n"
+                       "    superSecretComputeShaderHelper[groupOffset] = uvec4(0);\n"
+                       "  }\n"
+                       "  subgroupMemoryBarrierShared();\n"
+                       "  if (vote)\n"
+                       "  {\n"
+                       "    const highp uint invocationId = gl_SubgroupInvocationID % 32;\n"
+                       "    const highp uint bitToSet = 1u << invocationId;\n"
+                       "    switch (gl_SubgroupInvocationID / 32)\n"
+                       "    {\n"
+                       "    case 0: atomicOr(superSecretComputeShaderHelper[groupOffset].x, bitToSet); break;\n"
+                       "    case 1: atomicOr(superSecretComputeShaderHelper[groupOffset].y, bitToSet); break;\n"
+                       "    case 2: atomicOr(superSecretComputeShaderHelper[groupOffset].z, bitToSet); break;\n"
+                       "    case 3: atomicOr(superSecretComputeShaderHelper[groupOffset].w, bitToSet); break;\n"
+                       "    }\n"
+                       "  }\n"
+                       "  subgroupMemoryBarrierShared();\n"
+                       "  return superSecretComputeShaderHelper[groupOffset];\n"
+                       "}\n";
+}
+
+deUint32 vkt::subgroups::getSubgroupSize(Context& context)
+{
+       VkPhysicalDeviceSubgroupProperties subgroupProperties;
+       subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
+       subgroupProperties.pNext = DE_NULL;
+
+       VkPhysicalDeviceProperties2 properties;
+       properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+       properties.pNext = &subgroupProperties;
+
+       context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
+
+       return subgroupProperties.subgroupSize;
+}
+
+VkDeviceSize vkt::subgroups::maxSupportedSubgroupSize() {
+       return 128u;
+}
+
+std::string vkt::subgroups::getShaderStageName(VkShaderStageFlags stage)
+{
+       switch (stage)
+       {
+               default:
+                       DE_FATAL("Unhandled stage!");
+                       return "";
+               case VK_SHADER_STAGE_COMPUTE_BIT:
+                       return "compute";
+               case VK_SHADER_STAGE_FRAGMENT_BIT:
+                       return "fragment";
+               case VK_SHADER_STAGE_VERTEX_BIT:
+                       return "vertex";
+               case VK_SHADER_STAGE_GEOMETRY_BIT:
+                       return "geometry";
+               case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
+                       return "tess_control";
+               case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
+                       return "tess_eval";
+       }
+}
+
+std::string vkt::subgroups::getSubgroupFeatureName(vk::VkSubgroupFeatureFlagBits bit)
+{
+       switch (bit)
+       {
+               default:
+                       DE_FATAL("Unknown subgroup feature category!");
+                       return "";
+               case VK_SUBGROUP_FEATURE_BASIC_BIT:
+                       return "VK_SUBGROUP_FEATURE_BASIC_BIT";
+               case VK_SUBGROUP_FEATURE_VOTE_BIT:
+                       return "VK_SUBGROUP_FEATURE_VOTE_BIT";
+               case VK_SUBGROUP_FEATURE_ARITHMETIC_BIT:
+                       return "VK_SUBGROUP_FEATURE_ARITHMETIC_BIT";
+               case VK_SUBGROUP_FEATURE_BALLOT_BIT:
+                       return "VK_SUBGROUP_FEATURE_BALLOT_BIT";
+               case VK_SUBGROUP_FEATURE_SHUFFLE_BIT:
+                       return "VK_SUBGROUP_FEATURE_SHUFFLE_BIT";
+               case VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT:
+                       return "VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT";
+               case VK_SUBGROUP_FEATURE_CLUSTERED_BIT:
+                       return "VK_SUBGROUP_FEATURE_CLUSTERED_BIT";
+               case VK_SUBGROUP_FEATURE_QUAD_BIT:
+                       return "VK_SUBGROUP_FEATURE_QUAD_BIT";
+       }
+}
+
+void vkt::subgroups::addNoSubgroupShader (SourceCollections& programCollection)
+{
+       {
+       /*
+               "#version 450\n"
+               "void main (void)\n"
+               "{\n"
+               "  float pixelSize = 2.0f/1024.0f;\n"
+               "   float pixelPosition = pixelSize/2.0f - 1.0f;\n"
+               "  gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
+               "  gl_PointSize = 1.0f;\n"
+               "}\n"
+       */
+               const std::string vertNoSubgroup =
+                       "; SPIR-V\n"
+                       "; Version: 1.3\n"
+                       "; Generator: Khronos Glslang Reference Front End; 1\n"
+                       "; Bound: 37\n"
+                       "; Schema: 0\n"
+                       "OpCapability Shader\n"
+                       "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+                       "OpMemoryModel Logical GLSL450\n"
+                       "OpEntryPoint Vertex %4 \"main\" %22 %26\n"
+                       "OpMemberDecorate %20 0 BuiltIn Position\n"
+                       "OpMemberDecorate %20 1 BuiltIn PointSize\n"
+                       "OpMemberDecorate %20 2 BuiltIn ClipDistance\n"
+                       "OpMemberDecorate %20 3 BuiltIn CullDistance\n"
+                       "OpDecorate %20 Block\n"
+                       "OpDecorate %26 BuiltIn VertexIndex\n"
+                       "%2 = OpTypeVoid\n"
+                       "%3 = OpTypeFunction %2\n"
+                       "%6 = OpTypeFloat 32\n"
+                       "%7 = OpTypePointer Function %6\n"
+                       "%9 = OpConstant %6 0.00195313\n"
+                       "%12 = OpConstant %6 2\n"
+                       "%14 = OpConstant %6 1\n"
+                       "%16 = OpTypeVector %6 4\n"
+                       "%17 = OpTypeInt 32 0\n"
+                       "%18 = OpConstant %17 1\n"
+                       "%19 = OpTypeArray %6 %18\n"
+                       "%20 = OpTypeStruct %16 %6 %19 %19\n"
+                       "%21 = OpTypePointer Output %20\n"
+                       "%22 = OpVariable %21 Output\n"
+                       "%23 = OpTypeInt 32 1\n"
+                       "%24 = OpConstant %23 0\n"
+                       "%25 = OpTypePointer Input %23\n"
+                       "%26 = OpVariable %25 Input\n"
+                       "%33 = OpConstant %6 0\n"
+                       "%35 = OpTypePointer Output %16\n"
+                       "%37 = OpConstant %23 1\n"
+                       "%38 = OpTypePointer Output %6\n"
+                       "%4 = OpFunction %2 None %3\n"
+                       "%5 = OpLabel\n"
+                       "%8 = OpVariable %7 Function\n"
+                       "%10 = OpVariable %7 Function\n"
+                       "OpStore %8 %9\n"
+                       "%11 = OpLoad %6 %8\n"
+                       "%13 = OpFDiv %6 %11 %12\n"
+                       "%15 = OpFSub %6 %13 %14\n"
+                       "OpStore %10 %15\n"
+                       "%27 = OpLoad %23 %26\n"
+                       "%28 = OpConvertSToF %6 %27\n"
+                       "%29 = OpLoad %6 %8\n"
+                       "%30 = OpFMul %6 %28 %29\n"
+                       "%31 = OpLoad %6 %10\n"
+                       "%32 = OpFAdd %6 %30 %31\n"
+                       "%34 = OpCompositeConstruct %16 %32 %33 %33 %14\n"
+                       "%36 = OpAccessChain %35 %22 %24\n"
+                       "OpStore %36 %34\n"
+                       "%39 = OpAccessChain %38 %22 %37\n"
+                       "OpStore %39 %14\n"
+                       "OpReturn\n"
+                       "OpFunctionEnd\n";
+               programCollection.spirvAsmSources.add("vert_noSubgroup") << vertNoSubgroup;
+       }
+
+       {
+       /*
+               "#version 450\n"
+               "layout(vertices=1) out;\n"
+               "\n"
+               "void main (void)\n"
+               "{\n"
+               "  if (gl_InvocationID == 0)\n"
+               "  {\n"
+               "    gl_TessLevelOuter[0] = 1.0f;\n"
+               "    gl_TessLevelOuter[1] = 1.0f;\n"
+               "  }\n"
+               "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+               "}\n"
+       */
+               const std::string tescNoSubgroup =
+                       "; SPIR-V\n"
+                       "; Version: 1.3\n"
+                       "; Generator: Khronos Glslang Reference Front End; 1\n"
+                       "; Bound: 45\n"
+                       "; Schema: 0\n"
+                       "OpCapability Tessellation\n"
+                       "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+                       "OpMemoryModel Logical GLSL450\n"
+                       "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %32 %38\n"
+                       "OpExecutionMode %4 OutputVertices 1\n"
+                       "OpDecorate %8 BuiltIn InvocationId\n"
+                       "OpDecorate %20 Patch\n"
+                       "OpDecorate %20 BuiltIn TessLevelOuter\n"
+                       "OpMemberDecorate %29 0 BuiltIn Position\n"
+                       "OpMemberDecorate %29 1 BuiltIn PointSize\n"
+                       "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
+                       "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
+                       "OpDecorate %29 Block\n"
+                       "OpMemberDecorate %34 0 BuiltIn Position\n"
+                       "OpMemberDecorate %34 1 BuiltIn PointSize\n"
+                       "OpMemberDecorate %34 2 BuiltIn ClipDistance\n"
+                       "OpMemberDecorate %34 3 BuiltIn CullDistance\n"
+                       "OpDecorate %34 Block\n"
+                       "%2 = OpTypeVoid\n"
+                       "%3 = OpTypeFunction %2\n"
+                       "%6 = OpTypeInt 32 1\n"
+                       "%7 = OpTypePointer Input %6\n"
+                       "%8 = OpVariable %7 Input\n"
+                       "%10 = OpConstant %6 0\n"
+                       "%11 = OpTypeBool\n"
+                       "%15 = OpTypeFloat 32\n"
+                       "%16 = OpTypeInt 32 0\n"
+                       "%17 = OpConstant %16 4\n"
+                       "%18 = OpTypeArray %15 %17\n"
+                       "%19 = OpTypePointer Output %18\n"
+                       "%20 = OpVariable %19 Output\n"
+                       "%21 = OpConstant %15 1\n"
+                       "%22 = OpTypePointer Output %15\n"
+                       "%24 = OpConstant %6 1\n"
+                       "%26 = OpTypeVector %15 4\n"
+                       "%27 = OpConstant %16 1\n"
+                       "%28 = OpTypeArray %15 %27\n"
+                       "%29 = OpTypeStruct %26 %15 %28 %28\n"
+                       "%30 = OpTypeArray %29 %27\n"
+                       "%31 = OpTypePointer Output %30\n"
+                       "%32 = OpVariable %31 Output\n"
+                       "%34 = OpTypeStruct %26 %15 %28 %28\n"
+                       "%35 = OpConstant %16 32\n"
+                       "%36 = OpTypeArray %34 %35\n"
+                       "%37 = OpTypePointer Input %36\n"
+                       "%38 = OpVariable %37 Input\n"
+                       "%40 = OpTypePointer Input %26\n"
+                       "%43 = OpTypePointer Output %26\n"
+                       "%4 = OpFunction %2 None %3\n"
+                       "%5 = OpLabel\n"
+                       "%9 = OpLoad %6 %8\n"
+                       "%12 = OpIEqual %11 %9 %10\n"
+                       "OpSelectionMerge %14 None\n"
+                       "OpBranchConditional %12 %13 %14\n"
+                       "%13 = OpLabel\n"
+                       "%23 = OpAccessChain %22 %20 %10\n"
+                       "OpStore %23 %21\n"
+                       "%25 = OpAccessChain %22 %20 %24\n"
+                       "OpStore %25 %21\n"
+                       "OpBranch %14\n"
+                       "%14 = OpLabel\n"
+                       "%33 = OpLoad %6 %8\n"
+                       "%39 = OpLoad %6 %8\n"
+                       "%41 = OpAccessChain %40 %38 %39 %10\n"
+                       "%42 = OpLoad %26 %41\n"
+                       "%44 = OpAccessChain %43 %32 %33 %10\n"
+                       "OpStore %44 %42\n"
+                       "OpReturn\n"
+                       "OpFunctionEnd\n";
+               programCollection.spirvAsmSources.add("tesc_noSubgroup") << tescNoSubgroup;
+       }
+
+       {
+       /*
+               "#version 450\n"
+               "layout(isolines) in;\n"
+               "\n"
+               "void main (void)\n"
+               "{\n"
+               "  float pixelSize = 2.0f/1024.0f;\n"
+               "  gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
+               "}\n";
+       */
+               const std::string teseNoSubgroup =
+                       "; SPIR-V\n"
+                       "; Version: 1.3\n"
+                       "; Generator: Khronos Glslang Reference Front End; 2\n"
+                       "; Bound: 42\n"
+                       "; Schema: 0\n"
+                       "OpCapability Tessellation\n"
+                       "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+                       "OpMemoryModel Logical GLSL450\n"
+                       "OpEntryPoint TessellationEvaluation %4 \"main\" %16 %23 %29\n"
+                       "OpExecutionMode %4 Isolines\n"
+                       "OpExecutionMode %4 SpacingEqual\n"
+                       "OpExecutionMode %4 VertexOrderCcw\n"
+                       "OpMemberDecorate %14 0 BuiltIn Position\n"
+                       "OpMemberDecorate %14 1 BuiltIn PointSize\n"
+                       "OpMemberDecorate %14 2 BuiltIn ClipDistance\n"
+                       "OpMemberDecorate %14 3 BuiltIn CullDistance\n"
+                       "OpDecorate %14 Block\n"
+                       "OpMemberDecorate %19 0 BuiltIn Position\n"
+                       "OpMemberDecorate %19 1 BuiltIn PointSize\n"
+                       "OpMemberDecorate %19 2 BuiltIn ClipDistance\n"
+                       "OpMemberDecorate %19 3 BuiltIn CullDistance\n"
+                       "OpDecorate %19 Block\n"
+                       "OpDecorate %29 BuiltIn TessCoord\n"
+                       "%2 = OpTypeVoid\n"
+                       "%3 = OpTypeFunction %2\n"
+                       "%6 = OpTypeFloat 32\n"
+                       "%7 = OpTypePointer Function %6\n"
+                       "%9 = OpConstant %6 0.00195313\n"
+                       "%10 = OpTypeVector %6 4\n"
+                       "%11 = OpTypeInt 32 0\n"
+                       "%12 = OpConstant %11 1\n"
+                       "%13 = OpTypeArray %6 %12\n"
+                       "%14 = OpTypeStruct %10 %6 %13 %13\n"
+                       "%15 = OpTypePointer Output %14\n"
+                       "%16 = OpVariable %15 Output\n"
+                       "%17 = OpTypeInt 32 1\n"
+                       "%18 = OpConstant %17 0\n"
+                       "%19 = OpTypeStruct %10 %6 %13 %13\n"
+                       "%20 = OpConstant %11 32\n"
+                       "%21 = OpTypeArray %19 %20\n"
+                       "%22 = OpTypePointer Input %21\n"
+                       "%23 = OpVariable %22 Input\n"
+                       "%24 = OpTypePointer Input %10\n"
+                       "%27 = OpTypeVector %6 3\n"
+                       "%28 = OpTypePointer Input %27\n"
+                       "%29 = OpVariable %28 Input\n"
+                       "%30 = OpConstant %11 0\n"
+                       "%31 = OpTypePointer Input %6\n"
+                       "%36 = OpConstant %6 2\n"
+                       "%40 = OpTypePointer Output %10\n"
+                       "%4 = OpFunction %2 None %3\n"
+                       "%5 = OpLabel\n"
+                       "%8 = OpVariable %7 Function\n"
+                       "OpStore %8 %9\n"
+                       "%25 = OpAccessChain %24 %23 %18 %18\n"
+                       "%26 = OpLoad %10 %25\n"
+                       "%32 = OpAccessChain %31 %29 %30\n"
+                       "%33 = OpLoad %6 %32\n"
+                       "%34 = OpLoad %6 %8\n"
+                       "%35 = OpFMul %6 %33 %34\n"
+                       "%37 = OpFDiv %6 %35 %36\n"
+                       "%38 = OpCompositeConstruct %10 %37 %37 %37 %37\n"
+                       "%39 = OpFAdd %10 %26 %38\n"
+                       "%41 = OpAccessChain %40 %16 %18\n"
+                       "OpStore %41 %39\n"
+                       "OpReturn\n"
+                       "OpFunctionEnd\n";
+               programCollection.spirvAsmSources.add("tese_noSubgroup") << teseNoSubgroup;
+       }
+
+}
+
+
+std::string vkt::subgroups::getVertShaderForStage(vk::VkShaderStageFlags stage)
+{
+       switch (stage)
+       {
+               default:
+                       DE_FATAL("Unhandled stage!");
+                       return "";
+               case VK_SHADER_STAGE_FRAGMENT_BIT:
+                       return
+                               "#version 450\n"
+                               "void main (void)\n"
+                               "{\n"
+                               "  float pixelSize = 2.0f/1024.0f;\n"
+                               "   float pixelPosition = pixelSize/2.0f - 1.0f;\n"
+                               "  gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
+                               "}\n";
+               case VK_SHADER_STAGE_GEOMETRY_BIT:
+                       return
+                               "#version 450\n"
+                               "void main (void)\n"
+                               "{\n"
+                               "}\n";
+               case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
+               case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
+                       return
+                               "#version 450\n"
+                               "void main (void)\n"
+                               "{\n"
+                               "}\n";
+       }
+}
+
+bool vkt::subgroups::isSubgroupSupported(Context& context)
+{
+       return context.contextSupports(vk::ApiVersion(1, 1, 0));
+}
+
+bool vkt::subgroups::areSubgroupOperationsSupportedForStage(
+       Context& context, const VkShaderStageFlags stage)
+{
+       VkPhysicalDeviceSubgroupProperties subgroupProperties;
+       subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
+       subgroupProperties.pNext = DE_NULL;
+
+       VkPhysicalDeviceProperties2 properties;
+       properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+       properties.pNext = &subgroupProperties;
+
+       context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
+
+       return (stage & subgroupProperties.supportedStages) ? true : false;
+}
+
+bool vkt::subgroups::areSubgroupOperationsRequiredForStage(
+       VkShaderStageFlags stage)
+{
+       switch (stage)
+       {
+               default:
+                       return false;
+               case VK_SHADER_STAGE_COMPUTE_BIT:
+                       return true;
+       }
+}
+
+bool vkt::subgroups::isSubgroupFeatureSupportedForDevice(
+       Context& context,
+       VkSubgroupFeatureFlagBits bit) {
+       VkPhysicalDeviceSubgroupProperties subgroupProperties;
+       subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
+       subgroupProperties.pNext = DE_NULL;
+
+       VkPhysicalDeviceProperties2 properties;
+       properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+       properties.pNext = &subgroupProperties;
+
+       context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
+
+       return (bit & subgroupProperties.supportedOperations) ? true : false;
+}
+
+bool vkt::subgroups::isFragmentSSBOSupportedForDevice(Context& context)
+{
+       const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
+                               context.getInstanceInterface(), context.getPhysicalDevice());
+       return features.fragmentStoresAndAtomics ? true : false;
+}
+
+bool vkt::subgroups::isVertexSSBOSupportedForDevice(Context& context)
+{
+       const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
+                               context.getInstanceInterface(), context.getPhysicalDevice());
+       return features.vertexPipelineStoresAndAtomics ? true : false;
+}
+
+bool vkt::subgroups::isDoubleSupportedForDevice(Context& context)
+{
+       const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
+                               context.getInstanceInterface(), context.getPhysicalDevice());
+       return features.shaderFloat64 ? true : false;
+}
+
+bool vkt::subgroups::isDoubleFormat(VkFormat format)
+{
+       switch (format)
+       {
+               default:
+                       return false;
+               case VK_FORMAT_R64_SFLOAT:
+               case VK_FORMAT_R64G64_SFLOAT:
+               case VK_FORMAT_R64G64B64_SFLOAT:
+               case VK_FORMAT_R64G64B64A64_SFLOAT:
+                       return true;
+       }
+}
+
+std::string vkt::subgroups::getFormatNameForGLSL (VkFormat format)
+{
+       switch (format)
+       {
+               default:
+                       DE_FATAL("Unhandled format!");
+                       return "";
+               case VK_FORMAT_R32_SINT:
+                       return "int";
+               case VK_FORMAT_R32G32_SINT:
+                       return "ivec2";
+               case VK_FORMAT_R32G32B32_SINT:
+                       return "ivec3";
+               case VK_FORMAT_R32G32B32A32_SINT:
+                       return "ivec4";
+               case VK_FORMAT_R32_UINT:
+                       return "uint";
+               case VK_FORMAT_R32G32_UINT:
+                       return "uvec2";
+               case VK_FORMAT_R32G32B32_UINT:
+                       return "uvec3";
+               case VK_FORMAT_R32G32B32A32_UINT:
+                       return "uvec4";
+               case VK_FORMAT_R32_SFLOAT:
+                       return "float";
+               case VK_FORMAT_R32G32_SFLOAT:
+                       return "vec2";
+               case VK_FORMAT_R32G32B32_SFLOAT:
+                       return "vec3";
+               case VK_FORMAT_R32G32B32A32_SFLOAT:
+                       return "vec4";
+               case VK_FORMAT_R64_SFLOAT:
+                       return "double";
+               case VK_FORMAT_R64G64_SFLOAT:
+                       return "dvec2";
+               case VK_FORMAT_R64G64B64_SFLOAT:
+                       return "dvec3";
+               case VK_FORMAT_R64G64B64A64_SFLOAT:
+                       return "dvec4";
+               case VK_FORMAT_R8_USCALED:
+                       return "bool";
+               case VK_FORMAT_R8G8_USCALED:
+                       return "bvec2";
+               case VK_FORMAT_R8G8B8_USCALED:
+                       return "bvec3";
+               case VK_FORMAT_R8G8B8A8_USCALED:
+                       return "bvec4";
+       }
+}
+
+void vkt::subgroups::setVertexShaderFrameBuffer (SourceCollections& programCollection)
+{
+       /*
+               "layout(location = 0) in highp vec4 in_position;\n"
+               "void main (void)\n"
+               "{\n"
+               "  gl_Position = in_position;\n"
+               "}\n";
+       */
+       programCollection.spirvAsmSources.add("vert") <<
+               "; SPIR-V\n"
+               "; Version: 1.3\n"
+               "; Generator: Khronos Glslang Reference Front End; 2\n"
+               "; Bound: 21\n"
+               "; Schema: 0\n"
+               "OpCapability Shader\n"
+               "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+               "OpMemoryModel Logical GLSL450\n"
+               "OpEntryPoint Vertex %4 \"main\" %13 %17\n"
+               "OpMemberDecorate %11 0 BuiltIn Position\n"
+               "OpMemberDecorate %11 1 BuiltIn PointSize\n"
+               "OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
+               "OpMemberDecorate %11 3 BuiltIn CullDistance\n"
+               "OpDecorate %11 Block\n"
+               "OpDecorate %17 Location 0\n"
+               "%2 = OpTypeVoid\n"
+               "%3 = OpTypeFunction %2\n"
+               "%6 = OpTypeFloat 32\n"
+               "%7 = OpTypeVector %6 4\n"
+               "%8 = OpTypeInt 32 0\n"
+               "%9 = OpConstant %8 1\n"
+               "%10 = OpTypeArray %6 %9\n"
+               "%11 = OpTypeStruct %7 %6 %10 %10\n"
+               "%12 = OpTypePointer Output %11\n"
+               "%13 = OpVariable %12 Output\n"
+               "%14 = OpTypeInt 32 1\n"
+               "%15 = OpConstant %14 0\n"
+               "%16 = OpTypePointer Input %7\n"
+               "%17 = OpVariable %16 Input\n"
+               "%19 = OpTypePointer Output %7\n"
+               "%4 = OpFunction %2 None %3\n"
+               "%5 = OpLabel\n"
+               "%18 = OpLoad %7 %17\n"
+               "%20 = OpAccessChain %19 %13 %15\n"
+               "OpStore %20 %18\n"
+               "OpReturn\n"
+               "OpFunctionEnd\n";
+}
+
+void vkt::subgroups::setFragmentShaderFrameBuffer (vk::SourceCollections& programCollection)
+{
+       /*
+               "layout(location = 0) in float in_color;\n"
+               "layout(location = 0) out uint out_color;\n"
+               "void main()\n"
+               {\n"
+               "       out_color = uint(in_color);\n"
+               "}\n";
+       */
+       programCollection.spirvAsmSources.add("fragment") <<
+               "; SPIR-V\n"
+               "; Version: 1.3\n"
+               "; Generator: Khronos Glslang Reference Front End; 2\n"
+               "; Bound: 14\n"
+               "; Schema: 0\n"
+               "OpCapability Shader\n"
+               "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+               "OpMemoryModel Logical GLSL450\n"
+               "OpEntryPoint Fragment %4 \"main\" %8 %11\n"
+               "OpExecutionMode %4 OriginUpperLeft\n"
+               "OpDecorate %8 Location 0\n"
+               "OpDecorate %11 Location 0\n"
+               "%2 = OpTypeVoid\n"
+               "%3 = OpTypeFunction %2\n"
+               "%6 = OpTypeInt 32 0\n"
+               "%7 = OpTypePointer Output %6\n"
+               "%8 = OpVariable %7 Output\n"
+               "%9 = OpTypeFloat 32\n"
+               "%10 = OpTypePointer Input %9\n"
+               "%11 = OpVariable %10 Input\n"
+               "%4 = OpFunction %2 None %3\n"
+               "%5 = OpLabel\n"
+               "%12 = OpLoad %9 %11\n"
+               "%13 = OpConvertFToU %6 %12\n"
+               "OpStore %8 %13\n"
+               "OpReturn\n"
+               "OpFunctionEnd\n";
+}
+
+void vkt::subgroups::setTesCtrlShaderFrameBuffer (vk::SourceCollections& programCollection)
+{
+       /*
+               "#extension GL_KHR_shader_subgroup_basic: enable\n"
+               "#extension GL_EXT_tessellation_shader : require\n"
+               "layout(vertices = 2) out;\n"
+               "void main (void)\n"
+               "{\n"
+               "  if (gl_InvocationID == 0)\n"
+                 {\n"
+               "    gl_TessLevelOuter[0] = 1.0f;\n"
+               "    gl_TessLevelOuter[1] = 1.0f;\n"
+               "  }\n"
+               "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+               "}\n";
+       */
+       programCollection.spirvAsmSources.add("tesc") <<
+               "; SPIR-V\n"
+               "; Version: 1.3\n"
+               "; Generator: Khronos Glslang Reference Front End; 2\n"
+               "; Bound: 46\n"
+               "; Schema: 0\n"
+               "OpCapability Tessellation\n"
+               "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+               "OpMemoryModel Logical GLSL450\n"
+               "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %33 %39\n"
+               "OpExecutionMode %4 OutputVertices 2\n"
+               "OpDecorate %8 BuiltIn InvocationId\n"
+               "OpDecorate %20 Patch\n"
+               "OpDecorate %20 BuiltIn TessLevelOuter\n"
+               "OpMemberDecorate %29 0 BuiltIn Position\n"
+               "OpMemberDecorate %29 1 BuiltIn PointSize\n"
+               "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
+               "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
+               "OpDecorate %29 Block\n"
+               "OpMemberDecorate %35 0 BuiltIn Position\n"
+               "OpMemberDecorate %35 1 BuiltIn PointSize\n"
+               "OpMemberDecorate %35 2 BuiltIn ClipDistance\n"
+               "OpMemberDecorate %35 3 BuiltIn CullDistance\n"
+               "OpDecorate %35 Block\n"
+               "%2 = OpTypeVoid\n"
+               "%3 = OpTypeFunction %2\n"
+               "%6 = OpTypeInt 32 1\n"
+               "%7 = OpTypePointer Input %6\n"
+               "%8 = OpVariable %7 Input\n"
+               "%10 = OpConstant %6 0\n"
+               "%11 = OpTypeBool\n"
+               "%15 = OpTypeFloat 32\n"
+               "%16 = OpTypeInt 32 0\n"
+               "%17 = OpConstant %16 4\n"
+               "%18 = OpTypeArray %15 %17\n"
+               "%19 = OpTypePointer Output %18\n"
+               "%20 = OpVariable %19 Output\n"
+               "%21 = OpConstant %15 1\n"
+               "%22 = OpTypePointer Output %15\n"
+               "%24 = OpConstant %6 1\n"
+               "%26 = OpTypeVector %15 4\n"
+               "%27 = OpConstant %16 1\n"
+               "%28 = OpTypeArray %15 %27\n"
+               "%29 = OpTypeStruct %26 %15 %28 %28\n"
+               "%30 = OpConstant %16 2\n"
+               "%31 = OpTypeArray %29 %30\n"
+               "%32 = OpTypePointer Output %31\n"
+               "%33 = OpVariable %32 Output\n"
+               "%35 = OpTypeStruct %26 %15 %28 %28\n"
+               "%36 = OpConstant %16 32\n"
+               "%37 = OpTypeArray %35 %36\n"
+               "%38 = OpTypePointer Input %37\n"
+               "%39 = OpVariable %38 Input\n"
+               "%41 = OpTypePointer Input %26\n"
+               "%44 = OpTypePointer Output %26\n"
+               "%4 = OpFunction %2 None %3\n"
+               "%5 = OpLabel\n"
+               "%9 = OpLoad %6 %8\n"
+               "%12 = OpIEqual %11 %9 %10\n"
+               "OpSelectionMerge %14 None\n"
+               "OpBranchConditional %12 %13 %14\n"
+               "%13 = OpLabel\n"
+               "%23 = OpAccessChain %22 %20 %10\n"
+               "OpStore %23 %21\n"
+               "%25 = OpAccessChain %22 %20 %24\n"
+               "OpStore %25 %21\n"
+               "OpBranch %14\n"
+               "%14 = OpLabel\n"
+               "%34 = OpLoad %6 %8\n"
+               "%40 = OpLoad %6 %8\n"
+               "%42 = OpAccessChain %41 %39 %40 %10\n"
+               "%43 = OpLoad %26 %42\n"
+               "%45 = OpAccessChain %44 %33 %34 %10\n"
+               "OpStore %45 %43\n"
+               "OpReturn\n"
+               "OpFunctionEnd\n";
+}
+
+void vkt::subgroups::setTesEvalShaderFrameBuffer (vk::SourceCollections& programCollection)
+{
+       /*
+               "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+               "#extension GL_EXT_tessellation_shader : require\n"
+               "layout(isolines, equal_spacing, ccw ) in;\n"
+               "layout(location = 0) in float in_color[];\n"
+               "layout(location = 0) out float out_color;\n"
+               "\n"
+               "void main (void)\n"
+               "{\n"
+               "  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
+               "  out_color = in_color[0];\n"
+               "}\n";
+       */
+       programCollection.spirvAsmSources.add("tese") <<
+               "; SPIR-V\n"
+               "; Version: 1.3\n"
+               "; Generator: Khronos Glslang Reference Front End; 2\n"
+               "; Bound: 45\n"
+               "; Schema: 0\n"
+               "OpCapability Tessellation\n"
+               "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+               "OpMemoryModel Logical GLSL450\n"
+               "OpEntryPoint TessellationEvaluation %4 \"main\" %13 %20 %29 %39 %42\n"
+               "OpExecutionMode %4 Isolines\n"
+               "OpExecutionMode %4 SpacingEqual\n"
+               "OpExecutionMode %4 VertexOrderCcw\n"
+               "OpMemberDecorate %11 0 BuiltIn Position\n"
+               "OpMemberDecorate %11 1 BuiltIn PointSize\n"
+               "OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
+               "OpMemberDecorate %11 3 BuiltIn CullDistance\n"
+               "OpDecorate %11 Block\n"
+               "OpMemberDecorate %16 0 BuiltIn Position\n"
+               "OpMemberDecorate %16 1 BuiltIn PointSize\n"
+               "OpMemberDecorate %16 2 BuiltIn ClipDistance\n"
+               "OpMemberDecorate %16 3 BuiltIn CullDistance\n"
+               "OpDecorate %16 Block\n"
+               "OpDecorate %29 BuiltIn TessCoord\n"
+               "OpDecorate %39 Location 0\n"
+               "OpDecorate %42 Location 0\n"
+               "%2 = OpTypeVoid\n"
+               "%3 = OpTypeFunction %2\n"
+               "%6 = OpTypeFloat 32\n"
+               "%7 = OpTypeVector %6 4\n"
+               "%8 = OpTypeInt 32 0\n"
+               "%9 = OpConstant %8 1\n"
+               "%10 = OpTypeArray %6 %9\n"
+               "%11 = OpTypeStruct %7 %6 %10 %10\n"
+               "%12 = OpTypePointer Output %11\n"
+               "%13 = OpVariable %12 Output\n"
+               "%14 = OpTypeInt 32 1\n"
+               "%15 = OpConstant %14 0\n"
+               "%16 = OpTypeStruct %7 %6 %10 %10\n"
+               "%17 = OpConstant %8 32\n"
+               "%18 = OpTypeArray %16 %17\n"
+               "%19 = OpTypePointer Input %18\n"
+               "%20 = OpVariable %19 Input\n"
+               "%21 = OpTypePointer Input %7\n"
+               "%24 = OpConstant %14 1\n"
+               "%27 = OpTypeVector %6 3\n"
+               "%28 = OpTypePointer Input %27\n"
+               "%29 = OpVariable %28 Input\n"
+               "%30 = OpConstant %8 0\n"
+               "%31 = OpTypePointer Input %6\n"
+               "%36 = OpTypePointer Output %7\n"
+               "%38 = OpTypePointer Output %6\n"
+               "%39 = OpVariable %38 Output\n"
+               "%40 = OpTypeArray %6 %17\n"
+               "%41 = OpTypePointer Input %40\n"
+               "%42 = OpVariable %41 Input\n"
+               "%4 = OpFunction %2 None %3\n"
+               "%5 = OpLabel\n"
+               "%22 = OpAccessChain %21 %20 %15 %15\n"
+               "%23 = OpLoad %7 %22\n"
+               "%25 = OpAccessChain %21 %20 %24 %15\n"
+               "%26 = OpLoad %7 %25\n"
+               "%32 = OpAccessChain %31 %29 %30\n"
+               "%33 = OpLoad %6 %32\n"
+               "%34 = OpCompositeConstruct %7 %33 %33 %33 %33\n"
+               "%35 = OpExtInst %7 %1 FMix %23 %26 %34\n"
+               "%37 = OpAccessChain %36 %13 %15\n"
+               "OpStore %37 %35\n"
+               "%43 = OpAccessChain %31 %42 %15\n"
+               "%44 = OpLoad %6 %43\n"
+               "OpStore %39 %44\n"
+               "OpReturn\n"
+               "OpFunctionEnd\n";
+}
+
+void vkt::subgroups::addGeometryShadersFromTemplate (const std::string& glslTemplate, const vk::ShaderBuildOptions& options,  vk::GlslSourceCollection& collection)
+{
+       tcu::StringTemplate geometryTemplate(glslTemplate);
+
+       map<string, string>             linesParams;
+       linesParams.insert(pair<string, string>("TOPOLOGY", "lines"));
+
+       map<string, string>             pointsParams;
+       pointsParams.insert(pair<string, string>("TOPOLOGY", "points"));
+
+       collection.add("geometry_lines")        << glu::GeometrySource(geometryTemplate.specialize(linesParams))        << options;
+       collection.add("geometry_points")       << glu::GeometrySource(geometryTemplate.specialize(pointsParams))       << options;
+}
+
+void vkt::subgroups::addGeometryShadersFromTemplate (const std::string& spirvTemplate, const vk::SpirVAsmBuildOptions& options, vk::SpirVAsmCollection& collection)
+{
+       tcu::StringTemplate geometryTemplate(spirvTemplate);
+
+       map<string, string>             linesParams;
+       linesParams.insert(pair<string, string>("TOPOLOGY", "InputLines"));
+
+       map<string, string>             pointsParams;
+       pointsParams.insert(pair<string, string>("TOPOLOGY", "InputPoints"));
+
+       collection.add("geometry_lines")        << geometryTemplate.specialize(linesParams)             << options;
+       collection.add("geometry_points")       << geometryTemplate.specialize(pointsParams)    << options;
+}
+
+void initializeMemory(Context& context, const Allocation& alloc, subgroups::SSBOData& data)
+{
+       const vk::VkFormat format = data.format;
+       const vk::VkDeviceSize size = getFormatSizeInBytes(format) * data.numElements;
+       if (subgroups::SSBOData::InitializeNonZero == data.initializeType)
+       {
+               de::Random rnd(context.getTestContext().getCommandLine().getBaseSeed());
+
+               switch (format)
+               {
+                       default:
+                               DE_FATAL("Illegal buffer format");
+                               break;
+                       case VK_FORMAT_R8_USCALED:
+                       case VK_FORMAT_R8G8_USCALED:
+                       case VK_FORMAT_R8G8B8_USCALED:
+                       case VK_FORMAT_R8G8B8A8_USCALED:
+                       case VK_FORMAT_R32_SINT:
+                       case VK_FORMAT_R32G32_SINT:
+                       case VK_FORMAT_R32G32B32_SINT:
+                       case VK_FORMAT_R32G32B32A32_SINT:
+                       case VK_FORMAT_R32_UINT:
+                       case VK_FORMAT_R32G32_UINT:
+                       case VK_FORMAT_R32G32B32_UINT:
+                       case VK_FORMAT_R32G32B32A32_UINT:
+                       {
+                               deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
+
+                               for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint32)); k++)
+                               {
+                                       ptr[k] = rnd.getUint32();
+                               }
+                       }
+                       break;
+                       case VK_FORMAT_R32_SFLOAT:
+                       case VK_FORMAT_R32G32_SFLOAT:
+                       case VK_FORMAT_R32G32B32_SFLOAT:
+                       case VK_FORMAT_R32G32B32A32_SFLOAT:
+                       {
+                               float* ptr = reinterpret_cast<float*>(alloc.getHostPtr());
+
+                               for (vk::VkDeviceSize k = 0; k < (size / sizeof(float)); k++)
+                               {
+                                       ptr[k] = rnd.getFloat();
+                               }
+                       }
+                       break;
+                       case VK_FORMAT_R64_SFLOAT:
+                       case VK_FORMAT_R64G64_SFLOAT:
+                       case VK_FORMAT_R64G64B64_SFLOAT:
+                       case VK_FORMAT_R64G64B64A64_SFLOAT:
+                       {
+                               double* ptr = reinterpret_cast<double*>(alloc.getHostPtr());
+
+                               for (vk::VkDeviceSize k = 0; k < (size / sizeof(double)); k++)
+                               {
+                                       ptr[k] = rnd.getDouble();
+                               }
+                       }
+                       break;
+               }
+       }
+       else if (subgroups::SSBOData::InitializeZero == data.initializeType)
+       {
+               deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
+
+               for (vk::VkDeviceSize k = 0; k < size / 4; k++)
+               {
+                       ptr[k] = 0;
+               }
+       }
+
+       if (subgroups::SSBOData::InitializeNone != data.initializeType)
+       {
+               flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
+       }
+}
+
+deUint32 getResultBinding (const VkShaderStageFlagBits shaderStage)
+{
+       switch(shaderStage)
+       {
+               case VK_SHADER_STAGE_VERTEX_BIT:
+                       return 0u;
+                       break;
+               case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
+                       return 1u;
+                       break;
+               case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
+                       return 2u;
+                       break;
+               case VK_SHADER_STAGE_GEOMETRY_BIT:
+                       return 3u;
+                       break;
+               default:
+                       DE_ASSERT(0);
+                       return -1;
+       }
+       DE_ASSERT(0);
+       return -1;
+}
+
+tcu::TestStatus vkt::subgroups::makeTessellationEvaluationFrameBufferTest(
+       Context& context, VkFormat format, SSBOData* extraData,
+       deUint32 extraDataCount,
+       bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
+       const VkShaderStageFlags shaderStage)
+{
+       const deUint32                                                  maxWidth                                = 1024u;
+       vector<de::SharedPtr<BufferOrImage> >   inputBuffers                    (extraDataCount);
+       DescriptorSetLayoutBuilder                              layoutBuilder;
+       DescriptorPoolBuilder                                   poolBuilder;
+       DescriptorSetUpdateBuilder                              updateBuilder;
+       Move <VkDescriptorPool>                                 descriptorPool;
+       Move <VkDescriptorSet>                                  descriptorSet;
+
+       const Unique<VkShaderModule>                    vertexShaderModule              (createShaderModule(context.getDeviceInterface(), context.getDevice(),
+                                                                                                                                               context.getBinaryCollection().get("vert"), 0u));
+       const Unique<VkShaderModule>                    teCtrlShaderModule              (createShaderModule(context.getDeviceInterface(), context.getDevice(),
+                                                                                                                                               context.getBinaryCollection().get("tesc"), 0u));
+       const Unique<VkShaderModule>                    teEvalShaderModule              (createShaderModule(context.getDeviceInterface(), context.getDevice(),
+                                                                                                                                               context.getBinaryCollection().get("tese"), 0u));
+       const Unique<VkShaderModule>                    fragmentShaderModule    (createShaderModule(context.getDeviceInterface(), context.getDevice(),
+                                                                                                                                       context.getBinaryCollection().get("fragment"), 0u));
+       const Unique<VkRenderPass>                              renderPass                              (makeRenderPass(context, format));
+
+       const VkVertexInputBindingDescription   vertexInputBinding              =
+       {
+               0u,                                                                                     // binding;
+               static_cast<deUint32>(sizeof(tcu::Vec4)),       // stride;
+               VK_VERTEX_INPUT_RATE_VERTEX                                     // inputRate
+       };
+
+       const VkVertexInputAttributeDescription vertexInputAttribute    =
+       {
+               0u,
+               0u,
+               VK_FORMAT_R32G32B32A32_SFLOAT,
+               0u
+       };
+
+       for (deUint32 i = 0u; i < extraDataCount; i++)
+       {
+               if (extraData[i].isImage)
+               {
+                       inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
+               }
+               else
+               {
+                       vk::VkDeviceSize size = getFormatSizeInBytes(extraData[i].format) * extraData[i].numElements;
+                       inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
+               }
+               const Allocation& alloc = inputBuffers[i]->getAllocation();
+               initializeMemory(context, alloc, extraData[i]);
+       }
+
+       for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
+               layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, shaderStage, DE_NULL);
+
+       const Unique<VkDescriptorSetLayout>             descriptorSetLayout             (layoutBuilder.build(context.getDeviceInterface(), context.getDevice()));
+
+       const Unique<VkPipelineLayout>                  pipelineLayout                  (makePipelineLayout(context, *descriptorSetLayout));
+
+       const Unique<VkPipeline>                                pipeline                                (makeGraphicsPipeline(context, *pipelineLayout,
+                                                                                                                                       VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT |
+                                                                                                                                       VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
+                                                                                                                                       *vertexShaderModule, *fragmentShaderModule, DE_NULL, *teCtrlShaderModule, *teEvalShaderModule,
+                                                                                                                                       *renderPass, VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, &vertexInputBinding, &vertexInputAttribute, true, format));
+
+       for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
+               poolBuilder.addType(inputBuffers[ndx]->getType());
+
+       if (extraDataCount > 0)
+       {
+               descriptorPool = poolBuilder.build(context.getDeviceInterface(), context.getDevice(),
+                                                       VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
+               descriptorSet = makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout);
+       }
+
+       for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
+       {
+               if (inputBuffers[buffersNdx]->isImage())
+               {
+                       VkDescriptorImageInfo info =
+                               makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
+                                                                               inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
+
+                       updateBuilder.writeSingle(*descriptorSet,
+                                                                               DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
+                                                                               inputBuffers[buffersNdx]->getType(), &info);
+               }
+               else
+               {
+                       VkDescriptorBufferInfo info =
+                               makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
+                                                                               0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
+
+                       updateBuilder.writeSingle(*descriptorSet,
+                                                                               DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
+                                                                               inputBuffers[buffersNdx]->getType(), &info);
+               }
+       }
+
+       updateBuilder.update(context.getDeviceInterface(), context.getDevice());
+
+       const Unique<VkCommandPool>                             cmdPool                                 (makeCommandPool(context));
+       const deUint32                                                  subgroupSize                    = getSubgroupSize(context);
+       const Unique<VkCommandBuffer>                   cmdBuffer                               (makeCommandBuffer(context, *cmdPool));
+       const vk::VkDeviceSize                                  vertexBufferSize                = 2ull * maxWidth * sizeof(tcu::Vec4);
+       Buffer                                                                  vertexBuffer                    (context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
+       unsigned                                                                totalIterations                 = 0u;
+       unsigned                                                                failedIterations                = 0u;
+       Image                                                                   discardableImage                (context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
+
+       {
+               const Allocation&               alloc                           = vertexBuffer.getAllocation();
+               std::vector<tcu::Vec4>  data                            (2u * maxWidth, Vec4(1.0f, 0.0f, 1.0f, 1.0f));
+               const float                             pixelSize                       = 2.0f / static_cast<float>(maxWidth);
+               float                                   leftHandPosition        = -1.0f;
+
+               for(deUint32 ndx = 0u; ndx < data.size(); ndx+=2u)
+               {
+                       data[ndx][0] = leftHandPosition;
+                       leftHandPosition += pixelSize;
+                       data[ndx+1][0] = leftHandPosition;
+               }
+
+               deMemcpy(alloc.getHostPtr(), &data[0], data.size() * sizeof(tcu::Vec4));
+               flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
+       }
+
+       for (deUint32 width = 1u; width < maxWidth; ++width)
+       {
+               const Unique<VkFramebuffer>     framebuffer                     (makeFramebuffer(context, *renderPass, discardableImage.getImageView(), maxWidth, 1));
+               const VkViewport                        viewport                        = makeViewport(maxWidth, 1u);
+               const VkRect2D                          scissor                         = makeRect2D(maxWidth, 1u);
+               const vk::VkDeviceSize          imageResultSize         = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
+               Buffer                                          imageBufferResult       (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
+               const VkDeviceSize                      vertexBufferOffset      = 0u;
+
+               totalIterations++;
+
+               beginCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
+               {
+
+                       context.getDeviceInterface().cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
+                       context.getDeviceInterface().cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
+
+                       beginRenderPass(context.getDeviceInterface(), *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
+
+                       context.getDeviceInterface().cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
+
+                       if (extraDataCount > 0)
+                       {
+                               context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer,
+                                       VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
+                                       &descriptorSet.get(), 0u, DE_NULL);
+                       }
+
+                       context.getDeviceInterface().cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
+                       context.getDeviceInterface().cmdDraw(*cmdBuffer, 2 * width, 1, 0, 0);
+
+                       endRenderPass(context.getDeviceInterface(), *cmdBuffer);
+
+                       copyImageToBuffer(context.getDeviceInterface(), *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
+                       endCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
+
+                       Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer));
+                       waitFence(context, fence);
+               }
+
+               {
+                       const Allocation& allocResult = imageBufferResult.getAllocation();
+                       invalidateAlloc(context.getDeviceInterface(), context.getDevice(), allocResult);
+
+                       std::vector<const void*> datas;
+                       datas.push_back(allocResult.getHostPtr());
+                       if (!checkResult(datas, width/2u, subgroupSize))
+                               failedIterations++;
+               }
+       }
+
+       if (0 < failedIterations)
+       {
+               context.getTestContext().getLog()
+                               << TestLog::Message << (totalIterations - failedIterations) << " / "
+                               << totalIterations << " values passed" << TestLog::EndMessage;
+               return tcu::TestStatus::fail("Failed!");
+       }
+
+       return tcu::TestStatus::pass("OK");
+}
+
+bool vkt::subgroups::check(std::vector<const void*> datas,
+       deUint32 width, deUint32 ref)
+{
+       const deUint32* data = reinterpret_cast<const deUint32*>(datas[0]);
+
+       for (deUint32 n = 0; n < width; ++n)
+       {
+               if (data[n] != ref)
+               {
+                       return false;
+               }
+       }
+
+       return true;
+}
+
+bool vkt::subgroups::checkCompute(std::vector<const void*> datas,
+       const deUint32 numWorkgroups[3], const deUint32 localSize[3],
+       deUint32 ref)
+{
+       const deUint32 globalSizeX = numWorkgroups[0] * localSize[0];
+       const deUint32 globalSizeY = numWorkgroups[1] * localSize[1];
+       const deUint32 globalSizeZ = numWorkgroups[2] * localSize[2];
+
+       return check(datas, globalSizeX * globalSizeY * globalSizeZ, ref);
+}
+
+tcu::TestStatus vkt::subgroups::makeGeometryFrameBufferTest(
+       Context& context, VkFormat format, SSBOData* extraData,
+       deUint32 extraDataCount,
+       bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize))
+{
+       const deUint32                                                  maxWidth                                = 1024u;
+       vector<de::SharedPtr<BufferOrImage> >   inputBuffers                    (extraDataCount);
+       DescriptorSetLayoutBuilder                              layoutBuilder;
+       DescriptorPoolBuilder                                   poolBuilder;
+       DescriptorSetUpdateBuilder                              updateBuilder;
+       Move <VkDescriptorPool>                                 descriptorPool;
+       Move <VkDescriptorSet>                                  descriptorSet;
+
+       const Unique<VkShaderModule>                    vertexShaderModule              (createShaderModule(context.getDeviceInterface(), context.getDevice(),
+                                                                                                                                               context.getBinaryCollection().get("vert"), 0u));
+       const Unique<VkShaderModule>                    geometryShaderModule    (createShaderModule(context.getDeviceInterface(), context.getDevice(),
+                                                                                                                                               context.getBinaryCollection().get("geometry"), 0u));
+       const Unique<VkShaderModule>                    fragmentShaderModule    (createShaderModule(context.getDeviceInterface(), context.getDevice(),
+                                                                                                                                       context.getBinaryCollection().get("fragment"), 0u));
+       const Unique<VkRenderPass>                              renderPass                              (makeRenderPass(context, format));
+       const VkVertexInputBindingDescription   vertexInputBinding              =
+       {
+               0u,                                                                                     // binding;
+               static_cast<deUint32>(sizeof(tcu::Vec4)),       // stride;
+               VK_VERTEX_INPUT_RATE_VERTEX                                     // inputRate
+       };
+
+       const VkVertexInputAttributeDescription vertexInputAttribute    =
+       {
+               0u,
+               0u,
+               VK_FORMAT_R32G32B32A32_SFLOAT,
+               0u
+       };
+
+       for (deUint32 i = 0u; i < extraDataCount; i++)
+       {
+               if (extraData[i].isImage)
+               {
+                       inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
+               }
+               else
+               {
+                       vk::VkDeviceSize size = getFormatSizeInBytes(extraData[i].format) * extraData[i].numElements;
+                       inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
+               }
+               const Allocation& alloc = inputBuffers[i]->getAllocation();
+               initializeMemory(context, alloc, extraData[i]);
+       }
+
+       for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
+               layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_GEOMETRY_BIT, DE_NULL);
+
+       const Unique<VkDescriptorSetLayout>             descriptorSetLayout             (layoutBuilder.build(context.getDeviceInterface(), context.getDevice()));
+
+       const Unique<VkPipelineLayout>                  pipelineLayout                  (makePipelineLayout(context, *descriptorSetLayout));
+
+       const Unique<VkPipeline>                                pipeline                                (makeGraphicsPipeline(context, *pipelineLayout,
+                                                                                                                                       VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_GEOMETRY_BIT,
+                                                                                                                                       *vertexShaderModule, *fragmentShaderModule, *geometryShaderModule, DE_NULL, DE_NULL,
+                                                                                                                                       *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST, &vertexInputBinding, &vertexInputAttribute, true, format));
+
+       for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
+               poolBuilder.addType(inputBuffers[ndx]->getType());
+
+       if (extraDataCount > 0)
+       {
+               descriptorPool = poolBuilder.build(context.getDeviceInterface(), context.getDevice(),
+                                                       VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
+               descriptorSet = makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout);
+       }
+
+       for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
+       {
+               if (inputBuffers[buffersNdx]->isImage())
+               {
+                       VkDescriptorImageInfo info =
+                               makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
+                                                                               inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
+
+                       updateBuilder.writeSingle(*descriptorSet,
+                                                                               DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
+                                                                               inputBuffers[buffersNdx]->getType(), &info);
+               }
+               else
+               {
+                       VkDescriptorBufferInfo info =
+                               makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
+                                                                               0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
+
+                       updateBuilder.writeSingle(*descriptorSet,
+                                                                               DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
+                                                                               inputBuffers[buffersNdx]->getType(), &info);
+               }
+       }
+
+       updateBuilder.update(context.getDeviceInterface(), context.getDevice());
+
+       const Unique<VkCommandPool>                             cmdPool                                 (makeCommandPool(context));
+       const deUint32                                                  subgroupSize                    = getSubgroupSize(context);
+       const Unique<VkCommandBuffer>                   cmdBuffer                               (makeCommandBuffer(context, *cmdPool));
+       const vk::VkDeviceSize                                  vertexBufferSize                = maxWidth * sizeof(tcu::Vec4);
+       Buffer                                                                  vertexBuffer                    (context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
+       unsigned                                                                totalIterations                 = 0u;
+       unsigned                                                                failedIterations                = 0u;
+       Image                                                                   discardableImage                (context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
+
+       {
+               const Allocation&               alloc                           = vertexBuffer.getAllocation();
+               std::vector<tcu::Vec4>  data                            (maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
+               const float                             pixelSize                       = 2.0f / static_cast<float>(maxWidth);
+               float                                   leftHandPosition        = -1.0f;
+
+               for(deUint32 ndx = 0u; ndx < maxWidth; ++ndx)
+               {
+                       data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
+                       leftHandPosition += pixelSize;
+               }
+
+               deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
+               flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
+       }
+
+       for (deUint32 width = 1u; width < maxWidth; width++)
+       {
+               totalIterations++;
+               const Unique<VkFramebuffer>     framebuffer                     (makeFramebuffer(context, *renderPass, discardableImage.getImageView(), maxWidth, 1));
+               const VkViewport                        viewport                        = makeViewport(maxWidth, 1u);
+               const VkRect2D                          scissor                         = makeRect2D(maxWidth, 1u);
+               const vk::VkDeviceSize          imageResultSize         = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
+               Buffer                                          imageBufferResult       (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
+               const VkDeviceSize                      vertexBufferOffset      = 0u;
+
+               for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
+               {
+                       const Allocation& alloc = inputBuffers[ndx]->getAllocation();
+                       initializeMemory(context, alloc, extraData[ndx]);
+               }
+
+               beginCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
+               {
+                       context.getDeviceInterface().cmdSetViewport(
+                               *cmdBuffer, 0, 1, &viewport);
+
+                       context.getDeviceInterface().cmdSetScissor(
+                               *cmdBuffer, 0, 1, &scissor);
+
+                       beginRenderPass(context.getDeviceInterface(), *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
+
+                       context.getDeviceInterface().cmdBindPipeline(
+                               *cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
+
+                       if (extraDataCount > 0)
+                       {
+                               context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer,
+                                       VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
+                                       &descriptorSet.get(), 0u, DE_NULL);
+                       }
+
+                       context.getDeviceInterface().cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
+
+                       context.getDeviceInterface().cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
+
+                       endRenderPass(context.getDeviceInterface(), *cmdBuffer);
+
+                       copyImageToBuffer(context.getDeviceInterface(), *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
+
+                       endCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
+                       Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer));
+                       waitFence(context, fence);
+               }
+
+               {
+                       const Allocation& allocResult = imageBufferResult.getAllocation();
+                       invalidateAlloc(context.getDeviceInterface(), context.getDevice(), allocResult);
+
+                       std::vector<const void*> datas;
+                       datas.push_back(allocResult.getHostPtr());
+                       if (!checkResult(datas, width, subgroupSize))
+                               failedIterations++;
+               }
+       }
+
+       if (0 < failedIterations)
+       {
+               context.getTestContext().getLog()
+                               << TestLog::Message << (totalIterations - failedIterations) << " / "
+                               << totalIterations << " values passed" << TestLog::EndMessage;
+               return tcu::TestStatus::fail("Failed!");
+       }
+
+       return tcu::TestStatus::pass("OK");
+}
+
+
+tcu::TestStatus vkt::subgroups::allStages(
+       Context& context, VkFormat format, SSBOData* extraDatas,
+       deUint32 extraDatasCount,
+       bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
+       const VkShaderStageFlags shaderStageTested)
+{
+       const deUint32                                  maxWidth                        = 1024u;
+       vector<VkShaderStageFlagBits>   stagesVector;
+       VkShaderStageFlags                              shaderStageRequired     = (VkShaderStageFlags)0ull;
+
+       Move<VkShaderModule>                    vertexShaderModule;
+       Move<VkShaderModule>                    teCtrlShaderModule;
+       Move<VkShaderModule>                    teEvalShaderModule;
+       Move<VkShaderModule>                    geometryShaderModule;
+       Move<VkShaderModule>                    fragmentShaderModule;
+
+       if (shaderStageTested & VK_SHADER_STAGE_VERTEX_BIT)
+       {
+               stagesVector.push_back(VK_SHADER_STAGE_VERTEX_BIT);
+       }
+       if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
+       {
+               stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
+               shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
+               shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
+       }
+       if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
+       {
+               stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
+               shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
+               shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
+       }
+       if (shaderStageTested & VK_SHADER_STAGE_GEOMETRY_BIT)
+       {
+               stagesVector.push_back(VK_SHADER_STAGE_GEOMETRY_BIT);
+               const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
+               shaderStageRequired |=  (shaderStageTested & required) ? (VkShaderStageFlags) 0 : required;
+       }
+       if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
+       {
+               const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
+               shaderStageRequired |=  (shaderStageTested & required) ? (VkShaderStageFlags) 0 : required;
+       }
+
+       const deUint32  stagesCount     = static_cast<deUint32>(stagesVector.size());
+       const string    vert            = (shaderStageRequired & VK_SHADER_STAGE_VERTEX_BIT)                                    ? "vert_noSubgroup"             : "vert";
+       const string    tesc            = (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)              ? "tesc_noSubgroup"             : "tesc";
+       const string    tese            = (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)   ? "tese_noSubgroup"             : "tese";
+
+       shaderStageRequired = shaderStageTested | shaderStageRequired;
+
+       vertexShaderModule = createShaderModule(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get(vert), 0u);
+       if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
+       {
+               teCtrlShaderModule = createShaderModule(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get(tesc), 0u);
+               teEvalShaderModule = createShaderModule(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get(tese), 0u);
+       }
+       if (shaderStageRequired & VK_SHADER_STAGE_GEOMETRY_BIT)
+       {
+               if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
+               {
+                       // tessellation shaders output line primitives
+                       geometryShaderModule = createShaderModule(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("geometry_lines"), 0u);
+               }
+               else
+               {
+                       // otherwise points are processed by geometry shader
+                       geometryShaderModule = createShaderModule(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("geometry_points"), 0u);
+               }
+       }
+       if (shaderStageRequired & VK_SHADER_STAGE_FRAGMENT_BIT)
+               fragmentShaderModule = createShaderModule(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("fragment"), 0u);
+
+       std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(stagesCount + extraDatasCount);
+
+       DescriptorSetLayoutBuilder layoutBuilder;
+       // The implicit result SSBO we use to store our outputs from the shader
+       for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
+       {
+               const VkDeviceSize shaderSize = (stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? maxWidth * 2 : maxWidth;
+               const VkDeviceSize size = getFormatSizeInBytes(format) * shaderSize;
+               inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
+
+               layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1, stagesVector[ndx], getResultBinding(stagesVector[ndx]), DE_NULL);
+       }
+
+       for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
+       {
+               const deUint32 datasNdx = ndx - stagesCount;
+               if (extraDatas[datasNdx].isImage)
+               {
+                       inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraDatas[datasNdx].numElements), 1, extraDatas[datasNdx].format));
+               }
+               else
+               {
+                       const vk::VkDeviceSize size = getFormatSizeInBytes(extraDatas[datasNdx].format) * extraDatas[datasNdx].numElements;
+                       inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
+               }
+
+               const Allocation& alloc = inputBuffers[ndx]->getAllocation();
+               initializeMemory(context, alloc, extraDatas[datasNdx]);
+
+               layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1,
+                                                               extraDatas[datasNdx].stages, extraDatas[datasNdx].binding, DE_NULL);
+       }
+
+       const Unique<VkDescriptorSetLayout> descriptorSetLayout(
+               layoutBuilder.build(context.getDeviceInterface(), context.getDevice()));
+
+       const Unique<VkPipelineLayout> pipelineLayout(
+               makePipelineLayout(context, *descriptorSetLayout));
+
+       const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
+       const Unique<VkPipeline> pipeline(makeGraphicsPipeline(context, *pipelineLayout,
+                                                                               shaderStageRequired,
+                                                                               *vertexShaderModule, *fragmentShaderModule, *geometryShaderModule, *teCtrlShaderModule, *teEvalShaderModule,
+                                                                               *renderPass,
+                                                                               (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? VK_PRIMITIVE_TOPOLOGY_PATCH_LIST : VK_PRIMITIVE_TOPOLOGY_POINT_LIST));
+
+       DescriptorPoolBuilder poolBuilder;
+
+       for (deUint32 ndx = 0u; ndx < static_cast<deUint32>(inputBuffers.size()); ndx++)
+       {
+               poolBuilder.addType(inputBuffers[ndx]->getType());
+       }
+
+       const Unique<VkDescriptorPool> descriptorPool(
+               poolBuilder.build(context.getDeviceInterface(), context.getDevice(),
+                                                 VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
+
+       // Create descriptor set
+       const Unique<VkDescriptorSet> descriptorSet(
+               makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout));
+
+       DescriptorSetUpdateBuilder updateBuilder;
+
+       for (deUint32 ndx = 0u; ndx < stagesCount; ndx++)
+       {
+               if (inputBuffers[ndx]->isImage())
+               {
+                       VkDescriptorImageInfo info =
+                               makeDescriptorImageInfo(inputBuffers[ndx]->getAsImage()->getSampler(),
+                                                                               inputBuffers[ndx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
+
+                       updateBuilder.writeSingle(*descriptorSet,
+                                                                         DescriptorSetUpdateBuilder::Location::binding(getResultBinding(stagesVector[ndx])),
+                                                                         inputBuffers[ndx]->getType(), &info);
+               }
+               else
+               {
+                       VkDescriptorBufferInfo info =
+                               makeDescriptorBufferInfo(inputBuffers[ndx]->getAsBuffer()->getBuffer(),
+                                                                                0ull, inputBuffers[ndx]->getAsBuffer()->getSize());
+
+                       updateBuilder.writeSingle(*descriptorSet,
+                                                                         DescriptorSetUpdateBuilder::Location::binding(getResultBinding(stagesVector[ndx])),
+                                                                         inputBuffers[ndx]->getType(), &info);
+               }
+       }
+
+       for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ndx++)
+       {
+               if (inputBuffers[ndx]->isImage())
+               {
+                       VkDescriptorImageInfo info =
+                               makeDescriptorImageInfo(inputBuffers[ndx]->getAsImage()->getSampler(),
+                                                                               inputBuffers[ndx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
+
+                       updateBuilder.writeSingle(*descriptorSet,
+                                                                         DescriptorSetUpdateBuilder::Location::binding(extraDatas[ndx -stagesCount].binding),
+                                                                         inputBuffers[ndx]->getType(), &info);
+               }
+               else
+               {
+                       VkDescriptorBufferInfo info =
+                               makeDescriptorBufferInfo(inputBuffers[ndx]->getAsBuffer()->getBuffer(),
+                                                                                0ull, inputBuffers[ndx]->getAsBuffer()->getSize());
+
+                       updateBuilder.writeSingle(*descriptorSet,
+                                                                         DescriptorSetUpdateBuilder::Location::binding(extraDatas[ndx - stagesCount].binding),
+                                                                         inputBuffers[ndx]->getType(), &info);
+               }
+       }
+       updateBuilder.update(context.getDeviceInterface(), context.getDevice());
+
+       {
+               const Unique<VkCommandPool>             cmdPool                                 (makeCommandPool(context));
+               const deUint32                                  subgroupSize                    = getSubgroupSize(context);
+               const Unique<VkCommandBuffer>   cmdBuffer                               (makeCommandBuffer(context, *cmdPool));
+               unsigned                                                totalIterations                 = 0u;
+               unsigned                                                failedIterations                = 0u;
+               Image                                                   resultImage                             (context, maxWidth, 1, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
+               const Unique<VkFramebuffer>             framebuffer                             (makeFramebuffer(context, *renderPass, resultImage.getImageView(), maxWidth, 1));
+               const VkViewport                                viewport                                = makeViewport(maxWidth, 1u);
+               const VkRect2D                                  scissor                                 = makeRect2D(maxWidth, 1u);
+               const vk::VkDeviceSize                  imageResultSize                 = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
+               Buffer                                                  imageBufferResult               (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
+               const VkImageSubresourceRange   subresourceRange                =
+               {
+                       VK_IMAGE_ASPECT_COLOR_BIT,                                                                                      //VkImageAspectFlags    aspectMask
+                       0u,                                                                                                                                     //deUint32                              baseMipLevel
+                       1u,                                                                                                                                     //deUint32                              levelCount
+                       0u,                                                                                                                                     //deUint32                              baseArrayLayer
+                       1u                                                                                                                                      //deUint32                              layerCount
+               };
+
+               const VkImageMemoryBarrier              colorAttachmentBarrier  = makeImageMemoryBarrier(
+                       (VkAccessFlags)0u, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
+                       VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+                       resultImage.getImage(), subresourceRange);
+
+               for (deUint32 width = 1u; width < maxWidth; width++)
+               {
+                       for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
+                       {
+                               // re-init the data
+                               const Allocation& alloc = inputBuffers[ndx]->getAllocation();
+                               initializeMemory(context, alloc, extraDatas[ndx - stagesCount]);
+                       }
+
+                       totalIterations++;
+
+                       beginCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
+
+                       context.getDeviceInterface().cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, (VkDependencyFlags)0, 0u, (const VkMemoryBarrier*)DE_NULL, 0u, (const VkBufferMemoryBarrier*)DE_NULL, 1u, &colorAttachmentBarrier);
+
+                       context.getDeviceInterface().cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
+
+                       context.getDeviceInterface().cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
+
+                       beginRenderPass(context.getDeviceInterface(), *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
+
+                       context.getDeviceInterface().cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
+
+                       context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer,
+                                       VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
+                                       &descriptorSet.get(), 0u, DE_NULL);
+
+                       context.getDeviceInterface().cmdDraw(*cmdBuffer, width, 1, 0, 0);
+
+                       endRenderPass(context.getDeviceInterface(), *cmdBuffer);
+
+                       copyImageToBuffer(context.getDeviceInterface(), *cmdBuffer, resultImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(width, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
+
+                       endCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
+
+                       Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer));
+                       waitFence(context, fence);
+
+                       for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
+                       {
+                               std::vector<const void*> datas;
+                               if (!inputBuffers[ndx]->isImage())
+                               {
+                                       const Allocation& resultAlloc = inputBuffers[ndx]->getAllocation();
+                                       invalidateAlloc(context.getDeviceInterface(), context.getDevice(), resultAlloc);
+                                       // we always have our result data first
+                                       datas.push_back(resultAlloc.getHostPtr());
+                               }
+
+                               for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
+                               {
+                                       const deUint32 datasNdx = index - stagesCount;
+                                       if ((stagesVector[ndx] & extraDatas[datasNdx].stages) && (!inputBuffers[index]->isImage()))
+                                       {
+                                               const Allocation& resultAlloc = inputBuffers[index]->getAllocation();
+                                               invalidateAlloc(context.getDeviceInterface(), context.getDevice(), resultAlloc);
+                                               // we always have our result data first
+                                               datas.push_back(resultAlloc.getHostPtr());
+                                       }
+                               }
+
+                               if (!checkResult(datas, (stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? width * 2 : width , subgroupSize))
+                                       failedIterations++;
+                       }
+                       if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
+                       {
+                               std::vector<const void*> datas;
+                               const Allocation& resultAlloc = imageBufferResult.getAllocation();
+                               invalidateAlloc(context.getDeviceInterface(), context.getDevice(), resultAlloc);
+
+                               // we always have our result data first
+                               datas.push_back(resultAlloc.getHostPtr());
+
+                               for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
+                               {
+                                       const deUint32 datasNdx = index - stagesCount;
+                                       if (VK_SHADER_STAGE_FRAGMENT_BIT & extraDatas[datasNdx].stages && (!inputBuffers[index]->isImage()))
+                                       {
+                                               const Allocation& alloc = inputBuffers[index]->getAllocation();
+                                               invalidateAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
+                                               // we always have our result data first
+                                               datas.push_back(alloc.getHostPtr());
+                                       }
+                               }
+
+                               if (!checkResult(datas, width , subgroupSize))
+                                       failedIterations++;
+                       }
+
+                       context.getDeviceInterface().resetCommandBuffer(*cmdBuffer, 0);
+               }
+
+               if (0 < failedIterations)
+               {
+                       context.getTestContext().getLog()
+                                       << TestLog::Message << (totalIterations - failedIterations) << " / "
+                                       << totalIterations << " values passed" << TestLog::EndMessage;
+                       return tcu::TestStatus::fail("Failed!");
+               }
+       }
+
+       return tcu::TestStatus::pass("OK");
+}
+
+tcu::TestStatus vkt::subgroups::makeVertexFrameBufferTest(Context& context, vk::VkFormat format,
+       SSBOData* extraData, deUint32 extraDataCount,
+       bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize))
+{
+       const deUint32                                                  maxWidth                                = 1024u;
+       vector<de::SharedPtr<BufferOrImage> >   inputBuffers                    (extraDataCount);
+       DescriptorSetLayoutBuilder                              layoutBuilder;
+       const Unique<VkShaderModule>                    vertexShaderModule              (createShaderModule
+                                                                                                                                               (context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("vert"), 0u));
+       const Unique<VkShaderModule>                    fragmentShaderModule    (createShaderModule
+                                                                                                                                               (context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("fragment"), 0u));
+       const Unique<VkRenderPass>                              renderPass                              (makeRenderPass(context, format));
+
+       const VkVertexInputBindingDescription   vertexInputBinding              =
+       {
+               0u,                                                                                     // binding;
+               static_cast<deUint32>(sizeof(tcu::Vec4)),       // stride;
+               VK_VERTEX_INPUT_RATE_VERTEX                                     // inputRate
+       };
+
+       const VkVertexInputAttributeDescription vertexInputAttribute    =
+       {
+               0u,
+               0u,
+               VK_FORMAT_R32G32B32A32_SFLOAT,
+               0u
+       };
+
+       for (deUint32 i = 0u; i < extraDataCount; i++)
+       {
+               if (extraData[i].isImage)
+               {
+                       inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
+               }
+               else
+               {
+                       vk::VkDeviceSize size = getFormatSizeInBytes(extraData[i].format) * extraData[i].numElements;
+                       inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
+               }
+               const Allocation& alloc = inputBuffers[i]->getAllocation();
+               initializeMemory(context, alloc, extraData[i]);
+       }
+
+       for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
+               layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_VERTEX_BIT, DE_NULL);
+
+       const Unique<VkDescriptorSetLayout>             descriptorSetLayout             (layoutBuilder.build(context.getDeviceInterface(), context.getDevice()));
+
+       const Unique<VkPipelineLayout>                  pipelineLayout                  (makePipelineLayout(context, *descriptorSetLayout));
+
+       const Unique<VkPipeline>                                pipeline                                (makeGraphicsPipeline(context, *pipelineLayout,
+                                                                                                                                               VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
+                                                                                                                                               *vertexShaderModule, *fragmentShaderModule,
+                                                                                                                                               DE_NULL, DE_NULL, DE_NULL,
+                                                                                                                                               *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
+                                                                                                                                               &vertexInputBinding, &vertexInputAttribute, true, format));
+       DescriptorPoolBuilder                                   poolBuilder;
+       DescriptorSetUpdateBuilder                              updateBuilder;
+
+
+       for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
+               poolBuilder.addType(inputBuffers[ndx]->getType());
+
+       Move <VkDescriptorPool>                                 descriptorPool;
+       Move <VkDescriptorSet>                                  descriptorSet;
+
+       if (extraDataCount > 0)
+       {
+               descriptorPool = poolBuilder.build(context.getDeviceInterface(), context.getDevice(),
+                                                       VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
+               descriptorSet = makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout);
+       }
+
+       for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
+       {
+               const Allocation& alloc = inputBuffers[ndx]->getAllocation();
+               initializeMemory(context, alloc, extraData[ndx]);
+       }
+
+       for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
+       {
+               if (inputBuffers[buffersNdx]->isImage())
+               {
+                       VkDescriptorImageInfo info =
+                               makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
+                                                                               inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
+
+                       updateBuilder.writeSingle(*descriptorSet,
+                                                                               DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
+                                                                               inputBuffers[buffersNdx]->getType(), &info);
+               }
+               else
+               {
+                       VkDescriptorBufferInfo info =
+                               makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
+                                                                               0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
+
+                       updateBuilder.writeSingle(*descriptorSet,
+                                                                               DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
+                                                                               inputBuffers[buffersNdx]->getType(), &info);
+               }
+       }
+       updateBuilder.update(context.getDeviceInterface(), context.getDevice());
+
+       const Unique<VkCommandPool>                             cmdPool                                 (makeCommandPool(context));
+
+       const deUint32                                                  subgroupSize                    = getSubgroupSize(context);
+
+       const Unique<VkCommandBuffer>                   cmdBuffer                               (makeCommandBuffer(context, *cmdPool));
+
+       const vk::VkDeviceSize                                  vertexBufferSize                = maxWidth * sizeof(tcu::Vec4);
+       Buffer                                                                  vertexBuffer                    (context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
+
+       unsigned                                                                totalIterations                 = 0u;
+       unsigned                                                                failedIterations                = 0u;
+
+       Image                                                                   discardableImage                (context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
+
+       {
+               const Allocation&               alloc                           = vertexBuffer.getAllocation();
+               std::vector<tcu::Vec4>  data                            (maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
+               const float                             pixelSize                       = 2.0f / static_cast<float>(maxWidth);
+               float                                   leftHandPosition        = -1.0f;
+
+               for(deUint32 ndx = 0u; ndx < maxWidth; ++ndx)
+               {
+                       data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
+                       leftHandPosition += pixelSize;
+               }
+
+               deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
+               flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
+       }
+
+       for (deUint32 width = 1u; width < maxWidth; width++)
+       {
+               totalIterations++;
+               const Unique<VkFramebuffer>     framebuffer                     (makeFramebuffer(context, *renderPass, discardableImage.getImageView(), maxWidth, 1));
+               const VkViewport                        viewport                        = makeViewport(maxWidth, 1u);
+               const VkRect2D                          scissor                         = makeRect2D(maxWidth, 1u);
+               const vk::VkDeviceSize          imageResultSize         = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
+               Buffer                                          imageBufferResult       (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
+               const VkDeviceSize                      vertexBufferOffset      = 0u;
+
+               for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
+               {
+                       const Allocation& alloc = inputBuffers[ndx]->getAllocation();
+                       initializeMemory(context, alloc, extraData[ndx]);
+               }
+
+               beginCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
+               {
+                       context.getDeviceInterface().cmdSetViewport(
+                               *cmdBuffer, 0, 1, &viewport);
+
+                       context.getDeviceInterface().cmdSetScissor(
+                               *cmdBuffer, 0, 1, &scissor);
+
+                       beginRenderPass(context.getDeviceInterface(), *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
+
+                       context.getDeviceInterface().cmdBindPipeline(
+                               *cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
+
+                       if (extraDataCount > 0)
+                       {
+                               context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer,
+                                       VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
+                                       &descriptorSet.get(), 0u, DE_NULL);
+                       }
+
+                       context.getDeviceInterface().cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
+
+                       context.getDeviceInterface().cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
+
+                       endRenderPass(context.getDeviceInterface(), *cmdBuffer);
+
+                       copyImageToBuffer(context.getDeviceInterface(), *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
+
+                       endCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
+                       Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer));
+                       waitFence(context, fence);
+               }
+
+               {
+                       const Allocation& allocResult = imageBufferResult.getAllocation();
+                       invalidateAlloc(context.getDeviceInterface(), context.getDevice(), allocResult);
+
+                       std::vector<const void*> datas;
+                       datas.push_back(allocResult.getHostPtr());
+                       if (!checkResult(datas, width, subgroupSize))
+                               failedIterations++;
+               }
+       }
+
+       if (0 < failedIterations)
+       {
+               context.getTestContext().getLog()
+                               << TestLog::Message << (totalIterations - failedIterations) << " / "
+                               << totalIterations << " values passed" << TestLog::EndMessage;
+               return tcu::TestStatus::fail("Failed!");
+       }
+
+       return tcu::TestStatus::pass("OK");
+}
+
+
+tcu::TestStatus vkt::subgroups::makeFragmentFrameBufferTest    (Context& context, VkFormat format, SSBOData* extraDatas,
+       deUint32 extraDatasCount,
+       bool (*checkResult)(std::vector<const void*> datas, deUint32 width,
+                                               deUint32 height, deUint32 subgroupSize))
+{
+       const Unique<VkShaderModule>                    vertexShaderModule              (createShaderModule
+                                                                                                                                               (context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("vert"), 0u));
+       const Unique<VkShaderModule>                    fragmentShaderModule    (createShaderModule
+                                                                                                                                               (context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("fragment"), 0u));
+
+       std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(extraDatasCount);
+
+       for (deUint32 i = 0; i < extraDatasCount; i++)
+       {
+               if (extraDatas[i].isImage)
+               {
+                       inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context,
+                                                                               static_cast<deUint32>(extraDatas[i].numElements), 1, extraDatas[i].format));
+               }
+               else
+               {
+                       vk::VkDeviceSize size =
+                               getFormatSizeInBytes(extraDatas[i].format) * extraDatas[i].numElements;
+                       inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
+               }
+
+               const Allocation& alloc = inputBuffers[i]->getAllocation();
+               initializeMemory(context, alloc, extraDatas[i]);
+       }
+
+       DescriptorSetLayoutBuilder layoutBuilder;
+
+       for (deUint32 i = 0; i < extraDatasCount; i++)
+       {
+               layoutBuilder.addBinding(inputBuffers[i]->getType(), 1,
+                                                                VK_SHADER_STAGE_FRAGMENT_BIT, DE_NULL);
+       }
+
+       const Unique<VkDescriptorSetLayout> descriptorSetLayout(
+               layoutBuilder.build(context.getDeviceInterface(), context.getDevice()));
+
+       const Unique<VkPipelineLayout> pipelineLayout(
+               makePipelineLayout(context, *descriptorSetLayout));
+
+       const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
+       const Unique<VkPipeline> pipeline(makeGraphicsPipeline(context, *pipelineLayout,
+                                                                         VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
+                                                                         *vertexShaderModule, *fragmentShaderModule, DE_NULL, DE_NULL, DE_NULL, *renderPass, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
+                                                                         DE_NULL, DE_NULL, true));
+
+       DescriptorPoolBuilder poolBuilder;
+
+       // To stop validation complaining, always add at least one type to pool.
+       poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
+       for (deUint32 i = 0; i < extraDatasCount; i++)
+       {
+               poolBuilder.addType(inputBuffers[i]->getType());
+       }
+
+       Move<VkDescriptorPool> descriptorPool;
+       // Create descriptor set
+       Move<VkDescriptorSet> descriptorSet;
+
+       if (extraDatasCount > 0)
+       {
+               descriptorPool = poolBuilder.build(context.getDeviceInterface(), context.getDevice(),
+                                                                                                       VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
+
+               descriptorSet   = makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout);
+       }
+
+       DescriptorSetUpdateBuilder updateBuilder;
+
+       for (deUint32 i = 0; i < extraDatasCount; i++)
+       {
+               if (inputBuffers[i]->isImage())
+               {
+                       VkDescriptorImageInfo info =
+                               makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(),
+                                                                               inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
+
+                       updateBuilder.writeSingle(*descriptorSet,
+                                                                         DescriptorSetUpdateBuilder::Location::binding(i),
+                                                                         inputBuffers[i]->getType(), &info);
+               }
+               else
+               {
+                       VkDescriptorBufferInfo info =
+                               makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(),
+                                                                                0ull, inputBuffers[i]->getAsBuffer()->getSize());
+
+                       updateBuilder.writeSingle(*descriptorSet,
+                                                                         DescriptorSetUpdateBuilder::Location::binding(i),
+                                                                         inputBuffers[i]->getType(), &info);
+               }
+       }
+
+       if (extraDatasCount > 0)
+               updateBuilder.update(context.getDeviceInterface(), context.getDevice());
+
+       const Unique<VkCommandPool> cmdPool(makeCommandPool(context));
+
+       const deUint32 subgroupSize = getSubgroupSize(context);
+
+       const Unique<VkCommandBuffer> cmdBuffer(
+               makeCommandBuffer(context, *cmdPool));
+
+       unsigned totalIterations = 0;
+       unsigned failedIterations = 0;
+
+       for (deUint32 width = 8; width <= subgroupSize; width *= 2)
+       {
+               for (deUint32 height = 8; height <= subgroupSize; height *= 2)
+               {
+                       totalIterations++;
+
+                       // re-init the data
+                       for (deUint32 i = 0; i < extraDatasCount; i++)
+                       {
+                               const Allocation& alloc = inputBuffers[i]->getAllocation();
+                               initializeMemory(context, alloc, extraDatas[i]);
+                       }
+
+                       VkDeviceSize formatSize = getFormatSizeInBytes(format);
+                       const VkDeviceSize resultImageSizeInBytes =
+                               width * height * formatSize;
+
+                       Image resultImage(context, width, height, format,
+                                                         VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
+                                                         VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
+
+                       Buffer resultBuffer(context, resultImageSizeInBytes,
+                                                               VK_IMAGE_USAGE_TRANSFER_DST_BIT);
+
+                       const Unique<VkFramebuffer> framebuffer(makeFramebuffer(context,
+                                                                                                       *renderPass, resultImage.getImageView(), width, height));
+
+                       beginCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
+
+                       VkViewport viewport = makeViewport(width, height);
+
+                       context.getDeviceInterface().cmdSetViewport(
+                               *cmdBuffer, 0, 1, &viewport);
+
+                       VkRect2D scissor = {{0, 0}, {width, height}};
+
+                       context.getDeviceInterface().cmdSetScissor(
+                               *cmdBuffer, 0, 1, &scissor);
+
+                       beginRenderPass(context.getDeviceInterface(), *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, width, height), tcu::Vec4(0.0f));
+
+                       context.getDeviceInterface().cmdBindPipeline(
+                               *cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
+
+                       if (extraDatasCount > 0)
+                       {
+                               context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer,
+                                               VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
+                                               &descriptorSet.get(), 0u, DE_NULL);
+                       }
+
+                       context.getDeviceInterface().cmdDraw(*cmdBuffer, 4, 1, 0, 0);
+
+                       endRenderPass(context.getDeviceInterface(), *cmdBuffer);
+
+                       copyImageToBuffer(context.getDeviceInterface(), *cmdBuffer, resultImage.getImage(), resultBuffer.getBuffer(), tcu::IVec2(width, height), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
+
+                       endCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
+
+                       Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer));
+
+                       waitFence(context, fence);
+
+                       std::vector<const void*> datas;
+                       {
+                               const Allocation& resultAlloc = resultBuffer.getAllocation();
+                               invalidateAlloc(context.getDeviceInterface(), context.getDevice(), resultAlloc);
+
+                               // we always have our result data first
+                               datas.push_back(resultAlloc.getHostPtr());
+                       }
+
+                       if (!checkResult(datas, width, height, subgroupSize))
+                       {
+                               failedIterations++;
+                       }
+
+                       context.getDeviceInterface().resetCommandBuffer(*cmdBuffer, 0);
+               }
+       }
+
+       if (0 < failedIterations)
+       {
+               context.getTestContext().getLog()
+                               << TestLog::Message << (totalIterations - failedIterations) << " / "
+                               << totalIterations << " values passed" << TestLog::EndMessage;
+               return tcu::TestStatus::fail("Failed!");
+       }
+
+       return tcu::TestStatus::pass("OK");
+}
+
+tcu::TestStatus vkt::subgroups::makeComputeTest(
+       Context& context, VkFormat format, SSBOData* inputs, deUint32 inputsCount,
+       bool (*checkResult)(std::vector<const void*> datas,
+                                               const deUint32 numWorkgroups[3], const deUint32 localSize[3],
+                                               deUint32 subgroupSize))
+{
+       VkDeviceSize elementSize = getFormatSizeInBytes(format);
+
+       const VkDeviceSize resultBufferSize = maxSupportedSubgroupSize() *
+                                                                                 maxSupportedSubgroupSize() *
+                                                                                 maxSupportedSubgroupSize();
+       const VkDeviceSize resultBufferSizeInBytes = resultBufferSize * elementSize;
+
+       Buffer resultBuffer(
+               context, resultBufferSizeInBytes);
+
+       std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(inputsCount);
+
+       for (deUint32 i = 0; i < inputsCount; i++)
+       {
+               if (inputs[i].isImage)
+               {
+                       inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context,
+                                                                               static_cast<deUint32>(inputs[i].numElements), 1, inputs[i].format));
+               }
+               else
+               {
+                       vk::VkDeviceSize size =
+                               getFormatSizeInBytes(inputs[i].format) * inputs[i].numElements;
+                       inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
+               }
+
+               const Allocation& alloc = inputBuffers[i]->getAllocation();
+               initializeMemory(context, alloc, inputs[i]);
+       }
+
+       DescriptorSetLayoutBuilder layoutBuilder;
+       layoutBuilder.addBinding(
+               resultBuffer.getType(), 1, VK_SHADER_STAGE_COMPUTE_BIT, DE_NULL);
+
+       for (deUint32 i = 0; i < inputsCount; i++)
+       {
+               layoutBuilder.addBinding(
+                       inputBuffers[i]->getType(), 1, VK_SHADER_STAGE_COMPUTE_BIT, DE_NULL);
+       }
+
+       const Unique<VkDescriptorSetLayout> descriptorSetLayout(
+               layoutBuilder.build(context.getDeviceInterface(), context.getDevice()));
+
+       const Unique<VkShaderModule> shaderModule(
+               createShaderModule(context.getDeviceInterface(), context.getDevice(),
+                                                  context.getBinaryCollection().get("comp"), 0u));
+       const Unique<VkPipelineLayout> pipelineLayout(
+               makePipelineLayout(context, *descriptorSetLayout));
+
+       DescriptorPoolBuilder poolBuilder;
+
+       poolBuilder.addType(resultBuffer.getType());
+
+       for (deUint32 i = 0; i < inputsCount; i++)
+       {
+               poolBuilder.addType(inputBuffers[i]->getType());
+       }
+
+       const Unique<VkDescriptorPool> descriptorPool(
+               poolBuilder.build(context.getDeviceInterface(), context.getDevice(),
+                                                 VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
+
+       // Create descriptor set
+       const Unique<VkDescriptorSet> descriptorSet(
+               makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout));
+
+       DescriptorSetUpdateBuilder updateBuilder;
+
+       const VkDescriptorBufferInfo resultDescriptorInfo =
+               makeDescriptorBufferInfo(
+                       resultBuffer.getBuffer(), 0ull, resultBufferSizeInBytes);
+
+       updateBuilder.writeSingle(*descriptorSet,
+                                                         DescriptorSetUpdateBuilder::Location::binding(0u),
+                                                         VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultDescriptorInfo);
+
+       for (deUint32 i = 0; i < inputsCount; i++)
+       {
+               if (inputBuffers[i]->isImage())
+               {
+                       VkDescriptorImageInfo info =
+                               makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(),
+                                                                               inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
+
+                       updateBuilder.writeSingle(*descriptorSet,
+                                                                         DescriptorSetUpdateBuilder::Location::binding(i + 1),
+                                                                         inputBuffers[i]->getType(), &info);
+               }
+               else
+               {
+                       vk::VkDeviceSize size =
+                               getFormatSizeInBytes(inputs[i].format) * inputs[i].numElements;
+                       VkDescriptorBufferInfo info =
+                               makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(), 0ull, size);
+
+                       updateBuilder.writeSingle(*descriptorSet,
+                                                                         DescriptorSetUpdateBuilder::Location::binding(i + 1),
+                                                                         inputBuffers[i]->getType(), &info);
+               }
+       }
+
+       updateBuilder.update(context.getDeviceInterface(), context.getDevice());
+
+       const Unique<VkCommandPool> cmdPool(makeCommandPool(context));
+
+       unsigned totalIterations = 0;
+       unsigned failedIterations = 0;
+
+       const deUint32 subgroupSize = getSubgroupSize(context);
+
+       const Unique<VkCommandBuffer> cmdBuffer(
+               makeCommandBuffer(context, *cmdPool));
+
+       const deUint32 numWorkgroups[3] = {4, 2, 2};
+
+       const deUint32 localSizesToTestCount = 15;
+       deUint32 localSizesToTest[localSizesToTestCount][3] =
+       {
+               {1, 1, 1},
+               {32, 4, 1},
+               {32, 1, 4},
+               {1, 32, 4},
+               {1, 4, 32},
+               {4, 1, 32},
+               {4, 32, 1},
+               {subgroupSize, 1, 1},
+               {1, subgroupSize, 1},
+               {1, 1, subgroupSize},
+               {3, 5, 7},
+               {128, 1, 1},
+               {1, 128, 1},
+               {1, 1, 64},
+               {1, 1, 1} // Isn't used, just here to make double buffering checks easier
+       };
+
+       Move<VkPipeline> lastPipeline(
+               makeComputePipeline(context, *pipelineLayout, *shaderModule,
+                                                       localSizesToTest[0][0], localSizesToTest[0][1], localSizesToTest[0][2]));
+
+       for (deUint32 index = 0; index < (localSizesToTestCount - 1); index++)
+       {
+               const deUint32 nextX = localSizesToTest[index + 1][0];
+               const deUint32 nextY = localSizesToTest[index + 1][1];
+               const deUint32 nextZ = localSizesToTest[index + 1][2];
+
+               // we are running one test
+               totalIterations++;
+
+               beginCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
+
+               context.getDeviceInterface().cmdBindPipeline(
+                       *cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *lastPipeline);
+
+               context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer,
+                               VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u,
+                               &descriptorSet.get(), 0u, DE_NULL);
+
+               context.getDeviceInterface().cmdDispatch(*cmdBuffer,
+                               numWorkgroups[0], numWorkgroups[1], numWorkgroups[2]);
+
+               endCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
+
+               Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer));
+
+               Move<VkPipeline> nextPipeline(
+                       makeComputePipeline(context, *pipelineLayout, *shaderModule,
+                                                               nextX, nextY, nextZ));
+
+               waitFence(context, fence);
+
+               std::vector<const void*> datas;
+
+               {
+                       const Allocation& resultAlloc = resultBuffer.getAllocation();
+                       invalidateAlloc(context.getDeviceInterface(), context.getDevice(), resultAlloc);
+
+                       // we always have our result data first
+                       datas.push_back(resultAlloc.getHostPtr());
+               }
+
+               for (deUint32 i = 0; i < inputsCount; i++)
+               {
+                       if (!inputBuffers[i]->isImage())
+                       {
+                               const Allocation& resultAlloc = inputBuffers[i]->getAllocation();
+                               invalidateAlloc(context.getDeviceInterface(), context.getDevice(), resultAlloc);
+
+                               // we always have our result data first
+                               datas.push_back(resultAlloc.getHostPtr());
+                       }
+               }
+
+               if (!checkResult(datas, numWorkgroups, localSizesToTest[index], subgroupSize))
+               {
+                       failedIterations++;
+               }
+
+               context.getDeviceInterface().resetCommandBuffer(*cmdBuffer, 0);
+
+               lastPipeline = nextPipeline;
+       }
+
+       if (0 < failedIterations)
+       {
+               context.getTestContext().getLog()
+                               << TestLog::Message << (totalIterations - failedIterations) << " / "
+                               << totalIterations << " values passed" << TestLog::EndMessage;
+               return tcu::TestStatus::fail("Failed!");
+       }
+
+       return tcu::TestStatus::pass("OK");
+}
diff --git a/external/openglcts/modules/common/subgroups/glcSubgroupsTestsUtils.hpp b/external/openglcts/modules/common/subgroups/glcSubgroupsTestsUtils.hpp
new file mode 100644 (file)
index 0000000..d9edae6
--- /dev/null
@@ -0,0 +1,167 @@
+#ifndef _VKTSUBGROUPSTESTSUTILS_HPP
+#define _VKTSUBGROUPSTESTSUTILS_HPP
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups tests utility classes
+ */ /*--------------------------------------------------------------------*/
+
+#include "vkBuilderUtil.hpp"
+#include "vkDefs.hpp"
+#include "vkDeviceUtil.hpp"
+#include "vkMemUtil.hpp"
+#include "vkPlatform.hpp"
+#include "vkPrograms.hpp"
+#include "vkQueryUtil.hpp"
+#include "vkRef.hpp"
+#include "vkRefUtil.hpp"
+#include "vkStrUtil.hpp"
+#include "vkTypeUtil.hpp"
+#include "vktTestCase.hpp"
+#include "vktTestCaseUtil.hpp"
+
+#include "tcuFormatUtil.hpp"
+#include "tcuTestLog.hpp"
+#include "tcuVectorUtil.hpp"
+
+#include "gluShaderUtil.hpp"
+
+#include "deSharedPtr.hpp"
+#include "deUniquePtr.hpp"
+
+#include <string>
+
+namespace vkt
+{
+namespace subgroups
+{
+// A struct to represent input data to a shader
+struct SSBOData
+{
+       SSBOData() :
+               initializeType  (InitializeNone),
+               format                  (vk::VK_FORMAT_UNDEFINED),
+               numElements             (0),
+               isImage                 (false),
+               binding                 (0u),
+               stages                  ((vk::VkShaderStageFlagBits)0u)
+       {}
+
+       enum InputDataInitializeType
+       {
+               InitializeNone = 0,
+               InitializeNonZero,
+               InitializeZero,
+       } initializeType;
+
+       vk::VkFormat                            format;
+       vk::VkDeviceSize                        numElements;
+       bool                                            isImage;
+       deUint32                                        binding;
+       vk::VkShaderStageFlagBits       stages;
+};
+
+std::string getSharedMemoryBallotHelper();
+
+deUint32 getSubgroupSize(Context& context);
+
+vk::VkDeviceSize maxSupportedSubgroupSize();
+
+std::string getShaderStageName(vk::VkShaderStageFlags stage);
+
+std::string getSubgroupFeatureName(vk::VkSubgroupFeatureFlagBits bit);
+
+void addNoSubgroupShader (vk::SourceCollections& programCollection);
+
+std::string getVertShaderForStage(vk::VkShaderStageFlags stage);//TODO
+
+bool isSubgroupSupported(Context& context);
+
+bool areSubgroupOperationsSupportedForStage(
+       Context& context, vk::VkShaderStageFlags stage);
+
+bool areSubgroupOperationsRequiredForStage(vk::VkShaderStageFlags stage);
+
+bool isSubgroupFeatureSupportedForDevice(Context& context, vk::VkSubgroupFeatureFlagBits bit);
+
+bool isFragmentSSBOSupportedForDevice(Context& context);
+
+bool isVertexSSBOSupportedForDevice(Context& context);
+
+bool isDoubleSupportedForDevice(Context& context);
+
+bool isDoubleFormat(vk::VkFormat format);
+
+std::string getFormatNameForGLSL(vk::VkFormat format);
+
+void addGeometryShadersFromTemplate (const std::string& glslTemplate, const vk::ShaderBuildOptions& options, vk::GlslSourceCollection& collection);
+void addGeometryShadersFromTemplate (const std::string& spirvTemplate, const vk::SpirVAsmBuildOptions& options, vk::SpirVAsmCollection& collection);
+
+void setVertexShaderFrameBuffer (vk::SourceCollections& programCollection);
+
+void setFragmentShaderFrameBuffer (vk::SourceCollections& programCollection);
+
+void setFragmentShaderFrameBuffer (vk::SourceCollections& programCollection);
+
+void setTesCtrlShaderFrameBuffer (vk::SourceCollections& programCollection);
+
+void setTesEvalShaderFrameBuffer (vk::SourceCollections& programCollection);
+
+bool check(std::vector<const void*> datas,
+       deUint32 width, deUint32 ref);
+
+bool checkCompute(std::vector<const void*> datas,
+       const deUint32 numWorkgroups[3], const deUint32 localSize[3],
+       deUint32 ref);
+
+tcu::TestStatus makeTessellationEvaluationFrameBufferTest(Context& context, vk::VkFormat format,
+       SSBOData* extraData, deUint32 extraDataCount,
+       bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
+       const vk::VkShaderStageFlags shaderStage = vk::VK_SHADER_STAGE_ALL_GRAPHICS);
+
+tcu::TestStatus makeGeometryFrameBufferTest(Context& context, vk::VkFormat format, SSBOData* extraData,
+       deUint32 extraDataCount,
+       bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize));
+
+tcu::TestStatus allStages(Context& context, vk::VkFormat format,
+       SSBOData* extraData, deUint32 extraDataCount,
+       bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
+       const vk::VkShaderStageFlags shaderStage);
+
+tcu::TestStatus makeVertexFrameBufferTest(Context& context, vk::VkFormat format,
+       SSBOData* extraData, deUint32 extraDataCount,
+       bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize));
+
+tcu::TestStatus makeFragmentFrameBufferTest(Context& context, vk::VkFormat format,
+       SSBOData* extraData, deUint32 extraDataCount,
+       bool (*checkResult)(std::vector<const void*> datas, deUint32 width,
+                                                                        deUint32 height, deUint32 subgroupSize));
+
+tcu::TestStatus makeComputeTest(
+       Context& context, vk::VkFormat format, SSBOData* inputs,
+       deUint32 inputsCount,
+       bool (*checkResult)(std::vector<const void*> datas,
+               const deUint32 numWorkgroups[3], const deUint32 localSize[3],
+               deUint32 subgroupSize));
+} // subgroups
+} // vkt
+
+#endif // _VKTSUBGROUPSTESTSUTILS_HPP
diff --git a/external/openglcts/modules/common/subgroups/glcSubgroupsVoteTests.cpp b/external/openglcts/modules/common/subgroups/glcSubgroupsVoteTests.cpp
new file mode 100755 (executable)
index 0000000..f201dd2
--- /dev/null
@@ -0,0 +1,801 @@
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "vktSubgroupsVoteTests.hpp"
+#include "vktSubgroupsTestsUtils.hpp"
+
+#include <string>
+#include <vector>
+
+using namespace tcu;
+using namespace std;
+using namespace vk;
+using namespace vkt;
+
+namespace
+{
+enum OpType
+{
+       OPTYPE_ALL = 0,
+       OPTYPE_ANY,
+       OPTYPE_ALLEQUAL,
+       OPTYPE_LAST
+};
+
+static bool checkVertexPipelineStages(std::vector<const void*> datas,
+                                                                         deUint32 width, deUint32)
+{
+       return vkt::subgroups::check(datas, width, 0x1F);
+}
+
+static bool checkFragmentPipelineStages(std::vector<const void*> datas,
+                                                                         deUint32 width, deUint32 height, deUint32)
+{
+       const deUint32* data =
+               reinterpret_cast<const deUint32*>(datas[0]);
+       for (deUint32 x = 0u; x < width; ++x)
+       {
+               for (deUint32 y = 0u; y < height; ++y)
+               {
+                       const deUint32 ndx = (x * height + y);
+                       deUint32 val = data[ndx] & 0x1F;
+
+                       if (data[ndx] & 0x40) //Helper fragment shader invocation was executed
+                       {
+                               if(val != 0x1F)
+                                       return false;
+                       }
+                       else //Helper fragment shader invocation was not executed yet
+                       {
+                               if (val != 0x1E)
+                                       return false;
+                       }
+               }
+       }
+       return true;
+}
+
+static bool checkCompute(std::vector<const void*> datas,
+                                                const deUint32 numWorkgroups[3], const deUint32 localSize[3],
+                                                deUint32)
+{
+       return vkt::subgroups::checkCompute(datas, numWorkgroups, localSize, 0x1F);
+}
+
+std::string getOpTypeName(int opType)
+{
+       switch (opType)
+       {
+               default:
+                       DE_FATAL("Unsupported op type");
+                       return "";
+               case OPTYPE_ALL:
+                       return "subgroupAll";
+               case OPTYPE_ANY:
+                       return "subgroupAny";
+               case OPTYPE_ALLEQUAL:
+                       return "subgroupAllEqual";
+       }
+}
+
+struct CaseDefinition
+{
+       int                                     opType;
+       VkShaderStageFlags      shaderStage;
+       VkFormat                        format;
+};
+
+void initFrameBufferPrograms (SourceCollections& programCollection, CaseDefinition caseDef)
+{
+       const vk::ShaderBuildOptions buildOptions       (programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+       const bool formatIsBoolean =
+               VK_FORMAT_R8_USCALED == caseDef.format || VK_FORMAT_R8G8_USCALED == caseDef.format || VK_FORMAT_R8G8B8_USCALED == caseDef.format || VK_FORMAT_R8G8B8A8_USCALED == caseDef.format;
+
+       if (VK_SHADER_STAGE_FRAGMENT_BIT != caseDef.shaderStage)
+               subgroups::setFragmentShaderFrameBuffer(programCollection);
+
+       if (VK_SHADER_STAGE_FRAGMENT_BIT == caseDef.shaderStage)
+       {
+               const string vertex     = "#version 450\n"
+                       "void main (void)\n"
+                       "{\n"
+                       "  vec2 uv = vec2(float(gl_VertexIndex & 1), float((gl_VertexIndex >> 1) & 1));\n"
+                       "  gl_Position = vec4(uv * 4.0f -2.0f, 0.0f, 1.0f);\n"
+                       "  gl_PointSize = 1.0f;\n"
+                       "}\n";
+               programCollection.glslSources.add("vert") << glu::VertexSource(vertex) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+       }
+       else if (VK_SHADER_STAGE_VERTEX_BIT != caseDef.shaderStage)
+               subgroups::setVertexShaderFrameBuffer(programCollection);
+
+       const string source =
+               (OPTYPE_ALL == caseDef.opType) ?
+                       "  result = " + getOpTypeName(caseDef.opType) +
+                       "(true) ? 0x1 : 0;\n"
+                       "  result |= " + getOpTypeName(caseDef.opType) +
+                       "(false) ? 0 : 0x1A;\n"
+                       "  result |= 0x4;\n"
+               : (OPTYPE_ANY == caseDef.opType) ?
+                               "  result = " + getOpTypeName(caseDef.opType) +
+                               "(true) ? 0x1 : 0;\n"
+                               "  result |= " + getOpTypeName(caseDef.opType) +
+                               "(false) ? 0 : 0x1A;\n"
+                               "  result |= 0x4;\n"
+               : (OPTYPE_ALLEQUAL == caseDef.opType) ?
+                               "  " + subgroups::getFormatNameForGLSL(caseDef.format) + " valueEqual = " + subgroups::getFormatNameForGLSL(caseDef.format) + "(1.25 * float(data[gl_SubgroupInvocationID]) + 5.0);\n" +
+                               "  " + subgroups::getFormatNameForGLSL(caseDef.format) + " valueNoEqual = " + subgroups::getFormatNameForGLSL(caseDef.format) + (formatIsBoolean ? "(subgroupElect())\n;" : "(12.0 * float(data[gl_SubgroupInvocationID]) + gl_SubgroupInvocationID);\n") +
+                               "  result = " + getOpTypeName(caseDef.opType) + "("
+                               + subgroups::getFormatNameForGLSL(caseDef.format) + "(1)) ? 0x1 : 0;\n"
+                               "  result |= " + getOpTypeName(caseDef.opType) +
+                               "(gl_SubgroupInvocationID) ? 0 : 0x2;\n"
+                               "  result |= " + getOpTypeName(caseDef.opType) +
+                               "(data[0]) ? 0x4 : 0;\n"
+                               "  result |= " + getOpTypeName(caseDef.opType) +
+                               "(valueEqual) ? 0x8 : 0x0;\n"
+                               "  result |= " + getOpTypeName(caseDef.opType) +
+                               "(valueNoEqual) ? 0x0 : 0x10;\n"
+                               "  if (subgroupElect()) result |= 0x2 | 0x10;\n"
+               : "";
+
+       if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+       {
+               std::ostringstream vertexSrc;
+               vertexSrc << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+                       << "#extension GL_KHR_shader_subgroup_vote: enable\n"
+                       << "layout(location = 0) out vec4 out_color;\n"
+                       << "layout(location = 0) in highp vec4 in_position;\n"
+                       << "layout(set = 0, binding = 0) uniform Buffer1\n"
+                       << "{\n"
+                       << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+                       << "};\n"
+                       << "\n"
+                       << "void main (void)\n"
+                       << "{\n"
+                       << "  uint result;\n"
+                       << source
+                       << "  out_color.r = float(result);\n"
+                       << "  gl_Position = in_position;\n"
+                       << "  gl_PointSize = 1.0f;\n"
+                       << "}\n";
+
+               programCollection.glslSources.add("vert") << glu::VertexSource(vertexSrc.str()) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+       }
+       else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
+       {
+               std::ostringstream geometry;
+
+               geometry << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+                       << "#extension GL_KHR_shader_subgroup_vote: enable\n"
+                       << "layout(points) in;\n"
+                       << "layout(points, max_vertices = 1) out;\n"
+                       << "layout(location = 0) out float out_color;\n"
+                       << "layout(set = 0, binding = 0) uniform Buffer1\n"
+                       << "{\n"
+                       << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+                       << "};\n"
+                       << "\n"
+                       << "void main (void)\n"
+                       << "{\n"
+                       << "  uint result;\n"
+                       << source
+                       << "  out_color = float(result);\n"
+                       << "  gl_Position = gl_in[0].gl_Position;\n"
+                       << "  EmitVertex();\n"
+                       << "  EndPrimitive();\n"
+                       << "}\n";
+
+               programCollection.glslSources.add("geometry")
+                       << glu::GeometrySource(geometry.str()) << buildOptions;
+       }
+       else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+       {
+               std::ostringstream controlSource;
+               controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+                       << "#extension GL_KHR_shader_subgroup_vote: enable\n"
+                       << "layout(vertices = 2) out;\n"
+                       << "layout(location = 0) out float out_color[];\n"
+                       << "layout(set = 0, binding = 0) uniform Buffer1\n"
+                       << "{\n"
+                       << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+                       << "};\n"
+                       << "\n"
+                       << "void main (void)\n"
+                       << "{\n"
+                       << "  uint result;\n"
+                       << "  if (gl_InvocationID == 0)\n"
+                       <<"  {\n"
+                       << "    gl_TessLevelOuter[0] = 1.0f;\n"
+                       << "    gl_TessLevelOuter[1] = 1.0f;\n"
+                       << "  }\n"
+                       << source
+                       << "  out_color[gl_InvocationID] = float(result);"
+                       << "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+                       << "}\n";
+
+               programCollection.glslSources.add("tesc")
+                       << glu::TessellationControlSource(controlSource.str()) << buildOptions;
+               subgroups::setTesEvalShaderFrameBuffer(programCollection);
+       }
+       else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
+       {
+               std::ostringstream evaluationSource;
+               evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+                       << "#extension GL_KHR_shader_subgroup_vote: enable\n"
+                       << "#extension GL_EXT_tessellation_shader : require\n"
+                       << "layout(isolines, equal_spacing, ccw ) in;\n"
+                       << "layout(location = 0) out float out_color;\n"
+                       << "layout(set = 0, binding = 0) uniform Buffer1\n"
+                       << "{\n"
+                       << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+                       << "};\n"
+                       << "\n"
+                       << "void main (void)\n"
+                       << "{\n"
+                       << "  uint result;\n"
+                       << "  highp uint offset = gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5);\n"
+                       << source
+                       << "  out_color = float(result);\n"
+                       << "  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
+                       << "}\n";
+
+               subgroups::setTesCtrlShaderFrameBuffer(programCollection);
+               programCollection.glslSources.add("tese")
+                               << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
+       }
+       else if (VK_SHADER_STAGE_FRAGMENT_BIT == caseDef.shaderStage)
+       {
+               const string sourceFragment =
+               (OPTYPE_ALL == caseDef.opType) ?
+                       "  result |= " + getOpTypeName(caseDef.opType) +
+                       "(!gl_HelperInvocation) ? 0x0 : 0x1;\n"
+                       "  result |= " + getOpTypeName(caseDef.opType) +
+                       "(false) ? 0 : 0x1A;\n"
+                       "  result |= 0x4;\n"
+               : (OPTYPE_ANY == caseDef.opType) ?
+                               "  result |= " + getOpTypeName(caseDef.opType) +
+                               "(gl_HelperInvocation) ? 0x1 : 0x0;\n"
+                               "  result |= " + getOpTypeName(caseDef.opType) +
+                               "(false) ? 0 : 0x1A;\n"
+                               "  result |= 0x4;\n"
+               : (OPTYPE_ALLEQUAL == caseDef.opType) ?
+                               "  " + subgroups::getFormatNameForGLSL(caseDef.format) + " valueEqual = " + subgroups::getFormatNameForGLSL(caseDef.format) + "(1.25 * float(data[gl_SubgroupInvocationID]) + 5.0);\n" +
+                               "  " + subgroups::getFormatNameForGLSL(caseDef.format) + " valueNoEqual = " + subgroups::getFormatNameForGLSL(caseDef.format) + (formatIsBoolean ? "(subgroupElect());\n" : "(12.0 * float(data[gl_SubgroupInvocationID]) + int(gl_FragCoord.x*gl_SubgroupInvocationID));\n") +
+                               "  result |= " + getOpTypeName(caseDef.opType) + "("
+                               + subgroups::getFormatNameForGLSL(caseDef.format) + "(1)) ? 0x10 : 0;\n"
+                               "  result |= " + getOpTypeName(caseDef.opType) +
+                               "(gl_SubgroupInvocationID) ? 0 : 0x2;\n"
+                               "  result |= " + getOpTypeName(caseDef.opType) +
+                               "(data[0]) ? 0x4 : 0;\n"
+                               "  result |= " + getOpTypeName(caseDef.opType) +
+                               "(valueEqual) ? 0x8 : 0x0;\n"
+                               "  result |= " + getOpTypeName(caseDef.opType) +
+                               "(gl_HelperInvocation) ? 0x0 : 0x1;\n"
+                               "  if (subgroupElect()) result |= 0x2 | 0x10;\n"
+               : "";
+
+               std::ostringstream fragmentSource;
+               fragmentSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+               << "#extension GL_KHR_shader_subgroup_vote: enable\n"
+               << "layout(location = 0) out uint out_color;\n"
+               << "layout(set = 0, binding = 0) uniform Buffer1\n"
+               << "{\n"
+               << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+               << "};\n"
+               << ""
+               << "void main()\n"
+               << "{\n"
+               << "  uint result = 0u;\n"
+               << "  if (dFdx(gl_SubgroupInvocationID * gl_FragCoord.x * gl_FragCoord.y) - dFdy(gl_SubgroupInvocationID * gl_FragCoord.x * gl_FragCoord.y) > 0.0f)\n"
+               << "  {\n"
+               << "    result |= 0x20;\n" // to be sure that compiler doesn't remove dFdx and dFdy executions
+               << "  }\n"
+               << "  bool helper = subgroupAny(gl_HelperInvocation);\n"
+               << "  if (helper)\n"
+               << "  {\n"
+               << "    result |= 0x40;\n"
+               << "  }\n"
+               << sourceFragment
+               << "  out_color = result;\n"
+               << "}\n";
+
+               programCollection.glslSources.add("fragment")
+                       << glu::FragmentSource(fragmentSource.str())<< vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+       }
+       else
+       {
+               DE_FATAL("Unsupported shader stage");
+       }
+}
+
+void initPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
+{
+       const bool formatIsBoolean =
+               VK_FORMAT_R8_USCALED == caseDef.format || VK_FORMAT_R8G8_USCALED == caseDef.format || VK_FORMAT_R8G8B8_USCALED == caseDef.format || VK_FORMAT_R8G8B8A8_USCALED == caseDef.format;
+       if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+       {
+               std::ostringstream src;
+
+               src << "#version 450\n"
+                       << "#extension GL_KHR_shader_subgroup_vote: enable\n"
+                       << "layout (local_size_x_id = 0, local_size_y_id = 1, "
+                       "local_size_z_id = 2) in;\n"
+                       << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
+                       << "{\n"
+                       << "  uint result[];\n"
+                       << "};\n"
+                       << "layout(set = 0, binding = 1, std430) buffer Buffer2\n"
+                       << "{\n"
+                       << "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[];\n"
+                       << "};\n"
+                       << "\n"
+                       << "void main (void)\n"
+                       << "{\n"
+                       << "  uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
+                       << "  highp uint offset = globalSize.x * ((globalSize.y * "
+                       "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
+                       "gl_GlobalInvocationID.x;\n";
+               if (OPTYPE_ALL == caseDef.opType)
+               {
+                       src << "  result[offset] = " << getOpTypeName(caseDef.opType)
+                               << "(true) ? 0x1 : 0;\n"
+                               << "  result[offset] |= " << getOpTypeName(caseDef.opType)
+                               << "(false) ? 0 : 0x1A;\n"
+                               << "  result[offset] |= " << getOpTypeName(caseDef.opType)
+                               << "(data[gl_SubgroupInvocationID] > 0) ? 0x4 : 0;\n";
+               }
+               else if (OPTYPE_ANY == caseDef.opType)
+               {
+                       src << "  result[offset] = " << getOpTypeName(caseDef.opType)
+                               << "(true) ? 0x1 : 0;\n"
+                               << "  result[offset] |= " << getOpTypeName(caseDef.opType)
+                               << "(false) ? 0 : 0x1A;\n"
+                               << "  result[offset] |= " << getOpTypeName(caseDef.opType)
+                               << "(data[gl_SubgroupInvocationID] == data[0]) ? 0x4 : 0;\n";
+               }
+
+               else if (OPTYPE_ALLEQUAL == caseDef.opType)
+               {
+                       src << "  " << subgroups::getFormatNameForGLSL(caseDef.format) <<" valueEqual = " << subgroups::getFormatNameForGLSL(caseDef.format) << "(1.25 * float(data[gl_SubgroupInvocationID]) + 5.0);\n"
+                               << "  " << subgroups::getFormatNameForGLSL(caseDef.format) <<" valueNoEqual = " << subgroups::getFormatNameForGLSL(caseDef.format) << (formatIsBoolean ? "(subgroupElect());\n" : "(12.0 * float(data[gl_SubgroupInvocationID]) + offset);\n")
+                               <<"  result[offset] = " << getOpTypeName(caseDef.opType) << "("
+                               << subgroups::getFormatNameForGLSL(caseDef.format) << "(1)) ? 0x1 : 0x0;\n"
+                               << "  result[offset] |= " << getOpTypeName(caseDef.opType)
+                               << "(gl_SubgroupInvocationID) ? 0x0 : 0x2;\n"
+                               << "  result[offset] |= " << getOpTypeName(caseDef.opType)
+                               << "(data[0]) ? 0x4 : 0x0;\n"
+                               << "  result[offset] |= "<< getOpTypeName(caseDef.opType)
+                               << "(valueEqual) ? 0x8 : 0x0;\n"
+                               << "  result[offset] |= "<< getOpTypeName(caseDef.opType)
+                               << "(valueNoEqual) ? 0x0 : 0x10;\n"
+                               << "  if (subgroupElect()) result[offset] |= 0x2 | 0x10;\n";
+               }
+
+               src << "}\n";
+
+               programCollection.glslSources.add("comp")
+                               << glu::ComputeSource(src.str()) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+       }
+       else
+       {
+               const string source =
+               (OPTYPE_ALL == caseDef.opType) ?
+                       "  result[offset] = " + getOpTypeName(caseDef.opType) +
+                       "(true) ? 0x1 : 0;\n"
+                       "  result[offset] |= " + getOpTypeName(caseDef.opType) +
+                       "(false) ? 0 : 0x1A;\n"
+                       "  result[offset] |= 0x4;\n"
+               : (OPTYPE_ANY == caseDef.opType) ?
+                               "  result[offset] = " + getOpTypeName(caseDef.opType) +
+                               "(true) ? 0x1 : 0;\n"
+                               "  result[offset] |= " + getOpTypeName(caseDef.opType) +
+                               "(false) ? 0 : 0x1A;\n"
+                               "  result[offset] |= 0x4;\n"
+               : (OPTYPE_ALLEQUAL == caseDef.opType) ?
+                               "  " + subgroups::getFormatNameForGLSL(caseDef.format) + " valueEqual = " + subgroups::getFormatNameForGLSL(caseDef.format) + "(1.25 * float(data[gl_SubgroupInvocationID]) + 5.0);\n" +
+                               "  " + subgroups::getFormatNameForGLSL(caseDef.format) + " valueNoEqual = " + subgroups::getFormatNameForGLSL(caseDef.format) + (formatIsBoolean ? "(subgroupElect());\n" : "(12.0 * float(data[gl_SubgroupInvocationID]) + gl_SubgroupInvocationID);\n") +
+                               "  result[offset] = " + getOpTypeName(caseDef.opType) + "("
+                               + subgroups::getFormatNameForGLSL(caseDef.format) + "(1)) ? 0x1 : 0;\n"
+                               "  result[offset] |= " + getOpTypeName(caseDef.opType) +
+                               "(gl_SubgroupInvocationID) ? 0 : 0x2;\n"
+                               "  result[offset] |= " + getOpTypeName(caseDef.opType) +
+                               "(data[0]) ? 0x4 : 0;\n"
+                               "  result[offset] |= " + getOpTypeName(caseDef.opType) +
+                               "(valueEqual) ? 0x8 : 0x0;\n"
+                               "  result[offset] |= " + getOpTypeName(caseDef.opType) +
+                               "(valueNoEqual) ? 0x0 : 0x10;\n"
+                               "  if (subgroupElect()) result[offset] |= 0x2 | 0x10;\n"
+               : "";
+
+               const string formatString = subgroups::getFormatNameForGLSL(caseDef.format);
+
+               {
+                       const string vertex =
+                               "#version 450\n"
+                               "#extension GL_KHR_shader_subgroup_vote: enable\n"
+                               "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
+                               "{\n"
+                               "  uint result[];\n"
+                               "};\n"
+                               "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+                               "{\n"
+                               "  " + formatString + " data[];\n"
+                               "};\n"
+                               "\n"
+                               "void main (void)\n"
+                               "{\n"
+                               "  highp uint offset = gl_VertexIndex;\n"
+                               + source +
+                               "  float pixelSize = 2.0f/1024.0f;\n"
+                               "  float pixelPosition = pixelSize/2.0f - 1.0f;\n"
+                               "  gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
+                               "  gl_PointSize = 1.0f;\n"
+                               "}\n";
+                       programCollection.glslSources.add("vert")
+                               << glu::VertexSource(vertex) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+               }
+
+               {
+                       const string tesc =
+                               "#version 450\n"
+                               "#extension GL_KHR_shader_subgroup_vote: enable\n"
+                               "layout(vertices=1) out;\n"
+                               "layout(set = 0, binding = 1, std430) buffer Buffer1\n"
+                               "{\n"
+                               "  uint result[];\n"
+                               "};\n"
+                               "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+                               "{\n"
+                               "  " + formatString + " data[];\n"
+                               "};\n"
+                               "\n"
+                               "void main (void)\n"
+                               "{\n"
+                               "  highp uint offset = gl_PrimitiveID;\n"
+                               + source +
+                               "  if (gl_InvocationID == 0)\n"
+                               "  {\n"
+                               "    gl_TessLevelOuter[0] = 1.0f;\n"
+                               "    gl_TessLevelOuter[1] = 1.0f;\n"
+                               "  }\n"
+                               "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+                               "}\n";
+
+                       programCollection.glslSources.add("tesc")
+                                       << glu::TessellationControlSource(tesc) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+               }
+
+               {
+                       const string tese =
+                               "#version 450\n"
+                               "#extension GL_KHR_shader_subgroup_vote: enable\n"
+                               "layout(isolines) in;\n"
+                               "layout(set = 0, binding = 2, std430) buffer Buffer1\n"
+                               "{\n"
+                               "  uint result[];\n"
+                               "};\n"
+                               "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+                               "{\n"
+                               "  " + formatString + " data[];\n"
+                               "};\n"
+                               "\n"
+                               "void main (void)\n"
+                               "{\n"
+                               "  highp uint offset = gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5);\n"
+                               + source +
+                               "  float pixelSize = 2.0f/1024.0f;\n"
+                               "  gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
+                               "}\n";
+
+                       programCollection.glslSources.add("tese")
+                                       << glu::TessellationEvaluationSource(tese) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+               }
+
+               {
+                       const string geometry =
+                               "#version 450\n"
+                               "#extension GL_KHR_shader_subgroup_vote: enable\n"
+                               "layout(${TOPOLOGY}) in;\n"
+                               "layout(points, max_vertices = 1) out;\n"
+                               "layout(set = 0, binding = 3, std430) buffer Buffer1\n"
+                               "{\n"
+                               "  uint result[];\n"
+                               "};\n"
+                               "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+                               "{\n"
+                               "  " + formatString + " data[];\n"
+                               "};\n"
+                               "\n"
+                               "void main (void)\n"
+                               "{\n"
+                               "  highp uint offset = gl_PrimitiveIDIn;\n"
+                               + source +
+                               "  gl_Position = gl_in[0].gl_Position;\n"
+                               "  EmitVertex();\n"
+                               "  EndPrimitive();\n"
+                               "}\n";
+
+                       subgroups::addGeometryShadersFromTemplate(geometry, vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u),
+                                                                                                         programCollection.glslSources);
+               }
+
+               {
+                       const string sourceFragment =
+                       (OPTYPE_ALL == caseDef.opType) ?
+                               "  result = " + getOpTypeName(caseDef.opType) +
+                               "(true) ? 0x1 : 0;\n"
+                               "  result |= " + getOpTypeName(caseDef.opType) +
+                               "(false) ? 0 : 0x1A;\n"
+                               "  result |= 0x4;\n"
+                       : (OPTYPE_ANY == caseDef.opType) ?
+                                       "  result = " + getOpTypeName(caseDef.opType) +
+                                       "(true) ? 0x1 : 0;\n"
+                                       "  result |= " + getOpTypeName(caseDef.opType) +
+                                       "(false) ? 0 : 0x1A;\n"
+                                       "  result |= 0x4;\n"
+                       : (OPTYPE_ALLEQUAL == caseDef.opType) ?
+                                       "  " + subgroups::getFormatNameForGLSL(caseDef.format) + " valueEqual = " + subgroups::getFormatNameForGLSL(caseDef.format) + "(1.25 * float(data[gl_SubgroupInvocationID]) + 5.0);\n" +
+                                       "  " + subgroups::getFormatNameForGLSL(caseDef.format) + " valueNoEqual = " + subgroups::getFormatNameForGLSL(caseDef.format) + (formatIsBoolean ? "(subgroupElect());\n" : "(12.0 * float(data[gl_SubgroupInvocationID]) + int(gl_FragCoord.x*gl_SubgroupInvocationID));\n") +
+                                       "  result = " + getOpTypeName(caseDef.opType) + "("
+                                       + subgroups::getFormatNameForGLSL(caseDef.format) + "(1)) ? 0x1 : 0;\n"
+                                       "  result |= " + getOpTypeName(caseDef.opType) +
+                                       "(gl_SubgroupInvocationID) ? 0 : 0x2;\n"
+                                       "  result |= " + getOpTypeName(caseDef.opType) +
+                                       "(data[0]) ? 0x4 : 0;\n"
+                                       "  result |= " + getOpTypeName(caseDef.opType) +
+                                       "(valueEqual) ? 0x8 : 0x0;\n"
+                                       "  result |= " + getOpTypeName(caseDef.opType) +
+                                       "(valueNoEqual) ? 0x0 : 0x10;\n"
+                                       "  if (subgroupElect()) result |= 0x2 | 0x10;\n"
+                       : "";
+                       const string fragment =
+                               "#version 450\n"
+                               "#extension GL_KHR_shader_subgroup_vote: enable\n"
+                               "layout(location = 0) out uint result;\n"
+                               "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+                               "{\n"
+                               "  " + formatString + " data[];\n"
+                               "};\n"
+                               "void main (void)\n"
+                               "{\n"
+                               + sourceFragment +
+                               "}\n";
+
+                       programCollection.glslSources.add("fragment")
+                               << glu::FragmentSource(fragment)<< vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+               }
+
+               subgroups::addNoSubgroupShader(programCollection);
+       }
+}
+
+void supportedCheck (Context& context, CaseDefinition caseDef)
+{
+       if (!subgroups::isSubgroupSupported(context))
+               TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
+
+       if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_VOTE_BIT))
+       {
+               TCU_THROW(NotSupportedError, "Device does not support subgroup vote operations");
+       }
+
+       if (subgroups::isDoubleFormat(caseDef.format) &&
+                       !subgroups::isDoubleSupportedForDevice(context))
+       {
+               TCU_THROW(NotSupportedError, "Device does not support subgroup double operations");
+       }
+}
+
+tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
+{
+       if (!subgroups::areSubgroupOperationsSupportedForStage(
+                               context, caseDef.shaderStage))
+       {
+               if (subgroups::areSubgroupOperationsRequiredForStage(
+                                       caseDef.shaderStage))
+               {
+                       return tcu::TestStatus::fail(
+                                          "Shader stage " +
+                                          subgroups::getShaderStageName(caseDef.shaderStage) +
+                                          " is required to support subgroup operations!");
+               }
+               else
+               {
+                       TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
+               }
+       }
+
+       subgroups::SSBOData inputData;
+       inputData.format = caseDef.format;
+       inputData.numElements = subgroups::maxSupportedSubgroupSize();
+       inputData.initializeType = OPTYPE_ALLEQUAL == caseDef.opType ? subgroups::SSBOData::InitializeZero : subgroups::SSBOData::InitializeNonZero;
+
+       if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+               return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages);
+       else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
+               return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages);
+       else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+               return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
+       else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
+               return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
+       else if (VK_SHADER_STAGE_FRAGMENT_BIT == caseDef.shaderStage)
+               return subgroups::makeFragmentFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkFragmentPipelineStages);
+       else
+               TCU_THROW(InternalError, "Unhandled shader stage");
+}
+
+
+tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
+{
+       if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+       {
+               if (!subgroups::areSubgroupOperationsSupportedForStage(context, caseDef.shaderStage))
+               {
+                       return tcu::TestStatus::fail(
+                                          "Shader stage " +
+                                          subgroups::getShaderStageName(caseDef.shaderStage) +
+                                          " is required to support subgroup operations!");
+               }
+
+               subgroups::SSBOData inputData;
+               inputData.format = caseDef.format;
+               inputData.numElements = subgroups::maxSupportedSubgroupSize();
+               inputData.initializeType = OPTYPE_ALLEQUAL == caseDef.opType ? subgroups::SSBOData::InitializeZero : subgroups::SSBOData::InitializeNonZero;
+
+               return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, &inputData,
+                                                                                 1, checkCompute);
+       }
+       else
+       {
+               VkPhysicalDeviceSubgroupProperties subgroupProperties;
+               subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
+               subgroupProperties.pNext = DE_NULL;
+
+               VkPhysicalDeviceProperties2 properties;
+               properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+               properties.pNext = &subgroupProperties;
+
+               context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
+
+               VkShaderStageFlagBits stages = (VkShaderStageFlagBits)(caseDef.shaderStage  & subgroupProperties.supportedStages);
+
+               if (VK_SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
+               {
+                       if ( (stages & VK_SHADER_STAGE_FRAGMENT_BIT) == 0)
+                               TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
+                       else
+                               stages = VK_SHADER_STAGE_FRAGMENT_BIT;
+               }
+
+               if ((VkShaderStageFlagBits)0u == stages)
+                       TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
+
+               subgroups::SSBOData inputData;
+               inputData.format                        = caseDef.format;
+               inputData.numElements           = subgroups::maxSupportedSubgroupSize();
+               inputData.initializeType        = OPTYPE_ALLEQUAL == caseDef.opType ? subgroups::SSBOData::InitializeZero : subgroups::SSBOData::InitializeNonZero;
+               inputData.binding                       = 4u;
+               inputData.stages                        = stages;
+
+               return subgroups::allStages(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, stages);
+       }
+}
+}
+
+namespace vkt
+{
+namespace subgroups
+{
+tcu::TestCaseGroup* createSubgroupsVoteTests(tcu::TestContext& testCtx)
+{
+       de::MovePtr<tcu::TestCaseGroup> graphicGroup(new tcu::TestCaseGroup(
+               testCtx, "graphics", "Subgroup arithmetic category tests: graphics"));
+       de::MovePtr<tcu::TestCaseGroup> computeGroup(new tcu::TestCaseGroup(
+               testCtx, "compute", "Subgroup arithmetic category tests: compute"));
+       de::MovePtr<tcu::TestCaseGroup> framebufferGroup(new tcu::TestCaseGroup(
+               testCtx, "framebuffer", "Subgroup arithmetic category tests: framebuffer"));
+
+       de::MovePtr<tcu::TestCaseGroup> fragHelperGroup(new tcu::TestCaseGroup(
+               testCtx, "frag_helper", "Subgroup arithmetic category tests: fragment helper invocation"));
+
+       const VkShaderStageFlags stages[] =
+       {
+               VK_SHADER_STAGE_VERTEX_BIT,
+               VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
+               VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
+               VK_SHADER_STAGE_GEOMETRY_BIT,
+       };
+
+       const VkFormat formats[] =
+       {
+               VK_FORMAT_R32_SINT, VK_FORMAT_R32G32_SINT, VK_FORMAT_R32G32B32_SINT,
+               VK_FORMAT_R32G32B32A32_SINT, VK_FORMAT_R32_UINT, VK_FORMAT_R32G32_UINT,
+               VK_FORMAT_R32G32B32_UINT, VK_FORMAT_R32G32B32A32_UINT,
+               VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT,
+               VK_FORMAT_R32G32B32_SFLOAT, VK_FORMAT_R32G32B32A32_SFLOAT,
+               VK_FORMAT_R64_SFLOAT, VK_FORMAT_R64G64_SFLOAT,
+               VK_FORMAT_R64G64B64_SFLOAT, VK_FORMAT_R64G64B64A64_SFLOAT,
+               VK_FORMAT_R8_USCALED, VK_FORMAT_R8G8_USCALED,
+               VK_FORMAT_R8G8B8_USCALED, VK_FORMAT_R8G8B8A8_USCALED,
+       };
+
+       for (int formatIndex = 0; formatIndex < DE_LENGTH_OF_ARRAY(formats); ++formatIndex)
+       {
+               const VkFormat format = formats[formatIndex];
+
+               for (int opTypeIndex = 0; opTypeIndex < OPTYPE_LAST; ++opTypeIndex)
+               {
+                       // Skip the typed tests for all but subgroupAllEqual()
+                       if ((VK_FORMAT_R32_UINT != format) && (OPTYPE_ALLEQUAL != opTypeIndex))
+                       {
+                               continue;
+                       }
+
+                       const std::string op = de::toLower(getOpTypeName(opTypeIndex));
+
+                       {
+                               const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_COMPUTE_BIT, format};
+                               addFunctionCaseWithPrograms(computeGroup.get(),
+                                                                                       op + "_" + subgroups::getFormatNameForGLSL(format),
+                                                                                       "", supportedCheck, initPrograms, test, caseDef);
+                       }
+
+                       {
+                               const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_ALL_GRAPHICS, format};
+                               addFunctionCaseWithPrograms(graphicGroup.get(),
+                                                                                       op + "_" + subgroups::getFormatNameForGLSL(format),
+                                                                                       "", supportedCheck, initPrograms, test, caseDef);
+                       }
+
+                       for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
+                       {
+                               const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex], format};
+                               addFunctionCaseWithPrograms(framebufferGroup.get(),
+                                                       op + "_" +
+                                                       subgroups::getFormatNameForGLSL(format)
+                                                       + "_" + getShaderStageName(caseDef.shaderStage), "",
+                                                       supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
+                       }
+
+                       const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_FRAGMENT_BIT, format};
+                       addFunctionCaseWithPrograms(fragHelperGroup.get(),
+                                               op + "_" +
+                                               subgroups::getFormatNameForGLSL(format)
+                                               + "_" + getShaderStageName(caseDef.shaderStage), "",
+                                               supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
+               }
+       }
+
+       de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(
+               testCtx, "vote", "Subgroup vote category tests"));
+
+       group->addChild(graphicGroup.release());
+       group->addChild(computeGroup.release());
+       group->addChild(framebufferGroup.release());
+       group->addChild(fragHelperGroup.release());
+
+       return group.release();
+}
+
+} // subgroups
+} // vkt
diff --git a/external/openglcts/modules/common/subgroups/glcSubgroupsVoteTests.hpp b/external/openglcts/modules/common/subgroups/glcSubgroupsVoteTests.hpp
new file mode 100644 (file)
index 0000000..57b795a
--- /dev/null
@@ -0,0 +1,40 @@
+#ifndef _VKTSUBGROUPSVOTETESTS_HPP
+#define _VKTSUBGROUPSVOTETESTS_HPP
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "tcuDefs.hpp"
+#include "vktTestCase.hpp"
+
+namespace vkt
+{
+namespace subgroups
+{
+
+tcu::TestCaseGroup* createSubgroupsVoteTests(tcu::TestContext& testCtx);
+
+} // subgroups
+} // vkt
+
+#endif // _VKTSUBGROUPSVOTETESTS_HPP