add_definitions(-DGLCTS_SUPPORT_WGL=1)
endif ()
+add_subdirectory(subgroups)
+
set(GLCTS_COMMON_SRCS
glcAggressiveShaderOptimizationsTests.cpp
glcAggressiveShaderOptimizationsTests.hpp
glutil
tcutil
eglutil
+ deqp-gl-subgroups
)
# Add glslang
--- /dev/null
+include_directories(..)
+
+set(DEQP_GL_SUBGROUPS_SRCS
+# glcSubgroupsTests.cpp
+ glcSubgroupsTests.hpp
+# glcSubgroupsBuiltinVarTests.cpp
+# glcSubgroupsBuiltinVarTests.hpp
+# glcSubgroupsBuiltinMaskVarTests.cpp
+# glcSubgroupsBuiltinMaskVarTests.hpp
+# glcSubgroupsBasicTests.cpp
+# glcSubgroupsBasicTests.hpp
+# glcSubgroupsVoteTests.cpp
+# glcSubgroupsVoteTests.hpp
+# glcSubgroupsBallotTests.cpp
+# glcSubgroupsBallotTests.hpp
+# glcSubgroupsBallotBroadcastTests.cpp
+# glcSubgroupsBallotBroadcastTests.hpp
+# glcSubgroupsBallotOtherTests.cpp
+# glcSubgroupsBallotOtherTests.hpp
+# glcSubgroupsArithmeticTests.cpp
+# glcSubgroupsArithmeticTests.hpp
+# glcSubgroupsClusteredTests.cpp
+# glcSubgroupsClusteredTests.hpp
+# glcSubgroupsPartitionedTests.cpp
+# glcSubgroupsPartitionedTests.hpp
+# glcSubgroupsShuffleTests.cpp
+# glcSubgroupsShuffleTests.hpp
+# glcSubgroupsQuadTests.cpp
+# glcSubgroupsQuadTests.hpp
+# glcSubgroupsShapeTests.cpp
+# glcSubgroupsShapeTests.hpp
+# glcSubgroupsTestsUtils.cpp
+# glcSubgroupsTestsUtils.hpp
+ )
+
+set(DEQP_GL_SUBGROUPS_LIBS
+ glutil
+ tcutil
+ vkutil
+ )
+
+PCH(DEQP_GL_SUBGROUPS_SRCS ../../pch.cpp)
+
+add_library(deqp-gl-subgroups STATIC ${DEQP_GL_SUBGROUPS_SRCS})
+target_link_libraries(deqp-gl-subgroups ${DEQP_GL_SUBGROUPS_LIBS})
--- /dev/null
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "vktSubgroupsArithmeticTests.hpp"
+#include "vktSubgroupsTestsUtils.hpp"
+
+#include <string>
+#include <vector>
+
+using namespace tcu;
+using namespace std;
+using namespace vk;
+using namespace vkt;
+
+namespace
+{
+enum OpType
+{
+ OPTYPE_ADD = 0,
+ OPTYPE_MUL,
+ OPTYPE_MIN,
+ OPTYPE_MAX,
+ OPTYPE_AND,
+ OPTYPE_OR,
+ OPTYPE_XOR,
+ OPTYPE_INCLUSIVE_ADD,
+ OPTYPE_INCLUSIVE_MUL,
+ OPTYPE_INCLUSIVE_MIN,
+ OPTYPE_INCLUSIVE_MAX,
+ OPTYPE_INCLUSIVE_AND,
+ OPTYPE_INCLUSIVE_OR,
+ OPTYPE_INCLUSIVE_XOR,
+ OPTYPE_EXCLUSIVE_ADD,
+ OPTYPE_EXCLUSIVE_MUL,
+ OPTYPE_EXCLUSIVE_MIN,
+ OPTYPE_EXCLUSIVE_MAX,
+ OPTYPE_EXCLUSIVE_AND,
+ OPTYPE_EXCLUSIVE_OR,
+ OPTYPE_EXCLUSIVE_XOR,
+ OPTYPE_LAST
+};
+
+static bool checkVertexPipelineStages(std::vector<const void*> datas,
+ deUint32 width, deUint32)
+{
+ return vkt::subgroups::check(datas, width, 0x3);
+}
+
+static bool checkCompute(std::vector<const void*> datas,
+ const deUint32 numWorkgroups[3], const deUint32 localSize[3],
+ deUint32)
+{
+ return vkt::subgroups::checkCompute(datas, numWorkgroups, localSize, 0x3);
+}
+
+std::string getOpTypeName(int opType)
+{
+ switch (opType)
+ {
+ default:
+ DE_FATAL("Unsupported op type");
+ return "";
+ case OPTYPE_ADD:
+ return "subgroupAdd";
+ case OPTYPE_MUL:
+ return "subgroupMul";
+ case OPTYPE_MIN:
+ return "subgroupMin";
+ case OPTYPE_MAX:
+ return "subgroupMax";
+ case OPTYPE_AND:
+ return "subgroupAnd";
+ case OPTYPE_OR:
+ return "subgroupOr";
+ case OPTYPE_XOR:
+ return "subgroupXor";
+ case OPTYPE_INCLUSIVE_ADD:
+ return "subgroupInclusiveAdd";
+ case OPTYPE_INCLUSIVE_MUL:
+ return "subgroupInclusiveMul";
+ case OPTYPE_INCLUSIVE_MIN:
+ return "subgroupInclusiveMin";
+ case OPTYPE_INCLUSIVE_MAX:
+ return "subgroupInclusiveMax";
+ case OPTYPE_INCLUSIVE_AND:
+ return "subgroupInclusiveAnd";
+ case OPTYPE_INCLUSIVE_OR:
+ return "subgroupInclusiveOr";
+ case OPTYPE_INCLUSIVE_XOR:
+ return "subgroupInclusiveXor";
+ case OPTYPE_EXCLUSIVE_ADD:
+ return "subgroupExclusiveAdd";
+ case OPTYPE_EXCLUSIVE_MUL:
+ return "subgroupExclusiveMul";
+ case OPTYPE_EXCLUSIVE_MIN:
+ return "subgroupExclusiveMin";
+ case OPTYPE_EXCLUSIVE_MAX:
+ return "subgroupExclusiveMax";
+ case OPTYPE_EXCLUSIVE_AND:
+ return "subgroupExclusiveAnd";
+ case OPTYPE_EXCLUSIVE_OR:
+ return "subgroupExclusiveOr";
+ case OPTYPE_EXCLUSIVE_XOR:
+ return "subgroupExclusiveXor";
+ }
+}
+
+std::string getOpTypeOperation(int opType, vk::VkFormat format, std::string lhs, std::string rhs)
+{
+ switch (opType)
+ {
+ default:
+ DE_FATAL("Unsupported op type");
+ return "";
+ case OPTYPE_ADD:
+ case OPTYPE_INCLUSIVE_ADD:
+ case OPTYPE_EXCLUSIVE_ADD:
+ return lhs + " + " + rhs;
+ case OPTYPE_MUL:
+ case OPTYPE_INCLUSIVE_MUL:
+ case OPTYPE_EXCLUSIVE_MUL:
+ return lhs + " * " + rhs;
+ case OPTYPE_MIN:
+ case OPTYPE_INCLUSIVE_MIN:
+ case OPTYPE_EXCLUSIVE_MIN:
+ switch (format)
+ {
+ default:
+ return "min(" + lhs + ", " + rhs + ")";
+ case VK_FORMAT_R32_SFLOAT:
+ case VK_FORMAT_R64_SFLOAT:
+ return "(isnan(" + lhs + ") ? " + rhs + " : (isnan(" + rhs + ") ? " + lhs + " : min(" + lhs + ", " + rhs + ")))";
+ case VK_FORMAT_R32G32_SFLOAT:
+ case VK_FORMAT_R32G32B32_SFLOAT:
+ case VK_FORMAT_R32G32B32A32_SFLOAT:
+ case VK_FORMAT_R64G64_SFLOAT:
+ case VK_FORMAT_R64G64B64_SFLOAT:
+ case VK_FORMAT_R64G64B64A64_SFLOAT:
+ return "mix(mix(min(" + lhs + ", " + rhs + "), " + lhs + ", isnan(" + rhs + ")), " + rhs + ", isnan(" + lhs + "))";
+ }
+ case OPTYPE_MAX:
+ case OPTYPE_INCLUSIVE_MAX:
+ case OPTYPE_EXCLUSIVE_MAX:
+ switch (format)
+ {
+ default:
+ return "max(" + lhs + ", " + rhs + ")";
+ case VK_FORMAT_R32_SFLOAT:
+ case VK_FORMAT_R64_SFLOAT:
+ return "(isnan(" + lhs + ") ? " + rhs + " : (isnan(" + rhs + ") ? " + lhs + " : max(" + lhs + ", " + rhs + ")))";
+ case VK_FORMAT_R32G32_SFLOAT:
+ case VK_FORMAT_R32G32B32_SFLOAT:
+ case VK_FORMAT_R32G32B32A32_SFLOAT:
+ case VK_FORMAT_R64G64_SFLOAT:
+ case VK_FORMAT_R64G64B64_SFLOAT:
+ case VK_FORMAT_R64G64B64A64_SFLOAT:
+ return "mix(mix(max(" + lhs + ", " + rhs + "), " + lhs + ", isnan(" + rhs + ")), " + rhs + ", isnan(" + lhs + "))";
+ }
+ case OPTYPE_AND:
+ case OPTYPE_INCLUSIVE_AND:
+ case OPTYPE_EXCLUSIVE_AND:
+ switch (format)
+ {
+ default:
+ return lhs + " & " + rhs;
+ case VK_FORMAT_R8_USCALED:
+ return lhs + " && " + rhs;
+ case VK_FORMAT_R8G8_USCALED:
+ return "bvec2(" + lhs + ".x && " + rhs + ".x, " + lhs + ".y && " + rhs + ".y)";
+ case VK_FORMAT_R8G8B8_USCALED:
+ return "bvec3(" + lhs + ".x && " + rhs + ".x, " + lhs + ".y && " + rhs + ".y, " + lhs + ".z && " + rhs + ".z)";
+ case VK_FORMAT_R8G8B8A8_USCALED:
+ return "bvec4(" + lhs + ".x && " + rhs + ".x, " + lhs + ".y && " + rhs + ".y, " + lhs + ".z && " + rhs + ".z, " + lhs + ".w && " + rhs + ".w)";
+ }
+ case OPTYPE_OR:
+ case OPTYPE_INCLUSIVE_OR:
+ case OPTYPE_EXCLUSIVE_OR:
+ switch (format)
+ {
+ default:
+ return lhs + " | " + rhs;
+ case VK_FORMAT_R8_USCALED:
+ return lhs + " || " + rhs;
+ case VK_FORMAT_R8G8_USCALED:
+ return "bvec2(" + lhs + ".x || " + rhs + ".x, " + lhs + ".y || " + rhs + ".y)";
+ case VK_FORMAT_R8G8B8_USCALED:
+ return "bvec3(" + lhs + ".x || " + rhs + ".x, " + lhs + ".y || " + rhs + ".y, " + lhs + ".z || " + rhs + ".z)";
+ case VK_FORMAT_R8G8B8A8_USCALED:
+ return "bvec4(" + lhs + ".x || " + rhs + ".x, " + lhs + ".y || " + rhs + ".y, " + lhs + ".z || " + rhs + ".z, " + lhs + ".w || " + rhs + ".w)";
+ }
+ case OPTYPE_XOR:
+ case OPTYPE_INCLUSIVE_XOR:
+ case OPTYPE_EXCLUSIVE_XOR:
+ switch (format)
+ {
+ default:
+ return lhs + " ^ " + rhs;
+ case VK_FORMAT_R8_USCALED:
+ return lhs + " ^^ " + rhs;
+ case VK_FORMAT_R8G8_USCALED:
+ return "bvec2(" + lhs + ".x ^^ " + rhs + ".x, " + lhs + ".y ^^ " + rhs + ".y)";
+ case VK_FORMAT_R8G8B8_USCALED:
+ return "bvec3(" + lhs + ".x ^^ " + rhs + ".x, " + lhs + ".y ^^ " + rhs + ".y, " + lhs + ".z ^^ " + rhs + ".z)";
+ case VK_FORMAT_R8G8B8A8_USCALED:
+ return "bvec4(" + lhs + ".x ^^ " + rhs + ".x, " + lhs + ".y ^^ " + rhs + ".y, " + lhs + ".z ^^ " + rhs + ".z, " + lhs + ".w ^^ " + rhs + ".w)";
+ }
+ }
+}
+
+std::string getIdentity(int opType, vk::VkFormat format)
+{
+ bool isFloat = false;
+ bool isInt = false;
+ bool isUnsigned = false;
+
+ switch (format)
+ {
+ default:
+ DE_FATAL("Unhandled format!");
+ break;
+ case VK_FORMAT_R32_SINT:
+ case VK_FORMAT_R32G32_SINT:
+ case VK_FORMAT_R32G32B32_SINT:
+ case VK_FORMAT_R32G32B32A32_SINT:
+ isInt = true;
+ break;
+ case VK_FORMAT_R32_UINT:
+ case VK_FORMAT_R32G32_UINT:
+ case VK_FORMAT_R32G32B32_UINT:
+ case VK_FORMAT_R32G32B32A32_UINT:
+ isUnsigned = true;
+ break;
+ case VK_FORMAT_R32_SFLOAT:
+ case VK_FORMAT_R32G32_SFLOAT:
+ case VK_FORMAT_R32G32B32_SFLOAT:
+ case VK_FORMAT_R32G32B32A32_SFLOAT:
+ case VK_FORMAT_R64_SFLOAT:
+ case VK_FORMAT_R64G64_SFLOAT:
+ case VK_FORMAT_R64G64B64_SFLOAT:
+ case VK_FORMAT_R64G64B64A64_SFLOAT:
+ isFloat = true;
+ break;
+ case VK_FORMAT_R8_USCALED:
+ case VK_FORMAT_R8G8_USCALED:
+ case VK_FORMAT_R8G8B8_USCALED:
+ case VK_FORMAT_R8G8B8A8_USCALED:
+ break; // bool types are not anything
+ }
+
+ switch (opType)
+ {
+ default:
+ DE_FATAL("Unsupported op type");
+ return "";
+ case OPTYPE_ADD:
+ case OPTYPE_INCLUSIVE_ADD:
+ case OPTYPE_EXCLUSIVE_ADD:
+ return subgroups::getFormatNameForGLSL(format) + "(0)";
+ case OPTYPE_MUL:
+ case OPTYPE_INCLUSIVE_MUL:
+ case OPTYPE_EXCLUSIVE_MUL:
+ return subgroups::getFormatNameForGLSL(format) + "(1)";
+ case OPTYPE_MIN:
+ case OPTYPE_INCLUSIVE_MIN:
+ case OPTYPE_EXCLUSIVE_MIN:
+ if (isFloat)
+ {
+ return subgroups::getFormatNameForGLSL(format) + "(intBitsToFloat(0x7f800000))";
+ }
+ else if (isInt)
+ {
+ return subgroups::getFormatNameForGLSL(format) + "(0x7fffffff)";
+ }
+ else if (isUnsigned)
+ {
+ return subgroups::getFormatNameForGLSL(format) + "(0xffffffffu)";
+ }
+ else
+ {
+ DE_FATAL("Unhandled case");
+ return "";
+ }
+ case OPTYPE_MAX:
+ case OPTYPE_INCLUSIVE_MAX:
+ case OPTYPE_EXCLUSIVE_MAX:
+ if (isFloat)
+ {
+ return subgroups::getFormatNameForGLSL(format) + "(intBitsToFloat(0xff800000))";
+ }
+ else if (isInt)
+ {
+ return subgroups::getFormatNameForGLSL(format) + "(0x80000000)";
+ }
+ else if (isUnsigned)
+ {
+ return subgroups::getFormatNameForGLSL(format) + "(0)";
+ }
+ else
+ {
+ DE_FATAL("Unhandled case");
+ return "";
+ }
+ case OPTYPE_AND:
+ case OPTYPE_INCLUSIVE_AND:
+ case OPTYPE_EXCLUSIVE_AND:
+ return subgroups::getFormatNameForGLSL(format) + "(~0)";
+ case OPTYPE_OR:
+ case OPTYPE_INCLUSIVE_OR:
+ case OPTYPE_EXCLUSIVE_OR:
+ return subgroups::getFormatNameForGLSL(format) + "(0)";
+ case OPTYPE_XOR:
+ case OPTYPE_INCLUSIVE_XOR:
+ case OPTYPE_EXCLUSIVE_XOR:
+ return subgroups::getFormatNameForGLSL(format) + "(0)";
+ }
+}
+
+std::string getCompare(int opType, vk::VkFormat format, std::string lhs, std::string rhs)
+{
+ std::string formatName = subgroups::getFormatNameForGLSL(format);
+ switch (format)
+ {
+ default:
+ return "all(equal(" + lhs + ", " + rhs + "))";
+ case VK_FORMAT_R8_USCALED:
+ case VK_FORMAT_R32_UINT:
+ case VK_FORMAT_R32_SINT:
+ return "(" + lhs + " == " + rhs + ")";
+ case VK_FORMAT_R32_SFLOAT:
+ case VK_FORMAT_R64_SFLOAT:
+ switch (opType)
+ {
+ default:
+ return "(abs(" + lhs + " - " + rhs + ") < 0.00001)";
+ case OPTYPE_MIN:
+ case OPTYPE_INCLUSIVE_MIN:
+ case OPTYPE_EXCLUSIVE_MIN:
+ case OPTYPE_MAX:
+ case OPTYPE_INCLUSIVE_MAX:
+ case OPTYPE_EXCLUSIVE_MAX:
+ return "(" + lhs + " == " + rhs + ")";
+ }
+ case VK_FORMAT_R32G32_SFLOAT:
+ case VK_FORMAT_R32G32B32_SFLOAT:
+ case VK_FORMAT_R32G32B32A32_SFLOAT:
+ case VK_FORMAT_R64G64_SFLOAT:
+ case VK_FORMAT_R64G64B64_SFLOAT:
+ case VK_FORMAT_R64G64B64A64_SFLOAT:
+ switch (opType)
+ {
+ default:
+ return "all(lessThan(abs(" + lhs + " - " + rhs + "), " + formatName + "(0.00001)))";
+ case OPTYPE_MIN:
+ case OPTYPE_INCLUSIVE_MIN:
+ case OPTYPE_EXCLUSIVE_MIN:
+ case OPTYPE_MAX:
+ case OPTYPE_INCLUSIVE_MAX:
+ case OPTYPE_EXCLUSIVE_MAX:
+ return "all(equal(" + lhs + ", " + rhs + "))";
+ }
+ }
+}
+
+struct CaseDefinition
+{
+ int opType;
+ VkShaderStageFlags shaderStage;
+ VkFormat format;
+};
+
+void initFrameBufferPrograms (SourceCollections& programCollection, CaseDefinition caseDef)
+{
+ const vk::ShaderBuildOptions buildOptions (programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ std::string indexVars;
+ std::ostringstream bdy;
+
+ subgroups::setFragmentShaderFrameBuffer(programCollection);
+
+ if (VK_SHADER_STAGE_VERTEX_BIT != caseDef.shaderStage)
+ subgroups::setVertexShaderFrameBuffer(programCollection);
+
+ switch (caseDef.opType)
+ {
+ default:
+ indexVars = " uint start = 0, end = gl_SubgroupSize;\n";
+ break;
+ case OPTYPE_INCLUSIVE_ADD:
+ case OPTYPE_INCLUSIVE_MUL:
+ case OPTYPE_INCLUSIVE_MIN:
+ case OPTYPE_INCLUSIVE_MAX:
+ case OPTYPE_INCLUSIVE_AND:
+ case OPTYPE_INCLUSIVE_OR:
+ case OPTYPE_INCLUSIVE_XOR:
+ indexVars = " uint start = 0, end = gl_SubgroupInvocationID + 1;\n";
+ break;
+ case OPTYPE_EXCLUSIVE_ADD:
+ case OPTYPE_EXCLUSIVE_MUL:
+ case OPTYPE_EXCLUSIVE_MIN:
+ case OPTYPE_EXCLUSIVE_MAX:
+ case OPTYPE_EXCLUSIVE_AND:
+ case OPTYPE_EXCLUSIVE_OR:
+ case OPTYPE_EXCLUSIVE_XOR:
+ indexVars = " uint start = 0, end = gl_SubgroupInvocationID;\n";
+ break;
+ }
+
+ bdy << indexVars
+ << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " ref = "
+ << getIdentity(caseDef.opType, caseDef.format) << ";\n"
+ << " uint tempResult = 0;\n"
+ << " for (uint index = start; index < end; index++)\n"
+ << " {\n"
+ << " if (subgroupBallotBitExtract(mask, index))\n"
+ << " {\n"
+ << " ref = " << getOpTypeOperation(caseDef.opType, caseDef.format, "ref", "data[index]") << ";\n"
+ << " }\n"
+ << " }\n"
+ << " tempResult = " << getCompare(caseDef.opType, caseDef.format, "ref",
+ getOpTypeName(caseDef.opType) + "(data[gl_SubgroupInvocationID])") << " ? 0x1 : 0;\n"
+ << " if (1 == (gl_SubgroupInvocationID % 2))\n"
+ << " {\n"
+ << " mask = subgroupBallot(true);\n"
+ << " ref = " << getIdentity(caseDef.opType, caseDef.format) << ";\n"
+ << " for (uint index = start; index < end; index++)\n"
+ << " {\n"
+ << " if (subgroupBallotBitExtract(mask, index))\n"
+ << " {\n"
+ << " ref = " << getOpTypeOperation(caseDef.opType, caseDef.format, "ref", "data[index]") << ";\n"
+ << " }\n"
+ << " }\n"
+ << " tempResult |= " << getCompare(caseDef.opType, caseDef.format, "ref",
+ getOpTypeName(caseDef.opType) + "(data[gl_SubgroupInvocationID])") << " ? 0x2 : 0;\n"
+ << " }\n"
+ << " else\n"
+ << " {\n"
+ << " tempResult |= 0x2;\n"
+ << " }\n";
+
+ if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream vertexSrc;
+ vertexSrc << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+ << "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
+ << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ << "layout(location = 0) in highp vec4 in_position;\n"
+ << "layout(location = 0) out float out_color;\n"
+ << "layout(set = 0, binding = 0) uniform Buffer1\n"
+ << "{\n"
+ << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+ << "};\n"
+ << "\n"
+ << "void main (void)\n"
+ << "{\n"
+ << " uvec4 mask = subgroupBallot(true);\n"
+ << bdy.str()
+ << " out_color = float(tempResult);\n"
+ << " gl_Position = in_position;\n"
+ << " gl_PointSize = 1.0f;\n"
+ << "}\n";
+ programCollection.glslSources.add("vert")
+ << glu::VertexSource(vertexSrc.str()) << buildOptions;
+ }
+ else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream geometry;
+
+ geometry << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+ << "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
+ << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ << "layout(points) in;\n"
+ << "layout(points, max_vertices = 1) out;\n"
+ << "layout(location = 0) out float out_color;\n"
+ << "layout(set = 0, binding = 0) uniform Buffer\n"
+ << "{\n"
+ << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+ << "};\n"
+ << "\n"
+ << "void main (void)\n"
+ << "{\n"
+ << " uvec4 mask = subgroupBallot(true);\n"
+ << bdy.str()
+ << " out_color = float(tempResult);\n"
+ << " gl_Position = gl_in[0].gl_Position;\n"
+ << " EmitVertex();\n"
+ << " EndPrimitive();\n"
+ << "}\n";
+
+ programCollection.glslSources.add("geometry")
+ << glu::GeometrySource(geometry.str()) << buildOptions;
+ }
+ else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream controlSource;
+ controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+ << "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
+ << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ << "layout(vertices = 2) out;\n"
+ << "layout(location = 0) out float out_color[];\n"
+ << "layout(set = 0, binding = 0) uniform Buffer1\n"
+ << "{\n"
+ << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+ << "};\n"
+ << "\n"
+ << "void main (void)\n"
+ << "{\n"
+ << " if (gl_InvocationID == 0)\n"
+ <<" {\n"
+ << " gl_TessLevelOuter[0] = 1.0f;\n"
+ << " gl_TessLevelOuter[1] = 1.0f;\n"
+ << " }\n"
+ << " uvec4 mask = subgroupBallot(true);\n"
+ << bdy.str()
+ << " out_color[gl_InvocationID] = float(tempResult);"
+ << " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+ << "}\n";
+
+
+ programCollection.glslSources.add("tesc")
+ << glu::TessellationControlSource(controlSource.str()) << buildOptions;
+ subgroups::setTesEvalShaderFrameBuffer(programCollection);
+ }
+ else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
+ {
+
+ std::ostringstream evaluationSource;
+ evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+ << "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
+ << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ << "layout(isolines, equal_spacing, ccw ) in;\n"
+ << "layout(location = 0) out float out_color;\n"
+ << "layout(set = 0, binding = 0) uniform Buffer1\n"
+ << "{\n"
+ << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+ << "};\n"
+ << "\n"
+ << "void main (void)\n"
+ << "{\n"
+ << " uvec4 mask = subgroupBallot(true);\n"
+ << bdy.str()
+ << " out_color = float(tempResult);\n"
+ << " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
+ << "}\n";
+
+ subgroups::setTesCtrlShaderFrameBuffer(programCollection);
+ programCollection.glslSources.add("tese") << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
+ }
+ else
+ {
+ DE_FATAL("Unsupported shader stage");
+ }
+}
+
+void initPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
+{
+ std::string indexVars;
+ switch (caseDef.opType)
+ {
+ default:
+ indexVars = " uint start = 0, end = gl_SubgroupSize;\n";
+ break;
+ case OPTYPE_INCLUSIVE_ADD:
+ case OPTYPE_INCLUSIVE_MUL:
+ case OPTYPE_INCLUSIVE_MIN:
+ case OPTYPE_INCLUSIVE_MAX:
+ case OPTYPE_INCLUSIVE_AND:
+ case OPTYPE_INCLUSIVE_OR:
+ case OPTYPE_INCLUSIVE_XOR:
+ indexVars = " uint start = 0, end = gl_SubgroupInvocationID + 1;\n";
+ break;
+ case OPTYPE_EXCLUSIVE_ADD:
+ case OPTYPE_EXCLUSIVE_MUL:
+ case OPTYPE_EXCLUSIVE_MIN:
+ case OPTYPE_EXCLUSIVE_MAX:
+ case OPTYPE_EXCLUSIVE_AND:
+ case OPTYPE_EXCLUSIVE_OR:
+ case OPTYPE_EXCLUSIVE_XOR:
+ indexVars = " uint start = 0, end = gl_SubgroupInvocationID;\n";
+ break;
+ }
+
+ const string bdy =
+ indexVars +
+ " " + subgroups::getFormatNameForGLSL(caseDef.format) + " ref = "
+ + getIdentity(caseDef.opType, caseDef.format) + ";\n"
+ " uint tempResult = 0;\n"
+ " for (uint index = start; index < end; index++)\n"
+ " {\n"
+ " if (subgroupBallotBitExtract(mask, index))\n"
+ " {\n"
+ " ref = " + getOpTypeOperation(caseDef.opType, caseDef.format, "ref", "data[index]") + ";\n"
+ " }\n"
+ " }\n"
+ " tempResult = " + getCompare(caseDef.opType, caseDef.format, "ref", getOpTypeName(caseDef.opType) + "(data[gl_SubgroupInvocationID])") + " ? 0x1 : 0;\n"
+ " if (1 == (gl_SubgroupInvocationID % 2))\n"
+ " {\n"
+ " mask = subgroupBallot(true);\n"
+ " ref = " + getIdentity(caseDef.opType, caseDef.format) + ";\n"
+ " for (uint index = start; index < end; index++)\n"
+ " {\n"
+ " if (subgroupBallotBitExtract(mask, index))\n"
+ " {\n"
+ " ref = " + getOpTypeOperation(caseDef.opType, caseDef.format, "ref", "data[index]") + ";\n"
+ " }\n"
+ " }\n"
+ " tempResult |= " + getCompare(caseDef.opType, caseDef.format, "ref", getOpTypeName(caseDef.opType) + "(data[gl_SubgroupInvocationID])") + " ? 0x2 : 0;\n"
+ " }\n"
+ " else\n"
+ " {\n"
+ " tempResult |= 0x2;\n"
+ " }\n";
+
+ if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream src;
+
+ src << "#version 450\n"
+ << "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
+ << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ << "layout (local_size_x_id = 0, local_size_y_id = 1, "
+ "local_size_z_id = 2) in;\n"
+ << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
+ << "{\n"
+ << " uint result[];\n"
+ << "};\n"
+ << "layout(set = 0, binding = 1, std430) buffer Buffer2\n"
+ << "{\n"
+ << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[];\n"
+ << "};\n"
+ << "\n"
+ << "void main (void)\n"
+ << "{\n"
+ << " uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
+ << " highp uint offset = globalSize.x * ((globalSize.y * "
+ "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
+ "gl_GlobalInvocationID.x;\n"
+ << " uvec4 mask = subgroupBallot(true);\n"
+ << bdy
+ << " result[offset] = tempResult;\n"
+ << "}\n";
+
+ programCollection.glslSources.add("comp")
+ << glu::ComputeSource(src.str()) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+ else
+ {
+ {
+ const std::string vertex =
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
+ "{\n"
+ " uint result[];\n"
+ "};\n"
+ "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+ "{\n"
+ " " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
+ "};\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ " uvec4 mask = subgroupBallot(true);\n"
+ + bdy+
+ " result[gl_VertexIndex] = tempResult;\n"
+ " float pixelSize = 2.0f/1024.0f;\n"
+ " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
+ " gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
+ " gl_PointSize = 1.0f;\n"
+ "}\n";
+ programCollection.glslSources.add("vert")
+ << glu::VertexSource(vertex) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+
+ {
+ const std::string tesc =
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(vertices=1) out;\n"
+ "layout(set = 0, binding = 1, std430) buffer Buffer1\n"
+ "{\n"
+ " uint result[];\n"
+ "};\n"
+ "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+ "{\n"
+ " " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
+ "};\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ " uvec4 mask = subgroupBallot(true);\n"
+ + bdy +
+ " result[gl_PrimitiveID] = tempResult;\n"
+ " if (gl_InvocationID == 0)\n"
+ " {\n"
+ " gl_TessLevelOuter[0] = 1.0f;\n"
+ " gl_TessLevelOuter[1] = 1.0f;\n"
+ " }\n"
+ " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+ "}\n";
+ programCollection.glslSources.add("tesc")
+ << glu::TessellationControlSource(tesc) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+
+ {
+ const std::string tese =
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(isolines) in;\n"
+ "layout(set = 0, binding = 2, std430) buffer Buffer1\n"
+ "{\n"
+ " uint result[];\n"
+ "};\n"
+ "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+ "{\n"
+ " " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
+ "};\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ " uvec4 mask = subgroupBallot(true);\n"
+ + bdy +
+ " result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = tempResult;\n"
+ " float pixelSize = 2.0f/1024.0f;\n"
+ " gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
+ "}\n";
+ programCollection.glslSources.add("tese")
+ << glu::TessellationEvaluationSource(tese) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+
+ {
+ const std::string geometry =
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(${TOPOLOGY}) in;\n"
+ "layout(points, max_vertices = 1) out;\n"
+ "layout(set = 0, binding = 3, std430) buffer Buffer1\n"
+ "{\n"
+ " uint result[];\n"
+ "};\n"
+ "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+ "{\n"
+ " " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
+ "};\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ " uvec4 mask = subgroupBallot(true);\n"
+ + bdy +
+ " result[gl_PrimitiveIDIn] = tempResult;\n"
+ " gl_Position = gl_in[0].gl_Position;\n"
+ " EmitVertex();\n"
+ " EndPrimitive();\n"
+ "}\n";
+ subgroups::addGeometryShadersFromTemplate(geometry, vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u),
+ programCollection.glslSources);
+ }
+
+ {
+ const std::string fragment =
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(location = 0) out uint result;\n"
+ "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+ "{\n"
+ " " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
+ "};\n"
+ "void main (void)\n"
+ "{\n"
+ " uvec4 mask = subgroupBallot(true);\n"
+ + bdy +
+ " result = tempResult;\n"
+ "}\n";
+ programCollection.glslSources.add("fragment")
+ << glu::FragmentSource(fragment)<< vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+ subgroups::addNoSubgroupShader(programCollection);
+ }
+}
+
+void supportedCheck (Context& context, CaseDefinition caseDef)
+{
+ if (!subgroups::isSubgroupSupported(context))
+ TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
+
+ if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_ARITHMETIC_BIT))
+ {
+ TCU_THROW(NotSupportedError, "Device does not support subgroup arithmetic operations");
+ }
+
+ if (subgroups::isDoubleFormat(caseDef.format) &&
+ !subgroups::isDoubleSupportedForDevice(context))
+ {
+ TCU_THROW(NotSupportedError, "Device does not support subgroup double operations");
+ }
+}
+
+tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
+{
+ if (!subgroups::areSubgroupOperationsSupportedForStage(
+ context, caseDef.shaderStage))
+ {
+ if (subgroups::areSubgroupOperationsRequiredForStage(
+ caseDef.shaderStage))
+ {
+ return tcu::TestStatus::fail(
+ "Shader stage " +
+ subgroups::getShaderStageName(caseDef.shaderStage) +
+ " is required to support subgroup operations!");
+ }
+ else
+ {
+ TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
+ }
+ }
+
+ subgroups::SSBOData inputData;
+ inputData.format = caseDef.format;
+ inputData.numElements = subgroups::maxSupportedSubgroupSize();
+ inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
+
+ if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+ return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages);
+ else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
+ return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages);
+ else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+ return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
+ else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
+ return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
+ else
+ TCU_THROW(InternalError, "Unhandled shader stage");
+}
+
+bool checkShaderStages (Context& context, const CaseDefinition& caseDef)
+{
+ if (!subgroups::areSubgroupOperationsSupportedForStage(
+ context, caseDef.shaderStage))
+ {
+ if (subgroups::areSubgroupOperationsRequiredForStage(
+ caseDef.shaderStage))
+ {
+ return false;
+ }
+ else
+ {
+ TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
+ }
+ }
+ return true;
+}
+
+tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
+{
+ if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+ {
+ if(!checkShaderStages(context,caseDef))
+ {
+ return tcu::TestStatus::fail(
+ "Shader stage " +
+ subgroups::getShaderStageName(caseDef.shaderStage) +
+ " is required to support subgroup operations!");
+ }
+ subgroups::SSBOData inputData;
+ inputData.format = caseDef.format;
+ inputData.numElements = subgroups::maxSupportedSubgroupSize();
+ inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
+
+ return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkCompute);
+ }
+ else
+ {
+ VkPhysicalDeviceSubgroupProperties subgroupProperties;
+ subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
+ subgroupProperties.pNext = DE_NULL;
+
+ VkPhysicalDeviceProperties2 properties;
+ properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+ properties.pNext = &subgroupProperties;
+
+ context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
+
+ VkShaderStageFlagBits stages = (VkShaderStageFlagBits)(caseDef.shaderStage & subgroupProperties.supportedStages);
+
+ if ( VK_SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
+ {
+ if ( (stages & VK_SHADER_STAGE_FRAGMENT_BIT) == 0)
+ TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
+ else
+ stages = VK_SHADER_STAGE_FRAGMENT_BIT;
+ }
+
+ if ((VkShaderStageFlagBits)0u == stages)
+ TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
+
+ subgroups::SSBOData inputData;
+ inputData.format = caseDef.format;
+ inputData.numElements = subgroups::maxSupportedSubgroupSize();
+ inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
+ inputData.binding = 4u;
+ inputData.stages = stages;
+
+ return subgroups::allStages(context, VK_FORMAT_R32_UINT, &inputData,
+ 1, checkVertexPipelineStages, stages);
+ }
+}
+}
+
+namespace vkt
+{
+namespace subgroups
+{
+tcu::TestCaseGroup* createSubgroupsArithmeticTests(tcu::TestContext& testCtx)
+{
+ de::MovePtr<tcu::TestCaseGroup> graphicGroup(new tcu::TestCaseGroup(
+ testCtx, "graphics", "Subgroup arithmetic category tests: graphics"));
+ de::MovePtr<tcu::TestCaseGroup> computeGroup(new tcu::TestCaseGroup(
+ testCtx, "compute", "Subgroup arithmetic category tests: compute"));
+ de::MovePtr<tcu::TestCaseGroup> framebufferGroup(new tcu::TestCaseGroup(
+ testCtx, "framebuffer", "Subgroup arithmetic category tests: framebuffer"));
+
+ const VkShaderStageFlags stages[] =
+ {
+ VK_SHADER_STAGE_VERTEX_BIT,
+ VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
+ VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
+ VK_SHADER_STAGE_GEOMETRY_BIT,
+ };
+
+ const VkFormat formats[] =
+ {
+ VK_FORMAT_R32_SINT, VK_FORMAT_R32G32_SINT, VK_FORMAT_R32G32B32_SINT,
+ VK_FORMAT_R32G32B32A32_SINT, VK_FORMAT_R32_UINT, VK_FORMAT_R32G32_UINT,
+ VK_FORMAT_R32G32B32_UINT, VK_FORMAT_R32G32B32A32_UINT,
+ VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT,
+ VK_FORMAT_R32G32B32_SFLOAT, VK_FORMAT_R32G32B32A32_SFLOAT,
+ VK_FORMAT_R64_SFLOAT, VK_FORMAT_R64G64_SFLOAT,
+ VK_FORMAT_R64G64B64_SFLOAT, VK_FORMAT_R64G64B64A64_SFLOAT,
+ VK_FORMAT_R8_USCALED, VK_FORMAT_R8G8_USCALED,
+ VK_FORMAT_R8G8B8_USCALED, VK_FORMAT_R8G8B8A8_USCALED,
+ };
+
+ for (int formatIndex = 0; formatIndex < DE_LENGTH_OF_ARRAY(formats); ++formatIndex)
+ {
+ const VkFormat format = formats[formatIndex];
+
+ for (int opTypeIndex = 0; opTypeIndex < OPTYPE_LAST; ++opTypeIndex)
+ {
+ bool isBool = false;
+ bool isFloat = false;
+
+ switch (format)
+ {
+ default:
+ break;
+ case VK_FORMAT_R32_SFLOAT:
+ case VK_FORMAT_R32G32_SFLOAT:
+ case VK_FORMAT_R32G32B32_SFLOAT:
+ case VK_FORMAT_R32G32B32A32_SFLOAT:
+ case VK_FORMAT_R64_SFLOAT:
+ case VK_FORMAT_R64G64_SFLOAT:
+ case VK_FORMAT_R64G64B64_SFLOAT:
+ case VK_FORMAT_R64G64B64A64_SFLOAT:
+ isFloat = true;
+ break;
+ case VK_FORMAT_R8_USCALED:
+ case VK_FORMAT_R8G8_USCALED:
+ case VK_FORMAT_R8G8B8_USCALED:
+ case VK_FORMAT_R8G8B8A8_USCALED:
+ isBool = true;
+ break;
+ }
+
+ bool isBitwiseOp = false;
+
+ switch (opTypeIndex)
+ {
+ default:
+ break;
+ case OPTYPE_AND:
+ case OPTYPE_INCLUSIVE_AND:
+ case OPTYPE_EXCLUSIVE_AND:
+ case OPTYPE_OR:
+ case OPTYPE_INCLUSIVE_OR:
+ case OPTYPE_EXCLUSIVE_OR:
+ case OPTYPE_XOR:
+ case OPTYPE_INCLUSIVE_XOR:
+ case OPTYPE_EXCLUSIVE_XOR:
+ isBitwiseOp = true;
+ break;
+ }
+
+ if (isFloat && isBitwiseOp)
+ {
+ // Skip float with bitwise category.
+ continue;
+ }
+
+ if (isBool && !isBitwiseOp)
+ {
+ // Skip bool when its not the bitwise category.
+ continue;
+ }
+ std::string op = getOpTypeName(opTypeIndex);
+
+ {
+ const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_COMPUTE_BIT, format};
+ addFunctionCaseWithPrograms(computeGroup.get(),
+ de::toLower(op) + "_" +
+ subgroups::getFormatNameForGLSL(format),
+ "", supportedCheck, initPrograms, test, caseDef);
+ }
+
+ {
+ const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_ALL_GRAPHICS, format};
+ addFunctionCaseWithPrograms(graphicGroup.get(),
+ de::toLower(op) + "_" +
+ subgroups::getFormatNameForGLSL(format),
+ "", supportedCheck, initPrograms, test, caseDef);
+ }
+
+ for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
+ {
+ const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex], format};
+ addFunctionCaseWithPrograms(framebufferGroup.get(), de::toLower(op) + "_" + subgroups::getFormatNameForGLSL(format) +
+ "_" + getShaderStageName(caseDef.shaderStage), "",
+ supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
+ }
+ }
+ }
+
+ de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(
+ testCtx, "arithmetic", "Subgroup arithmetic category tests"));
+
+ group->addChild(graphicGroup.release());
+ group->addChild(computeGroup.release());
+ group->addChild(framebufferGroup.release());
+
+ return group.release();
+}
+
+} // subgroups
+} // vkt
--- /dev/null
+#ifndef _VKTSUBGROUPSARITHMETICTESTS_HPP
+#define _VKTSUBGROUPSARITHMETICTESTS_HPP
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "tcuDefs.hpp"
+#include "vktTestCase.hpp"
+
+namespace vkt
+{
+namespace subgroups
+{
+
+tcu::TestCaseGroup* createSubgroupsArithmeticTests(tcu::TestContext& testCtx);
+
+} // subgroups
+} // vkt
+
+#endif // _VKTSUBGROUPSARITHMETICTESTS_HPP
--- /dev/null
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "vktSubgroupsBallotBroadcastTests.hpp"
+#include "vktSubgroupsTestsUtils.hpp"
+
+#include <string>
+#include <vector>
+
+using namespace tcu;
+using namespace std;
+using namespace vk;
+using namespace vkt;
+
+namespace
+{
+enum OpType
+{
+ OPTYPE_BROADCAST = 0,
+ OPTYPE_BROADCAST_FIRST,
+ OPTYPE_LAST
+};
+
+static bool checkVertexPipelineStages(std::vector<const void*> datas,
+ deUint32 width, deUint32)
+{
+ return vkt::subgroups::check(datas, width, 3);
+}
+
+static bool checkCompute(std::vector<const void*> datas,
+ const deUint32 numWorkgroups[3], const deUint32 localSize[3],
+ deUint32)
+{
+ return vkt::subgroups::checkCompute(datas, numWorkgroups, localSize, 3);
+}
+
+std::string getOpTypeName(int opType)
+{
+ switch (opType)
+ {
+ default:
+ DE_FATAL("Unsupported op type");
+ return "";
+ case OPTYPE_BROADCAST:
+ return "subgroupBroadcast";
+ case OPTYPE_BROADCAST_FIRST:
+ return "subgroupBroadcastFirst";
+ }
+}
+
+struct CaseDefinition
+{
+ int opType;
+ VkShaderStageFlags shaderStage;
+ VkFormat format;
+};
+
+std::string getBodySource(CaseDefinition caseDef)
+{
+ std::ostringstream bdy;
+
+ bdy << " uvec4 mask = subgroupBallot(true);\n";
+ bdy << " uint tempResult = 0;\n";
+
+ if (OPTYPE_BROADCAST == caseDef.opType)
+ {
+ bdy << " tempResult = 0x3;\n";
+ for (int i = 0; i < (int)subgroups::maxSupportedSubgroupSize(); i++)
+ {
+ bdy << " {\n"
+ << " const uint id = "<< i << ";\n"
+ << " " << subgroups::getFormatNameForGLSL(caseDef.format)
+ << " op = subgroupBroadcast(data1[gl_SubgroupInvocationID], id);\n"
+ << " if ((id < gl_SubgroupSize) && subgroupBallotBitExtract(mask, id))\n"
+ << " {\n"
+ << " if (op != data1[id])\n"
+ << " {\n"
+ << " tempResult = 0;\n"
+ << " }\n"
+ << " }\n"
+ << " }\n";
+ }
+ }
+ else
+ {
+ bdy << " uint firstActive = 0;\n"
+ << " for (uint i = 0; i < gl_SubgroupSize; i++)\n"
+ << " {\n"
+ << " if (subgroupBallotBitExtract(mask, i))\n"
+ << " {\n"
+ << " firstActive = i;\n"
+ << " break;\n"
+ << " }\n"
+ << " }\n"
+ << " tempResult |= (subgroupBroadcastFirst(data1[gl_SubgroupInvocationID]) == data1[firstActive]) ? 0x1 : 0;\n"
+ << " // make the firstActive invocation inactive now\n"
+ << " if (firstActive == gl_SubgroupInvocationID)\n"
+ << " {\n"
+ << " for (uint i = 0; i < gl_SubgroupSize; i++)\n"
+ << " {\n"
+ << " if (subgroupBallotBitExtract(mask, i))\n"
+ << " {\n"
+ << " firstActive = i;\n"
+ << " break;\n"
+ << " }\n"
+ << " }\n"
+ << " tempResult |= (subgroupBroadcastFirst(data1[gl_SubgroupInvocationID]) == data1[firstActive]) ? 0x2 : 0;\n"
+ << " }\n"
+ << " else\n"
+ << " {\n"
+ << " // the firstActive invocation didn't partake in the second result so set it to true\n"
+ << " tempResult |= 0x2;\n"
+ << " }\n";
+ }
+ return bdy.str();
+}
+
+void initFrameBufferPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
+{
+ const vk::ShaderBuildOptions buildOptions (programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+
+ subgroups::setFragmentShaderFrameBuffer(programCollection);
+
+ if (VK_SHADER_STAGE_VERTEX_BIT != caseDef.shaderStage)
+ subgroups::setVertexShaderFrameBuffer(programCollection);
+
+ std::string bdyStr = getBodySource(caseDef);
+
+ if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream vertex;
+ vertex << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+ << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ << "layout(location = 0) in highp vec4 in_position;\n"
+ << "layout(location = 0) out float out_color;\n"
+ << "layout(set = 0, binding = 0) uniform Buffer1\n"
+ << "{\n"
+ << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data1[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+ << "};\n"
+ << "\n"
+ << "void main (void)\n"
+ << "{\n"
+ << bdyStr
+ << " out_color = float(tempResult);\n"
+ << " gl_Position = in_position;\n"
+ << " gl_PointSize = 1.0f;\n"
+ << "}\n";
+ programCollection.glslSources.add("vert")
+ << glu::VertexSource(vertex.str()) << buildOptions;
+ }
+ else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream geometry;
+
+ geometry << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+ << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ << "layout(points) in;\n"
+ << "layout(points, max_vertices = 1) out;\n"
+ << "layout(location = 0) out float out_color;\n"
+ << "layout(set = 0, binding = 0) uniform Buffer1\n"
+ << "{\n"
+ << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data1[" <<subgroups::maxSupportedSubgroupSize() << "];\n"
+ << "};\n"
+ << "\n"
+ << "void main (void)\n"
+ << "{\n"
+ << bdyStr
+ << " out_color = float(tempResult);\n"
+ << " gl_Position = gl_in[0].gl_Position;\n"
+ << " EmitVertex();\n"
+ << " EndPrimitive();\n"
+ << "}\n";
+
+ programCollection.glslSources.add("geometry")
+ << glu::GeometrySource(geometry.str()) << buildOptions;
+ }
+ else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream controlSource;
+
+ controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+ << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ << "layout(vertices = 2) out;\n"
+ << "layout(location = 0) out float out_color[];\n"
+ << "layout(set = 0, binding = 0) uniform Buffer2\n"
+ << "{\n"
+ << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data1[" <<subgroups::maxSupportedSubgroupSize() << "];\n"
+ << "};\n"
+ << "\n"
+ << "void main (void)\n"
+ << "{\n"
+ << " if (gl_InvocationID == 0)\n"
+ << " {\n"
+ << " gl_TessLevelOuter[0] = 1.0f;\n"
+ << " gl_TessLevelOuter[1] = 1.0f;\n"
+ << " }\n"
+ << bdyStr
+ << " out_color[gl_InvocationID ] = float(tempResult);\n"
+ << " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+ << "}\n";
+
+ programCollection.glslSources.add("tesc")
+ << glu::TessellationControlSource(controlSource.str()) << buildOptions;
+ subgroups::setTesEvalShaderFrameBuffer(programCollection);
+ }
+ else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream evaluationSource;
+ evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+ << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ << "layout(isolines, equal_spacing, ccw ) in;\n"
+ << "layout(location = 0) out float out_color;\n"
+ << "layout(set = 0, binding = 0) uniform Buffer1\n"
+ << "{\n"
+ << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data1[" <<subgroups::maxSupportedSubgroupSize() << "];\n"
+ << "};\n"
+ << "\n"
+ << "void main (void)\n"
+ << "{\n"
+ << bdyStr
+ << " out_color = float(tempResult);\n"
+ << " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
+ << "}\n";
+
+ subgroups::setTesCtrlShaderFrameBuffer(programCollection);
+ programCollection.glslSources.add("tese")
+ << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
+ }
+ else
+ {
+ DE_FATAL("Unsupported shader stage");
+ }
+}
+
+void initPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
+{
+ std::string bdyStr = getBodySource(caseDef);
+
+ if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream src;
+
+ src << "#version 450\n"
+ << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ << "layout (local_size_x_id = 0, local_size_y_id = 1, "
+ "local_size_z_id = 2) in;\n"
+ << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
+ << "{\n"
+ << " uint result[];\n"
+ << "};\n"
+ << "layout(set = 0, binding = 1, std430) buffer Buffer2\n"
+ << "{\n"
+ << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data1[];\n"
+ << "};\n"
+ << "\n"
+ << "void main (void)\n"
+ << "{\n"
+ << " uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
+ << " highp uint offset = globalSize.x * ((globalSize.y * "
+ "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
+ "gl_GlobalInvocationID.x;\n"
+ << bdyStr
+ << " result[offset] = tempResult;\n"
+ << "}\n";
+
+ programCollection.glslSources.add("comp")
+ << glu::ComputeSource(src.str()) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+ else
+ {
+ const string vertex =
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
+ "{\n"
+ " uint result[];\n"
+ "};\n"
+ "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+ "{\n"
+ " " + subgroups::getFormatNameForGLSL(caseDef.format) + " data1[];\n"
+ "};\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ + bdyStr +
+ " result[gl_VertexIndex] = tempResult;\n"
+ " float pixelSize = 2.0f/1024.0f;\n"
+ " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
+ " gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
+ " gl_PointSize = 1.0f;\n"
+ "}\n";
+
+ const string tesc =
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(vertices=1) out;\n"
+ "layout(set = 0, binding = 1, std430) buffer Buffer1\n"
+ "{\n"
+ " uint result[];\n"
+ "};\n"
+ "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+ "{\n"
+ " " + subgroups::getFormatNameForGLSL(caseDef.format) + " data1[];\n"
+ "};\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ + bdyStr +
+ " result[gl_PrimitiveID] = tempResult;\n"
+ " if (gl_InvocationID == 0)\n"
+ " {\n"
+ " gl_TessLevelOuter[0] = 1.0f;\n"
+ " gl_TessLevelOuter[1] = 1.0f;\n"
+ " }\n"
+ " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+ "}\n";
+
+ const string tese =
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(isolines) in;\n"
+ "layout(set = 0, binding = 2, std430) buffer Buffer1\n"
+ "{\n"
+ " uint result[];\n"
+ "};\n"
+ "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+ "{\n"
+ " " + subgroups::getFormatNameForGLSL(caseDef.format) + " data1[];\n"
+ "};\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ + bdyStr +
+ " result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = tempResult;\n"
+ " float pixelSize = 2.0f/1024.0f;\n"
+ " gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
+ "}\n";
+
+ const string geometry =
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(${TOPOLOGY}) in;\n"
+ "layout(points, max_vertices = 1) out;\n"
+ "layout(set = 0, binding = 3, std430) buffer Buffer1\n"
+ "{\n"
+ " uint result[];\n"
+ "};\n"
+ "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+ "{\n"
+ " " + subgroups::getFormatNameForGLSL(caseDef.format) + " data1[];\n"
+ "};\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ + bdyStr +
+ " result[gl_PrimitiveIDIn] = tempResult;\n"
+ " gl_Position = gl_in[0].gl_Position;\n"
+ " EmitVertex();\n"
+ " EndPrimitive();\n"
+ "}\n";
+
+ const string fragment =
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(location = 0) out uint result;\n"
+ "layout(set = 0, binding = 4, std430) readonly buffer Buffer1\n"
+ "{\n"
+ " " + subgroups::getFormatNameForGLSL(caseDef.format) + " data1[];\n"
+ "};\n"
+ "void main (void)\n"
+ "{\n"
+ + bdyStr +
+ " result = tempResult;\n"
+ "}\n";
+
+ subgroups::addNoSubgroupShader(programCollection);
+
+ programCollection.glslSources.add("vert")
+ << glu::VertexSource(vertex) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ programCollection.glslSources.add("tesc")
+ << glu::TessellationControlSource(tesc) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ programCollection.glslSources.add("tese")
+ << glu::TessellationEvaluationSource(tese) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ subgroups::addGeometryShadersFromTemplate(geometry, vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u),
+ programCollection.glslSources);
+ programCollection.glslSources.add("fragment")
+ << glu::FragmentSource(fragment)<< vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+}
+
+void supportedCheck (Context& context, CaseDefinition caseDef)
+{
+ if (!subgroups::isSubgroupSupported(context))
+ TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
+
+ if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_BALLOT_BIT))
+ {
+ TCU_THROW(NotSupportedError, "Device does not support subgroup ballot operations");
+ }
+
+ if (subgroups::isDoubleFormat(caseDef.format) &&
+ !subgroups::isDoubleSupportedForDevice(context))
+ {
+ TCU_THROW(NotSupportedError, "Device does not support subgroup double operations");
+ }
+}
+
+tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
+{
+ if (!subgroups::areSubgroupOperationsSupportedForStage(
+ context, caseDef.shaderStage))
+ {
+ if (subgroups::areSubgroupOperationsRequiredForStage(caseDef.shaderStage))
+ {
+ return tcu::TestStatus::fail(
+ "Shader stage " +
+ subgroups::getShaderStageName(caseDef.shaderStage) +
+ " is required to support subgroup operations!");
+ }
+ else
+ {
+ TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
+ }
+ }
+
+ subgroups::SSBOData inputData[1];
+ inputData[0].format = caseDef.format;
+ inputData[0].numElements = subgroups::maxSupportedSubgroupSize();
+ inputData[0].initializeType = subgroups::SSBOData::InitializeNonZero;
+
+ if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+ return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, inputData, 1, checkVertexPipelineStages);
+ else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
+ return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, inputData, 1, checkVertexPipelineStages);
+ else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+ return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
+ else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
+ return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
+ else
+ TCU_THROW(InternalError, "Unhandled shader stage");
+}
+
+
+tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
+{
+ if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+ {
+ if (!subgroups::areSubgroupOperationsSupportedForStage(context, caseDef.shaderStage))
+ {
+ if (subgroups::areSubgroupOperationsRequiredForStage(caseDef.shaderStage))
+ {
+ return tcu::TestStatus::fail(
+ "Shader stage " +
+ subgroups::getShaderStageName(caseDef.shaderStage) +
+ " is required to support subgroup operations!");
+ }
+ else
+ {
+ TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
+ }
+ }
+ subgroups::SSBOData inputData[1];
+ inputData[0].format = caseDef.format;
+ inputData[0].numElements = subgroups::maxSupportedSubgroupSize();
+ inputData[0].initializeType = subgroups::SSBOData::InitializeNonZero;
+
+ return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, inputData, 1, checkCompute);
+ }
+ else
+ {
+ VkPhysicalDeviceSubgroupProperties subgroupProperties;
+ subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
+ subgroupProperties.pNext = DE_NULL;
+
+ VkPhysicalDeviceProperties2 properties;
+ properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+ properties.pNext = &subgroupProperties;
+
+ context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
+
+ VkShaderStageFlagBits stages = (VkShaderStageFlagBits)(caseDef.shaderStage & subgroupProperties.supportedStages);
+
+ if ( VK_SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
+ {
+ if ( (stages & VK_SHADER_STAGE_FRAGMENT_BIT) == 0)
+ TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
+ else
+ stages = VK_SHADER_STAGE_FRAGMENT_BIT;
+ }
+
+ if ((VkShaderStageFlagBits)0u == stages)
+ TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
+
+ subgroups::SSBOData inputData;
+ inputData.format = caseDef.format;
+ inputData.numElements = subgroups::maxSupportedSubgroupSize();
+ inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
+ inputData.binding = 4u;
+ inputData.stages = stages;
+
+ return subgroups::allStages(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, stages);
+ }
+}
+}
+
+namespace vkt
+{
+namespace subgroups
+{
+tcu::TestCaseGroup* createSubgroupsBallotBroadcastTests(tcu::TestContext& testCtx)
+{
+ de::MovePtr<tcu::TestCaseGroup> graphicGroup(new tcu::TestCaseGroup(
+ testCtx, "graphics", "Subgroup ballot broadcast category tests: graphics"));
+ de::MovePtr<tcu::TestCaseGroup> computeGroup(new tcu::TestCaseGroup(
+ testCtx, "compute", "Subgroup ballot broadcast category tests: compute"));
+ de::MovePtr<tcu::TestCaseGroup> framebufferGroup(new tcu::TestCaseGroup(
+ testCtx, "framebuffer", "Subgroup ballot broadcast category tests: framebuffer"));
+
+ const VkShaderStageFlags stages[] =
+ {
+ VK_SHADER_STAGE_VERTEX_BIT,
+ VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
+ VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
+ VK_SHADER_STAGE_GEOMETRY_BIT,
+ };
+
+ const VkFormat formats[] =
+ {
+ VK_FORMAT_R32_SINT, VK_FORMAT_R32G32_SINT, VK_FORMAT_R32G32B32_SINT,
+ VK_FORMAT_R32G32B32A32_SINT, VK_FORMAT_R32_UINT, VK_FORMAT_R32G32_UINT,
+ VK_FORMAT_R32G32B32_UINT, VK_FORMAT_R32G32B32A32_UINT,
+ VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT,
+ VK_FORMAT_R32G32B32_SFLOAT, VK_FORMAT_R32G32B32A32_SFLOAT,
+ VK_FORMAT_R64_SFLOAT, VK_FORMAT_R64G64_SFLOAT,
+ VK_FORMAT_R64G64B64_SFLOAT, VK_FORMAT_R64G64B64A64_SFLOAT,
+ VK_FORMAT_R8_USCALED, VK_FORMAT_R8G8_USCALED,
+ VK_FORMAT_R8G8B8_USCALED, VK_FORMAT_R8G8B8A8_USCALED,
+ };
+
+ for (int formatIndex = 0; formatIndex < DE_LENGTH_OF_ARRAY(formats); ++formatIndex)
+ {
+ const VkFormat format = formats[formatIndex];
+
+ for (int opTypeIndex = 0; opTypeIndex < OPTYPE_LAST; ++opTypeIndex)
+ {
+ const std::string op = de::toLower(getOpTypeName(opTypeIndex));
+ const std::string name = op + "_" + subgroups::getFormatNameForGLSL(format);
+
+ {
+ CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_COMPUTE_BIT, format};
+ addFunctionCaseWithPrograms(computeGroup.get(), name, "", supportedCheck, initPrograms, test, caseDef);
+ }
+
+ {
+ const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_ALL_GRAPHICS, format};
+ addFunctionCaseWithPrograms(graphicGroup.get(), name, "", supportedCheck, initPrograms, test, caseDef);
+ }
+
+ for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
+ {
+ const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex], format};
+ addFunctionCaseWithPrograms(framebufferGroup.get(), name + getShaderStageName(caseDef.shaderStage), "",
+ supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
+ }
+ }
+ }
+
+ de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(
+ testCtx, "ballot_broadcast", "Subgroup ballot broadcast category tests"));
+
+ group->addChild(graphicGroup.release());
+ group->addChild(computeGroup.release());
+ group->addChild(framebufferGroup.release());
+ return group.release();
+}
+
+} // subgroups
+} // vkt
--- /dev/null
+#ifndef _VKTSUBGROUPSBALLOTBROADCASTTESTS_HPP
+#define _VKTSUBGROUPSBALLOTBROADCASTTESTS_HPP
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "tcuDefs.hpp"
+#include "vktTestCase.hpp"
+
+namespace vkt
+{
+namespace subgroups
+{
+
+tcu::TestCaseGroup* createSubgroupsBallotBroadcastTests(tcu::TestContext& testCtx);
+
+} // subgroups
+} // vkt
+
+#endif // _VKTSUBGROUPSBALLOTBROADCASTTESTS_HPP
--- /dev/null
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "vktSubgroupsBallotOtherTests.hpp"
+#include "vktSubgroupsTestsUtils.hpp"
+
+#include <string>
+#include <vector>
+
+using namespace tcu;
+using namespace std;
+using namespace vk;
+using namespace vkt;
+
+namespace
+{
+enum OpType
+{
+ OPTYPE_INVERSE_BALLOT = 0,
+ OPTYPE_BALLOT_BIT_EXTRACT,
+ OPTYPE_BALLOT_BIT_COUNT,
+ OPTYPE_BALLOT_INCLUSIVE_BIT_COUNT,
+ OPTYPE_BALLOT_EXCLUSIVE_BIT_COUNT,
+ OPTYPE_BALLOT_FIND_LSB,
+ OPTYPE_BALLOT_FIND_MSB,
+ OPTYPE_LAST
+};
+
+static bool checkVertexPipelineStages(std::vector<const void*> datas,
+ deUint32 width, deUint32)
+{
+ return vkt::subgroups::check(datas, width, 0xf);
+}
+
+static bool checkCompute(std::vector<const void*> datas,
+ const deUint32 numWorkgroups[3], const deUint32 localSize[3],
+ deUint32)
+{
+ return vkt::subgroups::checkCompute(datas, numWorkgroups, localSize, 0xf);
+}
+
+std::string getOpTypeName(int opType)
+{
+ switch (opType)
+ {
+ default:
+ DE_FATAL("Unsupported op type");
+ return "";
+ case OPTYPE_INVERSE_BALLOT:
+ return "subgroupInverseBallot";
+ case OPTYPE_BALLOT_BIT_EXTRACT:
+ return "subgroupBallotBitExtract";
+ case OPTYPE_BALLOT_BIT_COUNT:
+ return "subgroupBallotBitCount";
+ case OPTYPE_BALLOT_INCLUSIVE_BIT_COUNT:
+ return "subgroupBallotInclusiveBitCount";
+ case OPTYPE_BALLOT_EXCLUSIVE_BIT_COUNT:
+ return "subgroupBallotExclusiveBitCount";
+ case OPTYPE_BALLOT_FIND_LSB:
+ return "subgroupBallotFindLSB";
+ case OPTYPE_BALLOT_FIND_MSB:
+ return "subgroupBallotFindMSB";
+ }
+}
+
+struct CaseDefinition
+{
+ int opType;
+ VkShaderStageFlags shaderStage;
+};
+
+std::string getBodySource(CaseDefinition caseDef)
+{
+ std::ostringstream bdy;
+
+ bdy << " uvec4 allOnes = uvec4(0xFFFFFFFF);\n"
+ << " uvec4 allZeros = uvec4(0);\n"
+ << " uint tempResult = 0;\n"
+ << "#define MAKE_HIGH_BALLOT_RESULT(i) uvec4("
+ << "i >= 32 ? 0 : (0xFFFFFFFF << i), "
+ << "i >= 64 ? 0 : (0xFFFFFFFF << ((i < 32) ? 0 : (i - 32))), "
+ << "i >= 96 ? 0 : (0xFFFFFFFF << ((i < 64) ? 0 : (i - 64))), "
+ << " 0xFFFFFFFF << ((i < 96) ? 0 : (i - 96)))\n"
+ << "#define MAKE_SINGLE_BIT_BALLOT_RESULT(i) uvec4("
+ << "i >= 32 ? 0 : 0x1 << i, "
+ << "i < 32 || i >= 64 ? 0 : 0x1 << (i - 32), "
+ << "i < 64 || i >= 96 ? 0 : 0x1 << (i - 64), "
+ << "i < 96 ? 0 : 0x1 << (i - 96))\n";
+
+ switch (caseDef.opType)
+ {
+ default:
+ DE_FATAL("Unknown op type!");
+ break;
+ case OPTYPE_INVERSE_BALLOT:
+ bdy << " tempResult |= subgroupInverseBallot(allOnes) ? 0x1 : 0;\n"
+ << " tempResult |= subgroupInverseBallot(allZeros) ? 0 : 0x2;\n"
+ << " tempResult |= subgroupInverseBallot(subgroupBallot(true)) ? 0x4 : 0;\n"
+ << " tempResult |= 0x8;\n";
+ break;
+ case OPTYPE_BALLOT_BIT_EXTRACT:
+ bdy << " tempResult |= subgroupBallotBitExtract(allOnes, gl_SubgroupInvocationID) ? 0x1 : 0;\n"
+ << " tempResult |= subgroupBallotBitExtract(allZeros, gl_SubgroupInvocationID) ? 0 : 0x2;\n"
+ << " tempResult |= subgroupBallotBitExtract(subgroupBallot(true), gl_SubgroupInvocationID) ? 0x4 : 0;\n"
+ << " tempResult |= 0x8;\n"
+ << " for (uint i = 0; i < gl_SubgroupSize; i++)\n"
+ << " {\n"
+ << " if (!subgroupBallotBitExtract(allOnes, gl_SubgroupInvocationID))\n"
+ << " {\n"
+ << " tempResult &= ~0x8;\n"
+ << " }\n"
+ << " }\n";
+ break;
+ case OPTYPE_BALLOT_BIT_COUNT:
+ bdy << " tempResult |= gl_SubgroupSize == subgroupBallotBitCount(allOnes) ? 0x1 : 0;\n"
+ << " tempResult |= 0 == subgroupBallotBitCount(allZeros) ? 0x2 : 0;\n"
+ << " tempResult |= 0 < subgroupBallotBitCount(subgroupBallot(true)) ? 0x4 : 0;\n"
+ << " tempResult |= 0 == subgroupBallotBitCount(MAKE_HIGH_BALLOT_RESULT(gl_SubgroupSize)) ? 0x8 : 0;\n";
+ break;
+ case OPTYPE_BALLOT_INCLUSIVE_BIT_COUNT:
+ bdy << " uint inclusiveOffset = gl_SubgroupInvocationID + 1;\n"
+ << " tempResult |= inclusiveOffset == subgroupBallotInclusiveBitCount(allOnes) ? 0x1 : 0;\n"
+ << " tempResult |= 0 == subgroupBallotInclusiveBitCount(allZeros) ? 0x2 : 0;\n"
+ << " tempResult |= 0 < subgroupBallotInclusiveBitCount(subgroupBallot(true)) ? 0x4 : 0;\n"
+ << " tempResult |= 0x8;\n"
+ << " uvec4 inclusiveUndef = MAKE_HIGH_BALLOT_RESULT(inclusiveOffset);\n"
+ << " bool undefTerritory = false;\n"
+ << " for (uint i = 0; i <= 128; i++)\n"
+ << " {\n"
+ << " uvec4 iUndef = MAKE_HIGH_BALLOT_RESULT(i);\n"
+ << " if (iUndef == inclusiveUndef)"
+ << " {\n"
+ << " undefTerritory = true;\n"
+ << " }\n"
+ << " uint inclusiveBitCount = subgroupBallotInclusiveBitCount(iUndef);\n"
+ << " if (undefTerritory && (0 != inclusiveBitCount))\n"
+ << " {\n"
+ << " tempResult &= ~0x8;\n"
+ << " }\n"
+ << " else if (!undefTerritory && (0 == inclusiveBitCount))\n"
+ << " {\n"
+ << " tempResult &= ~0x8;\n"
+ << " }\n"
+ << " }\n";
+ break;
+ case OPTYPE_BALLOT_EXCLUSIVE_BIT_COUNT:
+ bdy << " uint exclusiveOffset = gl_SubgroupInvocationID;\n"
+ << " tempResult |= exclusiveOffset == subgroupBallotExclusiveBitCount(allOnes) ? 0x1 : 0;\n"
+ << " tempResult |= 0 == subgroupBallotExclusiveBitCount(allZeros) ? 0x2 : 0;\n"
+ << " tempResult |= 0x4;\n"
+ << " tempResult |= 0x8;\n"
+ << " uvec4 exclusiveUndef = MAKE_HIGH_BALLOT_RESULT(exclusiveOffset);\n"
+ << " bool undefTerritory = false;\n"
+ << " for (uint i = 0; i <= 128; i++)\n"
+ << " {\n"
+ << " uvec4 iUndef = MAKE_HIGH_BALLOT_RESULT(i);\n"
+ << " if (iUndef == exclusiveUndef)"
+ << " {\n"
+ << " undefTerritory = true;\n"
+ << " }\n"
+ << " uint exclusiveBitCount = subgroupBallotExclusiveBitCount(iUndef);\n"
+ << " if (undefTerritory && (0 != exclusiveBitCount))\n"
+ << " {\n"
+ << " tempResult &= ~0x4;\n"
+ << " }\n"
+ << " else if (!undefTerritory && (0 == exclusiveBitCount))\n"
+ << " {\n"
+ << " tempResult &= ~0x8;\n"
+ << " }\n"
+ << " }\n";
+ break;
+ case OPTYPE_BALLOT_FIND_LSB:
+ bdy << " tempResult |= 0 == subgroupBallotFindLSB(allOnes) ? 0x1 : 0;\n"
+ << " if (subgroupElect())\n"
+ << " {\n"
+ << " tempResult |= 0x2;\n"
+ << " }\n"
+ << " else\n"
+ << " {\n"
+ << " tempResult |= 0 < subgroupBallotFindLSB(subgroupBallot(true)) ? 0x2 : 0;\n"
+ << " }\n"
+ << " tempResult |= gl_SubgroupSize > subgroupBallotFindLSB(subgroupBallot(true)) ? 0x4 : 0;\n"
+ << " tempResult |= 0x8;\n"
+ << " for (uint i = 0; i < gl_SubgroupSize; i++)\n"
+ << " {\n"
+ << " if (i != subgroupBallotFindLSB(MAKE_HIGH_BALLOT_RESULT(i)))\n"
+ << " {\n"
+ << " tempResult &= ~0x8;\n"
+ << " }\n"
+ << " }\n";
+ break;
+ case OPTYPE_BALLOT_FIND_MSB:
+ bdy << " tempResult |= (gl_SubgroupSize - 1) == subgroupBallotFindMSB(allOnes) ? 0x1 : 0;\n"
+ << " if (subgroupElect())\n"
+ << " {\n"
+ << " tempResult |= 0x2;\n"
+ << " }\n"
+ << " else\n"
+ << " {\n"
+ << " tempResult |= 0 < subgroupBallotFindMSB(subgroupBallot(true)) ? 0x2 : 0;\n"
+ << " }\n"
+ << " tempResult |= gl_SubgroupSize > subgroupBallotFindMSB(subgroupBallot(true)) ? 0x4 : 0;\n"
+ << " tempResult |= 0x8;\n"
+ << " for (uint i = 0; i < gl_SubgroupSize; i++)\n"
+ << " {\n"
+ << " if (i != subgroupBallotFindMSB(MAKE_SINGLE_BIT_BALLOT_RESULT(i)))\n"
+ << " {\n"
+ << " tempResult &= ~0x8;\n"
+ << " }\n"
+ << " }\n";
+ break;
+ }
+ return bdy.str();
+}
+
+void initFrameBufferPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
+{
+ const vk::ShaderBuildOptions buildOptions (programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+
+ subgroups::setFragmentShaderFrameBuffer(programCollection);
+
+ if (VK_SHADER_STAGE_VERTEX_BIT != caseDef.shaderStage)
+ subgroups::setVertexShaderFrameBuffer(programCollection);
+
+ std::string bdyStr = getBodySource(caseDef);
+
+ if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream vertex;
+ vertex << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+ << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ << "layout(location = 0) in highp vec4 in_position;\n"
+ << "layout(location = 0) out float out_color;\n"
+ << "\n"
+ << "void main (void)\n"
+ << "{\n"
+ << bdyStr
+ << " out_color = float(tempResult);\n"
+ << " gl_Position = in_position;\n"
+ << " gl_PointSize = 1.0f;\n"
+ << "}\n";
+ programCollection.glslSources.add("vert")
+ << glu::VertexSource(vertex.str()) << buildOptions;
+ }
+ else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream geometry;
+
+ geometry << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+ << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ << "layout(points) in;\n"
+ << "layout(points, max_vertices = 1) out;\n"
+ << "layout(location = 0) out float out_color;\n"
+ << "void main (void)\n"
+ << "{\n"
+ << bdyStr
+ << " out_color = float(tempResult);\n"
+ << " gl_Position = gl_in[0].gl_Position;\n"
+ << " EmitVertex();\n"
+ << " EndPrimitive();\n"
+ << "}\n";
+
+ programCollection.glslSources.add("geometry")
+ << glu::GeometrySource(geometry.str()) << buildOptions;
+ }
+ else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream controlSource;
+
+ controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+ << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ << "layout(vertices = 2) out;\n"
+ << "layout(location = 0) out float out_color[];\n"
+ << "\n"
+ << "void main (void)\n"
+ << "{\n"
+ << " if (gl_InvocationID == 0)\n"
+ << " {\n"
+ << " gl_TessLevelOuter[0] = 1.0f;\n"
+ << " gl_TessLevelOuter[1] = 1.0f;\n"
+ << " }\n"
+ << bdyStr
+ << " out_color[gl_InvocationID ] = float(tempResult);\n"
+ << " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+ << "}\n";
+
+ programCollection.glslSources.add("tesc")
+ << glu::TessellationControlSource(controlSource.str()) << buildOptions;
+ subgroups::setTesEvalShaderFrameBuffer(programCollection);
+ }
+ else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream evaluationSource;
+ evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+ << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ << "layout(isolines, equal_spacing, ccw ) in;\n"
+ << "layout(location = 0) out float out_color;\n"
+ << "void main (void)\n"
+ << "{\n"
+ << bdyStr
+ << " out_color = float(tempResult);\n"
+ << " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
+ << "}\n";
+
+ subgroups::setTesCtrlShaderFrameBuffer(programCollection);
+ programCollection.glslSources.add("tese")
+ << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
+ }
+ else
+ {
+ DE_FATAL("Unsupported shader stage");
+ }
+}
+
+void initPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
+{
+ std::string bdyStr = getBodySource(caseDef);
+
+ if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream src;
+
+ src << "#version 450\n"
+ << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ << "layout (local_size_x_id = 0, local_size_y_id = 1, "
+ "local_size_z_id = 2) in;\n"
+ << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
+ << "{\n"
+ << " uint result[];\n"
+ << "};\n"
+ << "\n"
+ << "void main (void)\n"
+ << "{\n"
+ << " uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
+ << " highp uint offset = globalSize.x * ((globalSize.y * "
+ "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
+ "gl_GlobalInvocationID.x;\n"
+ << bdyStr
+ << " result[offset] = tempResult;\n"
+ << "}\n";
+
+ programCollection.glslSources.add("comp")
+ << glu::ComputeSource(src.str()) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+ else
+ {
+ const string vertex =
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
+ "{\n"
+ " uint result[];\n"
+ "};\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ + bdyStr +
+ " result[gl_VertexIndex] = tempResult;\n"
+ " float pixelSize = 2.0f/1024.0f;\n"
+ " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
+ " gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
+ " gl_PointSize = 1.0f;\n"
+ "}\n";
+
+ const string tesc =
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(vertices=1) out;\n"
+ "layout(set = 0, binding = 1, std430) buffer Buffer1\n"
+ "{\n"
+ " uint result[];\n"
+ "};\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ + bdyStr +
+ " result[gl_PrimitiveID] = tempResult;\n"
+ " if (gl_InvocationID == 0)\n"
+ " {\n"
+ " gl_TessLevelOuter[0] = 1.0f;\n"
+ " gl_TessLevelOuter[1] = 1.0f;\n"
+ " }\n"
+ " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+ "}\n";
+
+ const string tese =
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(isolines) in;\n"
+ "layout(set = 0, binding = 2, std430) buffer Buffer1\n"
+ "{\n"
+ " uint result[];\n"
+ "};\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ + bdyStr +
+ " result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = tempResult;\n"
+ " float pixelSize = 2.0f/1024.0f;\n"
+ " gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
+ "}\n";
+
+ const string geometry =
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(${TOPOLOGY}) in;\n"
+ "layout(points, max_vertices = 1) out;\n"
+ "layout(set = 0, binding = 3, std430) buffer Buffer1\n"
+ "{\n"
+ " uint result[];\n"
+ "};\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ + bdyStr +
+ " result[gl_PrimitiveIDIn] = tempResult;\n"
+ " gl_Position = gl_in[0].gl_Position;\n"
+ " EmitVertex();\n"
+ " EndPrimitive();\n"
+ "}\n";
+
+ const string fragment =
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(location = 0) out uint result;\n"
+ "void main (void)\n"
+ "{\n"
+ + bdyStr +
+ " result = tempResult;\n"
+ "}\n";
+
+ subgroups::addNoSubgroupShader(programCollection);
+
+ programCollection.glslSources.add("vert")
+ << glu::VertexSource(vertex) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ programCollection.glslSources.add("tesc")
+ << glu::TessellationControlSource(tesc) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ programCollection.glslSources.add("tese")
+ << glu::TessellationEvaluationSource(tese) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ subgroups::addGeometryShadersFromTemplate(geometry, vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u),
+ programCollection.glslSources);
+ programCollection.glslSources.add("fragment")
+ << glu::FragmentSource(fragment)<< vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+}
+
+void supportedCheck (Context& context, CaseDefinition caseDef)
+{
+ DE_UNREF(caseDef);
+ if (!subgroups::isSubgroupSupported(context))
+ TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
+
+ if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_BALLOT_BIT))
+ {
+ TCU_THROW(NotSupportedError, "Device does not support subgroup ballot operations");
+ }
+}
+
+tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
+{
+ if (!subgroups::areSubgroupOperationsSupportedForStage(
+ context, caseDef.shaderStage))
+ {
+ if (subgroups::areSubgroupOperationsRequiredForStage(caseDef.shaderStage))
+ {
+ return tcu::TestStatus::fail(
+ "Shader stage " +
+ subgroups::getShaderStageName(caseDef.shaderStage) +
+ " is required to support subgroup operations!");
+ }
+ else
+ {
+ TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
+ }
+ }
+
+ if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+ return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages);
+ else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
+ return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages);
+ else if ((VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) & caseDef.shaderStage)
+ return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages);
+ else
+ TCU_THROW(InternalError, "Unhandled shader stage");
+}
+
+tcu::TestStatus test (Context& context, const CaseDefinition caseDef)
+{
+ if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+ {
+ if (!subgroups::areSubgroupOperationsSupportedForStage(context, caseDef.shaderStage))
+ {
+ return tcu::TestStatus::fail(
+ "Shader stage " +
+ subgroups::getShaderStageName(caseDef.shaderStage) +
+ " is required to support subgroup operations!");
+ }
+ return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkCompute);
+ }
+ else
+ {
+ VkPhysicalDeviceSubgroupProperties subgroupProperties;
+ subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
+ subgroupProperties.pNext = DE_NULL;
+
+ VkPhysicalDeviceProperties2 properties;
+ properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+ properties.pNext = &subgroupProperties;
+
+ context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
+
+ VkShaderStageFlagBits stages = (VkShaderStageFlagBits)(caseDef.shaderStage & subgroupProperties.supportedStages);
+
+ if ( VK_SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
+ {
+ if ( (stages & VK_SHADER_STAGE_FRAGMENT_BIT) == 0)
+ TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
+ else
+ stages = VK_SHADER_STAGE_FRAGMENT_BIT;
+ }
+
+ if ((VkShaderStageFlagBits)0u == stages)
+ TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
+
+ return subgroups::allStages(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages, stages);
+ }
+ return tcu::TestStatus::pass("OK");
+}
+}
+
+namespace vkt
+{
+namespace subgroups
+{
+tcu::TestCaseGroup* createSubgroupsBallotOtherTests(tcu::TestContext& testCtx)
+{
+ de::MovePtr<tcu::TestCaseGroup> graphicGroup(new tcu::TestCaseGroup(
+ testCtx, "graphics", "Subgroup ballot other category tests: graphics"));
+ de::MovePtr<tcu::TestCaseGroup> computeGroup(new tcu::TestCaseGroup(
+ testCtx, "compute", "Subgroup ballot other category tests: compute"));
+ de::MovePtr<tcu::TestCaseGroup> framebufferGroup(new tcu::TestCaseGroup(
+ testCtx, "framebuffer", "Subgroup ballot other category tests: framebuffer"));
+
+ const VkShaderStageFlags stages[] =
+ {
+ VK_SHADER_STAGE_VERTEX_BIT,
+ VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
+ VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
+ VK_SHADER_STAGE_GEOMETRY_BIT,
+ };
+
+ for (int opTypeIndex = 0; opTypeIndex < OPTYPE_LAST; ++opTypeIndex)
+ {
+ const string op = de::toLower(getOpTypeName(opTypeIndex));
+ {
+ const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_COMPUTE_BIT};
+ addFunctionCaseWithPrograms(computeGroup.get(), op, "", supportedCheck, initPrograms, test, caseDef);
+ }
+
+ {
+ const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_ALL_GRAPHICS};
+ addFunctionCaseWithPrograms(graphicGroup.get(), op, "", supportedCheck, initPrograms, test, caseDef);
+ }
+
+ for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
+ {
+ const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex]};
+ addFunctionCaseWithPrograms(framebufferGroup.get(), op + "_" + getShaderStageName(caseDef.shaderStage), "", supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
+ }
+ }
+
+ de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(
+ testCtx, "ballot_other", "Subgroup ballot other category tests"));
+
+ group->addChild(graphicGroup.release());
+ group->addChild(computeGroup.release());
+ group->addChild(framebufferGroup.release());
+
+ return group.release();
+}
+
+} // subgroups
+} // vkt
--- /dev/null
+#ifndef _VKTSUBGROUPSBALLOTOTHERTESTS_HPP
+#define _VKTSUBGROUPSBALLOTOTHERTESTS_HPP
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "tcuDefs.hpp"
+#include "vktTestCase.hpp"
+
+namespace vkt
+{
+namespace subgroups
+{
+
+tcu::TestCaseGroup* createSubgroupsBallotOtherTests(tcu::TestContext& testCtx);
+
+} // subgroups
+} // vkt
+
+#endif // _VKTSUBGROUPSBALLOTOTHERTESTS_HPP
--- /dev/null
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "vktSubgroupsBallotTests.hpp"
+#include "vktSubgroupsTestsUtils.hpp"
+
+#include <string>
+#include <vector>
+
+using namespace tcu;
+using namespace std;
+using namespace vk;
+using namespace vkt;
+
+namespace
+{
+static bool checkVertexPipelineStages(std::vector<const void*> datas,
+ deUint32 width, deUint32)
+{
+ return vkt::subgroups::check(datas, width, 0x7);
+}
+
+static bool checkCompute(std::vector<const void*> datas,
+ const deUint32 numWorkgroups[3], const deUint32 localSize[3],
+ deUint32)
+{
+ return vkt::subgroups::checkCompute(datas, numWorkgroups, localSize, 0x7);
+}
+
+struct CaseDefinition
+{
+ VkShaderStageFlags shaderStage;
+};
+
+void initFrameBufferPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
+{
+ const vk::SpirVAsmBuildOptions buildOptionsSpr (programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3);
+ std::ostringstream subgroupSizeStr;
+ subgroupSizeStr << subgroups::maxSupportedSubgroupSize();
+
+ subgroups::setFragmentShaderFrameBuffer(programCollection);
+
+ if (VK_SHADER_STAGE_VERTEX_BIT != caseDef.shaderStage)
+ subgroups::setVertexShaderFrameBuffer(programCollection);
+
+ if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+ {
+ /*
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(location = 0) in highp vec4 in_position;\n"
+ "layout(location = 0) out float out_color;\n"
+ "layout(set = 0, binding = 0) uniform Buffer1\n"
+ "{\n"
+ " uint data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+ "};\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ " uint tempResult = 0;\n"
+ " tempResult |= !bool(uvec4(0) == subgroupBallot(true)) ? 0x1 : 0;\n"
+ " bool bData = data[gl_SubgroupInvocationID] != 0;\n"
+ " tempResult |= !bool(uvec4(0) == subgroupBallot(bData)) ? 0x2 : 0;\n"
+ " tempResult |= uvec4(0) == subgroupBallot(false) ? 0x4 : 0;\n"
+ " out_color = float(tempResult);\n"
+ " gl_Position = in_position;\n"
+ " gl_PointSize = 1.0f;\n"
+ "}\n";
+ */
+ const string vertex =
+ "; SPIR-V\n"
+ "; Version: 1.3\n"
+ "; Generator: Khronos Glslang Reference Front End; 2\n"
+ "; Bound: 76\n"
+ "; Schema: 0\n"
+ "OpCapability Shader\n"
+ "OpCapability GroupNonUniform\n"
+ "OpCapability GroupNonUniformBallot\n"
+ "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+ "OpMemoryModel Logical GLSL450\n"
+ "OpEntryPoint Vertex %4 \"main\" %35 %62 %70 %72\n"
+ "OpDecorate %30 ArrayStride 16\n"
+ "OpMemberDecorate %31 0 Offset 0\n"
+ "OpDecorate %31 Block\n"
+ "OpDecorate %33 DescriptorSet 0\n"
+ "OpDecorate %33 Binding 0\n"
+ "OpDecorate %35 RelaxedPrecision\n"
+ "OpDecorate %35 BuiltIn SubgroupLocalInvocationId\n"
+ "OpDecorate %36 RelaxedPrecision\n"
+ "OpDecorate %62 Location 0\n"
+ "OpMemberDecorate %68 0 BuiltIn Position\n"
+ "OpMemberDecorate %68 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %68 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %68 3 BuiltIn CullDistance\n"
+ "OpDecorate %68 Block\n"
+ "OpDecorate %72 Location 0\n"
+ "%2 = OpTypeVoid\n"
+ "%3 = OpTypeFunction %2\n"
+ "%6 = OpTypeInt 32 0\n"
+ "%7 = OpTypePointer Function %6\n"
+ "%9 = OpConstant %6 0\n"
+ "%10 = OpTypeVector %6 4\n"
+ "%11 = OpConstantComposite %10 %9 %9 %9 %9\n"
+ "%12 = OpTypeBool\n"
+ "%13 = OpConstantTrue %12\n"
+ "%14 = OpConstant %6 3\n"
+ "%16 = OpTypeVector %12 4\n"
+ "%20 = OpTypeInt 32 1\n"
+ "%21 = OpConstant %20 1\n"
+ "%22 = OpConstant %20 0\n"
+ "%27 = OpTypePointer Function %12\n"
+ "%29 = OpConstant %6 " + subgroupSizeStr.str() + "\n"
+ "%30 = OpTypeArray %6 %29\n"
+ "%31 = OpTypeStruct %30\n"
+ "%32 = OpTypePointer Uniform %31\n"
+ "%33 = OpVariable %32 Uniform\n"
+ "%34 = OpTypePointer Input %6\n"
+ "%35 = OpVariable %34 Input\n"
+ "%37 = OpTypePointer Uniform %6\n"
+ "%46 = OpConstant %20 2\n"
+ "%51 = OpConstantFalse %12\n"
+ "%55 = OpConstant %20 4\n"
+ "%60 = OpTypeFloat 32\n"
+ "%61 = OpTypePointer Output %60\n"
+ "%62 = OpVariable %61 Output\n"
+ "%65 = OpTypeVector %60 4\n"
+ "%66 = OpConstant %6 1\n"
+ "%67 = OpTypeArray %60 %66\n"
+ "%68 = OpTypeStruct %65 %60 %67 %67\n"
+ "%69 = OpTypePointer Output %68\n"
+ "%70 = OpVariable %69 Output\n"
+ "%71 = OpTypePointer Input %65\n"
+ "%72 = OpVariable %71 Input\n"
+ "%74 = OpTypePointer Output %65\n"
+ "%76 = OpConstant %60 1\n"
+ "%4 = OpFunction %2 None %3\n"
+ "%5 = OpLabel\n"
+ "%8 = OpVariable %7 Function\n"
+ "%28 = OpVariable %27 Function\n"
+ "OpStore %8 %9\n"
+ "%15 = OpGroupNonUniformBallot %10 %14 %13\n"
+ "%17 = OpIEqual %16 %11 %15\n"
+ "%18 = OpAll %12 %17\n"
+ "%19 = OpLogicalNot %12 %18\n"
+ "%23 = OpSelect %20 %19 %21 %22\n"
+ "%24 = OpBitcast %6 %23\n"
+ "%25 = OpLoad %6 %8\n"
+ "%26 = OpBitwiseOr %6 %25 %24\n"
+ "OpStore %8 %26\n"
+ "%36 = OpLoad %6 %35\n"
+ "%38 = OpAccessChain %37 %33 %22 %36\n"
+ "%39 = OpLoad %6 %38\n"
+ "%40 = OpINotEqual %12 %39 %9\n"
+ "OpStore %28 %40\n"
+ "%41 = OpLoad %12 %28\n"
+ "%42 = OpGroupNonUniformBallot %10 %14 %41\n"
+ "%43 = OpIEqual %16 %11 %42\n"
+ "%44 = OpAll %12 %43\n"
+ "%45 = OpLogicalNot %12 %44\n"
+ "%47 = OpSelect %20 %45 %46 %22\n"
+ "%48 = OpBitcast %6 %47\n"
+ "%49 = OpLoad %6 %8\n"
+ "%50 = OpBitwiseOr %6 %49 %48\n"
+ "OpStore %8 %50\n"
+ "%52 = OpGroupNonUniformBallot %10 %14 %51\n"
+ "%53 = OpIEqual %16 %11 %52\n"
+ "%54 = OpAll %12 %53\n"
+ "%56 = OpSelect %20 %54 %55 %22\n"
+ "%57 = OpBitcast %6 %56\n"
+ "%58 = OpLoad %6 %8\n"
+ "%59 = OpBitwiseOr %6 %58 %57\n"
+ "OpStore %8 %59\n"
+ "%63 = OpLoad %6 %8\n"
+ "%64 = OpConvertUToF %60 %63\n"
+ "OpStore %62 %64\n"
+ "%73 = OpLoad %65 %72\n"
+ "%75 = OpAccessChain %74 %70 %22\n"
+ "OpStore %75 %73\n"
+ "%77 = OpAccessChain %61 %70 %21\n"
+ "OpStore %77 %76\n"
+ "OpReturn\n"
+ "OpFunctionEnd\n";
+ programCollection.spirvAsmSources.add("vert") << vertex << buildOptionsSpr;
+ }
+ else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
+ {
+ /*
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(points) in;\n"
+ "layout(points, max_vertices = 1) out;\n"
+ "layout(location = 0) out float out_color;\n"
+ "layout(set = 0, binding = 0) uniform Buffer1\n"
+ "{\n"
+ " uint data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+ "};\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ " uint tempResult = 0;\n"
+ " tempResult |= !bool(uvec4(0) == subgroupBallot(true)) ? 0x1 : 0;\n"
+ " bool bData = data[gl_SubgroupInvocationID] != 0;\n"
+ " tempResult |= !bool(uvec4(0) == subgroupBallot(bData)) ? 0x2 : 0;\n"
+ " tempResult |= uvec4(0) == subgroupBallot(false) ? 0x4 : 0;\n"
+ " out_color = float(tempResult);\n"
+ " gl_Position = gl_in[0].gl_Position;\n"
+ " EmitVertex();\n"
+ " EndPrimitive();\n"
+ "}\n";
+ */
+ const string geometry =
+ "; SPIR-V\n"
+ "; Version: 1.3\n"
+ "; Generator: Khronos Glslang Reference Front End; 2\n"
+ "; Bound: 80\n"
+ "; Schema: 0\n"
+ "OpCapability Geometry\n"
+ "OpCapability GroupNonUniform\n"
+ "OpCapability GroupNonUniformBallot\n"
+ "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+ "OpMemoryModel Logical GLSL450\n"
+ "OpEntryPoint Geometry %4 \"main\" %35 %62 %70 %74\n"
+ "OpExecutionMode %4 InputPoints\n"
+ "OpExecutionMode %4 Invocations 1\n"
+ "OpExecutionMode %4 OutputPoints\n"
+ "OpExecutionMode %4 OutputVertices 1\n"
+ "OpDecorate %30 ArrayStride 16\n"
+ "OpMemberDecorate %31 0 Offset 0\n"
+ "OpDecorate %31 Block\n"
+ "OpDecorate %33 DescriptorSet 0\n"
+ "OpDecorate %33 Binding 0\n"
+ "OpDecorate %35 RelaxedPrecision\n"
+ "OpDecorate %35 BuiltIn SubgroupLocalInvocationId\n"
+ "OpDecorate %36 RelaxedPrecision\n"
+ "OpDecorate %62 Location 0\n"
+ "OpMemberDecorate %68 0 BuiltIn Position\n"
+ "OpMemberDecorate %68 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %68 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %68 3 BuiltIn CullDistance\n"
+ "OpDecorate %68 Block\n"
+ "OpMemberDecorate %71 0 BuiltIn Position\n"
+ "OpMemberDecorate %71 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %71 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %71 3 BuiltIn CullDistance\n"
+ "OpDecorate %71 Block\n"
+ "%2 = OpTypeVoid\n"
+ "%3 = OpTypeFunction %2\n"
+ "%6 = OpTypeInt 32 0\n"
+ "%7 = OpTypePointer Function %6\n"
+ "%9 = OpConstant %6 0\n"
+ "%10 = OpTypeVector %6 4\n"
+ "%11 = OpConstantComposite %10 %9 %9 %9 %9\n"
+ "%12 = OpTypeBool\n"
+ "%13 = OpConstantTrue %12\n"
+ "%14 = OpConstant %6 3\n"
+ "%16 = OpTypeVector %12 4\n"
+ "%20 = OpTypeInt 32 1\n"
+ "%21 = OpConstant %20 1\n"
+ "%22 = OpConstant %20 0\n"
+ "%27 = OpTypePointer Function %12\n"
+ "%29 = OpConstant %6 " + subgroupSizeStr.str() + "\n"
+ "%30 = OpTypeArray %6 %29\n"
+ "%31 = OpTypeStruct %30\n"
+ "%32 = OpTypePointer Uniform %31\n"
+ "%33 = OpVariable %32 Uniform\n"
+ "%34 = OpTypePointer Input %6\n"
+ "%35 = OpVariable %34 Input\n"
+ "%37 = OpTypePointer Uniform %6\n"
+ "%46 = OpConstant %20 2\n"
+ "%51 = OpConstantFalse %12\n"
+ "%55 = OpConstant %20 4\n"
+ "%60 = OpTypeFloat 32\n"
+ "%61 = OpTypePointer Output %60\n"
+ "%62 = OpVariable %61 Output\n"
+ "%65 = OpTypeVector %60 4\n"
+ "%66 = OpConstant %6 1\n"
+ "%67 = OpTypeArray %60 %66\n"
+ "%68 = OpTypeStruct %65 %60 %67 %67\n"
+ "%69 = OpTypePointer Output %68\n"
+ "%70 = OpVariable %69 Output\n"
+ "%71 = OpTypeStruct %65 %60 %67 %67\n"
+ "%72 = OpTypeArray %71 %66\n"
+ "%73 = OpTypePointer Input %72\n"
+ "%74 = OpVariable %73 Input\n"
+ "%75 = OpTypePointer Input %65\n"
+ "%78 = OpTypePointer Output %65\n"
+ "%4 = OpFunction %2 None %3\n"
+ "%5 = OpLabel\n"
+ "%8 = OpVariable %7 Function\n"
+ "%28 = OpVariable %27 Function\n"
+ "OpStore %8 %9\n"
+ "%15 = OpGroupNonUniformBallot %10 %14 %13\n"
+ "%17 = OpIEqual %16 %11 %15\n"
+ "%18 = OpAll %12 %17\n"
+ "%19 = OpLogicalNot %12 %18\n"
+ "%23 = OpSelect %20 %19 %21 %22\n"
+ "%24 = OpBitcast %6 %23\n"
+ "%25 = OpLoad %6 %8\n"
+ "%26 = OpBitwiseOr %6 %25 %24\n"
+ "OpStore %8 %26\n"
+ "%36 = OpLoad %6 %35\n"
+ "%38 = OpAccessChain %37 %33 %22 %36\n"
+ "%39 = OpLoad %6 %38\n"
+ "%40 = OpINotEqual %12 %39 %9\n"
+ "OpStore %28 %40\n"
+ "%41 = OpLoad %12 %28\n"
+ "%42 = OpGroupNonUniformBallot %10 %14 %41\n"
+ "%43 = OpIEqual %16 %11 %42\n"
+ "%44 = OpAll %12 %43\n"
+ "%45 = OpLogicalNot %12 %44\n"
+ "%47 = OpSelect %20 %45 %46 %22\n"
+ "%48 = OpBitcast %6 %47\n"
+ "%49 = OpLoad %6 %8\n"
+ "%50 = OpBitwiseOr %6 %49 %48\n"
+ "OpStore %8 %50\n"
+ "%52 = OpGroupNonUniformBallot %10 %14 %51\n"
+ "%53 = OpIEqual %16 %11 %52\n"
+ "%54 = OpAll %12 %53\n"
+ "%56 = OpSelect %20 %54 %55 %22\n"
+ "%57 = OpBitcast %6 %56\n"
+ "%58 = OpLoad %6 %8\n"
+ "%59 = OpBitwiseOr %6 %58 %57\n"
+ "OpStore %8 %59\n"
+ "%63 = OpLoad %6 %8\n"
+ "%64 = OpConvertUToF %60 %63\n"
+ "OpStore %62 %64\n"
+ "%76 = OpAccessChain %75 %74 %22 %22\n"
+ "%77 = OpLoad %65 %76\n"
+ "%79 = OpAccessChain %78 %70 %22\n"
+ "OpStore %79 %77\n"
+ "OpEmitVertex\n"
+ "OpEndPrimitive\n"
+ "OpReturn\n"
+ "OpFunctionEnd\n";
+ programCollection.spirvAsmSources.add("geometry") << geometry << buildOptionsSpr;
+ }
+ else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+ {
+ /*
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(vertices = 2) out;\n"
+ "layout(location = 0) out float out_color[];\n"
+ "layout(set = 0, binding = 0) uniform Buffer1\n"
+ "{\n"
+ " uint data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+ "};\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ " if (gl_InvocationID == 0)\n"
+ {\n"
+ " gl_TessLevelOuter[0] = 1.0f;\n"
+ " gl_TessLevelOuter[1] = 1.0f;\n"
+ " }\n"
+ " uint tempResult = 0;\n"
+ " tempResult |= !bool(uvec4(0) == subgroupBallot(true)) ? 0x1 : 0;\n"
+ " bool bData = data[gl_SubgroupInvocationID] != 0;\n"
+ " tempResult |= !bool(uvec4(0) == subgroupBallot(bData)) ? 0x2 : 0;\n"
+ " tempResult |= uvec4(0) == subgroupBallot(false) ? 0x4 : 0;\n"
+ " out_color[gl_InvocationID] = float(tempResult);\n"
+ " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+ "}\n";
+ */
+ const string controlSource =
+ "; SPIR-V\n"
+ "; Version: 1.3\n"
+ "; Generator: Khronos Glslang Reference Front End; 2\n"
+ "; Bound: 102\n"
+ "; Schema: 0\n"
+ "OpCapability Tessellation\n"
+ "OpCapability GroupNonUniform\n"
+ "OpCapability GroupNonUniformBallot\n"
+ "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+ "OpMemoryModel Logical GLSL450\n"
+ "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %50 %78 %89 %95\n"
+ "OpExecutionMode %4 OutputVertices 2\n"
+ "OpDecorate %8 BuiltIn InvocationId\n"
+ "OpDecorate %20 Patch\n"
+ "OpDecorate %20 BuiltIn TessLevelOuter\n"
+ "OpDecorate %45 ArrayStride 16\n"
+ "OpMemberDecorate %46 0 Offset 0\n"
+ "OpDecorate %46 Block\n"
+ "OpDecorate %48 DescriptorSet 0\n"
+ "OpDecorate %48 Binding 0\n"
+ "OpDecorate %50 RelaxedPrecision\n"
+ "OpDecorate %50 BuiltIn SubgroupLocalInvocationId\n"
+ "OpDecorate %51 RelaxedPrecision\n"
+ "OpDecorate %78 Location 0\n"
+ "OpMemberDecorate %86 0 BuiltIn Position\n"
+ "OpMemberDecorate %86 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %86 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %86 3 BuiltIn CullDistance\n"
+ "OpDecorate %86 Block\n"
+ "OpMemberDecorate %91 0 BuiltIn Position\n"
+ "OpMemberDecorate %91 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %91 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %91 3 BuiltIn CullDistance\n"
+ "OpDecorate %91 Block\n"
+ "%2 = OpTypeVoid\n"
+ "%3 = OpTypeFunction %2\n"
+ "%6 = OpTypeInt 32 1\n"
+ "%7 = OpTypePointer Input %6\n"
+ "%8 = OpVariable %7 Input\n"
+ "%10 = OpConstant %6 0\n"
+ "%11 = OpTypeBool\n"
+ "%15 = OpTypeFloat 32\n"
+ "%16 = OpTypeInt 32 0\n"
+ "%17 = OpConstant %16 4\n"
+ "%18 = OpTypeArray %15 %17\n"
+ "%19 = OpTypePointer Output %18\n"
+ "%20 = OpVariable %19 Output\n"
+ "%21 = OpConstant %15 1\n"
+ "%22 = OpTypePointer Output %15\n"
+ "%24 = OpConstant %6 1\n"
+ "%26 = OpTypePointer Function %16\n"
+ "%28 = OpConstant %16 0\n"
+ "%29 = OpTypeVector %16 4\n"
+ "%30 = OpConstantComposite %29 %28 %28 %28 %28\n"
+ "%31 = OpConstantTrue %11\n"
+ "%32 = OpConstant %16 3\n"
+ "%34 = OpTypeVector %11 4\n"
+ "%42 = OpTypePointer Function %11\n"
+ "%44 = OpConstant %16 " + subgroupSizeStr.str() + "\n"
+ "%45 = OpTypeArray %16 %44\n"
+ "%46 = OpTypeStruct %45\n"
+ "%47 = OpTypePointer Uniform %46\n"
+ "%48 = OpVariable %47 Uniform\n"
+ "%49 = OpTypePointer Input %16\n"
+ "%50 = OpVariable %49 Input\n"
+ "%52 = OpTypePointer Uniform %16\n"
+ "%61 = OpConstant %6 2\n"
+ "%66 = OpConstantFalse %11\n"
+ "%70 = OpConstant %6 4\n"
+ "%75 = OpConstant %16 2\n"
+ "%76 = OpTypeArray %15 %75\n"
+ "%77 = OpTypePointer Output %76\n"
+ "%78 = OpVariable %77 Output\n"
+ "%83 = OpTypeVector %15 4\n"
+ "%84 = OpConstant %16 1\n"
+ "%85 = OpTypeArray %15 %84\n"
+ "%86 = OpTypeStruct %83 %15 %85 %85\n"
+ "%87 = OpTypeArray %86 %75\n"
+ "%88 = OpTypePointer Output %87\n"
+ "%89 = OpVariable %88 Output\n"
+ "%91 = OpTypeStruct %83 %15 %85 %85\n"
+ "%92 = OpConstant %16 32\n"
+ "%93 = OpTypeArray %91 %92\n"
+ "%94 = OpTypePointer Input %93\n"
+ "%95 = OpVariable %94 Input\n"
+ "%97 = OpTypePointer Input %83\n"
+ "%100 = OpTypePointer Output %83\n"
+ "%4 = OpFunction %2 None %3\n"
+ "%5 = OpLabel\n"
+ "%27 = OpVariable %26 Function\n"
+ "%43 = OpVariable %42 Function\n"
+ "%9 = OpLoad %6 %8\n"
+ "%12 = OpIEqual %11 %9 %10\n"
+ "OpSelectionMerge %14 None\n"
+ "OpBranchConditional %12 %13 %14\n"
+ "%13 = OpLabel\n"
+ "%23 = OpAccessChain %22 %20 %10\n"
+ "OpStore %23 %21\n"
+ "%25 = OpAccessChain %22 %20 %24\n"
+ "OpStore %25 %21\n"
+ "OpBranch %14\n"
+ "%14 = OpLabel\n"
+ "OpStore %27 %28\n"
+ "%33 = OpGroupNonUniformBallot %29 %32 %31\n"
+ "%35 = OpIEqual %34 %30 %33\n"
+ "%36 = OpAll %11 %35\n"
+ "%37 = OpLogicalNot %11 %36\n"
+ "%38 = OpSelect %6 %37 %24 %10\n"
+ "%39 = OpBitcast %16 %38\n"
+ "%40 = OpLoad %16 %27\n"
+ "%41 = OpBitwiseOr %16 %40 %39\n"
+ "OpStore %27 %41\n"
+ "%51 = OpLoad %16 %50\n"
+ "%53 = OpAccessChain %52 %48 %10 %51\n"
+ "%54 = OpLoad %16 %53\n"
+ "%55 = OpINotEqual %11 %54 %28\n"
+ "OpStore %43 %55\n"
+ "%56 = OpLoad %11 %43\n"
+ "%57 = OpGroupNonUniformBallot %29 %32 %56\n"
+ "%58 = OpIEqual %34 %30 %57\n"
+ "%59 = OpAll %11 %58\n"
+ "%60 = OpLogicalNot %11 %59\n"
+ "%62 = OpSelect %6 %60 %61 %10\n"
+ "%63 = OpBitcast %16 %62\n"
+ "%64 = OpLoad %16 %27\n"
+ "%65 = OpBitwiseOr %16 %64 %63\n"
+ "OpStore %27 %65\n"
+ "%67 = OpGroupNonUniformBallot %29 %32 %66\n"
+ "%68 = OpIEqual %34 %30 %67\n"
+ "%69 = OpAll %11 %68\n"
+ "%71 = OpSelect %6 %69 %70 %10\n"
+ "%72 = OpBitcast %16 %71\n"
+ "%73 = OpLoad %16 %27\n"
+ "%74 = OpBitwiseOr %16 %73 %72\n"
+ "OpStore %27 %74\n"
+ "%79 = OpLoad %6 %8\n"
+ "%80 = OpLoad %16 %27\n"
+ "%81 = OpConvertUToF %15 %80\n"
+ "%82 = OpAccessChain %22 %78 %79\n"
+ "OpStore %82 %81\n"
+ "%90 = OpLoad %6 %8\n"
+ "%96 = OpLoad %6 %8\n"
+ "%98 = OpAccessChain %97 %95 %96 %10\n"
+ "%99 = OpLoad %83 %98\n"
+ "%101 = OpAccessChain %100 %89 %90 %10\n"
+ "OpStore %101 %99\n"
+ "OpReturn\n"
+ "OpFunctionEnd\n";
+
+ programCollection.spirvAsmSources.add("tesc") << controlSource << buildOptionsSpr;
+ subgroups::setTesEvalShaderFrameBuffer(programCollection);
+
+ }
+ else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
+ {
+ /*
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(isolines, equal_spacing, ccw ) in;\n"
+ "layout(location = 0) out float out_color;\n"
+ "layout(set = 0, binding = 0) uniform Buffer1\n"
+ "{\n"
+ " uint data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+ "};\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ " uint tempResult = 0;\n"
+ " tempResult |= !bool(uvec4(0) == subgroupBallot(true)) ? 0x1 : 0;\n"
+ " bool bData = data[gl_SubgroupInvocationID] != 0;\n"
+ " tempResult |= !bool(uvec4(0) == subgroupBallot(bData)) ? 0x2 : 0;\n"
+ " tempResult |= uvec4(0) == subgroupBallot(false) ? 0x4 : 0;\n"
+ " out_color = float(tempResult);\n"
+ " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
+ "}\n";
+ */
+ const string evaluationSource =
+ "; SPIR-V\n"
+ "; Version: 1.3\n"
+ "; Generator: Khronos Glslang Reference Front End; 2\n"
+ "; Bound: 91\n"
+ "; Schema: 0\n"
+ "OpCapability Tessellation\n"
+ "OpCapability GroupNonUniform\n"
+ "OpCapability GroupNonUniformBallot\n"
+ "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+ "OpMemoryModel Logical GLSL450\n"
+ "OpEntryPoint TessellationEvaluation %4 \"main\" %35 %62 %70 %75 %83\n"
+ "OpExecutionMode %4 Isolines\n"
+ "OpExecutionMode %4 SpacingEqual\n"
+ "OpExecutionMode %4 VertexOrderCcw\n"
+ "OpDecorate %30 ArrayStride 16\n"
+ "OpMemberDecorate %31 0 Offset 0\n"
+ "OpDecorate %31 Block\n"
+ "OpDecorate %33 DescriptorSet 0\n"
+ "OpDecorate %33 Binding 0\n"
+ "OpDecorate %35 RelaxedPrecision\n"
+ "OpDecorate %35 BuiltIn SubgroupLocalInvocationId\n"
+ "OpDecorate %36 RelaxedPrecision\n"
+ "OpDecorate %62 Location 0\n"
+ "OpMemberDecorate %68 0 BuiltIn Position\n"
+ "OpMemberDecorate %68 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %68 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %68 3 BuiltIn CullDistance\n"
+ "OpDecorate %68 Block\n"
+ "OpMemberDecorate %71 0 BuiltIn Position\n"
+ "OpMemberDecorate %71 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %71 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %71 3 BuiltIn CullDistance\n"
+ "OpDecorate %71 Block\n"
+ "OpDecorate %83 BuiltIn TessCoord\n"
+ "%2 = OpTypeVoid\n"
+ "%3 = OpTypeFunction %2\n"
+ "%6 = OpTypeInt 32 0\n"
+ "%7 = OpTypePointer Function %6\n"
+ "%9 = OpConstant %6 0\n"
+ "%10 = OpTypeVector %6 4\n"
+ "%11 = OpConstantComposite %10 %9 %9 %9 %9\n"
+ "%12 = OpTypeBool\n"
+ "%13 = OpConstantTrue %12\n"
+ "%14 = OpConstant %6 3\n"
+ "%16 = OpTypeVector %12 4\n"
+ "%20 = OpTypeInt 32 1\n"
+ "%21 = OpConstant %20 1\n"
+ "%22 = OpConstant %20 0\n"
+ "%27 = OpTypePointer Function %12\n"
+ "%29 = OpConstant %6 " + subgroupSizeStr.str() + "\n"
+ "%30 = OpTypeArray %6 %29\n"
+ "%31 = OpTypeStruct %30\n"
+ "%32 = OpTypePointer Uniform %31\n"
+ "%33 = OpVariable %32 Uniform\n"
+ "%34 = OpTypePointer Input %6\n"
+ "%35 = OpVariable %34 Input\n"
+ "%37 = OpTypePointer Uniform %6\n"
+ "%46 = OpConstant %20 2\n"
+ "%51 = OpConstantFalse %12\n"
+ "%55 = OpConstant %20 4\n"
+ "%60 = OpTypeFloat 32\n"
+ "%61 = OpTypePointer Output %60\n"
+ "%62 = OpVariable %61 Output\n"
+ "%65 = OpTypeVector %60 4\n"
+ "%66 = OpConstant %6 1\n"
+ "%67 = OpTypeArray %60 %66\n"
+ "%68 = OpTypeStruct %65 %60 %67 %67\n"
+ "%69 = OpTypePointer Output %68\n"
+ "%70 = OpVariable %69 Output\n"
+ "%71 = OpTypeStruct %65 %60 %67 %67\n"
+ "%72 = OpConstant %6 32\n"
+ "%73 = OpTypeArray %71 %72\n"
+ "%74 = OpTypePointer Input %73\n"
+ "%75 = OpVariable %74 Input\n"
+ "%76 = OpTypePointer Input %65\n"
+ "%81 = OpTypeVector %60 3\n"
+ "%82 = OpTypePointer Input %81\n"
+ "%83 = OpVariable %82 Input\n"
+ "%84 = OpTypePointer Input %60\n"
+ "%89 = OpTypePointer Output %65\n"
+ "%4 = OpFunction %2 None %3\n"
+ "%5 = OpLabel\n"
+ "%8 = OpVariable %7 Function\n"
+ "%28 = OpVariable %27 Function\n"
+ "OpStore %8 %9\n"
+ "%15 = OpGroupNonUniformBallot %10 %14 %13\n"
+ "%17 = OpIEqual %16 %11 %15\n"
+ "%18 = OpAll %12 %17\n"
+ "%19 = OpLogicalNot %12 %18\n"
+ "%23 = OpSelect %20 %19 %21 %22\n"
+ "%24 = OpBitcast %6 %23\n"
+ "%25 = OpLoad %6 %8\n"
+ "%26 = OpBitwiseOr %6 %25 %24\n"
+ "OpStore %8 %26\n"
+ "%36 = OpLoad %6 %35\n"
+ "%38 = OpAccessChain %37 %33 %22 %36\n"
+ "%39 = OpLoad %6 %38\n"
+ "%40 = OpINotEqual %12 %39 %9\n"
+ "OpStore %28 %40\n"
+ "%41 = OpLoad %12 %28\n"
+ "%42 = OpGroupNonUniformBallot %10 %14 %41\n"
+ "%43 = OpIEqual %16 %11 %42\n"
+ "%44 = OpAll %12 %43\n"
+ "%45 = OpLogicalNot %12 %44\n"
+ "%47 = OpSelect %20 %45 %46 %22\n"
+ "%48 = OpBitcast %6 %47\n"
+ "%49 = OpLoad %6 %8\n"
+ "%50 = OpBitwiseOr %6 %49 %48\n"
+ "OpStore %8 %50\n"
+ "%52 = OpGroupNonUniformBallot %10 %14 %51\n"
+ "%53 = OpIEqual %16 %11 %52\n"
+ "%54 = OpAll %12 %53\n"
+ "%56 = OpSelect %20 %54 %55 %22\n"
+ "%57 = OpBitcast %6 %56\n"
+ "%58 = OpLoad %6 %8\n"
+ "%59 = OpBitwiseOr %6 %58 %57\n"
+ "OpStore %8 %59\n"
+ "%63 = OpLoad %6 %8\n"
+ "%64 = OpConvertUToF %60 %63\n"
+ "OpStore %62 %64\n"
+ "%77 = OpAccessChain %76 %75 %22 %22\n"
+ "%78 = OpLoad %65 %77\n"
+ "%79 = OpAccessChain %76 %75 %21 %22\n"
+ "%80 = OpLoad %65 %79\n"
+ "%85 = OpAccessChain %84 %83 %9\n"
+ "%86 = OpLoad %60 %85\n"
+ "%87 = OpCompositeConstruct %65 %86 %86 %86 %86\n"
+ "%88 = OpExtInst %65 %1 FMix %78 %80 %87\n"
+ "%90 = OpAccessChain %89 %70 %22\n"
+ "OpStore %90 %88\n"
+ "OpReturn\n"
+ "OpFunctionEnd\n";
+ subgroups::setTesCtrlShaderFrameBuffer(programCollection);
+ programCollection.spirvAsmSources.add("tese") << evaluationSource << buildOptionsSpr;
+ }
+ else
+ {
+ DE_FATAL("Unsupported shader stage");
+ }
+}
+
+
+void initPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
+{
+ if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream src;
+
+ src << "#version 450\n"
+ << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ << "layout (local_size_x_id = 0, local_size_y_id = 1, "
+ "local_size_z_id = 2) in;\n"
+ << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
+ << "{\n"
+ << " uint result[];\n"
+ << "};\n"
+ << "layout(set = 0, binding = 1, std430) buffer Buffer2\n"
+ << "{\n"
+ << " uint data[];\n"
+ << "};\n"
+ << "\n"
+ << subgroups::getSharedMemoryBallotHelper()
+ << "void main (void)\n"
+ << "{\n"
+ << " uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
+ << " highp uint offset = globalSize.x * ((globalSize.y * "
+ "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
+ "gl_GlobalInvocationID.x;\n"
+ << " uint tempResult = 0;\n"
+ << " tempResult |= sharedMemoryBallot(true) == subgroupBallot(true) ? 0x1 : 0;\n"
+ << " bool bData = data[gl_SubgroupInvocationID] != 0;\n"
+ << " tempResult |= sharedMemoryBallot(bData) == subgroupBallot(bData) ? 0x2 : 0;\n"
+ << " tempResult |= uvec4(0) == subgroupBallot(false) ? 0x4 : 0;\n"
+ << " result[offset] = tempResult;\n"
+ << "}\n";
+
+ programCollection.glslSources.add("comp")
+ << glu::ComputeSource(src.str()) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+ else
+ {
+ const string vertex =
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
+ "{\n"
+ " uint result[];\n"
+ "};\n"
+ "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+ "{\n"
+ " uint data[];\n"
+ "};\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ " uint tempResult = 0;\n"
+ " tempResult |= !bool(uvec4(0) == subgroupBallot(true)) ? 0x1 : 0;\n"
+ " bool bData = data[gl_SubgroupInvocationID] != 0;\n"
+ " tempResult |= !bool(uvec4(0) == subgroupBallot(bData)) ? 0x2 : 0;\n"
+ " tempResult |= uvec4(0) == subgroupBallot(false) ? 0x4 : 0;\n"
+ " result[gl_VertexIndex] = tempResult;\n"
+ " float pixelSize = 2.0f/1024.0f;\n"
+ " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
+ " gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
+ " gl_PointSize = 1.0f;\n"
+ "}\n";
+
+ const string tesc =
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(vertices=1) out;\n"
+ "layout(set = 0, binding = 1, std430) buffer Buffer1\n"
+ "{\n"
+ " uint result[];\n"
+ "};\n"
+ "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+ "{\n"
+ " uint data[];\n"
+ "};\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ " uint tempResult = 0;\n"
+ " tempResult |= !bool(uvec4(0) == subgroupBallot(true)) ? 0x1 : 0;\n"
+ " bool bData = data[gl_SubgroupInvocationID] != 0;\n"
+ " tempResult |= !bool(uvec4(0) == subgroupBallot(bData)) ? 0x2 : 0;\n"
+ " tempResult |= uvec4(0) == subgroupBallot(false) ? 0x4 : 0;\n"
+ " result[gl_PrimitiveID] = tempResult;\n"
+ " if (gl_InvocationID == 0)\n"
+ " {\n"
+ " gl_TessLevelOuter[0] = 1.0f;\n"
+ " gl_TessLevelOuter[1] = 1.0f;\n"
+ " }\n"
+ " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+ "}\n";
+
+ const string tese =
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(isolines) in;\n"
+ "layout(set = 0, binding = 2, std430) buffer Buffer1\n"
+ "{\n"
+ " uint result[];\n"
+ "};\n"
+ "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+ "{\n"
+ " uint data[];\n"
+ "};\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ " uint tempResult = 0;\n"
+ " tempResult |= !bool(uvec4(0) == subgroupBallot(true)) ? 0x1 : 0;\n"
+ " bool bData = data[gl_SubgroupInvocationID] != 0;\n"
+ " tempResult |= !bool(uvec4(0) == subgroupBallot(bData)) ? 0x2 : 0;\n"
+ " tempResult |= uvec4(0) == subgroupBallot(false) ? 0x4 : 0;\n"
+ " result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = tempResult;\n"
+ " float pixelSize = 2.0f/1024.0f;\n"
+ " gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
+ "}\n";
+
+ const string geometry =
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(${TOPOLOGY}) in;\n"
+ "layout(points, max_vertices = 1) out;\n"
+ "layout(set = 0, binding = 3, std430) buffer Buffer1\n"
+ "{\n"
+ " uint result[];\n"
+ "};\n"
+ "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+ "{\n"
+ " uint data[];\n"
+ "};\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ " uint tempResult = 0;\n"
+ " tempResult |= !bool(uvec4(0) == subgroupBallot(true)) ? 0x1 : 0;\n"
+ " bool bData = data[gl_SubgroupInvocationID] != 0;\n"
+ " tempResult |= !bool(uvec4(0) == subgroupBallot(bData)) ? 0x2 : 0;\n"
+ " tempResult |= uvec4(0) == subgroupBallot(false) ? 0x4 : 0;\n"
+ " result[gl_PrimitiveIDIn] = tempResult;\n"
+ " gl_Position = gl_in[0].gl_Position;\n"
+ " EmitVertex();\n"
+ " EndPrimitive();\n"
+ "}\n";
+
+ const string fragment =
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(location = 0) out uint result;\n"
+ "layout(set = 0, binding = 4, std430) readonly buffer Buffer1\n"
+ "{\n"
+ " uint data[];\n"
+ "};\n"
+ "void main (void)\n"
+ "{\n"
+ " uint tempResult = 0;\n"
+ " tempResult |= !bool(uvec4(0) == subgroupBallot(true)) ? 0x1 : 0;\n"
+ " bool bData = data[gl_SubgroupInvocationID] != 0;\n"
+ " tempResult |= !bool(uvec4(0) == subgroupBallot(bData)) ? 0x2 : 0;\n"
+ " tempResult |= uvec4(0) == subgroupBallot(false) ? 0x4 : 0;\n"
+ " result = tempResult;\n"
+ "}\n";
+
+ subgroups::addNoSubgroupShader(programCollection);
+
+ programCollection.glslSources.add("vert")
+ << glu::VertexSource(vertex) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ programCollection.glslSources.add("tesc")
+ << glu::TessellationControlSource(tesc) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ programCollection.glslSources.add("tese")
+ << glu::TessellationEvaluationSource(tese) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ subgroups::addGeometryShadersFromTemplate(geometry, vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u),
+ programCollection.glslSources);
+ programCollection.glslSources.add("fragment")
+ << glu::FragmentSource(fragment)<< vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+}
+
+void supportedCheck (Context& context, CaseDefinition caseDef)
+{
+ DE_UNREF(caseDef);
+ if (!subgroups::isSubgroupSupported(context))
+ TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
+
+ if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_BALLOT_BIT))
+ {
+ TCU_THROW(NotSupportedError, "Device does not support subgroup ballot operations");
+ }
+}
+
+tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
+{
+ if (!subgroups::areSubgroupOperationsSupportedForStage(
+ context, caseDef.shaderStage))
+ {
+ if (subgroups::areSubgroupOperationsRequiredForStage(caseDef.shaderStage))
+ {
+ return tcu::TestStatus::fail(
+ "Shader stage " +
+ subgroups::getShaderStageName(caseDef.shaderStage) +
+ " is required to support subgroup operations!");
+ }
+ else
+ {
+ TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
+ }
+ }
+
+ subgroups::SSBOData inputData[1];
+ inputData[0].format = VK_FORMAT_R32_UINT;
+ inputData[0].numElements = subgroups::maxSupportedSubgroupSize();
+ inputData[0].initializeType = subgroups::SSBOData::InitializeNonZero;
+
+ if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+ return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, inputData, 1, checkVertexPipelineStages);
+ else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
+ return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, inputData, 1, checkVertexPipelineStages);
+ else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+ return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
+ else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
+ return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
+ else
+ TCU_THROW(InternalError, "Unhandled shader stage");
+}
+
+tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
+{
+ if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+ {
+ if (!subgroups::areSubgroupOperationsSupportedForStage(context, caseDef.shaderStage))
+ {
+ return tcu::TestStatus::fail(
+ "Shader stage " +
+ subgroups::getShaderStageName(caseDef.shaderStage) +
+ " is required to support subgroup operations!");
+ }
+ subgroups::SSBOData inputData[1];
+ inputData[0].format = VK_FORMAT_R32_UINT;
+ inputData[0].numElements = subgroups::maxSupportedSubgroupSize();
+ inputData[0].initializeType = subgroups::SSBOData::InitializeNonZero;
+
+ return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, inputData, 1, checkCompute);
+ }
+ else
+ {
+ VkPhysicalDeviceSubgroupProperties subgroupProperties;
+ subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
+ subgroupProperties.pNext = DE_NULL;
+
+ VkPhysicalDeviceProperties2 properties;
+ properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+ properties.pNext = &subgroupProperties;
+
+ context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
+
+ VkShaderStageFlagBits stages = (VkShaderStageFlagBits)(caseDef.shaderStage & subgroupProperties.supportedStages);
+
+ if ( VK_SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
+ {
+ if ( (stages & VK_SHADER_STAGE_FRAGMENT_BIT) == 0)
+ TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
+ else
+ stages = VK_SHADER_STAGE_FRAGMENT_BIT;
+ }
+
+ if ((VkShaderStageFlagBits)0u == stages)
+ TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
+
+ subgroups::SSBOData inputData;
+ inputData.format = VK_FORMAT_R32_UINT;
+ inputData.numElements = subgroups::maxSupportedSubgroupSize();
+ inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
+ inputData.binding = 4u;
+ inputData.stages = stages;
+
+ return subgroups::allStages(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, stages);
+ }
+}
+}
+
+namespace vkt
+{
+namespace subgroups
+{
+tcu::TestCaseGroup* createSubgroupsBallotTests(tcu::TestContext& testCtx)
+{
+ de::MovePtr<tcu::TestCaseGroup> graphicGroup(new tcu::TestCaseGroup(
+ testCtx, "graphics", "Subgroup ballot category tests: graphics"));
+ de::MovePtr<tcu::TestCaseGroup> computeGroup(new tcu::TestCaseGroup(
+ testCtx, "compute", "Subgroup ballot category tests: compute"));
+ de::MovePtr<tcu::TestCaseGroup> framebufferGroup(new tcu::TestCaseGroup(
+ testCtx, "framebuffer", "Subgroup ballot category tests: framebuffer"));
+
+ const VkShaderStageFlags stages[] =
+ {
+ VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
+ VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
+ VK_SHADER_STAGE_GEOMETRY_BIT,
+ VK_SHADER_STAGE_VERTEX_BIT
+ };
+
+ {
+ const CaseDefinition caseDef = {VK_SHADER_STAGE_COMPUTE_BIT};
+ addFunctionCaseWithPrograms(computeGroup.get(), getShaderStageName(caseDef.shaderStage), "", supportedCheck, initPrograms, test, caseDef);
+ }
+
+ {
+ const CaseDefinition caseDef = {VK_SHADER_STAGE_ALL_GRAPHICS};
+ addFunctionCaseWithPrograms(graphicGroup.get(), "graphic", "", supportedCheck, initPrograms, test, caseDef);
+ }
+
+ for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
+ {
+ const CaseDefinition caseDef = {stages[stageIndex]};
+ addFunctionCaseWithPrograms(framebufferGroup.get(), getShaderStageName(caseDef.shaderStage), "",
+ supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
+ }
+
+ de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(
+ testCtx, "ballot", "Subgroup ballot category tests"));
+
+ group->addChild(graphicGroup.release());
+ group->addChild(computeGroup.release());
+ group->addChild(framebufferGroup.release());
+
+ return group.release();
+}
+
+} // subgroups
+} // vkt
--- /dev/null
+#ifndef _VKTSUBGROUPSBALLOTTESTS_HPP
+#define _VKTSUBGROUPSBALLOTTESTS_HPP
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "tcuDefs.hpp"
+#include "vktTestCase.hpp"
+
+namespace vkt
+{
+namespace subgroups
+{
+
+tcu::TestCaseGroup* createSubgroupsBallotTests(tcu::TestContext& testCtx);
+
+} // subgroups
+} // vkt
+
+#endif // _VKTSUBGROUPSBALLOTTESTS_HPP
--- /dev/null
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "vktSubgroupsBasicTests.hpp"
+#include "vktSubgroupsTestsUtils.hpp"
+
+#include <string>
+#include <vector>
+
+using namespace tcu;
+using namespace std;
+using namespace vk;
+using namespace vkt;
+
+namespace
+{
+static const deUint32 ELECTED_VALUE = 42u;
+static const deUint32 UNELECTED_VALUE = 13u;
+static const vk::VkDeviceSize SHADER_BUFFER_SIZE = 4096ull; // min(maxUniformBufferRange, maxImageDimension1D)
+
+static bool checkFragmentSubgroupBarriersNoSSBO(std::vector<const void*> datas,
+ deUint32 width, deUint32 height, deUint32)
+{
+ const float* const resultData = reinterpret_cast<const float*>(datas[0]);
+
+ for (deUint32 x = 0u; x < width; ++x)
+ {
+ for (deUint32 y = 0u; y < height; ++y)
+ {
+ const deUint32 ndx = (x * height + y) * 4u;
+ if (1.0f == resultData[ndx +2])
+ {
+ if(resultData[ndx] != resultData[ndx +1])
+ {
+ return false;
+ }
+ }
+ else if (resultData[ndx] != resultData[ndx +3])
+ {
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+static bool checkVertexPipelineStagesSubgroupElectNoSSBO(std::vector<const void*> datas,
+ deUint32 width, deUint32)
+{
+ const float* const resultData = reinterpret_cast<const float*>(datas[0]);
+ float poisonValuesFound = 0.0f;
+ float numSubgroupsUsed = 0.0f;
+
+ for (deUint32 x = 0; x < width; ++x)
+ {
+ deUint32 val = static_cast<deUint32>(resultData[x * 2]);
+ numSubgroupsUsed += resultData[x * 2 + 1];
+
+ switch (val)
+ {
+ default:
+ // some garbage value was found!
+ return false;
+ case UNELECTED_VALUE:
+ break;
+ case ELECTED_VALUE:
+ poisonValuesFound += 1.0f;
+ break;
+ }
+ }
+ return numSubgroupsUsed == poisonValuesFound;
+}
+
+static bool checkVertexPipelineStagesSubgroupElect(std::vector<const void*> datas,
+ deUint32 width, deUint32)
+{
+ const deUint32* const resultData =
+ reinterpret_cast<const deUint32*>(datas[0]);
+ deUint32 poisonValuesFound = 0;
+
+ for (deUint32 x = 0; x < width; ++x)
+ {
+ deUint32 val = resultData[x];
+
+ switch (val)
+ {
+ default:
+ // some garbage value was found!
+ return false;
+ case UNELECTED_VALUE:
+ break;
+ case ELECTED_VALUE:
+ poisonValuesFound++;
+ break;
+ }
+ }
+
+ // we used an atomicly incremented counter to note how many subgroups we used for the vertex shader
+ const deUint32 numSubgroupsUsed =
+ *reinterpret_cast<const deUint32*>(datas[1]);
+
+ return numSubgroupsUsed == poisonValuesFound;
+}
+
+static bool checkVertexPipelineStagesSubgroupBarriers(std::vector<const void*> datas,
+ deUint32 width, deUint32)
+{
+ const deUint32* const resultData = reinterpret_cast<const deUint32*>(datas[0]);
+
+ // We used this SSBO to generate our unique value!
+ const deUint32 ref = *reinterpret_cast<const deUint32*>(datas[3]);
+
+ for (deUint32 x = 0; x < width; ++x)
+ {
+ deUint32 val = resultData[x];
+
+ if (val != ref)
+ return false;
+ }
+
+ return true;
+}
+
+static bool checkVertexPipelineStagesSubgroupBarriersNoSSBO(std::vector<const void*> datas,
+ deUint32 width, deUint32)
+{
+ const float* const resultData = reinterpret_cast<const float*>(datas[0]);
+
+ for (deUint32 x = 0u; x < width; ++x)
+ {
+ const deUint32 ndx = x*4u;
+ if (1.0f == resultData[ndx +2])
+ {
+ if(resultData[ndx] != resultData[ndx +1])
+ return false;
+ }
+ else if (resultData[ndx] != resultData[ndx +3])
+ {
+ return false;
+ }
+ }
+ return true;
+}
+
+static bool checkTessellationEvaluationSubgroupBarriersNoSSBO(std::vector<const void*> datas,
+ deUint32 width, deUint32)
+{
+ const float* const resultData = reinterpret_cast<const float*>(datas[0]);
+
+ for (deUint32 x = 0u; x < width; ++x)
+ {
+ const deUint32 ndx = x*4u;
+ if (0.0f == resultData[ndx +2] && resultData[ndx] != resultData[ndx +3])
+ {
+ return false;
+ }
+ }
+ return true;
+}
+
+static bool checkComputeSubgroupElect(std::vector<const void*> datas,
+ const deUint32 numWorkgroups[3], const deUint32 localSize[3],
+ deUint32)
+{
+ return vkt::subgroups::checkCompute(datas, numWorkgroups, localSize, 1);
+}
+
+static bool checkComputeSubgroupBarriers(std::vector<const void*> datas,
+ const deUint32 numWorkgroups[3], const deUint32 localSize[3],
+ deUint32)
+{
+ // We used this SSBO to generate our unique value!
+ const deUint32 ref = *reinterpret_cast<const deUint32*>(datas[2]);
+ return vkt::subgroups::checkCompute(datas, numWorkgroups, localSize, ref);
+}
+
+enum OpType
+{
+ OPTYPE_ELECT = 0,
+ OPTYPE_SUBGROUP_BARRIER,
+ OPTYPE_SUBGROUP_MEMORY_BARRIER,
+ OPTYPE_SUBGROUP_MEMORY_BARRIER_BUFFER,
+ OPTYPE_SUBGROUP_MEMORY_BARRIER_SHARED,
+ OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE,
+ OPTYPE_LAST
+};
+
+std::string getOpTypeName(int opType)
+{
+ switch (opType)
+ {
+ default:
+ DE_FATAL("Unsupported op type");
+ return "";
+ case OPTYPE_ELECT:
+ return "subgroupElect";
+ case OPTYPE_SUBGROUP_BARRIER:
+ return "subgroupBarrier";
+ case OPTYPE_SUBGROUP_MEMORY_BARRIER:
+ return "subgroupMemoryBarrier";
+ case OPTYPE_SUBGROUP_MEMORY_BARRIER_BUFFER:
+ return "subgroupMemoryBarrierBuffer";
+ case OPTYPE_SUBGROUP_MEMORY_BARRIER_SHARED:
+ return "subgroupMemoryBarrierShared";
+ case OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE:
+ return "subgroupMemoryBarrierImage";
+ }
+}
+
+struct CaseDefinition
+{
+ int opType;
+ VkShaderStageFlags shaderStage;
+};
+
+void initFrameBufferPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
+{
+ const vk::ShaderBuildOptions buildOptions (programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ const vk::SpirVAsmBuildOptions buildOptionsSpr (programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3);
+
+ if(VK_SHADER_STAGE_FRAGMENT_BIT != caseDef.shaderStage)
+ {
+ /*
+ "layout(location = 0) in vec4 in_color;\n"
+ "layout(location = 0) out vec4 out_color;\n"
+ "void main()\n"
+ {\n"
+ " out_color = in_color;\n"
+ "}\n";
+ */
+ const string fragment =
+ "; SPIR-V\n"
+ "; Version: 1.3\n"
+ "; Generator: Khronos Glslang Reference Front End; 2\n"
+ "; Bound: 13\n"
+ "; Schema: 0\n"
+ "OpCapability Shader\n"
+ "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+ "OpMemoryModel Logical GLSL450\n"
+ "OpEntryPoint Fragment %4 \"main\" %9 %11\n"
+ "OpExecutionMode %4 OriginUpperLeft\n"
+ "OpDecorate %9 Location 0\n"
+ "OpDecorate %11 Location 0\n"
+ "%2 = OpTypeVoid\n"
+ "%3 = OpTypeFunction %2\n"
+ "%6 = OpTypeFloat 32\n"
+ "%7 = OpTypeVector %6 4\n"
+ "%8 = OpTypePointer Output %7\n"
+ "%9 = OpVariable %8 Output\n"
+ "%10 = OpTypePointer Input %7\n"
+ "%11 = OpVariable %10 Input\n"
+ "%4 = OpFunction %2 None %3\n"
+ "%5 = OpLabel\n"
+ "%12 = OpLoad %7 %11\n"
+ "OpStore %9 %12\n"
+ "OpReturn\n"
+ "OpFunctionEnd\n";
+ programCollection.spirvAsmSources.add("fragment") << fragment;
+ }
+ if (VK_SHADER_STAGE_FRAGMENT_BIT == caseDef.shaderStage)
+ {
+ /*
+ "#version 450\n"
+ "void main (void)\n"
+ "{\n"
+ " vec2 uv = vec2((gl_VertexIndex << 1) & 2, gl_VertexIndex & 2);\n"
+ " gl_Position = vec4(uv * 2.0f + -1.0f, 0.0f, 1.0f);\n"
+ " gl_PointSize = 1.0f;\n"
+ "}\n";
+ */
+ const string vertex =
+ "; SPIR-V\n"
+ "; Version: 1.3\n"
+ "; Generator: Khronos Glslang Reference Front End; 2\n"
+ "; Bound: 44\n"
+ "; Schema: 0\n"
+ "OpCapability Shader\n"
+ "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+ "OpMemoryModel Logical GLSL450\n"
+ "OpEntryPoint Vertex %4 \"main\" %12 %29\n"
+ "OpDecorate %12 BuiltIn VertexIndex\n"
+ "OpMemberDecorate %27 0 BuiltIn Position\n"
+ "OpMemberDecorate %27 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %27 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %27 3 BuiltIn CullDistance\n"
+ "OpDecorate %27 Block\n"
+ "%2 = OpTypeVoid\n"
+ "%3 = OpTypeFunction %2\n"
+ "%6 = OpTypeFloat 32\n"
+ "%7 = OpTypeVector %6 2\n"
+ "%8 = OpTypePointer Function %7\n"
+ "%10 = OpTypeInt 32 1\n"
+ "%11 = OpTypePointer Input %10\n"
+ "%12 = OpVariable %11 Input\n"
+ "%14 = OpConstant %10 1\n"
+ "%16 = OpConstant %10 2\n"
+ "%23 = OpTypeVector %6 4\n"
+ "%24 = OpTypeInt 32 0\n"
+ "%25 = OpConstant %24 1\n"
+ "%26 = OpTypeArray %6 %25\n"
+ "%27 = OpTypeStruct %23 %6 %26 %26\n"
+ "%28 = OpTypePointer Output %27\n"
+ "%29 = OpVariable %28 Output\n"
+ "%30 = OpConstant %10 0\n"
+ "%32 = OpConstant %6 2\n"
+ "%34 = OpConstant %6 -1\n"
+ "%37 = OpConstant %6 0\n"
+ "%38 = OpConstant %6 1\n"
+ "%42 = OpTypePointer Output %23\n"
+ "%44 = OpTypePointer Output %6\n"
+ "%4 = OpFunction %2 None %3\n"
+ "%5 = OpLabel\n"
+ "%9 = OpVariable %8 Function\n"
+ "%13 = OpLoad %10 %12\n"
+ "%15 = OpShiftLeftLogical %10 %13 %14\n"
+ "%17 = OpBitwiseAnd %10 %15 %16\n"
+ "%18 = OpConvertSToF %6 %17\n"
+ "%19 = OpLoad %10 %12\n"
+ "%20 = OpBitwiseAnd %10 %19 %16\n"
+ "%21 = OpConvertSToF %6 %20\n"
+ "%22 = OpCompositeConstruct %7 %18 %21\n"
+ "OpStore %9 %22\n"
+ "%31 = OpLoad %7 %9\n"
+ "%33 = OpVectorTimesScalar %7 %31 %32\n"
+ "%35 = OpCompositeConstruct %7 %34 %34\n"
+ "%36 = OpFAdd %7 %33 %35\n"
+ "%39 = OpCompositeExtract %6 %36 0\n"
+ "%40 = OpCompositeExtract %6 %36 1\n"
+ "%41 = OpCompositeConstruct %23 %39 %40 %37 %38\n"
+ "%43 = OpAccessChain %42 %29 %30\n"
+ "OpStore %43 %41\n"
+ "%45 = OpAccessChain %44 %29 %14\n"
+ "OpStore %45 %38\n"
+ "OpReturn\n"
+ "OpFunctionEnd\n";
+ programCollection.spirvAsmSources.add("vert") << vertex;
+ }
+ else if (VK_SHADER_STAGE_VERTEX_BIT != caseDef.shaderStage)
+ subgroups::setVertexShaderFrameBuffer(programCollection);
+
+ if (OPTYPE_ELECT == caseDef.opType)
+ {
+ std::ostringstream electedValue ;
+ std::ostringstream unelectedValue;
+ electedValue << ELECTED_VALUE;
+ unelectedValue << UNELECTED_VALUE;
+
+ if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+ {
+ /*
+ "#extension GL_KHR_shader_subgroup_basic: enable\n"
+ "layout(location = 0) out vec4 out_color;\n"
+ "layout(location = 0) in highp vec4 in_position;\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ " if (subgroupElect())\n"
+ " {\n"
+ " out_color.r = " << ELECTED_VALUE << ";\n"
+ " out_color.g = 1.0f;\n"
+ " }\n"
+ " else\n"
+ " {\n"
+ " out_color.r = " << UNELECTED_VALUE << ";\n"
+ " out_color.g = 0.0f;\n"
+ " }\n"
+ " gl_Position = in_position;\n"
+ " gl_PointSize = 1.0f;\n"
+ "}\n";
+ */
+ const string vertex =
+ "; SPIR-V\n"
+ "; Version: 1.3\n"
+ "; Generator: Khronos Glslang Reference Front End; 2\n"
+ "; Bound: 38\n"
+ "; Schema: 0\n"
+ "OpCapability Shader\n"
+ "OpCapability GroupNonUniform\n"
+ "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+ "OpMemoryModel Logical GLSL450\n"
+ "OpEntryPoint Vertex %4 \"main\" %15 %31 %35\n"
+ "OpDecorate %15 Location 0\n"
+ "OpMemberDecorate %29 0 BuiltIn Position\n"
+ "OpMemberDecorate %29 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
+ "OpDecorate %29 Block\n"
+ "OpDecorate %35 Location 0\n"
+ "%2 = OpTypeVoid\n"
+ "%3 = OpTypeFunction %2\n"
+ "%6 = OpTypeBool\n"
+ "%7 = OpTypeInt 32 0\n"
+ "%8 = OpConstant %7 3\n"
+ "%12 = OpTypeFloat 32\n"
+ "%13 = OpTypeVector %12 4\n"
+ "%14 = OpTypePointer Output %13\n"
+ "%15 = OpVariable %14 Output\n"
+ "%16 = OpConstant %12 " + electedValue.str() + "\n"
+ "%17 = OpConstant %7 0\n"
+ "%18 = OpTypePointer Output %12\n"
+ "%20 = OpConstant %12 1\n"
+ "%21 = OpConstant %7 1\n"
+ "%24 = OpConstant %12 " + unelectedValue.str() + "\n"
+ "%26 = OpConstant %12 0\n"
+ "%28 = OpTypeArray %12 %21\n"
+ "%29 = OpTypeStruct %13 %12 %28 %28\n"
+ "%30 = OpTypePointer Output %29\n"
+ "%31 = OpVariable %30 Output\n"
+ "%32 = OpTypeInt 32 1\n"
+ "%33 = OpConstant %32 0\n"
+ "%34 = OpTypePointer Input %13\n"
+ "%35 = OpVariable %34 Input\n"
+ "%38 = OpConstant %32 1\n"
+ "%4 = OpFunction %2 None %3\n"
+ "%5 = OpLabel\n"
+ "%9 = OpGroupNonUniformElect %6 %8\n"
+ "OpSelectionMerge %11 None\n"
+ "OpBranchConditional %9 %10 %23\n"
+ "%10 = OpLabel\n"
+ "%19 = OpAccessChain %18 %15 %17\n"
+ "OpStore %19 %16\n"
+ "%22 = OpAccessChain %18 %15 %21\n"
+ "OpStore %22 %20\n"
+ "OpBranch %11\n"
+ "%23 = OpLabel\n"
+ "%25 = OpAccessChain %18 %15 %17\n"
+ "OpStore %25 %24\n"
+ "%27 = OpAccessChain %18 %15 %21\n"
+ "OpStore %27 %26\n"
+ "OpBranch %11\n"
+ "%11 = OpLabel\n"
+ "%36 = OpLoad %13 %35\n"
+ "%37 = OpAccessChain %14 %31 %33\n"
+ "OpStore %37 %36\n"
+ "%39 = OpAccessChain %18 %31 %38\n"
+ "OpStore %39 %20\n"
+ "OpReturn\n"
+ "OpFunctionEnd\n";
+ programCollection.spirvAsmSources.add("vert") << vertex << buildOptionsSpr;
+ }
+ else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
+ {
+ /*
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_basic: enable\n"
+ "layout(points) in;\n"
+ "layout(points, max_vertices = 1) out;\n"
+ "layout(location = 0) out vec4 out_color;\n"
+ "void main (void)\n"
+ "{\n"
+ " if (subgroupElect())\n"
+ " {\n"
+ " out_color.r = " << ELECTED_VALUE << ";\n"
+ " out_color.g = 1.0f;\n"
+ " }\n"
+ " else\n"
+ " {\n"
+ " out_color.r = " << UNELECTED_VALUE << ";\n"
+ " out_color.g = 0.0f;\n"
+ " }\n"
+ " gl_Position = gl_in[0].gl_Position;\n"
+ " EmitVertex();\n"
+ " EndPrimitive();\n"
+ "}\n";
+ */
+ const string geometry =
+ "; SPIR-V\n"
+ "; Version: 1.3\n"
+ "; Generator: Khronos Glslang Reference Front End; 2\n"
+ "; Bound: 42\n"
+ "; Schema: 0\n"
+ "OpCapability Geometry\n"
+ "OpCapability GroupNonUniform\n"
+ "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+ "OpMemoryModel Logical GLSL450\n"
+ "OpEntryPoint Geometry %4 \"main\" %15 %31 %37\n"
+ "OpExecutionMode %4 InputPoints\n"
+ "OpExecutionMode %4 Invocations 1\n"
+ "OpExecutionMode %4 OutputPoints\n"
+ "OpExecutionMode %4 OutputVertices 1\n"
+ "OpDecorate %15 Location 0\n"
+ "OpMemberDecorate %29 0 BuiltIn Position\n"
+ "OpMemberDecorate %29 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
+ "OpDecorate %29 Block\n"
+ "OpMemberDecorate %34 0 BuiltIn Position\n"
+ "OpMemberDecorate %34 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %34 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %34 3 BuiltIn CullDistance\n"
+ "OpDecorate %34 Block\n"
+ "%2 = OpTypeVoid\n"
+ "%3 = OpTypeFunction %2\n"
+ "%6 = OpTypeBool\n"
+ "%7 = OpTypeInt 32 0\n"
+ "%8 = OpConstant %7 3\n"
+ "%12 = OpTypeFloat 32\n"
+ "%13 = OpTypeVector %12 4\n"
+ "%14 = OpTypePointer Output %13\n"
+ "%15 = OpVariable %14 Output\n"
+ "%16 = OpConstant %12 " + electedValue.str() + "\n"
+ "%17 = OpConstant %7 0\n"
+ "%18 = OpTypePointer Output %12\n"
+ "%20 = OpConstant %12 1\n"
+ "%21 = OpConstant %7 1\n"
+ "%24 = OpConstant %12 " + unelectedValue.str() + "\n"
+ "%26 = OpConstant %12 0\n"
+ "%28 = OpTypeArray %12 %21\n"
+ "%29 = OpTypeStruct %13 %12 %28 %28\n"
+ "%30 = OpTypePointer Output %29\n"
+ "%31 = OpVariable %30 Output\n"
+ "%32 = OpTypeInt 32 1\n"
+ "%33 = OpConstant %32 0\n"
+ "%34 = OpTypeStruct %13 %12 %28 %28\n"
+ "%35 = OpTypeArray %34 %21\n"
+ "%36 = OpTypePointer Input %35\n"
+ "%37 = OpVariable %36 Input\n"
+ "%38 = OpTypePointer Input %13\n"
+ "%4 = OpFunction %2 None %3\n"
+ "%5 = OpLabel\n"
+ "%9 = OpGroupNonUniformElect %6 %8\n"
+ "OpSelectionMerge %11 None\n"
+ "OpBranchConditional %9 %10 %23\n"
+ "%10 = OpLabel\n"
+ "%19 = OpAccessChain %18 %15 %17\n"
+ "OpStore %19 %16\n"
+ "%22 = OpAccessChain %18 %15 %21\n"
+ "OpStore %22 %20\n"
+ "OpBranch %11\n"
+ "%23 = OpLabel\n"
+ "%25 = OpAccessChain %18 %15 %17\n"
+ "OpStore %25 %24\n"
+ "%27 = OpAccessChain %18 %15 %21\n"
+ "OpStore %27 %26\n"
+ "OpBranch %11\n"
+ "%11 = OpLabel\n"
+ "%39 = OpAccessChain %38 %37 %33 %33\n"
+ "%40 = OpLoad %13 %39\n"
+ "%41 = OpAccessChain %14 %31 %33\n"
+ "OpStore %41 %40\n"
+ "OpEmitVertex\n"
+ "OpEndPrimitive\n"
+ "OpReturn\n"
+ "OpFunctionEnd\n";
+ programCollection.spirvAsmSources.add("geometry") << geometry << buildOptionsSpr;
+ }
+ else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
+ {
+ /*
+ << "#extension GL_KHR_shader_subgroup_basic: enable\n"
+ << "#extension GL_EXT_tessellation_shader : require\n"
+ << "layout(vertices = 2) out;\n"
+ << "void main (void)\n"
+ << "{\n"
+ << " if (gl_InvocationID == 0)\n"
+ <<" {\n"
+ << " gl_TessLevelOuter[0] = 1.0f;\n"
+ << " gl_TessLevelOuter[1] = 1.0f;\n"
+ << " }\n"
+ << " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+ << "}\n";
+ */
+ const string controlSource =
+ "; SPIR-V\n"
+ "; Version: 1.3\n"
+ "; Generator: Khronos Glslang Reference Front End; 2\n"
+ "; Bound: 46\n"
+ "; Schema: 0\n"
+ "OpCapability Tessellation\n"
+ "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+ "OpMemoryModel Logical GLSL450\n"
+ "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %33 %39\n"
+ "OpExecutionMode %4 OutputVertices 2\n"
+ "OpDecorate %8 BuiltIn InvocationId\n"
+ "OpDecorate %20 Patch\n"
+ "OpDecorate %20 BuiltIn TessLevelOuter\n"
+ "OpMemberDecorate %29 0 BuiltIn Position\n"
+ "OpMemberDecorate %29 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
+ "OpDecorate %29 Block\n"
+ "OpMemberDecorate %35 0 BuiltIn Position\n"
+ "OpMemberDecorate %35 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %35 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %35 3 BuiltIn CullDistance\n"
+ "OpDecorate %35 Block\n"
+ "%2 = OpTypeVoid\n"
+ "%3 = OpTypeFunction %2\n"
+ "%6 = OpTypeInt 32 1\n"
+ "%7 = OpTypePointer Input %6\n"
+ "%8 = OpVariable %7 Input\n"
+ "%10 = OpConstant %6 0\n"
+ "%11 = OpTypeBool\n"
+ "%15 = OpTypeFloat 32\n"
+ "%16 = OpTypeInt 32 0\n"
+ "%17 = OpConstant %16 4\n"
+ "%18 = OpTypeArray %15 %17\n"
+ "%19 = OpTypePointer Output %18\n"
+ "%20 = OpVariable %19 Output\n"
+ "%21 = OpConstant %15 1\n"
+ "%22 = OpTypePointer Output %15\n"
+ "%24 = OpConstant %6 1\n"
+ "%26 = OpTypeVector %15 4\n"
+ "%27 = OpConstant %16 1\n"
+ "%28 = OpTypeArray %15 %27\n"
+ "%29 = OpTypeStruct %26 %15 %28 %28\n"
+ "%30 = OpConstant %16 2\n"
+ "%31 = OpTypeArray %29 %30\n"
+ "%32 = OpTypePointer Output %31\n"
+ "%33 = OpVariable %32 Output\n"
+ "%35 = OpTypeStruct %26 %15 %28 %28\n"
+ "%36 = OpConstant %16 32\n"
+ "%37 = OpTypeArray %35 %36\n"
+ "%38 = OpTypePointer Input %37\n"
+ "%39 = OpVariable %38 Input\n"
+ "%41 = OpTypePointer Input %26\n"
+ "%44 = OpTypePointer Output %26\n"
+ "%4 = OpFunction %2 None %3\n"
+ "%5 = OpLabel\n"
+ "%9 = OpLoad %6 %8\n"
+ "%12 = OpIEqual %11 %9 %10\n"
+ "OpSelectionMerge %14 None\n"
+ "OpBranchConditional %12 %13 %14\n"
+ "%13 = OpLabel\n"
+ "%23 = OpAccessChain %22 %20 %10\n"
+ "OpStore %23 %21\n"
+ "%25 = OpAccessChain %22 %20 %24\n"
+ "OpStore %25 %21\n"
+ "OpBranch %14\n"
+ "%14 = OpLabel\n"
+ "%34 = OpLoad %6 %8\n"
+ "%40 = OpLoad %6 %8\n"
+ "%42 = OpAccessChain %41 %39 %40 %10\n"
+ "%43 = OpLoad %26 %42\n"
+ "%45 = OpAccessChain %44 %33 %34 %10\n"
+ "OpStore %45 %43\n"
+ "OpReturn\n"
+ "OpFunctionEnd\n";
+ programCollection.spirvAsmSources.add("tesc") << controlSource << buildOptionsSpr;
+
+ /*
+ "#extension GL_KHR_shader_subgroup_basic: enable\n"
+ "#extension GL_EXT_tessellation_shader : require\n"
+ "layout(isolines, equal_spacing, ccw ) in;\n"
+ "layout(location = 0) out vec4 out_color;\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ " if (subgroupElect())\n"
+ " {\n"
+ " out_color.r = " << 2 * ELECTED_VALUE - UNELECTED_VALUE << ";\n"
+ " out_color.g = 2.0f;\n"
+ " }\n"
+ " else\n"
+ " {\n"
+ " out_color.r = " << UNELECTED_VALUE << ";\n"
+ " out_color.g = 0.0f;\n"
+ " }\n"
+ " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
+ "}\n";
+ */
+
+ const string evaluationSource =
+ "; SPIR-V\n"
+ "; Version: 1.3\n"
+ "; Generator: Khronos Glslang Reference Front End; 2\n"
+ "; Bound: 54\n"
+ "; Schema: 0\n"
+ "OpCapability Tessellation\n"
+ "OpCapability GroupNonUniform\n"
+ "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+ "OpMemoryModel Logical GLSL450\n"
+ "OpEntryPoint TessellationEvaluation %4 \"main\" %15 %31 %38 %47\n"
+ "OpExecutionMode %4 Isolines\n"
+ "OpExecutionMode %4 SpacingEqual\n"
+ "OpExecutionMode %4 VertexOrderCcw\n"
+ "OpDecorate %15 Location 0\n"
+ "OpMemberDecorate %29 0 BuiltIn Position\n"
+ "OpMemberDecorate %29 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
+ "OpDecorate %29 Block\n"
+ "OpMemberDecorate %34 0 BuiltIn Position\n"
+ "OpMemberDecorate %34 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %34 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %34 3 BuiltIn CullDistance\n"
+ "OpDecorate %34 Block\n"
+ "OpDecorate %47 BuiltIn TessCoord\n"
+ "%2 = OpTypeVoid\n"
+ "%3 = OpTypeFunction %2\n"
+ "%6 = OpTypeBool\n"
+ "%7 = OpTypeInt 32 0\n"
+ "%8 = OpConstant %7 3\n"
+ "%12 = OpTypeFloat 32\n"
+ "%13 = OpTypeVector %12 4\n"
+ "%14 = OpTypePointer Output %13\n"
+ "%15 = OpVariable %14 Output\n"
+ "%16 = OpConstant %12 71\n"//electedValue
+ "%17 = OpConstant %7 0\n"
+ "%18 = OpTypePointer Output %12\n"
+ "%20 = OpConstant %12 2\n"
+ "%21 = OpConstant %7 1\n"
+ "%24 = OpConstant %12 " + unelectedValue.str() + "\n"
+ "%26 = OpConstant %12 0\n"
+ "%28 = OpTypeArray %12 %21\n"
+ "%29 = OpTypeStruct %13 %12 %28 %28\n"
+ "%30 = OpTypePointer Output %29\n"
+ "%31 = OpVariable %30 Output\n"
+ "%32 = OpTypeInt 32 1\n"
+ "%33 = OpConstant %32 0\n"
+ "%34 = OpTypeStruct %13 %12 %28 %28\n"
+ "%35 = OpConstant %7 32\n"
+ "%36 = OpTypeArray %34 %35\n"
+ "%37 = OpTypePointer Input %36\n"
+ "%38 = OpVariable %37 Input\n"
+ "%39 = OpTypePointer Input %13\n"
+ "%42 = OpConstant %32 1\n"
+ "%45 = OpTypeVector %12 3\n"
+ "%46 = OpTypePointer Input %45\n"
+ "%47 = OpVariable %46 Input\n"
+ "%48 = OpTypePointer Input %12\n"
+ "%4 = OpFunction %2 None %3\n"
+ "%5 = OpLabel\n"
+ "%9 = OpGroupNonUniformElect %6 %8\n"
+ "OpSelectionMerge %11 None\n"
+ "OpBranchConditional %9 %10 %23\n"
+ "%10 = OpLabel\n"
+ "%19 = OpAccessChain %18 %15 %17\n"
+ "OpStore %19 %16\n"
+ "%22 = OpAccessChain %18 %15 %21\n"
+ "OpStore %22 %20\n"
+ "OpBranch %11\n"
+ "%23 = OpLabel\n"
+ "%25 = OpAccessChain %18 %15 %17\n"
+ "OpStore %25 %24\n"
+ "%27 = OpAccessChain %18 %15 %21\n"
+ "OpStore %27 %26\n"
+ "OpBranch %11\n"
+ "%11 = OpLabel\n"
+ "%40 = OpAccessChain %39 %38 %33 %33\n"
+ "%41 = OpLoad %13 %40\n"
+ "%43 = OpAccessChain %39 %38 %42 %33\n"
+ "%44 = OpLoad %13 %43\n"
+ "%49 = OpAccessChain %48 %47 %17\n"
+ "%50 = OpLoad %12 %49\n"
+ "%51 = OpCompositeConstruct %13 %50 %50 %50 %50\n"
+ "%52 = OpExtInst %13 %1 FMix %41 %44 %51\n"
+ "%53 = OpAccessChain %14 %31 %33\n"
+ "OpStore %53 %52\n"
+ "OpReturn\n"
+ "OpFunctionEnd\n";
+
+ programCollection.spirvAsmSources.add("tese") << evaluationSource << buildOptionsSpr;
+ }
+ else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+ {
+ /*
+ "#extension GL_KHR_shader_subgroup_basic: enable\n"
+ "#extension GL_EXT_tessellation_shader : require\n"
+ "layout(vertices = 2) out;\n"
+ "layout(location = 0) out vec4 out_color[];\n"
+ "void main (void)\n"
+ "{\n"
+ " if (gl_InvocationID == 0)\n"
+ {\n"
+ " gl_TessLevelOuter[0] = 1.0f;\n"
+ " gl_TessLevelOuter[1] = 1.0f;\n"
+ " }\n"
+ " if (subgroupElect())\n"
+ " {\n"
+ " out_color[gl_InvocationID].r = " << ELECTED_VALUE << ";\n"
+ " out_color[gl_InvocationID].g = 1.0f;\n"
+ " }\n"
+ " else\n"
+ " {\n"
+ " out_color[gl_InvocationID].r = " << UNELECTED_VALUE << ";\n"
+ " out_color[gl_InvocationID].g = 0.0f;\n"
+ " }\n"
+ " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+ "}\n";
+ */
+ const string controlSource =
+ "; SPIR-V\n"
+ "; Version: 1.3\n"
+ "; Generator: Khronos Glslang Reference Front End; 2\n"
+ "; Bound: 66\n"
+ "; Schema: 0\n"
+ "OpCapability Tessellation\n"
+ "OpCapability GroupNonUniform\n"
+ "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+ "OpMemoryModel Logical GLSL450\n"
+ "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %34 %53 %59\n"
+ "OpExecutionMode %4 OutputVertices 2\n"
+ "OpDecorate %8 BuiltIn InvocationId\n"
+ "OpDecorate %20 Patch\n"
+ "OpDecorate %20 BuiltIn TessLevelOuter\n"
+ "OpDecorate %34 Location 0\n"
+ "OpMemberDecorate %50 0 BuiltIn Position\n"
+ "OpMemberDecorate %50 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %50 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %50 3 BuiltIn CullDistance\n"
+ "OpDecorate %50 Block\n"
+ "OpMemberDecorate %55 0 BuiltIn Position\n"
+ "OpMemberDecorate %55 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %55 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %55 3 BuiltIn CullDistance\n"
+ "OpDecorate %55 Block\n"
+ "%2 = OpTypeVoid\n"
+ "%3 = OpTypeFunction %2\n"
+ "%6 = OpTypeInt 32 1\n"
+ "%7 = OpTypePointer Input %6\n"
+ "%8 = OpVariable %7 Input\n"
+ "%10 = OpConstant %6 0\n"
+ "%11 = OpTypeBool\n"
+ "%15 = OpTypeFloat 32\n"
+ "%16 = OpTypeInt 32 0\n"
+ "%17 = OpConstant %16 4\n"
+ "%18 = OpTypeArray %15 %17\n"
+ "%19 = OpTypePointer Output %18\n"
+ "%20 = OpVariable %19 Output\n"
+ "%21 = OpConstant %15 1\n"
+ "%22 = OpTypePointer Output %15\n"
+ "%24 = OpConstant %6 1\n"
+ "%26 = OpConstant %16 3\n"
+ "%30 = OpTypeVector %15 4\n"
+ "%31 = OpConstant %16 2\n"
+ "%32 = OpTypeArray %30 %31\n"
+ "%33 = OpTypePointer Output %32\n"
+ "%34 = OpVariable %33 Output\n"
+ "%36 = OpConstant %15 " + electedValue.str() + "\n"
+ "%37 = OpConstant %16 0\n"
+ "%40 = OpConstant %16 1\n"
+ "%44 = OpConstant %15 " + unelectedValue.str() + "\n"
+ "%47 = OpConstant %15 0\n"
+ "%49 = OpTypeArray %15 %40\n"
+ "%50 = OpTypeStruct %30 %15 %49 %49\n"
+ "%51 = OpTypeArray %50 %31\n"
+ "%52 = OpTypePointer Output %51\n"
+ "%53 = OpVariable %52 Output\n"
+ "%55 = OpTypeStruct %30 %15 %49 %49\n"
+ "%56 = OpConstant %16 32\n"
+ "%57 = OpTypeArray %55 %56\n"
+ "%58 = OpTypePointer Input %57\n"
+ "%59 = OpVariable %58 Input\n"
+ "%61 = OpTypePointer Input %30\n"
+ "%64 = OpTypePointer Output %30\n"
+ "%4 = OpFunction %2 None %3\n"
+ "%5 = OpLabel\n"
+ "%9 = OpLoad %6 %8\n"
+ "%12 = OpIEqual %11 %9 %10\n"
+ "OpSelectionMerge %14 None\n"
+ "OpBranchConditional %12 %13 %14\n"
+ "%13 = OpLabel\n"
+ "%23 = OpAccessChain %22 %20 %10\n"
+ "OpStore %23 %21\n"
+ "%25 = OpAccessChain %22 %20 %24\n"
+ "OpStore %25 %21\n"
+ "OpBranch %14\n"
+ "%14 = OpLabel\n"
+ "%27 = OpGroupNonUniformElect %11 %26\n"
+ "OpSelectionMerge %29 None\n"
+ "OpBranchConditional %27 %28 %42\n"
+ "%28 = OpLabel\n"
+ "%35 = OpLoad %6 %8\n"
+ "%38 = OpAccessChain %22 %34 %35 %37\n"
+ "OpStore %38 %36\n"
+ "%39 = OpLoad %6 %8\n"
+ "%41 = OpAccessChain %22 %34 %39 %40\n"
+ "OpStore %41 %21\n"
+ "OpBranch %29\n"
+ "%42 = OpLabel\n"
+ "%43 = OpLoad %6 %8\n"
+ "%45 = OpAccessChain %22 %34 %43 %37\n"
+ "OpStore %45 %44\n"
+ "%46 = OpLoad %6 %8\n"
+ "%48 = OpAccessChain %22 %34 %46 %40\n"
+ "OpStore %48 %47\n"
+ "OpBranch %29\n"
+ "%29 = OpLabel\n"
+ "%54 = OpLoad %6 %8\n"
+ "%60 = OpLoad %6 %8\n"
+ "%62 = OpAccessChain %61 %59 %60 %10\n"
+ "%63 = OpLoad %30 %62\n"
+ "%65 = OpAccessChain %64 %53 %54 %10\n"
+ "OpStore %65 %63\n"
+ "OpReturn\n"
+ "OpFunctionEnd\n";
+ programCollection.spirvAsmSources.add("tesc") << controlSource << buildOptionsSpr;
+
+ /*
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "#extension GL_EXT_tessellation_shader : require\n"
+ "layout(isolines, equal_spacing, ccw ) in;\n"
+ "layout(location = 0) in vec4 in_color[];\n"
+ "layout(location = 0) out vec4 out_color;\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
+ " out_color = in_color[0];\n"
+ "}\n";
+ */
+
+ const string evaluationSource =
+ "; SPIR-V\n"
+ "; Version: 1.3\n"
+ "; Generator: Khronos Glslang Reference Front End; 2\n"
+ "; Bound: 44\n"
+ "; Schema: 0\n"
+ "OpCapability Tessellation\n"
+ "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+ "OpMemoryModel Logical GLSL450\n"
+ "OpEntryPoint TessellationEvaluation %4 \"main\" %13 %20 %29 %38 %41\n"
+ "OpExecutionMode %4 Isolines\n"
+ "OpExecutionMode %4 SpacingEqual\n"
+ "OpExecutionMode %4 VertexOrderCcw\n"
+ "OpMemberDecorate %11 0 BuiltIn Position\n"
+ "OpMemberDecorate %11 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %11 3 BuiltIn CullDistance\n"
+ "OpDecorate %11 Block\n"
+ "OpMemberDecorate %16 0 BuiltIn Position\n"
+ "OpMemberDecorate %16 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %16 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %16 3 BuiltIn CullDistance\n"
+ "OpDecorate %16 Block\n"
+ "OpDecorate %29 BuiltIn TessCoord\n"
+ "OpDecorate %38 Location 0\n"
+ "OpDecorate %41 Location 0\n"
+ "%2 = OpTypeVoid\n"
+ "%3 = OpTypeFunction %2\n"
+ "%6 = OpTypeFloat 32\n"
+ "%7 = OpTypeVector %6 4\n"
+ "%8 = OpTypeInt 32 0\n"
+ "%9 = OpConstant %8 1\n"
+ "%10 = OpTypeArray %6 %9\n"
+ "%11 = OpTypeStruct %7 %6 %10 %10\n"
+ "%12 = OpTypePointer Output %11\n"
+ "%13 = OpVariable %12 Output\n"
+ "%14 = OpTypeInt 32 1\n"
+ "%15 = OpConstant %14 0\n"
+ "%16 = OpTypeStruct %7 %6 %10 %10\n"
+ "%17 = OpConstant %8 32\n"
+ "%18 = OpTypeArray %16 %17\n"
+ "%19 = OpTypePointer Input %18\n"
+ "%20 = OpVariable %19 Input\n"
+ "%21 = OpTypePointer Input %7\n"
+ "%24 = OpConstant %14 1\n"
+ "%27 = OpTypeVector %6 3\n"
+ "%28 = OpTypePointer Input %27\n"
+ "%29 = OpVariable %28 Input\n"
+ "%30 = OpConstant %8 0\n"
+ "%31 = OpTypePointer Input %6\n"
+ "%36 = OpTypePointer Output %7\n"
+ "%38 = OpVariable %36 Output\n"
+ "%39 = OpTypeArray %7 %17\n"
+ "%40 = OpTypePointer Input %39\n"
+ "%41 = OpVariable %40 Input\n"
+ "%4 = OpFunction %2 None %3\n"
+ "%5 = OpLabel\n"
+ "%22 = OpAccessChain %21 %20 %15 %15\n"
+ "%23 = OpLoad %7 %22\n"
+ "%25 = OpAccessChain %21 %20 %24 %15\n"
+ "%26 = OpLoad %7 %25\n"
+ "%32 = OpAccessChain %31 %29 %30\n"
+ "%33 = OpLoad %6 %32\n"
+ "%34 = OpCompositeConstruct %7 %33 %33 %33 %33\n"
+ "%35 = OpExtInst %7 %1 FMix %23 %26 %34\n"
+ "%37 = OpAccessChain %36 %13 %15\n"
+ "OpStore %37 %35\n"
+ "%42 = OpAccessChain %21 %41 %15\n"
+ "%43 = OpLoad %7 %42\n"
+ "OpStore %38 %43\n"
+ "OpReturn\n"
+ "OpFunctionEnd\n";
+ programCollection.spirvAsmSources.add("tese") << evaluationSource << buildOptionsSpr;
+ }
+ else
+ {
+ DE_FATAL("Unsupported shader stage");
+ }
+ }
+ else
+ {
+ std::ostringstream bdy;
+ string color = (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage) ? "out_color[gl_InvocationID].b = 1.0f;\n" : "out_color.b = 1.0f;\n";
+ switch (caseDef.opType)
+ {
+ default:
+ DE_FATAL("Unhandled op type!");
+ break;
+ case OPTYPE_SUBGROUP_BARRIER:
+ case OPTYPE_SUBGROUP_MEMORY_BARRIER:
+ case OPTYPE_SUBGROUP_MEMORY_BARRIER_BUFFER:
+ {
+ bdy << " tempResult2 = tempBuffer[id];\n"
+ << " if (subgroupElect())\n"
+ << " {\n"
+ << " tempResult = value;\n"
+ << " " << color
+ << " }\n"
+ << " else\n"
+ << " {\n"
+ << " tempResult = tempBuffer[id];\n"
+ << " }\n"
+ << " " << getOpTypeName(caseDef.opType) << "();\n";
+ break;
+ }
+ case OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE:
+ bdy <<"tempResult2 = imageLoad(tempImage, ivec2(id, 0)).x;\n"
+ << " if (subgroupElect())\n"
+ << " {\n"
+ << " tempResult = value;\n"
+ << " " << color
+ << " }\n"
+ << " else\n"
+ << " {\n"
+ << " tempResult = imageLoad(tempImage, ivec2(id, 0)).x;\n"
+ << " }\n"
+ << " subgroupMemoryBarrierImage();\n";
+
+ break;
+ }
+
+ if (VK_SHADER_STAGE_FRAGMENT_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream fragment;
+ fragment << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+ << "#extension GL_KHR_shader_subgroup_basic: enable\n"
+ << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ << "layout(location = 0) out vec4 out_color;\n"
+ << "\n"
+ << "layout(set = 0, binding = 0) uniform Buffer1\n"
+ << "{\n"
+ << " uint tempBuffer["<<SHADER_BUFFER_SIZE/4ull<<"];\n"
+ << "};\n"
+ << "\n"
+ << "layout(set = 0, binding = 1) uniform Buffer2\n"
+ << "{\n"
+ << " uint value;\n"
+ << "};\n"
+ << (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? "layout(set = 0, binding = 2, r32ui) readonly uniform highp uimage2D tempImage;\n" : "\n")
+ << "void main (void)\n"
+ << "{\n"
+ << " if (gl_HelperInvocation) return;\n"
+ << " uint id = 0;\n"
+ << " if (subgroupElect())\n"
+ << " {\n"
+ << " id = uint(gl_FragCoord.x);\n"
+ << " }\n"
+ << " id = subgroupBroadcastFirst(id);\n"
+ << " uint localId = id;\n"
+ << " uint tempResult = 0u;\n"
+ << " uint tempResult2 = 0u;\n"
+ << " out_color.b = 0.0f;\n"
+ << bdy.str()
+ << " out_color.r = float(tempResult);\n"
+ << " out_color.g = float(value);\n"
+ << " out_color.a = float(tempResult2);\n"
+ << "}\n";
+ programCollection.glslSources.add("fragment")
+ << glu::FragmentSource(fragment.str()) << buildOptions;
+ }
+ else if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream vertex;
+ vertex << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+ << "#extension GL_KHR_shader_subgroup_basic: enable\n"
+ << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ <<"\n"
+ << "layout(location = 0) out vec4 out_color;\n"
+ << "layout(location = 0) in highp vec4 in_position;\n"
+ << "\n"
+ << "layout(set = 0, binding = 0) uniform Buffer1\n"
+ << "{\n"
+ << " uint tempBuffer["<<SHADER_BUFFER_SIZE/4ull<<"];\n"
+ << "};\n"
+ << "\n"
+ << "layout(set = 0, binding = 1) uniform Buffer2\n"
+ << "{\n"
+ << " uint value;\n"
+ << "};\n"
+ << (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? "layout(set = 0, binding = 2, r32ui) readonly uniform highp uimage2D tempImage;\n" : "\n")
+ << "void main (void)\n"
+ << "{\n"
+ << " uint id = 0;\n"
+ << " if (subgroupElect())\n"
+ << " {\n"
+ << " id = gl_VertexIndex;\n"
+ << " }\n"
+ << " id = subgroupBroadcastFirst(id);\n"
+ << " uint tempResult = 0u;\n"
+ << " uint tempResult2 = 0u;\n"
+ << " out_color.b = 0.0f;\n"
+ << bdy.str()
+ << " out_color.r = float(tempResult);\n"
+ << " out_color.g = float(value);\n"
+ << " out_color.a = float(tempResult2);\n"
+ << " gl_Position = in_position;\n"
+ << " gl_PointSize = 1.0f;\n"
+ << "}\n";
+ programCollection.glslSources.add("vert")
+ << glu::VertexSource(vertex.str()) << buildOptions;
+ }
+ else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream geometry;
+
+ geometry << "#version 450\n"
+ << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ << "#extension GL_KHR_shader_subgroup_basic: enable\n"
+ << "layout(points) in;\n"
+ << "layout(points, max_vertices = 1) out;\n"
+ << "layout(location = 0) out vec4 out_color;\n"
+ << "layout(set = 0, binding = 0) uniform Buffer1\n"
+ << "{\n"
+ << " uint tempBuffer["<<SHADER_BUFFER_SIZE/4ull<<"];\n"
+ << "};\n"
+ << "\n"
+ << "layout(set = 0, binding = 1) uniform Buffer2\n"
+ << "{\n"
+ << " uint value;\n"
+ << "};\n"
+ << (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? "layout(set = 0, binding = 2, r32ui) readonly uniform highp uimage2D tempImage;\n" : "\n")
+ << "void main (void)\n"
+ << "{\n"
+ << " uint id = 0;\n"
+ << " if (subgroupElect())\n"
+ << " {\n"
+ << " id = gl_InvocationID;\n"
+ << " }\n"
+ << " id = subgroupBroadcastFirst(id);\n"
+ << " uint tempResult = 0u;\n"
+ << " uint tempResult2 = 0u;\n"
+ << " out_color.b = 0.0f;\n"
+ << bdy.str()
+ << " out_color.r = float(tempResult);\n"
+ << " out_color.g = float(value);\n"
+ << " out_color.a = float(tempResult2);\n"
+ << " gl_Position = gl_in[0].gl_Position;\n"
+ << " EmitVertex();\n"
+ << " EndPrimitive();\n"
+ << "}\n";
+
+ programCollection.glslSources.add("geometry")
+ << glu::GeometrySource(geometry.str()) << buildOptions;
+ }
+ else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream controlSource;
+ std::ostringstream evaluationSource;
+
+ controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+ << "#extension GL_EXT_tessellation_shader : require\n"
+ << "layout(vertices = 2) out;\n"
+ << "void main (void)\n"
+ << "{\n"
+ << " if (gl_InvocationID == 0)\n"
+ <<" {\n"
+ << " gl_TessLevelOuter[0] = 1.0f;\n"
+ << " gl_TessLevelOuter[1] = 1.0f;\n"
+ << " }\n"
+ << " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+ << "}\n";
+
+ evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+ << "#extension GL_KHR_shader_subgroup_basic: enable\n"
+ << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ << "#extension GL_EXT_tessellation_shader : require\n"
+ << "layout(isolines, equal_spacing, ccw ) in;\n"
+ << "layout(location = 0) out vec4 out_color;\n"
+ << "layout(set = 0, binding = 0) uniform Buffer1\n"
+ << "{\n"
+ << " uint tempBuffer["<<SHADER_BUFFER_SIZE/4ull<<"];\n"
+ << "};\n"
+ << "\n"
+ << "layout(set = 0, binding = 1) uniform Buffer2\n"
+ << "{\n"
+ << " uint value;\n"
+ << "};\n"
+ << (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? "layout(set = 0, binding = 2, r32ui) readonly uniform highp uimage2D tempImage;\n" : "\n")
+ << "void main (void)\n"
+ << "{\n"
+ << " uint id = 0;\n"
+ << " if (subgroupElect())\n"
+ << " {\n"
+ << " id = gl_PrimitiveID;\n"
+ << " }\n"
+ << " id = subgroupBroadcastFirst(id);\n"
+ << " uint tempResult = 0u;\n"
+ << " uint tempResult2 = 0u;\n"
+ << " out_color.b = 0.0f;\n"
+ << bdy.str()
+ << " out_color.r = float(tempResult);\n"
+ << " out_color.g = float(value);\n"
+ << " out_color.a = float(tempResult2);\n"
+ << " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
+ << "}\n";
+
+ programCollection.glslSources.add("tesc")
+ << glu::TessellationControlSource(controlSource.str()) << buildOptions;
+ programCollection.glslSources.add("tese")
+ << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
+ }
+ else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream controlSource;
+ std::ostringstream evaluationSource;
+
+ controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+ << "#extension GL_KHR_shader_subgroup_basic: enable\n"
+ << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ << "#extension GL_EXT_tessellation_shader : require\n"
+ << "layout(vertices = 2) out;\n"
+ << "layout(location = 0) out vec4 out_color[];\n"
+ << "layout(set = 0, binding = 0) uniform Buffer1\n"
+ << "{\n"
+ << " uint tempBuffer["<<SHADER_BUFFER_SIZE/4ull<<"];\n"
+ << "};\n"
+ << "\n"
+ << "layout(set = 0, binding = 1) uniform Buffer2\n"
+ << "{\n"
+ << " uint value;\n"
+ << "};\n"
+ << (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? "layout(set = 0, binding = 2, r32ui) readonly uniform highp uimage2D tempImage;\n" : "\n")
+ << "void main (void)\n"
+ << "{\n"
+ << " uint id = 0;\n"
+ << " if (gl_InvocationID == 0)\n"
+ <<" {\n"
+ << " gl_TessLevelOuter[0] = 1.0f;\n"
+ << " gl_TessLevelOuter[1] = 1.0f;\n"
+ << " }\n"
+ << " if (subgroupElect())\n"
+ << " {\n"
+ << " id = gl_InvocationID;\n"
+ << " }\n"
+ << " id = subgroupBroadcastFirst(id);\n"
+ << " uint tempResult = 0u;\n"
+ << " uint tempResult2 = 0u;\n"
+ << " out_color[gl_InvocationID].b = 0.0f;\n"
+ << bdy.str()
+ << " out_color[gl_InvocationID].r = float(tempResult);\n"
+ << " out_color[gl_InvocationID].g = float(value);\n"
+ << " out_color[gl_InvocationID].a = float(tempResult2);\n"
+ << " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+ << "}\n";
+
+ evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+ << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ << "#extension GL_EXT_tessellation_shader : require\n"
+ << "layout(isolines, equal_spacing, ccw ) in;\n"
+ << "layout(location = 0) in vec4 in_color[];\n"
+ << "layout(location = 0) out vec4 out_color;\n"
+ << "\n"
+ << "void main (void)\n"
+ << "{\n"
+ << " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
+ << " out_color = in_color[0];\n"
+ << "}\n";
+
+ programCollection.glslSources.add("tesc")
+ << glu::TessellationControlSource(controlSource.str()) << buildOptions;
+ programCollection.glslSources.add("tese")
+ << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
+ }
+ else
+ {
+ DE_FATAL("Unsupported shader stage");
+ }
+ }
+}
+
+void initPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
+{
+ if (OPTYPE_ELECT == caseDef.opType)
+ {
+ if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream src;
+
+ src << "#version 450\n"
+ << "#extension GL_KHR_shader_subgroup_basic: enable\n"
+ << "layout (local_size_x_id = 0, local_size_y_id = 1, "
+ "local_size_z_id = 2) in;\n"
+ << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
+ << "{\n"
+ << " uint result[];\n"
+ << "};\n"
+ << "\n"
+ << subgroups::getSharedMemoryBallotHelper()
+ << "void main (void)\n"
+ << "{\n"
+ << " uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
+ << " highp uint offset = globalSize.x * ((globalSize.y * "
+ "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
+ "gl_GlobalInvocationID.x;\n"
+ << " uint value = " << UNELECTED_VALUE << ";\n"
+ << " if (subgroupElect())\n"
+ << " {\n"
+ << " value = " << ELECTED_VALUE << ";\n"
+ << " }\n"
+ << " uvec4 bits = bitCount(sharedMemoryBallot(value == " << ELECTED_VALUE << "));\n"
+ << " result[offset] = bits.x + bits.y + bits.z + bits.w;\n"
+ << "}\n";
+
+ programCollection.glslSources.add("comp")
+ << glu::ComputeSource(src.str()) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+ else
+ {
+ {
+ std::ostringstream vertex;
+ vertex << "#version 450\n"
+ << "#extension GL_KHR_shader_subgroup_basic: enable\n"
+ << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
+ << "{\n"
+ << " uint result[];\n"
+ << "};\n"
+ << "layout(set = 0, binding = 4, std430) buffer Buffer2\n"
+ << "{\n"
+ << " uint numSubgroupsExecuted;\n"
+ << "};\n"
+ << "\n"
+ << "void main (void)\n"
+ << "{\n"
+ << " if (subgroupElect())\n"
+ << " {\n"
+ << " result[gl_VertexIndex] = " << ELECTED_VALUE << ";\n"
+ << " atomicAdd(numSubgroupsExecuted, 1);\n"
+ << " }\n"
+ << " else\n"
+ << " {\n"
+ << " result[gl_VertexIndex] = " << UNELECTED_VALUE << ";\n"
+ << " }\n"
+ << " float pixelSize = 2.0f/1024.0f;\n"
+ << " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
+ << " gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
+ << " gl_PointSize = 1.0f;\n"
+ << "}\n";
+ programCollection.glslSources.add("vert")
+ << glu::VertexSource(vertex.str()) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+
+ {
+ std::ostringstream tesc;
+ tesc << "#version 450\n"
+ << "#extension GL_KHR_shader_subgroup_basic: enable\n"
+ << "layout(vertices=1) out;\n"
+ << "layout(set = 0, binding = 1, std430) buffer Buffer1\n"
+ << "{\n"
+ << " uint result[];\n"
+ << "};\n"
+ << "layout(set = 0, binding = 5, std430) buffer Buffer2\n"
+ << "{\n"
+ << " uint numSubgroupsExecuted;\n"
+ << "};\n"
+ << "\n"
+ << "void main (void)\n"
+ << "{\n"
+ << " if (subgroupElect())\n"
+ << " {\n"
+ << " result[gl_PrimitiveID] = " << ELECTED_VALUE << ";\n"
+ << " atomicAdd(numSubgroupsExecuted, 1);\n"
+ << " }\n"
+ << " else\n"
+ << " {\n"
+ << " result[gl_PrimitiveID] = " << UNELECTED_VALUE << ";\n"
+ << " }\n"
+ << " if (gl_InvocationID == 0)\n"
+ << " {\n"
+ << " gl_TessLevelOuter[0] = 1.0f;\n"
+ << " gl_TessLevelOuter[1] = 1.0f;\n"
+ << " }\n"
+ << " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+ << "}\n";
+ programCollection.glslSources.add("tesc")
+ << glu::TessellationControlSource(tesc.str()) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+
+ {
+ std::ostringstream tese;
+ tese << "#version 450\n"
+ << "#extension GL_KHR_shader_subgroup_basic: enable\n"
+ << "layout(isolines) in;\n"
+ << "layout(set = 0, binding = 2, std430) buffer Buffer1\n"
+ << "{\n"
+ << " uint result[];\n"
+ << "};\n"
+ << "layout(set = 0, binding = 6, std430) buffer Buffer2\n"
+ << "{\n"
+ << " uint numSubgroupsExecuted;\n"
+ << "};\n"
+ << "\n"
+ << "void main (void)\n"
+ << "{\n"
+ << " if (subgroupElect())\n"
+ << " {\n"
+ << " result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = " << ELECTED_VALUE << ";\n"
+ << " atomicAdd(numSubgroupsExecuted, 1);\n"
+ << " }\n"
+ << " else\n"
+ << " {\n"
+ << " result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = " << UNELECTED_VALUE << ";\n"
+ << " }\n"
+ << " float pixelSize = 2.0f/1024.0f;\n"
+ << " gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
+ << "}\n";
+ programCollection.glslSources.add("tese")
+ << glu::TessellationEvaluationSource(tese.str()) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+
+ {
+ std::ostringstream geometry;
+ geometry << "#version 450\n"
+ << "#extension GL_KHR_shader_subgroup_basic: enable\n"
+ << "layout(${TOPOLOGY}) in;\n"
+ << "layout(points, max_vertices = 1) out;\n"
+ << "layout(set = 0, binding = 3, std430) buffer Buffer1\n"
+ << "{\n"
+ << " uint result[];\n"
+ << "};\n"
+ << "layout(set = 0, binding = 7, std430) buffer Buffer2\n"
+ << "{\n"
+ << " uint numSubgroupsExecuted;\n"
+ << "};\n"
+ << "\n"
+ << "void main (void)\n"
+ << "{\n"
+ << " if (subgroupElect())\n"
+ << " {\n"
+ << " result[gl_PrimitiveIDIn] = " << ELECTED_VALUE << ";\n"
+ << " atomicAdd(numSubgroupsExecuted, 1);\n"
+ << " }\n"
+ << " else\n"
+ << " {\n"
+ << " result[gl_PrimitiveIDIn] = " << UNELECTED_VALUE << ";\n"
+ << " }\n"
+ << " gl_Position = gl_in[0].gl_Position;\n"
+ << " EmitVertex();\n"
+ << " EndPrimitive();\n"
+ << "}\n";
+ subgroups::addGeometryShadersFromTemplate(geometry.str(), vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u),
+ programCollection.glslSources);
+ }
+
+ {
+ std::ostringstream fragment;
+ fragment << "#version 450\n"
+ << "#extension GL_KHR_shader_subgroup_basic: enable\n"
+ << "layout(location = 0) out uint data;\n"
+ << "layout(set = 0, binding = 8, std430) buffer Buffer\n"
+ << "{\n"
+ << " uint numSubgroupsExecuted;\n"
+ << "};\n"
+ << "void main (void)\n"
+ << "{\n"
+ << " if (gl_HelperInvocation) return;\n"
+ << " if (subgroupElect())\n"
+ << " {\n"
+ << " data = " << ELECTED_VALUE << ";\n"
+ << " atomicAdd(numSubgroupsExecuted, 1);\n"
+ << " }\n"
+ << " else\n"
+ << " {\n"
+ << " data = " << UNELECTED_VALUE << ";\n"
+ << " }\n"
+ << "}\n";
+ programCollection.glslSources.add("fragment")
+ << glu::FragmentSource(fragment.str())<< vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+ subgroups::addNoSubgroupShader(programCollection);
+ }
+ }
+ else
+ {
+ std::ostringstream bdy;
+
+ switch (caseDef.opType)
+ {
+ default:
+ DE_FATAL("Unhandled op type!");
+ break;
+ case OPTYPE_SUBGROUP_BARRIER:
+ case OPTYPE_SUBGROUP_MEMORY_BARRIER:
+ case OPTYPE_SUBGROUP_MEMORY_BARRIER_BUFFER:
+ bdy << " if (subgroupElect())\n"
+ << " {\n"
+ << " tempBuffer[id] = value;\n"
+ << " }\n"
+ << " " << getOpTypeName(caseDef.opType) << "();\n"
+ << " tempResult = tempBuffer[id];\n";
+ break;
+ case OPTYPE_SUBGROUP_MEMORY_BARRIER_SHARED:
+ bdy << " if (subgroupElect())\n"
+ << " {\n"
+ << " tempShared[localId] = value;\n"
+ << " }\n"
+ << " subgroupMemoryBarrierShared();\n"
+ << " tempResult = tempShared[localId];\n";
+ break;
+ case OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE:
+ bdy << " if (subgroupElect())\n"
+ << " {\n"
+ << " imageStore(tempImage, ivec2(id, 0), ivec4(value));\n"
+ << " }\n"
+ << " subgroupMemoryBarrierImage();\n"
+ << " tempResult = imageLoad(tempImage, ivec2(id, 0)).x;\n";
+ break;
+ }
+
+ if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream src;
+
+ src << "#version 450\n"
+ << "#extension GL_KHR_shader_subgroup_basic: enable\n"
+ << "layout (local_size_x_id = 0, local_size_y_id = 1, "
+ "local_size_z_id = 2) in;\n"
+ << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
+ << "{\n"
+ << " uint result[];\n"
+ << "};\n"
+ << "layout(set = 0, binding = 1, std430) buffer Buffer2\n"
+ << "{\n"
+ << " uint tempBuffer[];\n"
+ << "};\n"
+ << "layout(set = 0, binding = 2, std430) buffer Buffer3\n"
+ << "{\n"
+ << " uint value;\n"
+ << "};\n"
+ << "layout(set = 0, binding = 3, r32ui) uniform uimage2D tempImage;\n"
+ << "shared uint tempShared[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];\n"
+ << "\n"
+ << "void main (void)\n"
+ << "{\n"
+ << " uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
+ << " highp uint offset = globalSize.x * ((globalSize.y * "
+ "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
+ "gl_GlobalInvocationID.x;\n"
+ << " uint localId = gl_SubgroupID;\n"
+ << " uint id = globalSize.x * ((globalSize.y * "
+ "gl_WorkGroupID.z) + gl_WorkGroupID.y) + "
+ "gl_WorkGroupID.x + localId;\n"
+ << " uint tempResult = 0;\n"
+ << bdy.str()
+ << " result[offset] = tempResult;\n"
+ << "}\n";
+
+ programCollection.glslSources.add("comp")
+ << glu::ComputeSource(src.str()) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+ else
+ {
+ {
+ const string vertex =
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_basic: enable\n"
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
+ "{\n"
+ " uint result[];\n"
+ "};\n"
+ "layout(set = 0, binding = 4, std430) buffer Buffer2\n"
+ "{\n"
+ " uint tempBuffer[];\n"
+ "};\n"
+ "layout(set = 0, binding = 5, std430) buffer Buffer3\n"
+ "{\n"
+ " uint subgroupID;\n"
+ "};\n"
+ "layout(set = 0, binding = 6, std430) buffer Buffer4\n"
+ "{\n"
+ " uint value;\n"
+ "};\n"
+ "layout(set = 0, binding = 7, r32ui) uniform uimage2D tempImage;\n"
+ "void main (void)\n"
+ "{\n"
+ " uint id = 0;\n"
+ " if (subgroupElect())\n"
+ " {\n"
+ " id = atomicAdd(subgroupID, 1);\n"
+ " }\n"
+ " id = subgroupBroadcastFirst(id);\n"
+ " uint localId = id;\n"
+ " uint tempResult = 0;\n"
+ + bdy.str() +
+ " result[gl_VertexIndex] = tempResult;\n"
+ " float pixelSize = 2.0f/1024.0f;\n"
+ " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
+ " gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
+ " gl_PointSize = 1.0f;\n"
+ "}\n";
+ programCollection.glslSources.add("vert")
+ << glu::VertexSource(vertex) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+
+ {
+ const string tesc =
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_basic: enable\n"
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(vertices=1) out;\n"
+ "layout(set = 0, binding = 1, std430) buffer Buffer1\n"
+ "{\n"
+ " uint result[];\n"
+ "};\n"
+ "layout(set = 0, binding = 8, std430) buffer Buffer2\n"
+ "{\n"
+ " uint tempBuffer[];\n"
+ "};\n"
+ "layout(set = 0, binding = 9, std430) buffer Buffer3\n"
+ "{\n"
+ " uint subgroupID;\n"
+ "};\n"
+ "layout(set = 0, binding = 10, std430) buffer Buffer4\n"
+ "{\n"
+ " uint value;\n"
+ "};\n"
+ "layout(set = 0, binding = 11, r32ui) uniform uimage2D tempImage;\n"
+ "void main (void)\n"
+ "{\n"
+ " uint id = 0;\n"
+ " if (subgroupElect())\n"
+ " {\n"
+ " id = atomicAdd(subgroupID, 1);\n"
+ " }\n"
+ " id = subgroupBroadcastFirst(id);\n"
+ " uint localId = id;\n"
+ " uint tempResult = 0;\n"
+ + bdy.str() +
+ " result[gl_PrimitiveID] = tempResult;\n"
+ " if (gl_InvocationID == 0)\n"
+ " {\n"
+ " gl_TessLevelOuter[0] = 1.0f;\n"
+ " gl_TessLevelOuter[1] = 1.0f;\n"
+ " }\n"
+ " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+ "}\n";
+ programCollection.glslSources.add("tesc")
+ << glu::TessellationControlSource(tesc) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+
+ {
+ const string tese =
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_basic: enable\n"
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(isolines) in;\n"
+ "layout(set = 0, binding = 2, std430) buffer Buffer1\n"
+ "{\n"
+ " uint result[];\n"
+ "};\n"
+ "layout(set = 0, binding = 12, std430) buffer Buffer2\n"
+ "{\n"
+ " uint tempBuffer[];\n"
+ "};\n"
+ "layout(set = 0, binding = 13, std430) buffer Buffer3\n"
+ "{\n"
+ " uint subgroupID;\n"
+ "};\n"
+ "layout(set = 0, binding = 14, std430) buffer Buffer4\n"
+ "{\n"
+ " uint value;\n"
+ "};\n"
+ "layout(set = 0, binding = 15, r32ui) uniform uimage2D tempImage;\n"
+ "void main (void)\n"
+ "{\n"
+ " uint id = 0;\n"
+ " if (subgroupElect())\n"
+ " {\n"
+ " id = atomicAdd(subgroupID, 1);\n"
+ " }\n"
+ " id = subgroupBroadcastFirst(id);\n"
+ " uint localId = id;\n"
+ " uint tempResult = 0;\n"
+ + bdy.str() +
+ " result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = tempResult;\n"
+ " float pixelSize = 2.0f/1024.0f;\n"" gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
+ "}\n";
+ programCollection.glslSources.add("tese")
+ << glu::TessellationEvaluationSource(tese) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+
+ {
+ const string geometry =
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_basic: enable\n"
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(${TOPOLOGY}) in;\n"
+ "layout(points, max_vertices = 1) out;\n"
+ "layout(set = 0, binding = 3, std430) buffer Buffer1\n"
+ "{\n"
+ " uint result[];\n"
+ "};\n"
+ "layout(set = 0, binding = 16, std430) buffer Buffer2\n"
+ "{\n"
+ " uint tempBuffer[];\n"
+ "};\n"
+ "layout(set = 0, binding = 17, std430) buffer Buffer3\n"
+ "{\n"
+ " uint subgroupID;\n"
+ "};\n"
+ "layout(set = 0, binding = 18, std430) buffer Buffer4\n"
+ "{\n"
+ " uint value;\n"
+ "};\n"
+ "layout(set = 0, binding = 19, r32ui) uniform uimage2D tempImage;\n"
+ "void main (void)\n"
+ "{\n"
+ " uint id = 0;\n"
+ " if (subgroupElect())\n"
+ " {\n"
+ " id = atomicAdd(subgroupID, 1);\n"
+ " }\n"
+ " id = subgroupBroadcastFirst(id);\n"
+ " uint localId = id;\n"
+ " uint tempResult = 0;\n"
+ + bdy.str() +
+ " result[gl_PrimitiveIDIn] = tempResult;\n"
+ " gl_Position = gl_in[0].gl_Position;\n"
+ " EmitVertex();\n"
+ " EndPrimitive();\n"
+ "}\n";
+ subgroups::addGeometryShadersFromTemplate(geometry, vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u),
+ programCollection.glslSources);
+ }
+
+ {
+ const string fragment =
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_basic: enable\n"
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(location = 0) out uint result;\n"
+ "layout(set = 0, binding = 20, std430) buffer Buffer1\n"
+ "{\n"
+ " uint tempBuffer[];\n"
+ "};\n"
+ "layout(set = 0, binding = 21, std430) buffer Buffer2\n"
+ "{\n"
+ " uint subgroupID;\n"
+ "};\n"
+ "layout(set = 0, binding = 22, std430) buffer Buffer3\n"
+ "{\n"
+ " uint value;\n"
+ "};\n"
+ "layout(set = 0, binding = 23, r32ui) uniform uimage2D tempImage;\n"
+ "void main (void)\n"
+ "{\n"
+ " if (gl_HelperInvocation) return;\n"
+ " uint id = 0;\n"
+ " if (subgroupElect())\n"
+ " {\n"
+ " id = atomicAdd(subgroupID, 1);\n"
+ " }\n"
+ " id = subgroupBroadcastFirst(id);\n"
+ " uint localId = id;\n"
+ " uint tempResult = 0;\n"
+ + bdy.str() +
+ " result = tempResult;\n"
+ "}\n";
+ programCollection.glslSources.add("fragment")
+ << glu::FragmentSource(fragment)<< vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+
+ subgroups::addNoSubgroupShader(programCollection);
+ }
+ }
+}
+
+void supportedCheck (Context& context, CaseDefinition caseDef)
+{
+ DE_UNREF(caseDef);
+ if (!subgroups::isSubgroupSupported(context))
+ TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
+}
+
+tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
+{
+ if (!subgroups::areSubgroupOperationsSupportedForStage(
+ context, caseDef.shaderStage))
+ {
+ if (subgroups::areSubgroupOperationsRequiredForStage(
+ caseDef.shaderStage))
+ {
+ return tcu::TestStatus::fail(
+ "Shader stage " +
+ subgroups::getShaderStageName(caseDef.shaderStage) +
+ " is required to support subgroup operations!");
+ }
+ else
+ {
+ TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
+ }
+ }
+
+ if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_BASIC_BIT))
+ {
+ return tcu::TestStatus::fail(
+ "Subgroup feature " +
+ subgroups::getSubgroupFeatureName(VK_SUBGROUP_FEATURE_BASIC_BIT) +
+ " is a required capability!");
+ }
+
+ if (OPTYPE_ELECT != caseDef.opType && VK_SHADER_STAGE_COMPUTE_BIT != caseDef.shaderStage)
+ {
+ if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_BALLOT_BIT))
+ {
+ TCU_THROW(NotSupportedError, "Subgroup basic operation non-compute stage test required that ballot operations are supported!");
+ }
+ }
+
+ const deUint32 inputDatasCount = OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? 3u : 2u;
+ std::vector<subgroups::SSBOData> inputDatas (inputDatasCount);
+
+ inputDatas[0].format = VK_FORMAT_R32_UINT;
+ inputDatas[0].numElements = SHADER_BUFFER_SIZE/4ull;
+ inputDatas[0].initializeType = subgroups::SSBOData::InitializeNonZero;
+
+ inputDatas[1].format = VK_FORMAT_R32_UINT;
+ inputDatas[1].numElements = 1ull;
+ inputDatas[1].initializeType = subgroups::SSBOData::InitializeNonZero;
+
+ if(OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType )
+ {
+ inputDatas[2].format = VK_FORMAT_R32_UINT;
+ inputDatas[2].numElements = SHADER_BUFFER_SIZE;
+ inputDatas[2].initializeType = subgroups::SSBOData::InitializeNone;
+ inputDatas[2].isImage = true;
+ }
+
+ if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+ {
+ if (OPTYPE_ELECT == caseDef.opType)
+ return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32G32_SFLOAT, DE_NULL, 0u, checkVertexPipelineStagesSubgroupElectNoSSBO);
+ else
+ return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32G32B32A32_SFLOAT, &inputDatas[0], inputDatasCount, checkVertexPipelineStagesSubgroupBarriersNoSSBO);
+ }
+ else if (VK_SHADER_STAGE_FRAGMENT_BIT == caseDef.shaderStage)
+ {
+ return subgroups::makeFragmentFrameBufferTest(context, VK_FORMAT_R32G32B32A32_SFLOAT, &inputDatas[0], inputDatasCount, checkFragmentSubgroupBarriersNoSSBO);
+ }
+ else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
+ {
+ if (OPTYPE_ELECT == caseDef.opType)
+ return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32G32_SFLOAT, DE_NULL, 0u, checkVertexPipelineStagesSubgroupElectNoSSBO);
+ else
+ return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32G32B32A32_SFLOAT, &inputDatas[0], inputDatasCount, checkVertexPipelineStagesSubgroupBarriersNoSSBO);
+ }
+
+ if (OPTYPE_ELECT == caseDef.opType)
+ return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32G32_SFLOAT, DE_NULL, 0u, checkVertexPipelineStagesSubgroupElectNoSSBO, caseDef.shaderStage);
+
+ return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32G32B32A32_SFLOAT, &inputDatas[0], inputDatasCount,
+ (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)? checkVertexPipelineStagesSubgroupBarriersNoSSBO : checkTessellationEvaluationSubgroupBarriersNoSSBO,
+ caseDef.shaderStage);
+}
+
+tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
+{
+ if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_BASIC_BIT))
+ {
+ return tcu::TestStatus::fail(
+ "Subgroup feature " +
+ subgroups::getSubgroupFeatureName(VK_SUBGROUP_FEATURE_BASIC_BIT) +
+ " is a required capability!");
+ }
+
+ if (OPTYPE_ELECT != caseDef.opType && VK_SHADER_STAGE_COMPUTE_BIT != caseDef.shaderStage)
+ {
+ if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_BALLOT_BIT))
+ {
+ TCU_THROW(NotSupportedError, "Subgroup basic operation non-compute stage test required that ballot operations are supported!");
+ }
+ }
+
+ if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+ {
+ if (!subgroups::areSubgroupOperationsSupportedForStage(context, caseDef.shaderStage))
+ {
+ return tcu::TestStatus::fail("Shader stage " +
+ subgroups::getShaderStageName(caseDef.shaderStage) +
+ " is required to support subgroup operations!");
+ }
+
+ if (OPTYPE_ELECT == caseDef.opType)
+ {
+ return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkComputeSubgroupElect);
+ }
+ else
+ {
+ const deUint32 inputDatasCount = 3;
+ subgroups::SSBOData inputDatas[inputDatasCount];
+ inputDatas[0].format = VK_FORMAT_R32_UINT;
+ inputDatas[0].numElements = SHADER_BUFFER_SIZE;
+ inputDatas[0].initializeType = subgroups::SSBOData::InitializeNone;
+
+ inputDatas[1].format = VK_FORMAT_R32_UINT;
+ inputDatas[1].numElements = 1;
+ inputDatas[1].initializeType = subgroups::SSBOData::InitializeNonZero;
+
+ inputDatas[2].format = VK_FORMAT_R32_UINT;
+ inputDatas[2].numElements = SHADER_BUFFER_SIZE;
+ inputDatas[2].initializeType = subgroups::SSBOData::InitializeNone;
+ inputDatas[2].isImage = true;
+
+ return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, inputDatas, inputDatasCount, checkComputeSubgroupBarriers);
+ }
+ }
+ else
+ {
+ if (!subgroups::isFragmentSSBOSupportedForDevice(context))
+ {
+ TCU_THROW(NotSupportedError, "Subgroup basic operation require that the fragment stage be able to write to SSBOs!");
+ }
+
+ VkPhysicalDeviceSubgroupProperties subgroupProperties;
+ subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
+ subgroupProperties.pNext = DE_NULL;
+
+ VkPhysicalDeviceProperties2 properties;
+ properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+ properties.pNext = &subgroupProperties;
+
+ context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
+
+ VkShaderStageFlagBits stages = (VkShaderStageFlagBits)(caseDef.shaderStage & subgroupProperties.supportedStages);
+
+ if ( VK_SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
+ {
+ if ( (stages & VK_SHADER_STAGE_FRAGMENT_BIT) == 0)
+ TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
+ else
+ stages = VK_SHADER_STAGE_FRAGMENT_BIT;
+ }
+
+ if ((VkShaderStageFlagBits)0u == stages)
+ TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
+
+ if (OPTYPE_ELECT == caseDef.opType)
+ {
+ const deUint32 inputCount = 5u;
+ subgroups::SSBOData inputData[inputCount];
+
+ inputData[0].format = VK_FORMAT_R32_UINT;
+ inputData[0].numElements = 1;
+ inputData[0].initializeType = subgroups::SSBOData::InitializeZero;
+ inputData[0].binding = 4u;
+ inputData[0].stages = VK_SHADER_STAGE_VERTEX_BIT;
+
+ inputData[1].format = VK_FORMAT_R32_UINT;
+ inputData[1].numElements = 1;
+ inputData[1].initializeType = subgroups::SSBOData::InitializeZero;
+ inputData[1].binding = 5u;
+ inputData[1].stages = VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
+
+ inputData[2].format = VK_FORMAT_R32_UINT;
+ inputData[2].numElements = 1;
+ inputData[2].initializeType = subgroups::SSBOData::InitializeZero;
+ inputData[2].binding = 6u;
+ inputData[2].stages = VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
+
+ inputData[3].format = VK_FORMAT_R32_UINT;
+ inputData[3].numElements = 1;
+ inputData[3].initializeType = subgroups::SSBOData::InitializeZero;
+ inputData[3].binding = 7u;
+ inputData[3].stages = VK_SHADER_STAGE_GEOMETRY_BIT;
+
+ inputData[4].format = VK_FORMAT_R32_UINT;
+ inputData[4].numElements = 1;
+ inputData[4].initializeType = subgroups::SSBOData::InitializeZero;
+ inputData[4].binding = 8u;
+ inputData[4].stages = VK_SHADER_STAGE_FRAGMENT_BIT;
+
+ return subgroups::allStages(context, VK_FORMAT_R32_UINT, inputData, inputCount, checkVertexPipelineStagesSubgroupElect, stages);
+ }
+ else
+ {
+ const VkShaderStageFlagBits stagesBits[] =
+ {
+ VK_SHADER_STAGE_VERTEX_BIT,
+ VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
+ VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
+ VK_SHADER_STAGE_GEOMETRY_BIT,
+ VK_SHADER_STAGE_FRAGMENT_BIT,
+ };
+
+ const deUint32 inputDatasCount = DE_LENGTH_OF_ARRAY(stagesBits) * 4u;
+ subgroups::SSBOData inputDatas[inputDatasCount];
+
+ for (int ndx = 0; ndx < DE_LENGTH_OF_ARRAY(stagesBits); ++ndx)
+ {
+ const deUint32 index = ndx*4;
+ inputDatas[index].format = VK_FORMAT_R32_UINT;
+ inputDatas[index].numElements = SHADER_BUFFER_SIZE;
+ inputDatas[index].initializeType = subgroups::SSBOData::InitializeNonZero;
+ inputDatas[index].binding = index + 4u;
+ inputDatas[index].stages = stagesBits[ndx];
+
+ inputDatas[index + 1].format = VK_FORMAT_R32_UINT;
+ inputDatas[index + 1].numElements = 1;
+ inputDatas[index + 1].initializeType = subgroups::SSBOData::InitializeZero;
+ inputDatas[index + 1].binding = index + 5u;
+ inputDatas[index + 1].stages = stagesBits[ndx];
+
+ inputDatas[index + 2].format = VK_FORMAT_R32_UINT;
+ inputDatas[index + 2].numElements = 1;
+ inputDatas[index + 2].initializeType = subgroups::SSBOData::InitializeNonZero;
+ inputDatas[index + 2].binding = index + 6u;
+ inputDatas[index + 2].stages = stagesBits[ndx];
+
+ inputDatas[index + 3].format = VK_FORMAT_R32_UINT;
+ inputDatas[index + 3].numElements = SHADER_BUFFER_SIZE;
+ inputDatas[index + 3].initializeType = subgroups::SSBOData::InitializeNone;
+ inputDatas[index + 3].isImage = true;
+ inputDatas[index + 3].binding = index + 7u;
+ inputDatas[index + 3].stages = stagesBits[ndx];
+ }
+
+ return subgroups::allStages(context, VK_FORMAT_R32_UINT, inputDatas, inputDatasCount, checkVertexPipelineStagesSubgroupBarriers, stages);
+ }
+ }
+}
+}
+
+namespace vkt
+{
+namespace subgroups
+{
+tcu::TestCaseGroup* createSubgroupsBasicTests(tcu::TestContext& testCtx)
+{
+ de::MovePtr<tcu::TestCaseGroup> graphicGroup(new tcu::TestCaseGroup(
+ testCtx, "graphics", "Subgroup basic category tests: graphics"));
+ de::MovePtr<tcu::TestCaseGroup> computeGroup(new tcu::TestCaseGroup(
+ testCtx, "compute", "Subgroup basic category tests: compute"));
+ de::MovePtr<tcu::TestCaseGroup> framebufferGroup(new tcu::TestCaseGroup(
+ testCtx, "framebuffer", "Subgroup basic category tests: framebuffer"));
+
+
+ const VkShaderStageFlags stages[] =
+ {
+ VK_SHADER_STAGE_FRAGMENT_BIT,
+ VK_SHADER_STAGE_VERTEX_BIT,
+ VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
+ VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
+ VK_SHADER_STAGE_GEOMETRY_BIT,
+ };
+
+ for (int opTypeIndex = 0; opTypeIndex < OPTYPE_LAST; ++opTypeIndex)
+ {
+ const std::string op = de::toLower(getOpTypeName(opTypeIndex));
+
+ {
+ const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_COMPUTE_BIT};
+ addFunctionCaseWithPrograms(computeGroup.get(), op, "",
+ supportedCheck, initPrograms, test, caseDef);
+ }
+
+ if (OPTYPE_SUBGROUP_MEMORY_BARRIER_SHARED == opTypeIndex)
+ {
+ // Shared isn't available in non compute shaders.
+ continue;
+ }
+
+ {
+ const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_ALL_GRAPHICS};
+ addFunctionCaseWithPrograms(graphicGroup.get(),
+ op, "",
+ supportedCheck, initPrograms, test, caseDef);
+ }
+
+ if (OPTYPE_ELECT == opTypeIndex)
+ {
+ for (int stageIndex = 1; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
+ {
+ const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex]};
+ addFunctionCaseWithPrograms(framebufferGroup.get(),
+ op + "_" + getShaderStageName(caseDef.shaderStage), "",
+ supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
+ }
+ }
+ else
+ {
+ for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
+ {
+ const CaseDefinition caseDefFrag = {opTypeIndex, stages[stageIndex]};
+ addFunctionCaseWithPrograms(framebufferGroup.get(),
+ op + "_" + getShaderStageName(caseDefFrag.shaderStage), "",
+ supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDefFrag);
+ }
+ }
+
+ }
+
+ de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(
+ testCtx, "basic", "Subgroup basic category tests"));
+
+ group->addChild(graphicGroup.release());
+ group->addChild(computeGroup.release());
+ group->addChild(framebufferGroup.release());
+
+ return group.release();
+}
+
+} // subgroups
+} // vkt
--- /dev/null
+#ifndef _VKTSUBGROUPSBASICTESTS_HPP
+#define _VKTSUBGROUPSBASICTESTS_HPP
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "tcuDefs.hpp"
+#include "vktTestCase.hpp"
+
+namespace vkt
+{
+namespace subgroups
+{
+
+tcu::TestCaseGroup* createSubgroupsBasicTests(tcu::TestContext& testCtx);
+
+} // subgroups
+} // vkt
+
+#endif // _VKTSUBGROUPSBASICTESTS_HPP
--- /dev/null
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "vktSubgroupsBuiltinMaskVarTests.hpp"
+#include "vktSubgroupsTestsUtils.hpp"
+
+#include <string>
+#include <vector>
+
+using namespace tcu;
+using namespace std;
+using namespace vk;
+
+namespace vkt
+{
+namespace subgroups
+{
+
+static bool checkVertexPipelineStages(std::vector<const void*> datas,
+ deUint32 width, deUint32)
+{
+ return check(datas, width, 1);
+}
+
+static bool checkComputeStage(std::vector<const void*> datas,
+ const deUint32 numWorkgroups[3], const deUint32 localSize[3],
+ deUint32)
+{
+ return checkCompute(datas, numWorkgroups, localSize, 1);
+}
+
+namespace
+{
+struct CaseDefinition
+{
+ std::string varName;
+ VkShaderStageFlags shaderStage;
+};
+}
+
+std::string subgroupComparison (const CaseDefinition& caseDef)
+{
+ if ("gl_SubgroupEqMask" == caseDef.varName)
+ {
+ if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+ return "%56 = OpIEqual %11 %53 %55\n";
+ else
+ return "%38 = OpIEqual %16 %35 %37\n";
+ }
+ else if ("gl_SubgroupGeMask" == caseDef.varName)
+ {
+ if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+ return "%56 = OpUGreaterThanEqual %11 %53 %55\n";
+ else
+ return "%38 = OpUGreaterThanEqual %16 %35 %37\n";
+ }
+ else if ("gl_SubgroupGtMask" == caseDef.varName)
+ {
+ if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+ return "%56 = OpUGreaterThan %11 %53 %55\n";
+ else
+ return "%38 = OpUGreaterThan %16 %35 %37\n";
+ }
+ else if ("gl_SubgroupLeMask" == caseDef.varName)
+ {
+ if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+ return "%56 = OpULessThanEqual %11 %53 %55\n";
+ else
+ return "%38 = OpULessThanEqual %16 %35 %37\n";
+ }
+ else if ("gl_SubgroupLtMask" == caseDef.varName)
+ {
+ if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+ return "%56 = OpULessThan %11 %53 %55\n";
+ else
+ return "%38 = OpULessThan %16 %35 %37\n";
+ }
+ return "";
+}
+
+std::string varSubgroupMask (const CaseDefinition& caseDef)
+{
+ if ("gl_SubgroupEqMask" == caseDef.varName)
+ {
+ if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+ return "OpDecorate %40 BuiltIn SubgroupEqMask\n";
+ else
+ return "OpDecorate %22 BuiltIn SubgroupEqMask\n";
+ }
+ else if ("gl_SubgroupGeMask" == caseDef.varName)
+ {
+ if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+ return "OpDecorate %40 BuiltIn SubgroupGeMask\n";
+ else
+ return "OpDecorate %22 BuiltIn SubgroupGeMask\n";
+ }
+ else if ("gl_SubgroupGtMask" == caseDef.varName)
+ {
+ if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+ return "OpDecorate %40 BuiltIn SubgroupGtMask\n";
+ else
+ return "OpDecorate %22 BuiltIn SubgroupGtMask\n";
+ }
+ else if ("gl_SubgroupLeMask" == caseDef.varName)
+ {
+ if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+ return "OpDecorate %40 BuiltIn SubgroupLeMask\n";
+ else
+ return "OpDecorate %22 BuiltIn SubgroupLeMask\n";
+ }
+ else if ("gl_SubgroupLtMask" == caseDef.varName)
+ {
+ if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+ return "OpDecorate %40 BuiltIn SubgroupLtMask\n";
+ else
+ return "OpDecorate %22 BuiltIn SubgroupLtMask\n";
+ }
+ return "";
+}
+
+std::string subgroupMask (const CaseDefinition& caseDef)
+{
+ std::ostringstream bdy;
+
+ bdy << " uint tempResult = 0x1;\n"
+ << " uint bit = 0x1;\n"
+ << " uint bitCount = 0x0;\n"
+ << " uvec4 mask = subgroupBallot(true);\n"
+ << " const uvec4 var = " << caseDef.varName << ";\n"
+ << " for (uint i = 0; i < gl_SubgroupSize; i++)\n"
+ << " {\n";
+
+ if ("gl_SubgroupEqMask" == caseDef.varName)
+ {
+ bdy << " if ((i == gl_SubgroupInvocationID) ^^ subgroupBallotBitExtract(var, i))\n"
+ << " {\n"
+ << " tempResult = 0;\n"
+ << " }\n";
+ }
+ else if ("gl_SubgroupGeMask" == caseDef.varName)
+ {
+ bdy << " if ((i >= gl_SubgroupInvocationID) ^^ subgroupBallotBitExtract(var, i))\n"
+ << " {\n"
+ << " tempResult = 0;\n"
+ << " }\n";
+ }
+ else if ("gl_SubgroupGtMask" == caseDef.varName)
+ {
+ bdy << " if ((i > gl_SubgroupInvocationID) ^^ subgroupBallotBitExtract(var, i))\n"
+ << " {\n"
+ << " tempResult = 0;\n"
+ << " }\n";
+ }
+ else if ("gl_SubgroupLeMask" == caseDef.varName)
+ {
+ bdy << " if ((i <= gl_SubgroupInvocationID) ^^ subgroupBallotBitExtract(var, i))\n"
+ << " {\n"
+ << " tempResult = 0;\n"
+ << " }\n";
+ }
+ else if ("gl_SubgroupLtMask" == caseDef.varName)
+ {
+ bdy << " if ((i < gl_SubgroupInvocationID) ^^ subgroupBallotBitExtract(var, i))\n"
+ << " {\n"
+ << " tempResult = 0;\n"
+ << " }\n";
+ }
+
+ bdy << " }\n"
+ << " for (uint i = 0; i < 32; i++)\n"
+ << " {\n"
+ << " if ((var.x & bit) > 0)\n"
+ << " {\n"
+ << " bitCount++;\n"
+ << " }\n"
+ << " if ((var.y & bit) > 0)\n"
+ << " {\n"
+ << " bitCount++;\n"
+ << " }\n"
+ << " if ((var.z & bit) > 0)\n"
+ << " {\n"
+ << " bitCount++;\n"
+ << " }\n"
+ << " if ((var.w & bit) > 0)\n"
+ << " {\n"
+ << " bitCount++;\n"
+ << " }\n"
+ << " bit = bit<<1;\n"
+ << " }\n"
+ << " if (subgroupBallotBitCount(var) != bitCount)\n"
+ << " {\n"
+ << " tempResult = 0;\n"
+ << " }\n";
+ return bdy.str();
+}
+
+void initFrameBufferPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
+{
+ const vk::SpirVAsmBuildOptions buildOptionsSpr (programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3);
+ const string comparison = subgroupComparison(caseDef);
+ const string mask = varSubgroupMask(caseDef);
+
+ subgroups::setFragmentShaderFrameBuffer(programCollection);
+
+ if (VK_SHADER_STAGE_VERTEX_BIT != caseDef.shaderStage)
+ subgroups::setVertexShaderFrameBuffer(programCollection);
+
+ if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+ {
+ /*
+ const string bdy = subgroupMask(caseDef);
+ const string vertex =
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(location = 0) out float out_color;\n"
+ "layout(location = 0) in highp vec4 in_position;\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ + bdy +
+ " out_color = float(tempResult);\n"
+ " gl_Position = in_position;\n"
+ " gl_PointSize = 1.0f;\n"
+ "}\n";
+ programCollection.glslSources.add("vert")
+ << glu::VertexSource(vertex) << vk::ShaderBuildOptions(vk::SPIRV_VERSION_1_3, 0u);
+ */
+
+ const string vertex =
+ "; SPIR-V\n"
+ "; Version: 1.3\n"
+ "; Generator: Khronos Glslang Reference Front End; 2\n"
+ "; Bound: 123\n"
+ "; Schema: 0\n"
+ "OpCapability Shader\n"
+ "OpCapability GroupNonUniform\n"
+ "OpCapability GroupNonUniformBallot\n"
+ "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+ "OpMemoryModel Logical GLSL450\n"
+ "OpEntryPoint Vertex %4 \"main\" %22 %32 %36 %107 %114 %117\n"
+ + mask +
+ "OpDecorate %32 RelaxedPrecision\n"
+ "OpDecorate %32 BuiltIn SubgroupSize\n"
+ "OpDecorate %33 RelaxedPrecision\n"
+ "OpDecorate %36 RelaxedPrecision\n"
+ "OpDecorate %36 BuiltIn SubgroupLocalInvocationId\n"
+ "OpDecorate %37 RelaxedPrecision\n"
+ "OpDecorate %107 Location 0\n"
+ "OpMemberDecorate %112 0 BuiltIn Position\n"
+ "OpMemberDecorate %112 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %112 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %112 3 BuiltIn CullDistance\n"
+ "OpDecorate %112 Block\n"
+ "OpDecorate %117 Location 0\n"
+ "%2 = OpTypeVoid\n"
+ "%3 = OpTypeFunction %2\n"
+ "%6 = OpTypeInt 32 0\n"
+ "%7 = OpTypePointer Function %6\n"
+ "%9 = OpConstant %6 1\n"
+ "%12 = OpConstant %6 0\n"
+ "%13 = OpTypeVector %6 4\n"
+ "%14 = OpTypePointer Function %13\n"
+ "%16 = OpTypeBool\n"
+ "%17 = OpConstantTrue %16\n"
+ "%18 = OpConstant %6 3\n"
+ "%21 = OpTypePointer Input %13\n"
+ "%22 = OpVariable %21 Input\n"
+ "%31 = OpTypePointer Input %6\n"
+ "%32 = OpVariable %31 Input\n"
+ "%36 = OpVariable %31 Input\n"
+ "%46 = OpTypeInt 32 1\n"
+ "%47 = OpConstant %46 1\n"
+ "%56 = OpConstant %6 32\n"
+ "%76 = OpConstant %6 2\n"
+ "%105 = OpTypeFloat 32\n"
+ "%106 = OpTypePointer Output %105\n"
+ "%107 = OpVariable %106 Output\n"
+ "%110 = OpTypeVector %105 4\n"
+ "%111 = OpTypeArray %105 %9\n"
+ "%112 = OpTypeStruct %110 %105 %111 %111\n"
+ "%113 = OpTypePointer Output %112\n"
+ "%114 = OpVariable %113 Output\n"
+ "%115 = OpConstant %46 0\n"
+ "%116 = OpTypePointer Input %110\n"
+ "%117 = OpVariable %116 Input\n"
+ "%119 = OpTypePointer Output %110\n"
+ "%121 = OpConstant %105 1\n"
+ "%4 = OpFunction %2 None %3\n"
+ "%5 = OpLabel\n"
+ "%8 = OpVariable %7 Function\n"
+ "%10 = OpVariable %7 Function\n"
+ "%11 = OpVariable %7 Function\n"
+ "%15 = OpVariable %14 Function\n"
+ "%20 = OpVariable %14 Function\n"
+ "%24 = OpVariable %7 Function\n"
+ "%49 = OpVariable %7 Function\n"
+ "OpStore %8 %9\n"
+ "OpStore %10 %9\n"
+ "OpStore %11 %12\n"
+ "%19 = OpGroupNonUniformBallot %13 %18 %17\n"
+ "OpStore %15 %19\n"
+ "%23 = OpLoad %13 %22\n"
+ "OpStore %20 %23\n"
+ "OpStore %24 %12\n"
+ "OpBranch %25\n"
+ "%25 = OpLabel\n"
+ "OpLoopMerge %27 %28 None\n"
+ "OpBranch %29\n"
+ "%29 = OpLabel\n"
+ "%30 = OpLoad %6 %24\n"
+ "%33 = OpLoad %6 %32\n"
+ "%34 = OpULessThan %16 %30 %33\n"
+ "OpBranchConditional %34 %26 %27\n"
+ "%26 = OpLabel\n"
+ "%35 = OpLoad %6 %24\n"
+ "%37 = OpLoad %6 %36\n"
+ + comparison +
+ "%39 = OpLoad %13 %20\n"
+ "%40 = OpLoad %6 %24\n"
+ "%41 = OpGroupNonUniformBallotBitExtract %16 %18 %39 %40\n"
+ "%42 = OpLogicalNotEqual %16 %38 %41\n"
+ "OpSelectionMerge %44 None\n"
+ "OpBranchConditional %42 %43 %44\n"
+ "%43 = OpLabel\n"
+ "OpStore %8 %12\n"
+ "OpBranch %44\n"
+ "%44 = OpLabel\n"
+ "OpBranch %28\n"
+ "%28 = OpLabel\n"
+ "%45 = OpLoad %6 %24\n"
+ "%48 = OpIAdd %6 %45 %47\n"
+ "OpStore %24 %48\n"
+ "OpBranch %25\n"
+ "%27 = OpLabel\n"
+ "OpStore %49 %12\n"
+ "OpBranch %50\n"
+ "%50 = OpLabel\n"
+ "OpLoopMerge %52 %53 None\n"
+ "OpBranch %54\n"
+ "%54 = OpLabel\n"
+ "%55 = OpLoad %6 %49\n"
+ "%57 = OpULessThan %16 %55 %56\n"
+ "OpBranchConditional %57 %51 %52\n"
+ "%51 = OpLabel\n"
+ "%58 = OpAccessChain %7 %20 %12\n"
+ "%59 = OpLoad %6 %58\n"
+ "%60 = OpLoad %6 %10\n"
+ "%61 = OpBitwiseAnd %6 %59 %60\n"
+ "%62 = OpUGreaterThan %16 %61 %12\n"
+ "OpSelectionMerge %64 None\n"
+ "OpBranchConditional %62 %63 %64\n"
+ "%63 = OpLabel\n"
+ "%65 = OpLoad %6 %11\n"
+ "%66 = OpIAdd %6 %65 %47\n"
+ "OpStore %11 %66\n"
+ "OpBranch %64\n"
+ "%64 = OpLabel\n"
+ "%67 = OpAccessChain %7 %20 %9\n"
+ "%68 = OpLoad %6 %67\n"
+ "%69 = OpLoad %6 %10\n"
+ "%70 = OpBitwiseAnd %6 %68 %69\n"
+ "%71 = OpUGreaterThan %16 %70 %12\n"
+ "OpSelectionMerge %73 None\n"
+ "OpBranchConditional %71 %72 %73\n"
+ "%72 = OpLabel\n"
+ "%74 = OpLoad %6 %11\n"
+ "%75 = OpIAdd %6 %74 %47\n"
+ "OpStore %11 %75\n"
+ "OpBranch %73\n"
+ "%73 = OpLabel\n"
+ "%77 = OpAccessChain %7 %20 %76\n"
+ "%78 = OpLoad %6 %77\n"
+ "%79 = OpLoad %6 %10\n"
+ "%80 = OpBitwiseAnd %6 %78 %79\n"
+ "%81 = OpUGreaterThan %16 %80 %12\n"
+ "OpSelectionMerge %83 None\n"
+ "OpBranchConditional %81 %82 %83\n"
+ "%82 = OpLabel\n"
+ "%84 = OpLoad %6 %11\n"
+ "%85 = OpIAdd %6 %84 %47\n"
+ "OpStore %11 %85\n"
+ "OpBranch %83\n"
+ "%83 = OpLabel\n"
+ "%86 = OpAccessChain %7 %20 %18\n"
+ "%87 = OpLoad %6 %86\n"
+ "%88 = OpLoad %6 %10\n"
+ "%89 = OpBitwiseAnd %6 %87 %88\n"
+ "%90 = OpUGreaterThan %16 %89 %12\n"
+ "OpSelectionMerge %92 None\n"
+ "OpBranchConditional %90 %91 %92\n"
+ "%91 = OpLabel\n"
+ "%93 = OpLoad %6 %11\n"
+ "%94 = OpIAdd %6 %93 %47\n"
+ "OpStore %11 %94\n"
+ "OpBranch %92\n"
+ "%92 = OpLabel\n"
+ "%95 = OpLoad %6 %10\n"
+ "%96 = OpShiftLeftLogical %6 %95 %47\n"
+ "OpStore %10 %96\n"
+ "OpBranch %53\n"
+ "%53 = OpLabel\n"
+ "%97 = OpLoad %6 %49\n"
+ "%98 = OpIAdd %6 %97 %47\n"
+ "OpStore %49 %98\n"
+ "OpBranch %50\n"
+ "%52 = OpLabel\n"
+ "%99 = OpLoad %13 %20\n"
+ "%100 = OpGroupNonUniformBallotBitCount %6 %18 Reduce %99\n"
+ "%101 = OpLoad %6 %11\n"
+ "%102 = OpINotEqual %16 %100 %101\n"
+ "OpSelectionMerge %104 None\n"
+ "OpBranchConditional %102 %103 %104\n"
+ "%103 = OpLabel\n"
+ "OpStore %8 %12\n"
+ "OpBranch %104\n"
+ "%104 = OpLabel\n"
+ "%108 = OpLoad %6 %8\n"
+ "%109 = OpConvertUToF %105 %108\n"
+ "OpStore %107 %109\n"
+ "%118 = OpLoad %110 %117\n"
+ "%120 = OpAccessChain %119 %114 %115\n"
+ "OpStore %120 %118\n"
+ "%122 = OpAccessChain %106 %114 %47\n"
+ "OpStore %122 %121\n"
+ "OpReturn\n"
+ "OpFunctionEnd\n";
+ programCollection.spirvAsmSources.add("vert") << vertex << buildOptionsSpr;
+ }
+ else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
+ {
+ /*
+ const string bdy = subgroupMask(caseDef);
+ const string evaluationSource =
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "#extension GL_EXT_tessellation_shader : require\n"
+ "layout(isolines, equal_spacing, ccw ) in;\n"
+ "layout(location = 0) out float out_color;\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ + bdy +
+ " out_color = float(tempResult);\n"
+ " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
+ "}\n";
+ programCollection.glslSources.add("tese")
+ << glu::TessellationEvaluationSource(evaluationSource) << vk::ShaderBuildOptions(vk::SPIRV_VERSION_1_3, 0u);
+ */
+ const string evaluationSource =
+ "; SPIR-V\n"
+ "; Version: 1.3\n"
+ "; Generator: Khronos Glslang Reference Front End; 2\n"
+ "; Bound: 136\n"
+ "; Schema: 0\n"
+ "OpCapability Tessellation\n"
+ "OpCapability GroupNonUniform\n"
+ "OpCapability GroupNonUniformBallot\n"
+ "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+ "OpMemoryModel Logical GLSL450\n"
+ "OpEntryPoint TessellationEvaluation %4 \"main\" %22 %32 %36 %107 %114 %120 %128\n"
+ "OpExecutionMode %4 Isolines\n"
+ "OpExecutionMode %4 SpacingEqual\n"
+ "OpExecutionMode %4 VertexOrderCcw\n"
+ + mask +
+ "OpDecorate %32 RelaxedPrecision\n"
+ "OpDecorate %32 BuiltIn SubgroupSize\n"
+ "OpDecorate %33 RelaxedPrecision\n"
+ "OpDecorate %36 RelaxedPrecision\n"
+ "OpDecorate %36 BuiltIn SubgroupLocalInvocationId\n"
+ "OpDecorate %37 RelaxedPrecision\n"
+ "OpDecorate %107 Location 0\n"
+ "OpMemberDecorate %112 0 BuiltIn Position\n"
+ "OpMemberDecorate %112 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %112 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %112 3 BuiltIn CullDistance\n"
+ "OpDecorate %112 Block\n"
+ "OpMemberDecorate %116 0 BuiltIn Position\n"
+ "OpMemberDecorate %116 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %116 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %116 3 BuiltIn CullDistance\n"
+ "OpDecorate %116 Block\n"
+ "OpDecorate %128 BuiltIn TessCoord\n"
+ "%2 = OpTypeVoid\n"
+ "%3 = OpTypeFunction %2\n"
+ "%6 = OpTypeInt 32 0\n"
+ "%7 = OpTypePointer Function %6\n"
+ "%9 = OpConstant %6 1\n"
+ "%12 = OpConstant %6 0\n"
+ "%13 = OpTypeVector %6 4\n"
+ "%14 = OpTypePointer Function %13\n"
+ "%16 = OpTypeBool\n"
+ "%17 = OpConstantTrue %16\n"
+ "%18 = OpConstant %6 3\n"
+ "%21 = OpTypePointer Input %13\n"
+ "%22 = OpVariable %21 Input\n"
+ "%31 = OpTypePointer Input %6\n"
+ "%32 = OpVariable %31 Input\n"
+ "%36 = OpVariable %31 Input\n"
+ "%46 = OpTypeInt 32 1\n"
+ "%47 = OpConstant %46 1\n"
+ "%56 = OpConstant %6 32\n"
+ "%76 = OpConstant %6 2\n"
+ "%105 = OpTypeFloat 32\n"
+ "%106 = OpTypePointer Output %105\n"
+ "%107 = OpVariable %106 Output\n"
+ "%110 = OpTypeVector %105 4\n"
+ "%111 = OpTypeArray %105 %9\n"
+ "%112 = OpTypeStruct %110 %105 %111 %111\n"
+ "%113 = OpTypePointer Output %112\n"
+ "%114 = OpVariable %113 Output\n"
+ "%115 = OpConstant %46 0\n"
+ "%116 = OpTypeStruct %110 %105 %111 %111\n"
+ "%117 = OpConstant %6 32\n"
+ "%118 = OpTypeArray %116 %117\n"
+ "%119 = OpTypePointer Input %118\n"
+ "%120 = OpVariable %119 Input\n"
+ "%121 = OpTypePointer Input %110\n"
+ "%126 = OpTypeVector %105 3\n"
+ "%127 = OpTypePointer Input %126\n"
+ "%128 = OpVariable %127 Input\n"
+ "%129 = OpTypePointer Input %105\n"
+ "%134 = OpTypePointer Output %110\n"
+ "%4 = OpFunction %2 None %3\n"
+ "%5 = OpLabel\n"
+ "%8 = OpVariable %7 Function\n"
+ "%10 = OpVariable %7 Function\n"
+ "%11 = OpVariable %7 Function\n"
+ "%15 = OpVariable %14 Function\n"
+ "%20 = OpVariable %14 Function\n"
+ "%24 = OpVariable %7 Function\n"
+ "%49 = OpVariable %7 Function\n"
+ "OpStore %8 %9\n"
+ "OpStore %10 %9\n"
+ "OpStore %11 %12\n"
+ "%19 = OpGroupNonUniformBallot %13 %18 %17\n"
+ "OpStore %15 %19\n"
+ "%23 = OpLoad %13 %22\n"
+ "OpStore %20 %23\n"
+ "OpStore %24 %12\n"
+ "OpBranch %25\n"
+ "%25 = OpLabel\n"
+ "OpLoopMerge %27 %28 None\n"
+ "OpBranch %29\n"
+ "%29 = OpLabel\n"
+ "%30 = OpLoad %6 %24\n"
+ "%33 = OpLoad %6 %32\n"
+ "%34 = OpULessThan %16 %30 %33\n"
+ "OpBranchConditional %34 %26 %27\n"
+ "%26 = OpLabel\n"
+ "%35 = OpLoad %6 %24\n"
+ "%37 = OpLoad %6 %36\n"
+ + comparison +
+ "%39 = OpLoad %13 %20\n"
+ "%40 = OpLoad %6 %24\n"
+ "%41 = OpGroupNonUniformBallotBitExtract %16 %18 %39 %40\n"
+ "%42 = OpLogicalNotEqual %16 %38 %41\n"
+ "OpSelectionMerge %44 None\n"
+ "OpBranchConditional %42 %43 %44\n"
+ "%43 = OpLabel\n"
+ "OpStore %8 %12\n"
+ "OpBranch %44\n"
+ "%44 = OpLabel\n"
+ "OpBranch %28\n"
+ "%28 = OpLabel\n"
+ "%45 = OpLoad %6 %24\n"
+ "%48 = OpIAdd %6 %45 %47\n"
+ "OpStore %24 %48\n"
+ "OpBranch %25\n"
+ "%27 = OpLabel\n"
+ "OpStore %49 %12\n"
+ "OpBranch %50\n"
+ "%50 = OpLabel\n"
+ "OpLoopMerge %52 %53 None\n"
+ "OpBranch %54\n"
+ "%54 = OpLabel\n"
+ "%55 = OpLoad %6 %49\n"
+ "%57 = OpULessThan %16 %55 %56\n"
+ "OpBranchConditional %57 %51 %52\n"
+ "%51 = OpLabel\n"
+ "%58 = OpAccessChain %7 %20 %12\n"
+ "%59 = OpLoad %6 %58\n"
+ "%60 = OpLoad %6 %10\n"
+ "%61 = OpBitwiseAnd %6 %59 %60\n"
+ "%62 = OpUGreaterThan %16 %61 %12\n"
+ "OpSelectionMerge %64 None\n"
+ "OpBranchConditional %62 %63 %64\n"
+ "%63 = OpLabel\n"
+ "%65 = OpLoad %6 %11\n"
+ "%66 = OpIAdd %6 %65 %47\n"
+ "OpStore %11 %66\n"
+ "OpBranch %64\n"
+ "%64 = OpLabel\n"
+ "%67 = OpAccessChain %7 %20 %9\n"
+ "%68 = OpLoad %6 %67\n"
+ "%69 = OpLoad %6 %10\n"
+ "%70 = OpBitwiseAnd %6 %68 %69\n"
+ "%71 = OpUGreaterThan %16 %70 %12\n"
+ "OpSelectionMerge %73 None\n"
+ "OpBranchConditional %71 %72 %73\n"
+ "%72 = OpLabel\n"
+ "%74 = OpLoad %6 %11\n"
+ "%75 = OpIAdd %6 %74 %47\n"
+ "OpStore %11 %75\n"
+ "OpBranch %73\n"
+ "%73 = OpLabel\n"
+ "%77 = OpAccessChain %7 %20 %76\n"
+ "%78 = OpLoad %6 %77\n"
+ "%79 = OpLoad %6 %10\n"
+ "%80 = OpBitwiseAnd %6 %78 %79\n"
+ "%81 = OpUGreaterThan %16 %80 %12\n"
+ "OpSelectionMerge %83 None\n"
+ "OpBranchConditional %81 %82 %83\n"
+ "%82 = OpLabel\n"
+ "%84 = OpLoad %6 %11\n"
+ "%85 = OpIAdd %6 %84 %47\n"
+ "OpStore %11 %85\n"
+ "OpBranch %83\n"
+ "%83 = OpLabel\n"
+ "%86 = OpAccessChain %7 %20 %18\n"
+ "%87 = OpLoad %6 %86\n"
+ "%88 = OpLoad %6 %10\n"
+ "%89 = OpBitwiseAnd %6 %87 %88\n"
+ "%90 = OpUGreaterThan %16 %89 %12\n"
+ "OpSelectionMerge %92 None\n"
+ "OpBranchConditional %90 %91 %92\n"
+ "%91 = OpLabel\n"
+ "%93 = OpLoad %6 %11\n"
+ "%94 = OpIAdd %6 %93 %47\n"
+ "OpStore %11 %94\n"
+ "OpBranch %92\n"
+ "%92 = OpLabel\n"
+ "%95 = OpLoad %6 %10\n"
+ "%96 = OpShiftLeftLogical %6 %95 %47\n"
+ "OpStore %10 %96\n"
+ "OpBranch %53\n"
+ "%53 = OpLabel\n"
+ "%97 = OpLoad %6 %49\n"
+ "%98 = OpIAdd %6 %97 %47\n"
+ "OpStore %49 %98\n"
+ "OpBranch %50\n"
+ "%52 = OpLabel\n"
+ "%99 = OpLoad %13 %20\n"
+ "%100 = OpGroupNonUniformBallotBitCount %6 %18 Reduce %99\n"
+ "%101 = OpLoad %6 %11\n"
+ "%102 = OpINotEqual %16 %100 %101\n"
+ "OpSelectionMerge %104 None\n"
+ "OpBranchConditional %102 %103 %104\n"
+ "%103 = OpLabel\n"
+ "OpStore %8 %12\n"
+ "OpBranch %104\n"
+ "%104 = OpLabel\n"
+ "%108 = OpLoad %6 %8\n"
+ "%109 = OpConvertUToF %105 %108\n"
+ "OpStore %107 %109\n"
+ "%122 = OpAccessChain %121 %120 %115 %115\n"
+ "%123 = OpLoad %110 %122\n"
+ "%124 = OpAccessChain %121 %120 %47 %115\n"
+ "%125 = OpLoad %110 %124\n"
+ "%130 = OpAccessChain %129 %128 %12\n"
+ "%131 = OpLoad %105 %130\n"
+ "%132 = OpCompositeConstruct %110 %131 %131 %131 %131\n"
+ "%133 = OpExtInst %110 %1 FMix %123 %125 %132\n"
+ "%135 = OpAccessChain %134 %114 %115\n"
+ "OpStore %135 %133\n"
+ "OpReturn\n"
+ "OpFunctionEnd\n";
+ programCollection.spirvAsmSources.add("tese") << evaluationSource << buildOptionsSpr;
+ subgroups::setTesCtrlShaderFrameBuffer(programCollection);
+ }
+ else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+ {
+ /*
+ const string bdy = subgroupMask(caseDef);
+ const string controlSource =
+ "#version 450\n"
+ "#extension GL_EXT_tessellation_shader : require\n"
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(vertices = 2) out;\n"
+ "layout(location = 0) out float out_color[];\n"
+ "void main (void)\n"
+ "{\n"
+ " if (gl_InvocationID == 0)\n"
+ " {\n"
+ " gl_TessLevelOuter[0] = 1.0f;\n"
+ " gl_TessLevelOuter[1] = 1.0f;\n"
+ " }\n"
+ + bdy +
+ " out_color[gl_InvocationID] = float(tempResult);\n"
+ " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+ "}\n";
+ programCollection.glslSources.add("tesc")
+ << glu::TessellationControlSource(controlSource) << vk::ShaderBuildOptions(vk::SPIRV_VERSION_1_3, 0u);
+ */
+ const string controlSource =
+ "; SPIR-V\n"
+ "; Version: 1.3\n"
+ "; Generator: Khronos Glslang Reference Front End; 2\n"
+ "; Bound: 146\n"
+ "; Schema: 0\n"
+ "OpCapability Tessellation\n"
+ "OpCapability GroupNonUniform\n"
+ "OpCapability GroupNonUniformBallot\n"
+ "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+ "OpMemoryModel Logical GLSL450\n"
+ "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %40 %50 %54 %123 %133 %139\n"
+ "OpExecutionMode %4 OutputVertices 2\n"
+ "OpDecorate %8 BuiltIn InvocationId\n"
+ "OpDecorate %20 Patch\n"
+ "OpDecorate %20 BuiltIn TessLevelOuter\n"
+ + mask +
+ "OpDecorate %50 RelaxedPrecision\n"
+ "OpDecorate %50 BuiltIn SubgroupSize\n"
+ "OpDecorate %51 RelaxedPrecision\n"
+ "OpDecorate %54 RelaxedPrecision\n"
+ "OpDecorate %54 BuiltIn SubgroupLocalInvocationId\n"
+ "OpDecorate %55 RelaxedPrecision\n"
+ "OpDecorate %123 Location 0\n"
+ "OpMemberDecorate %130 0 BuiltIn Position\n"
+ "OpMemberDecorate %130 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %130 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %130 3 BuiltIn CullDistance\n"
+ "OpDecorate %130 Block\n"
+ "OpMemberDecorate %135 0 BuiltIn Position\n"
+ "OpMemberDecorate %135 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %135 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %135 3 BuiltIn CullDistance\n"
+ "OpDecorate %135 Block\n"
+ "%2 = OpTypeVoid\n"
+ "%3 = OpTypeFunction %2\n"
+ "%6 = OpTypeInt 32 1\n"
+ "%7 = OpTypePointer Input %6\n"
+ "%8 = OpVariable %7 Input\n"
+ "%10 = OpConstant %6 0\n"
+ "%11 = OpTypeBool\n"
+ "%15 = OpTypeFloat 32\n"
+ "%16 = OpTypeInt 32 0\n"
+ "%17 = OpConstant %16 4\n"
+ "%18 = OpTypeArray %15 %17\n"
+ "%19 = OpTypePointer Output %18\n"
+ "%20 = OpVariable %19 Output\n"
+ "%21 = OpConstant %15 1\n"
+ "%22 = OpTypePointer Output %15\n"
+ "%24 = OpConstant %6 1\n"
+ "%26 = OpTypePointer Function %16\n"
+ "%28 = OpConstant %16 1\n"
+ "%31 = OpConstant %16 0\n"
+ "%32 = OpTypeVector %16 4\n"
+ "%33 = OpTypePointer Function %32\n"
+ "%35 = OpConstantTrue %11\n"
+ "%36 = OpConstant %16 3\n"
+ "%39 = OpTypePointer Input %32\n"
+ "%40 = OpVariable %39 Input\n"
+ "%49 = OpTypePointer Input %16\n"
+ "%50 = OpVariable %49 Input\n"
+ "%54 = OpVariable %49 Input\n"
+ "%72 = OpConstant %16 32\n"
+ "%92 = OpConstant %16 2\n"
+ "%121 = OpTypeArray %15 %92\n"
+ "%122 = OpTypePointer Output %121\n"
+ "%123 = OpVariable %122 Output\n"
+ "%128 = OpTypeVector %15 4\n"
+ "%129 = OpTypeArray %15 %28\n"
+ "%130 = OpTypeStruct %128 %15 %129 %129\n"
+ "%131 = OpTypeArray %130 %92\n"
+ "%132 = OpTypePointer Output %131\n"
+ "%133 = OpVariable %132 Output\n"
+ "%135 = OpTypeStruct %128 %15 %129 %129\n"
+ "%136 = OpConstant %16 32\n"
+ "%137 = OpTypeArray %135 %136\n"
+ "%138 = OpTypePointer Input %137\n"
+ "%139 = OpVariable %138 Input\n"
+ "%141 = OpTypePointer Input %128\n"
+ "%144 = OpTypePointer Output %128\n"
+ "%4 = OpFunction %2 None %3\n"
+ "%5 = OpLabel\n"
+ "%27 = OpVariable %26 Function\n"
+ "%29 = OpVariable %26 Function\n"
+ "%30 = OpVariable %26 Function\n"
+ "%34 = OpVariable %33 Function\n"
+ "%38 = OpVariable %33 Function\n"
+ "%42 = OpVariable %26 Function\n"
+ "%65 = OpVariable %26 Function\n"
+ "%9 = OpLoad %6 %8\n"
+ "%12 = OpIEqual %11 %9 %10\n"
+ "OpSelectionMerge %14 None\n"
+ "OpBranchConditional %12 %13 %14\n"
+ "%13 = OpLabel\n"
+ "%23 = OpAccessChain %22 %20 %10\n"
+ "OpStore %23 %21\n"
+ "%25 = OpAccessChain %22 %20 %24\n"
+ "OpStore %25 %21\n"
+ "OpBranch %14\n"
+ "%14 = OpLabel\n"
+ "OpStore %27 %28\n"
+ "OpStore %29 %28\n"
+ "OpStore %30 %31\n"
+ "%37 = OpGroupNonUniformBallot %32 %36 %35\n"
+ "OpStore %34 %37\n"
+ "%41 = OpLoad %32 %40\n"
+ "OpStore %38 %41\n"
+ "OpStore %42 %31\n"
+ "OpBranch %43\n"
+ "%43 = OpLabel\n"
+ "OpLoopMerge %45 %46 None\n"
+ "OpBranch %47\n"
+ "%47 = OpLabel\n"
+ "%48 = OpLoad %16 %42\n"
+ "%51 = OpLoad %16 %50\n"
+ "%52 = OpULessThan %11 %48 %51\n"
+ "OpBranchConditional %52 %44 %45\n"
+ "%44 = OpLabel\n"
+ "%53 = OpLoad %16 %42\n"
+ "%55 = OpLoad %16 %54\n"
+ + comparison +
+ "%57 = OpLoad %32 %38\n"
+ "%58 = OpLoad %16 %42\n"
+ "%59 = OpGroupNonUniformBallotBitExtract %11 %36 %57 %58\n"
+ "%60 = OpLogicalNotEqual %11 %56 %59\n"
+ "OpSelectionMerge %62 None\n"
+ "OpBranchConditional %60 %61 %62\n"
+ "%61 = OpLabel\n"
+ "OpStore %27 %31\n"
+ "OpBranch %62\n"
+ "%62 = OpLabel\n"
+ "OpBranch %46\n"
+ "%46 = OpLabel\n"
+ "%63 = OpLoad %16 %42\n"
+ "%64 = OpIAdd %16 %63 %24\n"
+ "OpStore %42 %64\n"
+ "OpBranch %43\n"
+ "%45 = OpLabel\n"
+ "OpStore %65 %31\n"
+ "OpBranch %66\n"
+ "%66 = OpLabel\n"
+ "OpLoopMerge %68 %69 None\n"
+ "OpBranch %70\n"
+ "%70 = OpLabel\n"
+ "%71 = OpLoad %16 %65\n"
+ "%73 = OpULessThan %11 %71 %72\n"
+ "OpBranchConditional %73 %67 %68\n"
+ "%67 = OpLabel\n"
+ "%74 = OpAccessChain %26 %38 %31\n"
+ "%75 = OpLoad %16 %74\n"
+ "%76 = OpLoad %16 %29\n"
+ "%77 = OpBitwiseAnd %16 %75 %76\n"
+ "%78 = OpUGreaterThan %11 %77 %31\n"
+ "OpSelectionMerge %80 None\n"
+ "OpBranchConditional %78 %79 %80\n"
+ "%79 = OpLabel\n"
+ "%81 = OpLoad %16 %30\n"
+ "%82 = OpIAdd %16 %81 %24\n"
+ "OpStore %30 %82\n"
+ "OpBranch %80\n"
+ "%80 = OpLabel\n"
+ "%83 = OpAccessChain %26 %38 %28\n"
+ "%84 = OpLoad %16 %83\n"
+ "%85 = OpLoad %16 %29\n"
+ "%86 = OpBitwiseAnd %16 %84 %85\n"
+ "%87 = OpUGreaterThan %11 %86 %31\n"
+ "OpSelectionMerge %89 None\n"
+ "OpBranchConditional %87 %88 %89\n"
+ "%88 = OpLabel\n"
+ "%90 = OpLoad %16 %30\n"
+ "%91 = OpIAdd %16 %90 %24\n"
+ "OpStore %30 %91\n"
+ "OpBranch %89\n"
+ "%89 = OpLabel\n"
+ "%93 = OpAccessChain %26 %38 %92\n"
+ "%94 = OpLoad %16 %93\n"
+ "%95 = OpLoad %16 %29\n"
+ "%96 = OpBitwiseAnd %16 %94 %95\n"
+ "%97 = OpUGreaterThan %11 %96 %31\n"
+ "OpSelectionMerge %99 None\n"
+ "OpBranchConditional %97 %98 %99\n"
+ "%98 = OpLabel\n"
+ "%100 = OpLoad %16 %30\n"
+ "%101 = OpIAdd %16 %100 %24\n"
+ "OpStore %30 %101\n"
+ "OpBranch %99\n"
+ "%99 = OpLabel\n"
+ "%102 = OpAccessChain %26 %38 %36\n"
+ "%103 = OpLoad %16 %102\n"
+ "%104 = OpLoad %16 %29\n"
+ "%105 = OpBitwiseAnd %16 %103 %104\n"
+ "%106 = OpUGreaterThan %11 %105 %31\n"
+ "OpSelectionMerge %108 None\n"
+ "OpBranchConditional %106 %107 %108\n"
+ "%107 = OpLabel\n"
+ "%109 = OpLoad %16 %30\n"
+ "%110 = OpIAdd %16 %109 %24\n"
+ "OpStore %30 %110\n"
+ "OpBranch %108\n"
+ "%108 = OpLabel\n"
+ "%111 = OpLoad %16 %29\n"
+ "%112 = OpShiftLeftLogical %16 %111 %24\n"
+ "OpStore %29 %112\n"
+ "OpBranch %69\n"
+ "%69 = OpLabel\n"
+ "%113 = OpLoad %16 %65\n"
+ "%114 = OpIAdd %16 %113 %24\n"
+ "OpStore %65 %114\n"
+ "OpBranch %66\n"
+ "%68 = OpLabel\n"
+ "%115 = OpLoad %32 %38\n"
+ "%116 = OpGroupNonUniformBallotBitCount %16 %36 Reduce %115\n"
+ "%117 = OpLoad %16 %30\n"
+ "%118 = OpINotEqual %11 %116 %117\n"
+ "OpSelectionMerge %120 None\n"
+ "OpBranchConditional %118 %119 %120\n"
+ "%119 = OpLabel\n"
+ "OpStore %27 %31\n"
+ "OpBranch %120\n"
+ "%120 = OpLabel\n"
+ "%124 = OpLoad %6 %8\n"
+ "%125 = OpLoad %16 %27\n"
+ "%126 = OpConvertUToF %15 %125\n"
+ "%127 = OpAccessChain %22 %123 %124\n"
+ "OpStore %127 %126\n"
+ "%134 = OpLoad %6 %8\n"
+ "%140 = OpLoad %6 %8\n"
+ "%142 = OpAccessChain %141 %139 %140 %10\n"
+ "%143 = OpLoad %128 %142\n"
+ "%145 = OpAccessChain %144 %133 %134 %10\n"
+ "OpStore %145 %143\n"
+ "OpReturn\n"
+ "OpFunctionEnd\n";
+ programCollection.spirvAsmSources.add("tesc") << controlSource << buildOptionsSpr;
+ subgroups::setTesEvalShaderFrameBuffer(programCollection);
+ }
+ else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
+ {
+ /*
+ const string bdy = subgroupMask(caseDef);
+ const string geometry =
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(points) in;\n"
+ "layout(points, max_vertices = 1) out;\n"
+ "layout(location = 0) out float out_color;\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ + bdy +
+ " out_color = float(tempResult);\n"
+ " gl_Position = gl_in[0].gl_Position;\n"
+ " EmitVertex();\n"
+ " EndPrimitive();\n"
+ "}\n";
+ programCollection.glslSources.add("geometry")
+ << glu::GeometrySource(geometry) << vk::ShaderBuildOptions(vk::SPIRV_VERSION_1_3, 0u);
+ */
+
+ const string geometry =
+ "; SPIR-V\n"
+ "; Version: 1.3\n"
+ "; Generator: Khronos Glslang Reference Front End; 2\n"
+ "; Bound: 125\n"
+ "; Schema: 0\n"
+ "OpCapability Geometry\n"
+ "OpCapability GroupNonUniform\n"
+ "OpCapability GroupNonUniformBallot\n"
+ "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+ "OpMemoryModel Logical GLSL450\n"
+ "OpEntryPoint Geometry %4 \"main\" %22 %32 %36 %107 %114 %119\n"
+ "OpExecutionMode %4 InputPoints\n"
+ "OpExecutionMode %4 Invocations 1\n"
+ "OpExecutionMode %4 OutputPoints\n"
+ "OpExecutionMode %4 OutputVertices 1\n"
+ + mask +
+ "OpDecorate %32 RelaxedPrecision\n"
+ "OpDecorate %32 BuiltIn SubgroupSize\n"
+ "OpDecorate %33 RelaxedPrecision\n"
+ "OpDecorate %36 RelaxedPrecision\n"
+ "OpDecorate %36 BuiltIn SubgroupLocalInvocationId\n"
+ "OpDecorate %37 RelaxedPrecision\n"
+ "OpDecorate %107 Location 0\n"
+ "OpMemberDecorate %112 0 BuiltIn Position\n"
+ "OpMemberDecorate %112 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %112 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %112 3 BuiltIn CullDistance\n"
+ "OpDecorate %112 Block\n"
+ "OpMemberDecorate %116 0 BuiltIn Position\n"
+ "OpMemberDecorate %116 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %116 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %116 3 BuiltIn CullDistance\n"
+ "OpDecorate %116 Block\n"
+ "%2 = OpTypeVoid\n"
+ "%3 = OpTypeFunction %2\n"
+ "%6 = OpTypeInt 32 0\n"
+ "%7 = OpTypePointer Function %6\n"
+ "%9 = OpConstant %6 1\n"
+ "%12 = OpConstant %6 0\n"
+ "%13 = OpTypeVector %6 4\n"
+ "%14 = OpTypePointer Function %13\n"
+ "%16 = OpTypeBool\n"
+ "%17 = OpConstantTrue %16\n"
+ "%18 = OpConstant %6 3\n"
+ "%21 = OpTypePointer Input %13\n"
+ "%22 = OpVariable %21 Input\n"
+ "%31 = OpTypePointer Input %6\n"
+ "%32 = OpVariable %31 Input\n"
+ "%36 = OpVariable %31 Input\n"
+ "%46 = OpTypeInt 32 1\n"
+ "%47 = OpConstant %46 1\n"
+ "%56 = OpConstant %6 32\n"
+ "%76 = OpConstant %6 2\n"
+ "%105 = OpTypeFloat 32\n"
+ "%106 = OpTypePointer Output %105\n"
+ "%107 = OpVariable %106 Output\n"
+ "%110 = OpTypeVector %105 4\n"
+ "%111 = OpTypeArray %105 %9\n"
+ "%112 = OpTypeStruct %110 %105 %111 %111\n"
+ "%113 = OpTypePointer Output %112\n"
+ "%114 = OpVariable %113 Output\n"
+ "%115 = OpConstant %46 0\n"
+ "%116 = OpTypeStruct %110 %105 %111 %111\n"
+ "%117 = OpTypeArray %116 %9\n"
+ "%118 = OpTypePointer Input %117\n"
+ "%119 = OpVariable %118 Input\n"
+ "%120 = OpTypePointer Input %110\n"
+ "%123 = OpTypePointer Output %110\n"
+ "%4 = OpFunction %2 None %3\n"
+ "%5 = OpLabel\n"
+ "%8 = OpVariable %7 Function\n"
+ "%10 = OpVariable %7 Function\n"
+ "%11 = OpVariable %7 Function\n"
+ "%15 = OpVariable %14 Function\n"
+ "%20 = OpVariable %14 Function\n"
+ "%24 = OpVariable %7 Function\n"
+ "%49 = OpVariable %7 Function\n"
+ "OpStore %8 %9\n"
+ "OpStore %10 %9\n"
+ "OpStore %11 %12\n"
+ "%19 = OpGroupNonUniformBallot %13 %18 %17\n"
+ "OpStore %15 %19\n"
+ "%23 = OpLoad %13 %22\n"
+ "OpStore %20 %23\n"
+ "OpStore %24 %12\n"
+ "OpBranch %25\n"
+ "%25 = OpLabel\n"
+ "OpLoopMerge %27 %28 None\n"
+ "OpBranch %29\n"
+ "%29 = OpLabel\n"
+ "%30 = OpLoad %6 %24\n"
+ "%33 = OpLoad %6 %32\n"
+ "%34 = OpULessThan %16 %30 %33\n"
+ "OpBranchConditional %34 %26 %27\n"
+ "%26 = OpLabel\n"
+ "%35 = OpLoad %6 %24\n"
+ "%37 = OpLoad %6 %36\n"
+ + comparison +
+ "%39 = OpLoad %13 %20\n"
+ "%40 = OpLoad %6 %24\n"
+ "%41 = OpGroupNonUniformBallotBitExtract %16 %18 %39 %40\n"
+ "%42 = OpLogicalNotEqual %16 %38 %41\n"
+ "OpSelectionMerge %44 None\n"
+ "OpBranchConditional %42 %43 %44\n"
+ "%43 = OpLabel\n"
+ "OpStore %8 %12\n"
+ "OpBranch %44\n"
+ "%44 = OpLabel\n"
+ "OpBranch %28\n"
+ "%28 = OpLabel\n"
+ "%45 = OpLoad %6 %24\n"
+ "%48 = OpIAdd %6 %45 %47\n"
+ "OpStore %24 %48\n"
+ "OpBranch %25\n"
+ "%27 = OpLabel\n"
+ "OpStore %49 %12\n"
+ "OpBranch %50\n"
+ "%50 = OpLabel\n"
+ "OpLoopMerge %52 %53 None\n"
+ "OpBranch %54\n"
+ "%54 = OpLabel\n"
+ "%55 = OpLoad %6 %49\n"
+ "%57 = OpULessThan %16 %55 %56\n"
+ "OpBranchConditional %57 %51 %52\n"
+ "%51 = OpLabel\n"
+ "%58 = OpAccessChain %7 %20 %12\n"
+ "%59 = OpLoad %6 %58\n"
+ "%60 = OpLoad %6 %10\n"
+ "%61 = OpBitwiseAnd %6 %59 %60\n"
+ "%62 = OpUGreaterThan %16 %61 %12\n"
+ "OpSelectionMerge %64 None\n"
+ "OpBranchConditional %62 %63 %64\n"
+ "%63 = OpLabel\n"
+ "%65 = OpLoad %6 %11\n"
+ "%66 = OpIAdd %6 %65 %47\n"
+ "OpStore %11 %66\n"
+ "OpBranch %64\n"
+ "%64 = OpLabel\n"
+ "%67 = OpAccessChain %7 %20 %9\n"
+ "%68 = OpLoad %6 %67\n"
+ "%69 = OpLoad %6 %10\n"
+ "%70 = OpBitwiseAnd %6 %68 %69\n"
+ "%71 = OpUGreaterThan %16 %70 %12\n"
+ "OpSelectionMerge %73 None\n"
+ "OpBranchConditional %71 %72 %73\n"
+ "%72 = OpLabel\n"
+ "%74 = OpLoad %6 %11\n"
+ "%75 = OpIAdd %6 %74 %47\n"
+ "OpStore %11 %75\n"
+ "OpBranch %73\n"
+ "%73 = OpLabel\n"
+ "%77 = OpAccessChain %7 %20 %76\n"
+ "%78 = OpLoad %6 %77\n"
+ "%79 = OpLoad %6 %10\n"
+ "%80 = OpBitwiseAnd %6 %78 %79\n"
+ "%81 = OpUGreaterThan %16 %80 %12\n"
+ "OpSelectionMerge %83 None\n"
+ "OpBranchConditional %81 %82 %83\n"
+ "%82 = OpLabel\n"
+ "%84 = OpLoad %6 %11\n"
+ "%85 = OpIAdd %6 %84 %47\n"
+ "OpStore %11 %85\n"
+ "OpBranch %83\n"
+ "%83 = OpLabel\n"
+ "%86 = OpAccessChain %7 %20 %18\n"
+ "%87 = OpLoad %6 %86\n"
+ "%88 = OpLoad %6 %10\n"
+ "%89 = OpBitwiseAnd %6 %87 %88\n"
+ "%90 = OpUGreaterThan %16 %89 %12\n"
+ "OpSelectionMerge %92 None\n"
+ "OpBranchConditional %90 %91 %92\n"
+ "%91 = OpLabel\n"
+ "%93 = OpLoad %6 %11\n"
+ "%94 = OpIAdd %6 %93 %47\n"
+ "OpStore %11 %94\n"
+ "OpBranch %92\n"
+ "%92 = OpLabel\n"
+ "%95 = OpLoad %6 %10\n"
+ "%96 = OpShiftLeftLogical %6 %95 %47\n"
+ "OpStore %10 %96\n"
+ "OpBranch %53\n"
+ "%53 = OpLabel\n"
+ "%97 = OpLoad %6 %49\n"
+ "%98 = OpIAdd %6 %97 %47\n"
+ "OpStore %49 %98\n"
+ "OpBranch %50\n"
+ "%52 = OpLabel\n"
+ "%99 = OpLoad %13 %20\n"
+ "%100 = OpGroupNonUniformBallotBitCount %6 %18 Reduce %99\n"
+ "%101 = OpLoad %6 %11\n"
+ "%102 = OpINotEqual %16 %100 %101\n"
+ "OpSelectionMerge %104 None\n"
+ "OpBranchConditional %102 %103 %104\n"
+ "%103 = OpLabel\n"
+ "OpStore %8 %12\n"
+ "OpBranch %104\n"
+ "%104 = OpLabel\n"
+ "%108 = OpLoad %6 %8\n"
+ "%109 = OpConvertUToF %105 %108\n"
+ "OpStore %107 %109\n"
+ "%121 = OpAccessChain %120 %119 %115 %115\n"
+ "%122 = OpLoad %110 %121\n"
+ "%124 = OpAccessChain %123 %114 %115\n"
+ "OpStore %124 %122\n"
+ "OpEmitVertex\n"
+ "OpEndPrimitive\n"
+ "OpReturn\n"
+ "OpFunctionEnd\n";
+ programCollection.spirvAsmSources.add("geometry") << geometry << buildOptionsSpr;
+ }
+ else
+ {
+ DE_FATAL("Unsupported shader stage");
+ }
+}
+
+
+void initPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
+{
+ const string bdy = subgroupMask(caseDef);
+
+ if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream src;
+
+ src << "#version 450\n"
+ << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ << "layout (local_size_x_id = 0, local_size_y_id = 1, "
+ "local_size_z_id = 2) in;\n"
+ << "layout(set = 0, binding = 0, std430) buffer Output\n"
+ << "{\n"
+ << " uint result[];\n"
+ << "};\n"
+ << "\n"
+ << "void main (void)\n"
+ << "{\n"
+ << " uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
+ << " highp uint offset = globalSize.x * ((globalSize.y * "
+ "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
+ "gl_GlobalInvocationID.x;\n"
+ << bdy
+ << " result[offset] = tempResult;\n"
+ << "}\n";
+
+ programCollection.glslSources.add("comp")
+ << glu::ComputeSource(src.str()) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+ else
+ {
+ {
+ const string vertex =
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(set = 0, binding = 0, std430) buffer Output\n"
+ "{\n"
+ " uint result[];\n"
+ "};\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ + bdy +
+ " result[gl_VertexIndex] = tempResult;\n"
+ " float pixelSize = 2.0f/1024.0f;\n"
+ " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
+ " gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
+ " gl_PointSize = 1.0f;\n"
+ "}\n";
+ programCollection.glslSources.add("vert")
+ << glu::VertexSource(vertex) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+
+ {
+ const string tesc =
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(vertices=1) out;\n"
+ "layout(set = 0, binding = 1, std430) buffer Output\n"
+ "{\n"
+ " uint result[];\n"
+ "};\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ + bdy +
+ " result[gl_PrimitiveID] = tempResult;\n"
+ " if (gl_InvocationID == 0)\n"
+ " {\n"
+ " gl_TessLevelOuter[0] = 1.0f;\n"
+ " gl_TessLevelOuter[1] = 1.0f;\n"
+ " }\n"
+ " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+ "}\n";
+ programCollection.glslSources.add("tesc")
+ << glu::TessellationControlSource(tesc) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+
+ {
+ const string tese =
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(isolines) in;\n"
+ "layout(set = 0, binding = 2, std430) buffer Output\n"
+ "{\n"
+ " uint result[];\n"
+ "};\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ + bdy +
+ " result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = tempResult;\n"
+ " float pixelSize = 2.0f/1024.0f;\n"
+ " gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
+ "}\n";
+
+ programCollection.glslSources.add("tese")
+ << glu::TessellationEvaluationSource(tese) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+
+ {
+ const string geometry =
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(${TOPOLOGY}) in;\n"
+ "layout(points, max_vertices = 1) out;\n"
+ "layout(set = 0, binding = 3, std430) buffer Output\n"
+ "{\n"
+ " uint result[];\n"
+ "};\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ + bdy +
+ " result[gl_PrimitiveIDIn] = tempResult;\n"
+ " gl_Position = gl_in[0].gl_Position;\n"
+ " EmitVertex();\n"
+ " EndPrimitive();\n"
+ "}\n";
+
+ subgroups::addGeometryShadersFromTemplate(geometry, vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u),
+ programCollection.glslSources);
+ }
+
+ {
+ const string fragment =
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(location = 0) out uint result;\n"
+ "void main (void)\n"
+ "{\n"
+ + bdy +
+ " result = tempResult;\n"
+ "}\n";
+
+ programCollection.glslSources.add("fragment")
+ << glu::FragmentSource(fragment)<< vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+
+ subgroups::addNoSubgroupShader(programCollection);
+ }
+}
+
+void supportedCheck (Context& context, CaseDefinition caseDef)
+{
+ DE_UNREF(caseDef);
+ if (!subgroups::isSubgroupSupported(context))
+ TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
+}
+
+tcu::TestStatus noSSBOtest(Context& context, const CaseDefinition caseDef)
+{
+ if (!areSubgroupOperationsSupportedForStage(
+ context, caseDef.shaderStage))
+ {
+ if (areSubgroupOperationsRequiredForStage(caseDef.shaderStage))
+ {
+ return tcu::TestStatus::fail(
+ "Shader stage " + getShaderStageName(caseDef.shaderStage) +
+ " is required to support subgroup operations!");
+ }
+ else
+ {
+ TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
+ }
+ }
+
+ if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_BALLOT_BIT))
+ {
+ TCU_THROW(NotSupportedError, "Device does not support subgroup ballot operations");
+ }
+
+ if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+ return makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages);
+ else if ((VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) & caseDef.shaderStage )
+ return makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages);
+
+ return makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages);
+}
+
+
+tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
+{
+ if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_BALLOT_BIT))
+ {
+ TCU_THROW(NotSupportedError, "Device does not support subgroup ballot operations");
+ }
+
+ if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+ {
+ if (!areSubgroupOperationsSupportedForStage(context, caseDef.shaderStage))
+ {
+ return tcu::TestStatus::fail(
+ "Shader stage " + getShaderStageName(caseDef.shaderStage) +
+ " is required to support subgroup operations!");
+ }
+ return makeComputeTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkComputeStage);
+ }
+ else
+ {
+ VkPhysicalDeviceSubgroupProperties subgroupProperties;
+ subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
+ subgroupProperties.pNext = DE_NULL;
+
+ VkPhysicalDeviceProperties2 properties;
+ properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+ properties.pNext = &subgroupProperties;
+
+ context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
+
+ VkShaderStageFlagBits stages = (VkShaderStageFlagBits)(caseDef.shaderStage & subgroupProperties.supportedStages);
+
+ if ( VK_SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
+ {
+ if ( (stages & VK_SHADER_STAGE_FRAGMENT_BIT) == 0)
+ TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
+ else
+ stages = VK_SHADER_STAGE_FRAGMENT_BIT;
+ }
+
+ if ((VkShaderStageFlagBits)0u == stages)
+ TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
+
+ return subgroups::allStages(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages, stages);
+ }
+}
+
+tcu::TestCaseGroup* createSubgroupsBuiltinMaskVarTests(tcu::TestContext& testCtx)
+{
+de::MovePtr<tcu::TestCaseGroup> graphicGroup(new tcu::TestCaseGroup(
+ testCtx, "graphics", "Subgroup builtin mask category tests: graphics"));
+ de::MovePtr<tcu::TestCaseGroup> computeGroup(new tcu::TestCaseGroup(
+ testCtx, "compute", "Subgroup builtin mask category tests: compute"));
+ de::MovePtr<tcu::TestCaseGroup> framebufferGroup(new tcu::TestCaseGroup(
+ testCtx, "framebuffer", "Subgroup builtin mask category tests: framebuffer"));
+
+ const char* const all_stages_vars[] =
+ {
+ "SubgroupEqMask",
+ "SubgroupGeMask",
+ "SubgroupGtMask",
+ "SubgroupLeMask",
+ "SubgroupLtMask",
+ };
+
+ const VkShaderStageFlags stages[] =
+ {
+ VK_SHADER_STAGE_VERTEX_BIT,
+ VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
+ VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
+ VK_SHADER_STAGE_GEOMETRY_BIT,
+ };
+
+
+ for (int a = 0; a < DE_LENGTH_OF_ARRAY(all_stages_vars); ++a)
+ {
+ const std::string var = all_stages_vars[a];
+ const std::string varLower = de::toLower(var);
+
+ {
+ const CaseDefinition caseDef = {"gl_" + var, VK_SHADER_STAGE_ALL_GRAPHICS};
+ addFunctionCaseWithPrograms(graphicGroup.get(),
+ varLower, "",
+ supportedCheck, initPrograms, test, caseDef);
+ }
+
+ {
+ const CaseDefinition caseDef = {"gl_" + var, VK_SHADER_STAGE_COMPUTE_BIT};
+ addFunctionCaseWithPrograms(computeGroup.get(),
+ varLower, "",
+ supportedCheck, initPrograms, test, caseDef);
+ }
+
+ for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
+ {
+ const CaseDefinition caseDef = {"gl_" + var, stages[stageIndex]};
+ addFunctionCaseWithPrograms(framebufferGroup.get(),
+ varLower + "_" +
+ getShaderStageName(caseDef.shaderStage), "",
+ supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
+ }
+ }
+
+ de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(
+ testCtx, "builtin_mask_var", "Subgroup builtin mask variable tests"));
+
+ group->addChild(graphicGroup.release());
+ group->addChild(computeGroup.release());
+ group->addChild(framebufferGroup.release());
+
+ return group.release();
+}
+} // subgroups
+} // vkt
--- /dev/null
+#ifndef _VKTSUBGROUPSBUILTINMASKVARTESTS_HPP
+#define _VKTSUBGROUPSBUILTINMASKVARTESTS_HPP
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "tcuDefs.hpp"
+#include "vktTestCase.hpp"
+
+namespace vkt
+{
+namespace subgroups
+{
+
+tcu::TestCaseGroup* createSubgroupsBuiltinMaskVarTests(tcu::TestContext& testCtx);
+
+} // subgroups
+} // vkt
+
+#endif // _VKTSUBGROUPSBUILTINMASKVARTESTS_HPP
--- /dev/null
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "vktSubgroupsBuiltinVarTests.hpp"
+#include "vktSubgroupsTestsUtils.hpp"
+
+#include <string>
+#include <vector>
+
+using namespace tcu;
+using namespace std;
+using namespace vk;
+
+namespace vkt
+{
+namespace subgroups
+{
+
+bool checkVertexPipelineStagesSubgroupSize(std::vector<const void*> datas,
+ deUint32 width, deUint32 subgroupSize)
+{
+ const deUint32* data =
+ reinterpret_cast<const deUint32*>(datas[0]);
+ for (deUint32 x = 0; x < width; ++x)
+ {
+ deUint32 val = data[x * 4];
+
+ if (subgroupSize != val)
+ return false;
+ }
+
+ return true;
+}
+
+bool checkVertexPipelineStagesSubgroupInvocationID(std::vector<const void*> datas,
+ deUint32 width, deUint32 subgroupSize)
+{
+ const deUint32* data =
+ reinterpret_cast<const deUint32*>(datas[0]);
+ vector<deUint32> subgroupInvocationHits(subgroupSize, 0);
+
+ for (deUint32 x = 0; x < width; ++x)
+ {
+ deUint32 subgroupInvocationID = data[(x * 4) + 1];
+
+ if (subgroupInvocationID >= subgroupSize)
+ return false;
+ subgroupInvocationHits[subgroupInvocationID]++;
+ }
+
+ const deUint32 totalSize = width;
+
+ deUint32 totalInvocationsRun = 0;
+ for (deUint32 i = 0; i < subgroupSize; ++i)
+ {
+ totalInvocationsRun += subgroupInvocationHits[i];
+ }
+
+ if (totalInvocationsRun != totalSize)
+ return false;
+
+ return true;
+}
+
+static bool checkComputeSubgroupSize(std::vector<const void*> datas,
+ const deUint32 numWorkgroups[3], const deUint32 localSize[3],
+ deUint32 subgroupSize)
+{
+ const deUint32* data = reinterpret_cast<const deUint32*>(datas[0]);
+
+ for (deUint32 nX = 0; nX < numWorkgroups[0]; ++nX)
+ {
+ for (deUint32 nY = 0; nY < numWorkgroups[1]; ++nY)
+ {
+ for (deUint32 nZ = 0; nZ < numWorkgroups[2]; ++nZ)
+ {
+ for (deUint32 lX = 0; lX < localSize[0]; ++lX)
+ {
+ for (deUint32 lY = 0; lY < localSize[1]; ++lY)
+ {
+ for (deUint32 lZ = 0; lZ < localSize[2];
+ ++lZ)
+ {
+ const deUint32 globalInvocationX =
+ nX * localSize[0] + lX;
+ const deUint32 globalInvocationY =
+ nY * localSize[1] + lY;
+ const deUint32 globalInvocationZ =
+ nZ * localSize[2] + lZ;
+
+ const deUint32 globalSizeX =
+ numWorkgroups[0] * localSize[0];
+ const deUint32 globalSizeY =
+ numWorkgroups[1] * localSize[1];
+
+ const deUint32 offset =
+ globalSizeX *
+ ((globalSizeY *
+ globalInvocationZ) +
+ globalInvocationY) +
+ globalInvocationX;
+
+ if (subgroupSize != data[offset * 4])
+ return false;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
+static bool checkComputeSubgroupInvocationID(std::vector<const void*> datas,
+ const deUint32 numWorkgroups[3], const deUint32 localSize[3],
+ deUint32 subgroupSize)
+{
+ const deUint32* data = reinterpret_cast<const deUint32*>(datas[0]);
+
+ for (deUint32 nX = 0; nX < numWorkgroups[0]; ++nX)
+ {
+ for (deUint32 nY = 0; nY < numWorkgroups[1]; ++nY)
+ {
+ for (deUint32 nZ = 0; nZ < numWorkgroups[2]; ++nZ)
+ {
+ const deUint32 totalLocalSize =
+ localSize[0] * localSize[1] * localSize[2];
+ vector<deUint32> subgroupInvocationHits(subgroupSize, 0);
+
+ for (deUint32 lX = 0; lX < localSize[0]; ++lX)
+ {
+ for (deUint32 lY = 0; lY < localSize[1]; ++lY)
+ {
+ for (deUint32 lZ = 0; lZ < localSize[2];
+ ++lZ)
+ {
+ const deUint32 globalInvocationX =
+ nX * localSize[0] + lX;
+ const deUint32 globalInvocationY =
+ nY * localSize[1] + lY;
+ const deUint32 globalInvocationZ =
+ nZ * localSize[2] + lZ;
+
+ const deUint32 globalSizeX =
+ numWorkgroups[0] * localSize[0];
+ const deUint32 globalSizeY =
+ numWorkgroups[1] * localSize[1];
+
+ const deUint32 offset =
+ globalSizeX *
+ ((globalSizeY *
+ globalInvocationZ) +
+ globalInvocationY) +
+ globalInvocationX;
+
+ deUint32 subgroupInvocationID = data[(offset * 4) + 1];
+
+ if (subgroupInvocationID >= subgroupSize)
+ return false;
+
+ subgroupInvocationHits[subgroupInvocationID]++;
+ }
+ }
+ }
+
+ deUint32 totalInvocationsRun = 0;
+ for (deUint32 i = 0; i < subgroupSize; ++i)
+ {
+ totalInvocationsRun += subgroupInvocationHits[i];
+ }
+
+ if (totalInvocationsRun != totalLocalSize)
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+static bool checkComputeNumSubgroups (std::vector<const void*> datas,
+ const deUint32 numWorkgroups[3],
+ const deUint32 localSize[3],
+ deUint32)
+{
+ const deUint32* data = reinterpret_cast<const deUint32*>(datas[0]);
+
+ for (deUint32 nX = 0; nX < numWorkgroups[0]; ++nX)
+ {
+ for (deUint32 nY = 0; nY < numWorkgroups[1]; ++nY)
+ {
+ for (deUint32 nZ = 0; nZ < numWorkgroups[2]; ++nZ)
+ {
+ const deUint32 totalLocalSize =
+ localSize[0] * localSize[1] * localSize[2];
+
+ for (deUint32 lX = 0; lX < localSize[0]; ++lX)
+ {
+ for (deUint32 lY = 0; lY < localSize[1]; ++lY)
+ {
+ for (deUint32 lZ = 0; lZ < localSize[2];
+ ++lZ)
+ {
+ const deUint32 globalInvocationX =
+ nX * localSize[0] + lX;
+ const deUint32 globalInvocationY =
+ nY * localSize[1] + lY;
+ const deUint32 globalInvocationZ =
+ nZ * localSize[2] + lZ;
+
+ const deUint32 globalSizeX =
+ numWorkgroups[0] * localSize[0];
+ const deUint32 globalSizeY =
+ numWorkgroups[1] * localSize[1];
+
+ const deUint32 offset =
+ globalSizeX *
+ ((globalSizeY *
+ globalInvocationZ) +
+ globalInvocationY) +
+ globalInvocationX;
+
+ deUint32 numSubgroups = data[(offset * 4) + 2];
+
+ if (numSubgroups > totalLocalSize)
+ return false;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
+static bool checkComputeSubgroupID (std::vector<const void*> datas,
+ const deUint32 numWorkgroups[3],
+ const deUint32 localSize[3],
+ deUint32)
+{
+ const deUint32* data = reinterpret_cast<const deUint32*>(datas[0]);
+
+ for (deUint32 nX = 0; nX < numWorkgroups[0]; ++nX)
+ {
+ for (deUint32 nY = 0; nY < numWorkgroups[1]; ++nY)
+ {
+ for (deUint32 nZ = 0; nZ < numWorkgroups[2]; ++nZ)
+ {
+ for (deUint32 lX = 0; lX < localSize[0]; ++lX)
+ {
+ for (deUint32 lY = 0; lY < localSize[1]; ++lY)
+ {
+ for (deUint32 lZ = 0; lZ < localSize[2];
+ ++lZ)
+ {
+ const deUint32 globalInvocationX =
+ nX * localSize[0] + lX;
+ const deUint32 globalInvocationY =
+ nY * localSize[1] + lY;
+ const deUint32 globalInvocationZ =
+ nZ * localSize[2] + lZ;
+
+ const deUint32 globalSizeX =
+ numWorkgroups[0] * localSize[0];
+ const deUint32 globalSizeY =
+ numWorkgroups[1] * localSize[1];
+
+ const deUint32 offset =
+ globalSizeX *
+ ((globalSizeY *
+ globalInvocationZ) +
+ globalInvocationY) +
+ globalInvocationX;
+
+ deUint32 numSubgroups = data[(offset * 4) + 2];
+ deUint32 subgroupID = data[(offset * 4) + 3];
+
+ if (subgroupID >= numSubgroups)
+ return false;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
+namespace
+{
+struct CaseDefinition
+{
+ std::string varName;
+ VkShaderStageFlags shaderStage;
+};
+}
+
+void initFrameBufferPrograms (SourceCollections& programCollection, CaseDefinition caseDef)
+{
+ const vk::ShaderBuildOptions buildOptions (programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ const vk::SpirVAsmBuildOptions buildOptionsSpr (programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3);
+
+ {
+ /*
+ "layout(location = 0) in vec4 in_color;\n"
+ "layout(location = 0) out uvec4 out_color;\n"
+ "void main()\n"
+ "{\n"
+ " out_color = uvec4(in_color);\n"
+ "}\n";
+ */
+ const string fragment =
+ "; SPIR-V\n"
+ "; Version: 1.3\n"
+ "; Generator: Khronos Glslang Reference Front End; 2\n"
+ "; Bound: 16\n"
+ "; Schema: 0\n"
+ "OpCapability Shader\n"
+ "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+ "OpMemoryModel Logical GLSL450\n"
+ "OpEntryPoint Fragment %4 \"main\" %9 %13\n"
+ "OpExecutionMode %4 OriginUpperLeft\n"
+ "OpDecorate %9 Location 0\n"
+ "OpDecorate %13 Location 0\n"
+ "%2 = OpTypeVoid\n"
+ "%3 = OpTypeFunction %2\n"
+ "%6 = OpTypeInt 32 0\n"
+ "%7 = OpTypeVector %6 4\n"
+ "%8 = OpTypePointer Output %7\n"
+ "%9 = OpVariable %8 Output\n"
+ "%10 = OpTypeFloat 32\n"
+ "%11 = OpTypeVector %10 4\n"
+ "%12 = OpTypePointer Input %11\n"
+ "%13 = OpVariable %12 Input\n"
+ "%4 = OpFunction %2 None %3\n"
+ "%5 = OpLabel\n"
+ "%14 = OpLoad %11 %13\n"
+ "%15 = OpConvertFToU %7 %14\n"
+ "OpStore %9 %15\n"
+ "OpReturn\n"
+ "OpFunctionEnd\n";
+ programCollection.spirvAsmSources.add("fragment") << fragment << buildOptionsSpr;
+ }
+
+ if (VK_SHADER_STAGE_VERTEX_BIT != caseDef.shaderStage)
+ subgroups::setVertexShaderFrameBuffer(programCollection);
+
+ if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+ {
+ /*
+ "#extension GL_KHR_shader_subgroup_basic: enable\n"
+ "layout(location = 0) out vec4 out_color;\n"
+ "layout(location = 0) in highp vec4 in_position;\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ " out_color = vec4(gl_SubgroupSize, gl_SubgroupInvocationID, 1.0f, 1.0f);\n"
+ " gl_Position = in_position;\n"
+ " gl_PointSize = 1.0f;\n"
+ "}\n";
+ */
+ const string vertex =
+ "; SPIR-V\n"
+ "; Version: 1.3\n"
+ "; Generator: Khronos Glslang Reference Front End; 2\n"
+ "; Bound: 31\n"
+ "; Schema: 0\n"
+ "OpCapability Shader\n"
+ "OpCapability GroupNonUniform\n"
+ "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+ "OpMemoryModel Logical GLSL450\n"
+ "OpEntryPoint Vertex %4 \"main\" %9 %12 %15 %24 %28\n"
+ "OpDecorate %9 Location 0\n"
+ "OpDecorate %12 RelaxedPrecision\n"
+ "OpDecorate %12 BuiltIn SubgroupSize\n"
+ "OpDecorate %13 RelaxedPrecision\n"
+ "OpDecorate %15 RelaxedPrecision\n"
+ "OpDecorate %15 BuiltIn SubgroupLocalInvocationId\n"
+ "OpDecorate %16 RelaxedPrecision\n"
+ "OpMemberDecorate %22 0 BuiltIn Position\n"
+ "OpMemberDecorate %22 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %22 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %22 3 BuiltIn CullDistance\n"
+ "OpDecorate %22 Block\n"
+ "OpDecorate %28 Location 0\n"
+ "%2 = OpTypeVoid\n"
+ "%3 = OpTypeFunction %2\n"
+ "%6 = OpTypeFloat 32\n"
+ "%7 = OpTypeVector %6 4\n"
+ "%8 = OpTypePointer Output %7\n"
+ "%9 = OpVariable %8 Output\n"
+ "%10 = OpTypeInt 32 0\n"
+ "%11 = OpTypePointer Input %10\n"
+ "%12 = OpVariable %11 Input\n"
+ "%15 = OpVariable %11 Input\n"
+ "%18 = OpConstant %6 1\n"
+ "%20 = OpConstant %10 1\n"
+ "%21 = OpTypeArray %6 %20\n"
+ "%22 = OpTypeStruct %7 %6 %21 %21\n"
+ "%23 = OpTypePointer Output %22\n"
+ "%24 = OpVariable %23 Output\n"
+ "%25 = OpTypeInt 32 1\n"
+ "%26 = OpConstant %25 0\n"
+ "%27 = OpTypePointer Input %7\n"
+ "%28 = OpVariable %27 Input\n"
+ "%31 = OpConstant %25 1\n"
+ "%32 = OpTypePointer Output %6\n"
+ "%4 = OpFunction %2 None %3\n"
+ "%5 = OpLabel\n"
+ "%13 = OpLoad %10 %12\n"
+ "%14 = OpConvertUToF %6 %13\n"
+ "%16 = OpLoad %10 %15\n"
+ "%17 = OpConvertUToF %6 %16\n"
+ "%19 = OpCompositeConstruct %7 %14 %17 %18 %18\n"
+ "OpStore %9 %19\n"
+ "%29 = OpLoad %7 %28\n"
+ "%30 = OpAccessChain %8 %24 %26\n"
+ "OpStore %30 %29\n"
+ "%33 = OpAccessChain %32 %24 %31\n"
+ "OpStore %33 %18\n"
+ "OpReturn\n"
+ "OpFunctionEnd\n";
+ programCollection.spirvAsmSources.add("vert") << vertex << buildOptionsSpr;
+ }
+ else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
+ {
+ /*
+ "#extension GL_EXT_tessellation_shader : require\n"
+ "layout(vertices = 2) out;\n"
+ "layout(location = 0) out vec4 out_color[];\n"
+ "void main (void)\n"
+ "{\n"
+ " if (gl_InvocationID == 0)\n"
+ {\n"
+ " gl_TessLevelOuter[0] = 1.0f;\n"
+ " gl_TessLevelOuter[1] = 1.0f;\n"
+ " }\n"
+ " out_color[gl_InvocationID] = vec4(0.0f);\n"
+ " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+ "}\n";
+ */
+ const string controlSource =
+ "; SPIR-V\n"
+ "; Version: 1.3\n"
+ "; Generator: Khronos Glslang Reference Front End; 2\n"
+ "; Bound: 53\n"
+ "; Schema: 0\n"
+ "OpCapability Tessellation\n"
+ "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+ "OpMemoryModel Logical GLSL450\n"
+ "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %30 %41 %47\n"
+ "OpExecutionMode %4 OutputVertices 2\n"
+ "OpDecorate %8 BuiltIn InvocationId\n"
+ "OpDecorate %20 Patch\n"
+ "OpDecorate %20 BuiltIn TessLevelOuter\n"
+ "OpDecorate %30 Location 0\n"
+ "OpMemberDecorate %38 0 BuiltIn Position\n"
+ "OpMemberDecorate %38 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %38 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %38 3 BuiltIn CullDistance\n"
+ "OpDecorate %38 Block\n"
+ "OpMemberDecorate %43 0 BuiltIn Position\n"
+ "OpMemberDecorate %43 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %43 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %43 3 BuiltIn CullDistance\n"
+ "OpDecorate %43 Block\n"
+ "%2 = OpTypeVoid\n"
+ "%3 = OpTypeFunction %2\n"
+ "%6 = OpTypeInt 32 1\n"
+ "%7 = OpTypePointer Input %6\n"
+ "%8 = OpVariable %7 Input\n"
+ "%10 = OpConstant %6 0\n"
+ "%11 = OpTypeBool\n"
+ "%15 = OpTypeFloat 32\n"
+ "%16 = OpTypeInt 32 0\n"
+ "%17 = OpConstant %16 4\n"
+ "%18 = OpTypeArray %15 %17\n"
+ "%19 = OpTypePointer Output %18\n"
+ "%20 = OpVariable %19 Output\n"
+ "%21 = OpConstant %15 1\n"
+ "%22 = OpTypePointer Output %15\n"
+ "%24 = OpConstant %6 1\n"
+ "%26 = OpTypeVector %15 4\n"
+ "%27 = OpConstant %16 2\n"
+ "%28 = OpTypeArray %26 %27\n"
+ "%29 = OpTypePointer Output %28\n"
+ "%30 = OpVariable %29 Output\n"
+ "%32 = OpConstant %15 0\n"
+ "%33 = OpConstantComposite %26 %32 %32 %32 %32\n"
+ "%34 = OpTypePointer Output %26\n"
+ "%36 = OpConstant %16 1\n"
+ "%37 = OpTypeArray %15 %36\n"
+ "%38 = OpTypeStruct %26 %15 %37 %37\n"
+ "%39 = OpTypeArray %38 %27\n"
+ "%40 = OpTypePointer Output %39\n"
+ "%41 = OpVariable %40 Output\n"
+ "%43 = OpTypeStruct %26 %15 %37 %37\n"
+ "%44 = OpConstant %16 32\n"
+ "%45 = OpTypeArray %43 %44\n"
+ "%46 = OpTypePointer Input %45\n"
+ "%47 = OpVariable %46 Input\n"
+ "%49 = OpTypePointer Input %26\n"
+ "%4 = OpFunction %2 None %3\n"
+ "%5 = OpLabel\n"
+ "%9 = OpLoad %6 %8\n"
+ "%12 = OpIEqual %11 %9 %10\n"
+ "OpSelectionMerge %14 None\n"
+ "OpBranchConditional %12 %13 %14\n"
+ "%13 = OpLabel\n"
+ "%23 = OpAccessChain %22 %20 %10\n"
+ "OpStore %23 %21\n"
+ "%25 = OpAccessChain %22 %20 %24\n"
+ "OpStore %25 %21\n"
+ "OpBranch %14\n"
+ "%14 = OpLabel\n"
+ "%31 = OpLoad %6 %8\n"
+ "%35 = OpAccessChain %34 %30 %31\n"
+ "OpStore %35 %33\n"
+ "%42 = OpLoad %6 %8\n"
+ "%48 = OpLoad %6 %8\n"
+ "%50 = OpAccessChain %49 %47 %48 %10\n"
+ "%51 = OpLoad %26 %50\n"
+ "%52 = OpAccessChain %34 %41 %42 %10\n"
+ "OpStore %52 %51\n"
+ "OpReturn\n"
+ "OpFunctionEnd\n";
+ programCollection.spirvAsmSources.add("tesc") << controlSource << buildOptionsSpr;
+
+ /*
+ "#extension GL_KHR_shader_subgroup_basic: enable\n"
+ "#extension GL_EXT_tessellation_shader : require\n"
+ "layout(isolines, equal_spacing, ccw ) in;\n"
+ "layout(location = 0) in vec4 in_color[];\n"
+ "layout(location = 0) out vec4 out_color;\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
+ " out_color = vec4(gl_SubgroupSize, gl_SubgroupInvocationID, 0.0f, 0.0f);\n"
+ "}\n";
+ */
+ const string evaluationSource =
+ "; SPIR-V\n"
+ "; Version: 1.3\n"
+ "; Generator: Khronos Glslang Reference Front End; 2\n"
+ "; Bound: 51\n"
+ "; Schema: 0\n"
+ "OpCapability Tessellation\n"
+ "OpCapability GroupNonUniform\n"
+ "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+ "OpMemoryModel Logical GLSL450\n"
+ "OpEntryPoint TessellationEvaluation %4 \"main\" %13 %20 %29 %38 %40 %43 %50\n"
+ "OpExecutionMode %4 Isolines\n"
+ "OpExecutionMode %4 SpacingEqual\n"
+ "OpExecutionMode %4 VertexOrderCcw\n"
+ "OpMemberDecorate %11 0 BuiltIn Position\n"
+ "OpMemberDecorate %11 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %11 3 BuiltIn CullDistance\n"
+ "OpDecorate %11 Block\n"
+ "OpMemberDecorate %16 0 BuiltIn Position\n"
+ "OpMemberDecorate %16 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %16 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %16 3 BuiltIn CullDistance\n"
+ "OpDecorate %16 Block\n"
+ "OpDecorate %29 BuiltIn TessCoord\n"
+ "OpDecorate %38 Location 0\n"
+ "OpDecorate %40 RelaxedPrecision\n"
+ "OpDecorate %40 BuiltIn SubgroupSize\n"
+ "OpDecorate %41 RelaxedPrecision\n"
+ "OpDecorate %43 RelaxedPrecision\n"
+ "OpDecorate %43 BuiltIn SubgroupLocalInvocationId\n"
+ "OpDecorate %44 RelaxedPrecision\n"
+ "OpDecorate %50 Location 0\n"
+ "%2 = OpTypeVoid\n"
+ "%3 = OpTypeFunction %2\n"
+ "%6 = OpTypeFloat 32\n"
+ "%7 = OpTypeVector %6 4\n"
+ "%8 = OpTypeInt 32 0\n"
+ "%9 = OpConstant %8 1\n"
+ "%10 = OpTypeArray %6 %9\n"
+ "%11 = OpTypeStruct %7 %6 %10 %10\n"
+ "%12 = OpTypePointer Output %11\n"
+ "%13 = OpVariable %12 Output\n"
+ "%14 = OpTypeInt 32 1\n"
+ "%15 = OpConstant %14 0\n"
+ "%16 = OpTypeStruct %7 %6 %10 %10\n"
+ "%17 = OpConstant %8 32\n"
+ "%18 = OpTypeArray %16 %17\n"
+ "%19 = OpTypePointer Input %18\n"
+ "%20 = OpVariable %19 Input\n"
+ "%21 = OpTypePointer Input %7\n"
+ "%24 = OpConstant %14 1\n"
+ "%27 = OpTypeVector %6 3\n"
+ "%28 = OpTypePointer Input %27\n"
+ "%29 = OpVariable %28 Input\n"
+ "%30 = OpConstant %8 0\n"
+ "%31 = OpTypePointer Input %6\n"
+ "%36 = OpTypePointer Output %7\n"
+ "%38 = OpVariable %36 Output\n"
+ "%39 = OpTypePointer Input %8\n"
+ "%40 = OpVariable %39 Input\n"
+ "%43 = OpVariable %39 Input\n"
+ "%46 = OpConstant %6 0\n"
+ "%48 = OpTypeArray %7 %17\n"
+ "%49 = OpTypePointer Input %48\n"
+ "%50 = OpVariable %49 Input\n"
+ "%4 = OpFunction %2 None %3\n"
+ "%5 = OpLabel\n"
+ "%22 = OpAccessChain %21 %20 %15 %15\n"
+ "%23 = OpLoad %7 %22\n"
+ "%25 = OpAccessChain %21 %20 %24 %15\n"
+ "%26 = OpLoad %7 %25\n"
+ "%32 = OpAccessChain %31 %29 %30\n"
+ "%33 = OpLoad %6 %32\n"
+ "%34 = OpCompositeConstruct %7 %33 %33 %33 %33\n"
+ "%35 = OpExtInst %7 %1 FMix %23 %26 %34\n"
+ "%37 = OpAccessChain %36 %13 %15\n"
+ "OpStore %37 %35\n"
+ "%41 = OpLoad %8 %40\n"
+ "%42 = OpConvertUToF %6 %41\n"
+ "%44 = OpLoad %8 %43\n"
+ "%45 = OpConvertUToF %6 %44\n"
+ "%47 = OpCompositeConstruct %7 %42 %45 %46 %46\n"
+ "OpStore %38 %47\n"
+ "OpReturn\n"
+ "OpFunctionEnd\n";
+
+ programCollection.spirvAsmSources.add("tese") << evaluationSource << buildOptionsSpr;
+ }
+ else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+ {
+ /*
+ "#extension GL_EXT_tessellation_shader : require\n"
+ "#extension GL_KHR_shader_subgroup_basic: enable\n"
+ "layout(vertices = 2) out;\n"
+ "layout(location = 0) out vec4 out_color[];\n"
+ "void main (void)\n"
+ "{\n"
+ " if (gl_InvocationID == 0)\n"
+ {\n"
+ " gl_TessLevelOuter[0] = 1.0f;\n"
+ " gl_TessLevelOuter[1] = 1.0f;\n"
+ " }\n"
+ " out_color[gl_InvocationID] = vec4(gl_SubgroupSize, gl_SubgroupInvocationID, 0, 0);\n"
+ " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+ "}\n";
+ */
+ const string controlSource =
+ "; SPIR-V\n"
+ "; Version: 1.3\n"
+ "; Generator: Khronos Glslang Reference Front End; 2\n"
+ "; Bound: 60\n"
+ "; Schema: 0\n"
+ "OpCapability Tessellation\n"
+ "OpCapability GroupNonUniform\n"
+ "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+ "OpMemoryModel Logical GLSL450\n"
+ "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %30 %33 %36 %48 %54\n"
+ "OpExecutionMode %4 OutputVertices 2\n"
+ "OpDecorate %8 BuiltIn InvocationId\n"
+ "OpDecorate %20 Patch\n"
+ "OpDecorate %20 BuiltIn TessLevelOuter\n"
+ "OpDecorate %30 Location 0\n"
+ "OpDecorate %33 RelaxedPrecision\n"
+ "OpDecorate %33 BuiltIn SubgroupSize\n"
+ "OpDecorate %34 RelaxedPrecision\n"
+ "OpDecorate %36 RelaxedPrecision\n"
+ "OpDecorate %36 BuiltIn SubgroupLocalInvocationId\n"
+ "OpDecorate %37 RelaxedPrecision\n"
+ "OpMemberDecorate %45 0 BuiltIn Position\n"
+ "OpMemberDecorate %45 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %45 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %45 3 BuiltIn CullDistance\n"
+ "OpDecorate %45 Block\n"
+ "OpMemberDecorate %50 0 BuiltIn Position\n"
+ "OpMemberDecorate %50 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %50 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %50 3 BuiltIn CullDistance\n"
+ "OpDecorate %50 Block\n"
+ "%2 = OpTypeVoid\n"
+ "%3 = OpTypeFunction %2\n"
+ "%6 = OpTypeInt 32 1\n"
+ "%7 = OpTypePointer Input %6\n"
+ "%8 = OpVariable %7 Input\n"
+ "%10 = OpConstant %6 0\n"
+ "%11 = OpTypeBool\n"
+ "%15 = OpTypeFloat 32\n"
+ "%16 = OpTypeInt 32 0\n"
+ "%17 = OpConstant %16 4\n"
+ "%18 = OpTypeArray %15 %17\n"
+ "%19 = OpTypePointer Output %18\n"
+ "%20 = OpVariable %19 Output\n"
+ "%21 = OpConstant %15 1\n"
+ "%22 = OpTypePointer Output %15\n"
+ "%24 = OpConstant %6 1\n"
+ "%26 = OpTypeVector %15 4\n"
+ "%27 = OpConstant %16 2\n"
+ "%28 = OpTypeArray %26 %27\n"
+ "%29 = OpTypePointer Output %28\n"
+ "%30 = OpVariable %29 Output\n"
+ "%32 = OpTypePointer Input %16\n"
+ "%33 = OpVariable %32 Input\n"
+ "%36 = OpVariable %32 Input\n"
+ "%39 = OpConstant %15 0\n"
+ "%41 = OpTypePointer Output %26\n"
+ "%43 = OpConstant %16 1\n"
+ "%44 = OpTypeArray %15 %43\n"
+ "%45 = OpTypeStruct %26 %15 %44 %44\n"
+ "%46 = OpTypeArray %45 %27\n"
+ "%47 = OpTypePointer Output %46\n"
+ "%48 = OpVariable %47 Output\n"
+ "%50 = OpTypeStruct %26 %15 %44 %44\n"
+ "%51 = OpConstant %16 32\n"
+ "%52 = OpTypeArray %50 %51\n"
+ "%53 = OpTypePointer Input %52\n"
+ "%54 = OpVariable %53 Input\n"
+ "%56 = OpTypePointer Input %26\n"
+ "%4 = OpFunction %2 None %3\n"
+ "%5 = OpLabel\n"
+ "%9 = OpLoad %6 %8\n"
+ "%12 = OpIEqual %11 %9 %10\n"
+ "OpSelectionMerge %14 None\n"
+ "OpBranchConditional %12 %13 %14\n"
+ "%13 = OpLabel\n"
+ "%23 = OpAccessChain %22 %20 %10\n"
+ "OpStore %23 %21\n"
+ "%25 = OpAccessChain %22 %20 %24\n"
+ "OpStore %25 %21\n"
+ "OpBranch %14\n"
+ "%14 = OpLabel\n"
+ "%31 = OpLoad %6 %8\n"
+ "%34 = OpLoad %16 %33\n"
+ "%35 = OpConvertUToF %15 %34\n"
+ "%37 = OpLoad %16 %36\n"
+ "%38 = OpConvertUToF %15 %37\n"
+ "%40 = OpCompositeConstruct %26 %35 %38 %39 %39\n"
+ "%42 = OpAccessChain %41 %30 %31\n"
+ "OpStore %42 %40\n"
+ "%49 = OpLoad %6 %8\n"
+ "%55 = OpLoad %6 %8\n"
+ "%57 = OpAccessChain %56 %54 %55 %10\n"
+ "%58 = OpLoad %26 %57\n"
+ "%59 = OpAccessChain %41 %48 %49 %10\n"
+ "OpStore %59 %58\n"
+ "OpReturn\n"
+ "OpFunctionEnd\n";
+ programCollection.spirvAsmSources.add("tesc") << controlSource << buildOptionsSpr;
+
+ /*
+ "#extension GL_KHR_shader_subgroup_basic: enable\n"
+ "#extension GL_EXT_tessellation_shader : require\n"
+ "layout(isolines, equal_spacing, ccw ) in;\n"
+ "layout(location = 0) in vec4 in_color[];\n"
+ "layout(location = 0) out vec4 out_color;\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
+ " out_color = in_color[0];\n"
+ "}\n";
+ */
+ const string evaluationSource =
+ "; SPIR-V\n"
+ "; Version: 1.3\n"
+ "; Generator: Khronos Glslang Reference Front End; 2\n"
+ "; Bound: 44\n"
+ "; Schema: 0\n"
+ "OpCapability Tessellation\n"
+ "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+ "OpMemoryModel Logical GLSL450\n"
+ "OpEntryPoint TessellationEvaluation %4 \"main\" %13 %20 %29 %38 %41\n"
+ "OpExecutionMode %4 Isolines\n"
+ "OpExecutionMode %4 SpacingEqual\n"
+ "OpExecutionMode %4 VertexOrderCcw\n"
+ "OpMemberDecorate %11 0 BuiltIn Position\n"
+ "OpMemberDecorate %11 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %11 3 BuiltIn CullDistance\n"
+ "OpDecorate %11 Block\n"
+ "OpMemberDecorate %16 0 BuiltIn Position\n"
+ "OpMemberDecorate %16 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %16 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %16 3 BuiltIn CullDistance\n"
+ "OpDecorate %16 Block\n"
+ "OpDecorate %29 BuiltIn TessCoord\n"
+ "OpDecorate %38 Location 0\n"
+ "OpDecorate %41 Location 0\n"
+ "%2 = OpTypeVoid\n"
+ "%3 = OpTypeFunction %2\n"
+ "%6 = OpTypeFloat 32\n"
+ "%7 = OpTypeVector %6 4\n"
+ "%8 = OpTypeInt 32 0\n"
+ "%9 = OpConstant %8 1\n"
+ "%10 = OpTypeArray %6 %9\n"
+ "%11 = OpTypeStruct %7 %6 %10 %10\n"
+ "%12 = OpTypePointer Output %11\n"
+ "%13 = OpVariable %12 Output\n"
+ "%14 = OpTypeInt 32 1\n"
+ "%15 = OpConstant %14 0\n"
+ "%16 = OpTypeStruct %7 %6 %10 %10\n"
+ "%17 = OpConstant %8 32\n"
+ "%18 = OpTypeArray %16 %17\n"
+ "%19 = OpTypePointer Input %18\n"
+ "%20 = OpVariable %19 Input\n"
+ "%21 = OpTypePointer Input %7\n"
+ "%24 = OpConstant %14 1\n"
+ "%27 = OpTypeVector %6 3\n"
+ "%28 = OpTypePointer Input %27\n"
+ "%29 = OpVariable %28 Input\n"
+ "%30 = OpConstant %8 0\n"
+ "%31 = OpTypePointer Input %6\n"
+ "%36 = OpTypePointer Output %7\n"
+ "%38 = OpVariable %36 Output\n"
+ "%39 = OpTypeArray %7 %17\n"
+ "%40 = OpTypePointer Input %39\n"
+ "%41 = OpVariable %40 Input\n"
+ "%4 = OpFunction %2 None %3\n"
+ "%5 = OpLabel\n"
+ "%22 = OpAccessChain %21 %20 %15 %15\n"
+ "%23 = OpLoad %7 %22\n"
+ "%25 = OpAccessChain %21 %20 %24 %15\n"
+ "%26 = OpLoad %7 %25\n"
+ "%32 = OpAccessChain %31 %29 %30\n"
+ "%33 = OpLoad %6 %32\n"
+ "%34 = OpCompositeConstruct %7 %33 %33 %33 %33\n"
+ "%35 = OpExtInst %7 %1 FMix %23 %26 %34\n"
+ "%37 = OpAccessChain %36 %13 %15\n"
+ "OpStore %37 %35\n"
+ "%42 = OpAccessChain %21 %41 %15\n"
+ "%43 = OpLoad %7 %42\n"
+ "OpStore %38 %43\n"
+ "OpReturn\n"
+ "OpFunctionEnd\n";
+ programCollection.spirvAsmSources.add("tese") << evaluationSource << buildOptionsSpr;
+ }
+ else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
+ {
+ /*
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_basic: enable\n"
+ "layout(points) in;\n"
+ "layout(points, max_vertices = 1) out;\n"
+ "layout(location = 0) out vec4 out_color;\n"
+ "void main (void)\n"
+ "{\n"
+ " out_color = vec4(gl_SubgroupSize, gl_SubgroupInvocationID, 0, 0);\n"
+ " gl_Position = gl_in[0].gl_Position;\n"
+ " EmitVertex();\n"
+ " EndPrimitive();\n"
+ "}\n";
+ */
+ const string geometry =
+ "; SPIR-V\n"
+ "; Version: 1.3\n"
+ "; Generator: Khronos Glslang Reference Front End; 2\n"
+ "; Bound: 35\n"
+ "; Schema: 0\n"
+ "OpCapability Geometry\n"
+ "OpCapability GroupNonUniform\n"
+ "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+ "OpMemoryModel Logical GLSL450\n"
+ "OpEntryPoint Geometry %4 \"main\" %9 %12 %15 %24 %30\n"
+ "OpExecutionMode %4 InputPoints\n"
+ "OpExecutionMode %4 Invocations 1\n"
+ "OpExecutionMode %4 OutputPoints\n"
+ "OpExecutionMode %4 OutputVertices 1\n"
+ "OpDecorate %9 Location 0\n"
+ "OpDecorate %12 RelaxedPrecision\n"
+ "OpDecorate %12 BuiltIn SubgroupSize\n"
+ "OpDecorate %13 RelaxedPrecision\n"
+ "OpDecorate %15 RelaxedPrecision\n"
+ "OpDecorate %15 BuiltIn SubgroupLocalInvocationId\n"
+ "OpDecorate %16 RelaxedPrecision\n"
+ "OpMemberDecorate %22 0 BuiltIn Position\n"
+ "OpMemberDecorate %22 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %22 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %22 3 BuiltIn CullDistance\n"
+ "OpDecorate %22 Block\n"
+ "OpMemberDecorate %27 0 BuiltIn Position\n"
+ "OpMemberDecorate %27 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %27 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %27 3 BuiltIn CullDistance\n"
+ "OpDecorate %27 Block\n"
+ "%2 = OpTypeVoid\n"
+ "%3 = OpTypeFunction %2\n"
+ "%6 = OpTypeFloat 32\n"
+ "%7 = OpTypeVector %6 4\n"
+ "%8 = OpTypePointer Output %7\n"
+ "%9 = OpVariable %8 Output\n"
+ "%10 = OpTypeInt 32 0\n"
+ "%11 = OpTypePointer Input %10\n"
+ "%12 = OpVariable %11 Input\n"
+ "%15 = OpVariable %11 Input\n"
+ "%18 = OpConstant %6 0\n"
+ "%20 = OpConstant %10 1\n"
+ "%21 = OpTypeArray %6 %20\n"
+ "%22 = OpTypeStruct %7 %6 %21 %21\n"
+ "%23 = OpTypePointer Output %22\n"
+ "%24 = OpVariable %23 Output\n"
+ "%25 = OpTypeInt 32 1\n"
+ "%26 = OpConstant %25 0\n"
+ "%27 = OpTypeStruct %7 %6 %21 %21\n"
+ "%28 = OpTypeArray %27 %20\n"
+ "%29 = OpTypePointer Input %28\n"
+ "%30 = OpVariable %29 Input\n"
+ "%31 = OpTypePointer Input %7\n"
+ "%4 = OpFunction %2 None %3\n"
+ "%5 = OpLabel\n"
+ "%13 = OpLoad %10 %12\n"
+ "%14 = OpConvertUToF %6 %13\n"
+ "%16 = OpLoad %10 %15\n"
+ "%17 = OpConvertUToF %6 %16\n"
+ "%19 = OpCompositeConstruct %7 %14 %17 %18 %18\n"
+ "OpStore %9 %19\n"
+ "%32 = OpAccessChain %31 %30 %26 %26\n"
+ "%33 = OpLoad %7 %32\n"
+ "%34 = OpAccessChain %8 %24 %26\n"
+ "OpStore %34 %33\n"
+ "OpEmitVertex\n"
+ "OpEndPrimitive\n"
+ "OpReturn\n"
+ "OpFunctionEnd\n";
+ programCollection.spirvAsmSources.add("geometry") << geometry << buildOptionsSpr;
+ }
+ else
+ {
+ DE_FATAL("Unsupported shader stage");
+ }
+}
+
+void initPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
+{
+ if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream src;
+
+ src << "#version 450\n"
+ << "#extension GL_KHR_shader_subgroup_basic: enable\n"
+ << "layout (local_size_x_id = 0, local_size_y_id = 1, "
+ "local_size_z_id = 2) in;\n"
+ << "layout(set = 0, binding = 0, std430) buffer Output\n"
+ << "{\n"
+ << " uvec4 result[];\n"
+ << "};\n"
+ << "\n"
+ << "void main (void)\n"
+ << "{\n"
+ << " uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
+ << " highp uint offset = globalSize.x * ((globalSize.y * "
+ "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
+ "gl_GlobalInvocationID.x;\n"
+ << " result[offset] = uvec4(gl_SubgroupSize, gl_SubgroupInvocationID, gl_NumSubgroups, gl_SubgroupID);\n"
+ << "}\n";
+
+ programCollection.glslSources.add("comp")
+ << glu::ComputeSource(src.str()) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+ else
+ {
+ {
+ /*
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_basic: enable\n"
+ "layout(set = 0, binding = 0, std430) buffer Output\n"
+ "{\n"
+ " uvec4 result[];\n"
+ "};\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ " result[gl_VertexIndex] = uvec4(gl_SubgroupSize, gl_SubgroupInvocationID, 0, 0);\n"
+ " float pixelSize = 2.0f/1024.0f;\n"
+ " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
+ " gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
+ " gl_PointSize = 1.0f;\n"
+ "}\n";
+ */
+ const string vertex =
+ "; SPIR-V\n"
+ "; Version: 1.3\n"
+ "; Generator: Khronos Glslang Reference Front End; 1\n"
+ "; Bound: 52\n"
+ "; Schema: 0\n"
+ "OpCapability Shader\n"
+ "OpCapability GroupNonUniform\n"
+ "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+ "OpMemoryModel Logical GLSL450\n"
+ "OpEntryPoint Vertex %4 \"main\" %15 %18 %20 %41\n"
+ "OpDecorate %8 ArrayStride 16\n"
+ "OpMemberDecorate %9 0 Offset 0\n"
+ "OpDecorate %9 BufferBlock\n"
+ "OpDecorate %11 DescriptorSet 0\n"
+ "OpDecorate %11 Binding 0\n"
+ "OpDecorate %15 BuiltIn VertexIndex\n"
+ "OpDecorate %18 RelaxedPrecision\n"
+ "OpDecorate %18 BuiltIn SubgroupSize\n"
+ "OpDecorate %19 RelaxedPrecision\n"
+ "OpDecorate %20 RelaxedPrecision\n"
+ "OpDecorate %20 BuiltIn SubgroupLocalInvocationId\n"
+ "OpDecorate %21 RelaxedPrecision\n"
+ "OpMemberDecorate %39 0 BuiltIn Position\n"
+ "OpMemberDecorate %39 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %39 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %39 3 BuiltIn CullDistance\n"
+ "OpDecorate %39 Block\n"
+ "%2 = OpTypeVoid\n"
+ "%3 = OpTypeFunction %2\n"
+ "%6 = OpTypeInt 32 0\n"
+ "%7 = OpTypeVector %6 4\n"
+ "%8 = OpTypeRuntimeArray %7\n"
+ "%9 = OpTypeStruct %8\n"
+ "%10 = OpTypePointer Uniform %9\n"
+ "%11 = OpVariable %10 Uniform\n"
+ "%12 = OpTypeInt 32 1\n"
+ "%13 = OpConstant %12 0\n"
+ "%14 = OpTypePointer Input %12\n"
+ "%15 = OpVariable %14 Input\n"
+ "%17 = OpTypePointer Input %6\n"
+ "%18 = OpVariable %17 Input\n"
+ "%20 = OpVariable %17 Input\n"
+ "%22 = OpConstant %6 0\n"
+ "%24 = OpTypePointer Uniform %7\n"
+ "%26 = OpTypeFloat 32\n"
+ "%27 = OpTypePointer Function %26\n"
+ "%29 = OpConstant %26 0.00195313\n"
+ "%32 = OpConstant %26 2\n"
+ "%34 = OpConstant %26 1\n"
+ "%36 = OpTypeVector %26 4\n"
+ "%37 = OpConstant %6 1\n"
+ "%38 = OpTypeArray %26 %37\n"
+ "%39 = OpTypeStruct %36 %26 %38 %38\n"
+ "%40 = OpTypePointer Output %39\n"
+ "%41 = OpVariable %40 Output\n"
+ "%48 = OpConstant %26 0\n"
+ "%50 = OpTypePointer Output %36\n"
+ "%52 = OpConstant %12 1\n"
+ "%53 = OpTypePointer Output %26\n"
+ "%4 = OpFunction %2 None %3\n"
+ "%5 = OpLabel\n"
+ "%28 = OpVariable %27 Function\n"
+ "%30 = OpVariable %27 Function\n"
+ "%16 = OpLoad %12 %15\n"
+ "%19 = OpLoad %6 %18\n"
+ "%21 = OpLoad %6 %20\n"
+ "%23 = OpCompositeConstruct %7 %19 %21 %22 %22\n"
+ "%25 = OpAccessChain %24 %11 %13 %16\n"
+ "OpStore %25 %23\n"
+ "OpStore %28 %29\n"
+ "%31 = OpLoad %26 %28\n"
+ "%33 = OpFDiv %26 %31 %32\n"
+ "%35 = OpFSub %26 %33 %34\n"
+ "OpStore %30 %35\n"
+ "%42 = OpLoad %12 %15\n"
+ "%43 = OpConvertSToF %26 %42\n"
+ "%44 = OpLoad %26 %28\n"
+ "%45 = OpFMul %26 %43 %44\n"
+ "%46 = OpLoad %26 %30\n"
+ "%47 = OpFAdd %26 %45 %46\n"
+ "%49 = OpCompositeConstruct %36 %47 %48 %48 %34\n"
+ "%51 = OpAccessChain %50 %41 %13\n"
+ "OpStore %51 %49\n"
+ "%54 = OpAccessChain %53 %41 %52\n"
+ "OpStore %54 %34\n"
+ "OpReturn\n"
+ "OpFunctionEnd\n";
+ programCollection.spirvAsmSources.add("vert") << vertex << SpirVAsmBuildOptions(programCollection.usedVulkanVersion, SPIRV_VERSION_1_3);
+ }
+
+ {
+ /*
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_basic: enable\n"
+ "layout(vertices=1) out;\n"
+ "layout(set = 0, binding = 1, std430) buffer Output\n"
+ "{\n"
+ " uvec4 result[];\n"
+ "};\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ " result[gl_PrimitiveID] = uvec4(gl_SubgroupSize, gl_SubgroupInvocationID, 0, 0);\n"
+ " if (gl_InvocationID == 0)\n"
+ " {\n"
+ " gl_TessLevelOuter[0] = 1.0f;\n"
+ " gl_TessLevelOuter[1] = 1.0f;\n"
+ " }\n"
+ " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+ "}\n";
+ */
+ const string tesc =
+ "; SPIR-V\n"
+ "; Version: 1.3\n"
+ "; Generator: Khronos Glslang Reference Front End; 1\n"
+ "; Bound: 61\n"
+ "; Schema: 0\n"
+ "OpCapability Tessellation\n"
+ "OpCapability GroupNonUniform\n"
+ "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+ "OpMemoryModel Logical GLSL450\n"
+ "OpEntryPoint TessellationControl %4 \"main\" %15 %18 %20 %26 %36 %48 %54\n"
+ "OpExecutionMode %4 OutputVertices 1\n"
+ "OpDecorate %8 ArrayStride 16\n"
+ "OpMemberDecorate %9 0 Offset 0\n"
+ "OpDecorate %9 BufferBlock\n"
+ "OpDecorate %11 DescriptorSet 0\n"
+ "OpDecorate %11 Binding 1\n"
+ "OpDecorate %15 BuiltIn PrimitiveId\n"
+ "OpDecorate %18 RelaxedPrecision\n"
+ "OpDecorate %18 BuiltIn SubgroupSize\n"
+ "OpDecorate %19 RelaxedPrecision\n"
+ "OpDecorate %20 RelaxedPrecision\n"
+ "OpDecorate %20 BuiltIn SubgroupLocalInvocationId\n"
+ "OpDecorate %21 RelaxedPrecision\n"
+ "OpDecorate %26 BuiltIn InvocationId\n"
+ "OpDecorate %36 Patch\n"
+ "OpDecorate %36 BuiltIn TessLevelOuter\n"
+ "OpMemberDecorate %45 0 BuiltIn Position\n"
+ "OpMemberDecorate %45 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %45 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %45 3 BuiltIn CullDistance\n"
+ "OpDecorate %45 Block\n"
+ "OpMemberDecorate %50 0 BuiltIn Position\n"
+ "OpMemberDecorate %50 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %50 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %50 3 BuiltIn CullDistance\n"
+ "OpDecorate %50 Block\n"
+ "%2 = OpTypeVoid\n"
+ "%3 = OpTypeFunction %2\n"
+ "%6 = OpTypeInt 32 0\n"
+ "%7 = OpTypeVector %6 4\n"
+ "%8 = OpTypeRuntimeArray %7\n"
+ "%9 = OpTypeStruct %8\n"
+ "%10 = OpTypePointer Uniform %9\n"
+ "%11 = OpVariable %10 Uniform\n"
+ "%12 = OpTypeInt 32 1\n"
+ "%13 = OpConstant %12 0\n"
+ "%14 = OpTypePointer Input %12\n"
+ "%15 = OpVariable %14 Input\n"
+ "%17 = OpTypePointer Input %6\n"
+ "%18 = OpVariable %17 Input\n"
+ "%20 = OpVariable %17 Input\n"
+ "%22 = OpConstant %6 0\n"
+ "%24 = OpTypePointer Uniform %7\n"
+ "%26 = OpVariable %14 Input\n"
+ "%28 = OpTypeBool\n"
+ "%32 = OpTypeFloat 32\n"
+ "%33 = OpConstant %6 4\n"
+ "%34 = OpTypeArray %32 %33\n"
+ "%35 = OpTypePointer Output %34\n"
+ "%36 = OpVariable %35 Output\n"
+ "%37 = OpConstant %32 1\n"
+ "%38 = OpTypePointer Output %32\n"
+ "%40 = OpConstant %12 1\n"
+ "%42 = OpTypeVector %32 4\n"
+ "%43 = OpConstant %6 1\n"
+ "%44 = OpTypeArray %32 %43\n"
+ "%45 = OpTypeStruct %42 %32 %44 %44\n"
+ "%46 = OpTypeArray %45 %43\n"
+ "%47 = OpTypePointer Output %46\n"
+ "%48 = OpVariable %47 Output\n"
+ "%50 = OpTypeStruct %42 %32 %44 %44\n"
+ "%51 = OpConstant %6 32\n"
+ "%52 = OpTypeArray %50 %51\n"
+ "%53 = OpTypePointer Input %52\n"
+ "%54 = OpVariable %53 Input\n"
+ "%56 = OpTypePointer Input %42\n"
+ "%59 = OpTypePointer Output %42\n"
+ "%4 = OpFunction %2 None %3\n"
+ "%5 = OpLabel\n"
+ "%16 = OpLoad %12 %15\n"
+ "%19 = OpLoad %6 %18\n"
+ "%21 = OpLoad %6 %20\n"
+ "%23 = OpCompositeConstruct %7 %19 %21 %22 %22\n"
+ "%25 = OpAccessChain %24 %11 %13 %16\n"
+ "OpStore %25 %23\n"
+ "%27 = OpLoad %12 %26\n"
+ "%29 = OpIEqual %28 %27 %13\n"
+ "OpSelectionMerge %31 None\n"
+ "OpBranchConditional %29 %30 %31\n"
+ "%30 = OpLabel\n"
+ "%39 = OpAccessChain %38 %36 %13\n"
+ "OpStore %39 %37\n"
+ "%41 = OpAccessChain %38 %36 %40\n"
+ "OpStore %41 %37\n"
+ "OpBranch %31\n"
+ "%31 = OpLabel\n"
+ "%49 = OpLoad %12 %26\n"
+ "%55 = OpLoad %12 %26\n"
+ "%57 = OpAccessChain %56 %54 %55 %13\n"
+ "%58 = OpLoad %42 %57\n"
+ "%60 = OpAccessChain %59 %48 %49 %13\n"
+ "OpStore %60 %58\n"
+ "OpReturn\n"
+ "OpFunctionEnd\n";
+ programCollection.spirvAsmSources.add("tesc") << tesc << SpirVAsmBuildOptions(programCollection.usedVulkanVersion, SPIRV_VERSION_1_3);
+ }
+
+ {
+ /*
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_basic: enable\n"
+ "layout(isolines) in;\n"
+ "layout(set = 0, binding = 2, std430) buffer Output\n"
+ "{\n"
+ " uvec4 result[];\n"
+ "};\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ " result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = uvec4(gl_SubgroupSize, gl_SubgroupInvocationID, 0, 0);\n"
+ " float pixelSize = 2.0f/1024.0f;\n"
+ " gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
+ "}\n";
+ */
+ const string tese =
+ "; SPIR - V\n"
+ "; Version: 1.3\n"
+ "; Generator: Khronos Glslang Reference Front End; 2\n"
+ "; Bound: 67\n"
+ "; Schema: 0\n"
+ "OpCapability Tessellation\n"
+ "OpCapability GroupNonUniform\n"
+ "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+ "OpMemoryModel Logical GLSL450\n"
+ "OpEntryPoint TessellationEvaluation %4 \"main\" %15 %23 %33 %35 %48 %53\n"
+ "OpExecutionMode %4 Isolines\n"
+ "OpExecutionMode %4 SpacingEqual\n"
+ "OpExecutionMode %4 VertexOrderCcw\n"
+ "OpDecorate %8 ArrayStride 16\n"
+ "OpMemberDecorate %9 0 Offset 0\n"
+ "OpDecorate %9 BufferBlock\n"
+ "OpDecorate %11 DescriptorSet 0\n"
+ "OpDecorate %11 Binding 2\n"
+ "OpDecorate %15 BuiltIn PrimitiveId\n"
+ "OpDecorate %23 BuiltIn TessCoord\n"
+ "OpDecorate %33 RelaxedPrecision\n"
+ "OpDecorate %33 BuiltIn SubgroupSize\n"
+ "OpDecorate %34 RelaxedPrecision\n"
+ "OpDecorate %35 RelaxedPrecision\n"
+ "OpDecorate %35 BuiltIn SubgroupLocalInvocationId\n"
+ "OpDecorate %36 RelaxedPrecision\n"
+ "OpMemberDecorate %46 0 BuiltIn Position\n"
+ "OpMemberDecorate %46 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %46 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %46 3 BuiltIn CullDistance\n"
+ "OpDecorate %46 Block\n"
+ "OpMemberDecorate %49 0 BuiltIn Position\n"
+ "OpMemberDecorate %49 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %49 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %49 3 BuiltIn CullDistance\n"
+ "OpDecorate %49 Block\n"
+ "%2 = OpTypeVoid\n"
+ "%3 = OpTypeFunction %2\n"
+ "%6 = OpTypeInt 32 0\n"
+ "%7 = OpTypeVector %6 4\n"
+ "%8 = OpTypeRuntimeArray %7\n"
+ "%9 = OpTypeStruct %8\n"
+ "%10 = OpTypePointer Uniform %9\n"
+ "%11 = OpVariable %10 Uniform\n"
+ "%12 = OpTypeInt 32 1\n"
+ "%13 = OpConstant %12 0\n"
+ "%14 = OpTypePointer Input %12\n"
+ "%15 = OpVariable %14 Input\n"
+ "%17 = OpConstant %12 2\n"
+ "%20 = OpTypeFloat 32\n"
+ "%21 = OpTypeVector %20 3\n"
+ "%22 = OpTypePointer Input %21\n"
+ "%23 = OpVariable %22 Input\n"
+ "%24 = OpConstant %6 0\n"
+ "%25 = OpTypePointer Input %20\n"
+ "%28 = OpConstant %20 0.5\n"
+ "%32 = OpTypePointer Input %6\n"
+ "%33 = OpVariable %32 Input\n"
+ "%35 = OpVariable %32 Input\n"
+ "%38 = OpTypePointer Uniform %7\n"
+ "%40 = OpTypePointer Function %20\n"
+ "%42 = OpConstant %20 0.00195313\n"
+ "%43 = OpTypeVector %20 4\n"
+ "%44 = OpConstant %6 1\n"
+ "%45 = OpTypeArray %20 %44\n"
+ "%46 = OpTypeStruct %43 %20 %45 %45\n"
+ "%47 = OpTypePointer Output %46\n"
+ "%48 = OpVariable %47 Output\n"
+ "%49 = OpTypeStruct %43 %20 %45 %45\n"
+ "%50 = OpConstant %6 32\n"
+ "%51 = OpTypeArray %49 %50\n"
+ "%52 = OpTypePointer Input %51\n"
+ "%53 = OpVariable %52 Input\n"
+ "%54 = OpTypePointer Input %43\n"
+ "%61 = OpConstant %20 2\n"
+ "%65 = OpTypePointer Output %43\n"
+ "%4 = OpFunction %2 None %3\n"
+ "%5 = OpLabel\n"
+ "%41 = OpVariable %40 Function\n"
+ "%16 = OpLoad %12 %15\n"
+ "%18 = OpIMul %12 %16 %17\n"
+ "%19 = OpBitcast %6 %18\n"
+ "%26 = OpAccessChain %25 %23 %24\n"
+ "%27 = OpLoad %20 %26\n"
+ "%29 = OpFAdd %20 %27 %28\n"
+ "%30 = OpConvertFToU %6 %29\n"
+ "%31 = OpIAdd %6 %19 %30\n"
+ "%34 = OpLoad %6 %33\n"
+ "%36 = OpLoad %6 %35\n"
+ "%37 = OpCompositeConstruct %7 %34 %36 %24 %24\n"
+ "%39 = OpAccessChain %38 %11 %13 %31\n"
+ "OpStore %39 %37\n"
+ "OpStore %41 %42\n"
+ "%55 = OpAccessChain %54 %53 %13 %13\n"
+ "%56 = OpLoad %43 %55\n"
+ "%57 = OpAccessChain %25 %23 %24\n"
+ "%58 = OpLoad %20 %57\n"
+ "%59 = OpLoad %20 %41\n"
+ "%60 = OpFMul %20 %58 %59\n"
+ "%62 = OpFDiv %20 %60 %61\n"
+ "%63 = OpCompositeConstruct %43 %62 %62 %62 %62\n"
+ "%64 = OpFAdd %43 %56 %63\n"
+ "%66 = OpAccessChain %65 %48 %13\n"
+ "OpStore %66 %64\n"
+ "OpReturn\n"
+ "OpFunctionEnd\n";
+ programCollection.spirvAsmSources.add("tese") << tese << SpirVAsmBuildOptions(programCollection.usedVulkanVersion, SPIRV_VERSION_1_3);
+ }
+
+ {
+ /*
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_basic: enable\n"
+ "// Note: ${TOPOLOGY} variable is substituted manually at SPIR-V ASM level"
+ "layout(${TOPOLOGY}) in;\n"
+ "layout(points, max_vertices = 1) out;\n"
+ "layout(set = 0, binding = 3, std430) buffer Output\n"
+ "{\n"
+ " uvec4 result[];\n"
+ "};\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ " result[gl_PrimitiveIDIn] = uvec4(gl_SubgroupSize, gl_SubgroupInvocationID, 0, 0);\n"
+ " gl_Position = gl_in[0].gl_Position;\n"
+ " EmitVertex();\n"
+ " EndPrimitive();\n"
+ "}\n";
+ */
+ const string geometry =
+ "; SPIR-V\n"
+ "; Version: 1.3\n"
+ "; Generator: Khronos Glslang Reference Front End; 1\n"
+ "; Bound: 42\n"
+ "; Schema: 0\n"
+ "OpCapability Geometry\n"
+ "OpCapability GroupNonUniform\n"
+ "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+ "OpMemoryModel Logical GLSL450\n"
+ "OpEntryPoint Geometry %4 \"main\" %15 %18 %20 %32 %36\n"
+ "OpExecutionMode %4 ${TOPOLOGY}\n"
+ "OpExecutionMode %4 Invocations 1\n"
+ "OpExecutionMode %4 OutputPoints\n"
+ "OpExecutionMode %4 OutputVertices 1\n"
+ "OpDecorate %8 ArrayStride 16\n"
+ "OpMemberDecorate %9 0 Offset 0\n"
+ "OpDecorate %9 BufferBlock\n"
+ "OpDecorate %11 DescriptorSet 0\n"
+ "OpDecorate %11 Binding 3\n"
+ "OpDecorate %15 BuiltIn PrimitiveId\n"
+ "OpDecorate %18 RelaxedPrecision\n"
+ "OpDecorate %18 BuiltIn SubgroupSize\n"
+ "OpDecorate %19 RelaxedPrecision\n"
+ "OpDecorate %20 RelaxedPrecision\n"
+ "OpDecorate %20 BuiltIn SubgroupLocalInvocationId\n"
+ "OpDecorate %21 RelaxedPrecision\n"
+ "OpMemberDecorate %30 0 BuiltIn Position\n"
+ "OpMemberDecorate %30 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %30 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %30 3 BuiltIn CullDistance\n"
+ "OpDecorate %30 Block\n"
+ "OpMemberDecorate %33 0 BuiltIn Position\n"
+ "OpMemberDecorate %33 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %33 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %33 3 BuiltIn CullDistance\n"
+ "OpDecorate %33 Block\n"
+ "%2 = OpTypeVoid\n"
+ "%3 = OpTypeFunction %2\n"
+ "%6 = OpTypeInt 32 0\n"
+ "%7 = OpTypeVector %6 4\n"
+ "%8 = OpTypeRuntimeArray %7\n"
+ "%9 = OpTypeStruct %8\n"
+ "%10 = OpTypePointer Uniform %9\n"
+ "%11 = OpVariable %10 Uniform\n"
+ "%12 = OpTypeInt 32 1\n"
+ "%13 = OpConstant %12 0\n"
+ "%14 = OpTypePointer Input %12\n"
+ "%15 = OpVariable %14 Input\n"
+ "%17 = OpTypePointer Input %6\n"
+ "%18 = OpVariable %17 Input\n"
+ "%20 = OpVariable %17 Input\n"
+ "%22 = OpConstant %6 0\n"
+ "%24 = OpTypePointer Uniform %7\n"
+ "%26 = OpTypeFloat 32\n"
+ "%27 = OpTypeVector %26 4\n"
+ "%28 = OpConstant %6 1\n"
+ "%29 = OpTypeArray %26 %28\n"
+ "%30 = OpTypeStruct %27 %26 %29 %29\n"
+ "%31 = OpTypePointer Output %30\n"
+ "%32 = OpVariable %31 Output\n"
+ "%33 = OpTypeStruct %27 %26 %29 %29\n"
+ "%34 = OpTypeArray %33 %28\n"
+ "%35 = OpTypePointer Input %34\n"
+ "%36 = OpVariable %35 Input\n"
+ "%37 = OpTypePointer Input %27\n"
+ "%40 = OpTypePointer Output %27\n"
+ "%4 = OpFunction %2 None %3\n"
+ "%5 = OpLabel\n"
+ "%16 = OpLoad %12 %15\n"
+ "%19 = OpLoad %6 %18\n"
+ "%21 = OpLoad %6 %20\n"
+ "%23 = OpCompositeConstruct %7 %19 %21 %22 %22\n"
+ "%25 = OpAccessChain %24 %11 %13 %16\n"
+ "OpStore %25 %23\n"
+ "%38 = OpAccessChain %37 %36 %13 %13\n"
+ "%39 = OpLoad %27 %38\n"
+ "%41 = OpAccessChain %40 %32 %13\n"
+ "OpStore %41 %39\n"
+ "OpEmitVertex\n"
+ "OpEndPrimitive\n"
+ "OpReturn\n"
+ "OpFunctionEnd\n";
+ addGeometryShadersFromTemplate(geometry, SpirVAsmBuildOptions(programCollection.usedVulkanVersion, SPIRV_VERSION_1_3), programCollection.spirvAsmSources);
+ }
+
+ {
+ /*
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_basic: enable\n"
+ "layout(location = 0) out uvec4 data;\n"
+ "void main (void)\n"
+ "{\n"
+ " data = uvec4(gl_SubgroupSize, gl_SubgroupInvocationID, 0, 0);\n"
+ "}\n";
+ */
+ const string fragment =
+ "; SPIR-V\n"
+ "; Version: 1.3\n"
+ "; Generator: Khronos Glslang Reference Front End; 1\n"
+ "; Bound: 17\n"
+ "; Schema: 0\n"
+ "OpCapability Shader\n"
+ "OpCapability GroupNonUniform\n"
+ "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+ "OpMemoryModel Logical GLSL450\n"
+ "OpEntryPoint Fragment %4 \"main\" %9 %11 %13\n"
+ "OpExecutionMode %4 OriginUpperLeft\n"
+ "OpDecorate %9 Location 0\n"
+ "OpDecorate %11 RelaxedPrecision\n"
+ "OpDecorate %11 Flat\n"
+ "OpDecorate %11 BuiltIn SubgroupSize\n"
+ "OpDecorate %12 RelaxedPrecision\n"
+ "OpDecorate %13 RelaxedPrecision\n"
+ "OpDecorate %13 Flat\n"
+ "OpDecorate %13 BuiltIn SubgroupLocalInvocationId\n"
+ "OpDecorate %14 RelaxedPrecision\n"
+ "%2 = OpTypeVoid\n"
+ "%3 = OpTypeFunction %2\n"
+ "%6 = OpTypeInt 32 0\n"
+ "%7 = OpTypeVector %6 4\n"
+ "%8 = OpTypePointer Output %7\n"
+ "%9 = OpVariable %8 Output\n"
+ "%10 = OpTypePointer Input %6\n"
+ "%11 = OpVariable %10 Input\n"
+ "%13 = OpVariable %10 Input\n"
+ "%15 = OpConstant %6 0\n"
+ "%4 = OpFunction %2 None %3\n"
+ "%5 = OpLabel\n"
+ "%12 = OpLoad %6 %11\n"
+ "%14 = OpLoad %6 %13\n"
+ "%16 = OpCompositeConstruct %7 %12 %14 %15 %15\n"
+ "OpStore %9 %16\n"
+ "OpReturn\n"
+ "OpFunctionEnd\n";
+
+ programCollection.spirvAsmSources.add("fragment") << fragment << SpirVAsmBuildOptions(programCollection.usedVulkanVersion, SPIRV_VERSION_1_3);
+ }
+
+ subgroups::addNoSubgroupShader(programCollection);
+ }
+}
+
+void supportedCheck (Context& context, CaseDefinition caseDef)
+{
+ DE_UNREF(caseDef);
+ if (!subgroups::isSubgroupSupported(context))
+ TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
+}
+
+tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
+{
+ if (!areSubgroupOperationsSupportedForStage(
+ context, caseDef.shaderStage))
+ {
+ if (areSubgroupOperationsRequiredForStage(caseDef.shaderStage))
+ {
+ return tcu::TestStatus::fail(
+ "Shader stage " + getShaderStageName(caseDef.shaderStage) +
+ " is required to support subgroup operations!");
+ }
+ else
+ {
+ TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
+ }
+ }
+
+ if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+ {
+ if ("gl_SubgroupSize" == caseDef.varName)
+ {
+ return makeVertexFrameBufferTest(
+ context, VK_FORMAT_R32G32B32A32_UINT, DE_NULL, 0, checkVertexPipelineStagesSubgroupSize);
+ }
+ else if ("gl_SubgroupInvocationID" == caseDef.varName)
+ {
+ return makeVertexFrameBufferTest(
+ context, VK_FORMAT_R32G32B32A32_UINT, DE_NULL, 0, checkVertexPipelineStagesSubgroupInvocationID);
+ }
+ else
+ {
+ return tcu::TestStatus::fail(
+ caseDef.varName + " failed (unhandled error checking case " +
+ caseDef.varName + ")!");
+ }
+ }
+ else if ((VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) & caseDef.shaderStage )
+ {
+ if ("gl_SubgroupSize" == caseDef.varName)
+ {
+ return makeTessellationEvaluationFrameBufferTest(
+ context, VK_FORMAT_R32G32B32A32_UINT, DE_NULL, 0, checkVertexPipelineStagesSubgroupSize);
+ }
+ else if ("gl_SubgroupInvocationID" == caseDef.varName)
+ {
+ return makeTessellationEvaluationFrameBufferTest(
+ context, VK_FORMAT_R32G32B32A32_UINT, DE_NULL, 0, checkVertexPipelineStagesSubgroupInvocationID);
+ }
+ else
+ {
+ return tcu::TestStatus::fail(
+ caseDef.varName + " failed (unhandled error checking case " +
+ caseDef.varName + ")!");
+ }
+ }
+ else if (VK_SHADER_STAGE_GEOMETRY_BIT & caseDef.shaderStage )
+ {
+ if ("gl_SubgroupSize" == caseDef.varName)
+ {
+ return makeGeometryFrameBufferTest(
+ context, VK_FORMAT_R32G32B32A32_UINT, DE_NULL, 0, checkVertexPipelineStagesSubgroupSize);
+ }
+ else if ("gl_SubgroupInvocationID" == caseDef.varName)
+ {
+ return makeGeometryFrameBufferTest(
+ context, VK_FORMAT_R32G32B32A32_UINT, DE_NULL, 0, checkVertexPipelineStagesSubgroupInvocationID);
+ }
+ else
+ {
+ return tcu::TestStatus::fail(
+ caseDef.varName + " failed (unhandled error checking case " +
+ caseDef.varName + ")!");
+ }
+ }
+ else
+ {
+ TCU_THROW(InternalError, "Unhandled shader stage");
+ }
+}
+
+
+tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
+{
+ if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+ {
+ if (!areSubgroupOperationsSupportedForStage(context, caseDef.shaderStage))
+ {
+ return tcu::TestStatus::fail(
+ "Shader stage " + getShaderStageName(caseDef.shaderStage) +
+ " is required to support subgroup operations!");
+ }
+
+ if ("gl_SubgroupSize" == caseDef.varName)
+ {
+ return makeComputeTest(context, VK_FORMAT_R32G32B32A32_UINT, DE_NULL, 0, checkComputeSubgroupSize);
+ }
+ else if ("gl_SubgroupInvocationID" == caseDef.varName)
+ {
+ return makeComputeTest(context, VK_FORMAT_R32G32B32A32_UINT, DE_NULL, 0, checkComputeSubgroupInvocationID);
+ }
+ else if ("gl_NumSubgroups" == caseDef.varName)
+ {
+ return makeComputeTest(context, VK_FORMAT_R32G32B32A32_UINT, DE_NULL, 0, checkComputeNumSubgroups);
+ }
+ else if ("gl_SubgroupID" == caseDef.varName)
+ {
+ return makeComputeTest(context, VK_FORMAT_R32G32B32A32_UINT, DE_NULL, 0, checkComputeSubgroupID);
+ }
+ else
+ {
+ return tcu::TestStatus::fail(
+ caseDef.varName + " failed (unhandled error checking case " +
+ caseDef.varName + ")!");
+ }
+ }
+ else
+ {
+ VkPhysicalDeviceSubgroupProperties subgroupProperties;
+ subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
+ subgroupProperties.pNext = DE_NULL;
+
+ VkPhysicalDeviceProperties2 properties;
+ properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+ properties.pNext = &subgroupProperties;
+
+ context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
+
+ VkShaderStageFlagBits stages = (VkShaderStageFlagBits)(caseDef.shaderStage & subgroupProperties.supportedStages);
+
+ if (VK_SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
+ {
+ if ( (stages & VK_SHADER_STAGE_FRAGMENT_BIT) == 0)
+ TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
+ else
+ stages = VK_SHADER_STAGE_FRAGMENT_BIT;
+ }
+
+ if ((VkShaderStageFlagBits)0u == stages)
+ TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
+
+ if ("gl_SubgroupSize" == caseDef.varName)
+ {
+ return subgroups::allStages(context, VK_FORMAT_R32G32B32A32_UINT, DE_NULL, 0, checkVertexPipelineStagesSubgroupSize, stages);
+ }
+ else if ("gl_SubgroupInvocationID" == caseDef.varName)
+ {
+ return subgroups::allStages(context, VK_FORMAT_R32G32B32A32_UINT, DE_NULL, 0, checkVertexPipelineStagesSubgroupInvocationID, stages);
+ }
+ else
+ {
+ return tcu::TestStatus::fail(
+ caseDef.varName + " failed (unhandled error checking case " +
+ caseDef.varName + ")!");
+ }
+ }
+}
+
+tcu::TestCaseGroup* createSubgroupsBuiltinVarTests(tcu::TestContext& testCtx)
+{
+ de::MovePtr<tcu::TestCaseGroup> graphicGroup(new tcu::TestCaseGroup(
+ testCtx, "graphics", "Subgroup builtin variable tests: graphics"));
+ de::MovePtr<tcu::TestCaseGroup> computeGroup(new tcu::TestCaseGroup(
+ testCtx, "compute", "Subgroup builtin variable tests: compute"));
+ de::MovePtr<tcu::TestCaseGroup> framebufferGroup(new tcu::TestCaseGroup(
+ testCtx, "framebuffer", "Subgroup builtin variable tests: framebuffer"));
+
+ const char* const all_stages_vars[] =
+ {
+ "SubgroupSize",
+ "SubgroupInvocationID"
+ };
+
+ const char* const compute_only_vars[] =
+ {
+ "NumSubgroups",
+ "SubgroupID"
+ };
+
+ const VkShaderStageFlags stages[] =
+ {
+ VK_SHADER_STAGE_VERTEX_BIT,
+ VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
+ VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
+ VK_SHADER_STAGE_GEOMETRY_BIT,
+ };
+
+ for (int a = 0; a < DE_LENGTH_OF_ARRAY(all_stages_vars); ++a)
+ {
+ const std::string var = all_stages_vars[a];
+ const std::string varLower = de::toLower(var);
+
+ {
+ const CaseDefinition caseDef = { "gl_" + var, VK_SHADER_STAGE_ALL_GRAPHICS};
+
+ addFunctionCaseWithPrograms(graphicGroup.get(),
+ varLower, "",
+ supportedCheck, initPrograms, test, caseDef);
+ }
+
+ {
+ const CaseDefinition caseDef = {"gl_" + var, VK_SHADER_STAGE_COMPUTE_BIT};
+ addFunctionCaseWithPrograms(computeGroup.get(),
+ varLower + "_" + getShaderStageName(caseDef.shaderStage), "",
+ supportedCheck, initPrograms, test, caseDef);
+ }
+
+ for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
+ {
+ const CaseDefinition caseDef = {"gl_" + var, stages[stageIndex]};
+ addFunctionCaseWithPrograms(framebufferGroup.get(),
+ varLower + "_" + getShaderStageName(caseDef.shaderStage), "",
+ supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
+ }
+ }
+
+ for (int a = 0; a < DE_LENGTH_OF_ARRAY(compute_only_vars); ++a)
+ {
+ const std::string var = compute_only_vars[a];
+
+ const CaseDefinition caseDef = {"gl_" + var, VK_SHADER_STAGE_COMPUTE_BIT};
+
+ addFunctionCaseWithPrograms(computeGroup.get(), de::toLower(var), "",
+ supportedCheck, initPrograms, test, caseDef);
+ }
+
+ de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(
+ testCtx, "builtin_var", "Subgroup builtin variable tests"));
+
+ group->addChild(graphicGroup.release());
+ group->addChild(computeGroup.release());
+ group->addChild(framebufferGroup.release());
+
+ return group.release();
+}
+
+} // subgroups
+} // vkt
--- /dev/null
+#ifndef _VKTSUBGROUPSBUILTINVARTESTS_HPP
+#define _VKTSUBGROUPSBUILTINVARTESTS_HPP
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "tcuDefs.hpp"
+#include "vktTestCase.hpp"
+
+namespace vkt
+{
+namespace subgroups
+{
+
+tcu::TestCaseGroup* createSubgroupsBuiltinVarTests(tcu::TestContext& testCtx);
+
+} // subgroups
+} // vkt
+
+#endif // _VKTSUBGROUPSBUILTINVARTESTS_HPP
--- /dev/null
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "vktSubgroupsClusteredTests.hpp"
+#include "vktSubgroupsTestsUtils.hpp"
+
+#include <string>
+#include <vector>
+
+using namespace tcu;
+using namespace std;
+using namespace vk;
+using namespace vkt;
+
+namespace
+{
+enum OpType
+{
+ OPTYPE_CLUSTERED_ADD = 0,
+ OPTYPE_CLUSTERED_MUL,
+ OPTYPE_CLUSTERED_MIN,
+ OPTYPE_CLUSTERED_MAX,
+ OPTYPE_CLUSTERED_AND,
+ OPTYPE_CLUSTERED_OR,
+ OPTYPE_CLUSTERED_XOR,
+ OPTYPE_CLUSTERED_LAST
+};
+
+static bool checkVertexPipelineStages(std::vector<const void*> datas,
+ deUint32 width, deUint32)
+{
+ return vkt::subgroups::check(datas, width, 1);
+}
+
+static bool checkCompute(std::vector<const void*> datas,
+ const deUint32 numWorkgroups[3], const deUint32 localSize[3],
+ deUint32)
+{
+ return vkt::subgroups::checkCompute(datas, numWorkgroups, localSize, 1);
+}
+
+std::string getOpTypeName(int opType)
+{
+ switch (opType)
+ {
+ default:
+ DE_FATAL("Unsupported op type");
+ return "";
+ case OPTYPE_CLUSTERED_ADD:
+ return "subgroupClusteredAdd";
+ case OPTYPE_CLUSTERED_MUL:
+ return "subgroupClusteredMul";
+ case OPTYPE_CLUSTERED_MIN:
+ return "subgroupClusteredMin";
+ case OPTYPE_CLUSTERED_MAX:
+ return "subgroupClusteredMax";
+ case OPTYPE_CLUSTERED_AND:
+ return "subgroupClusteredAnd";
+ case OPTYPE_CLUSTERED_OR:
+ return "subgroupClusteredOr";
+ case OPTYPE_CLUSTERED_XOR:
+ return "subgroupClusteredXor";
+ }
+}
+
+std::string getOpTypeOperation(int opType, vk::VkFormat format, std::string lhs, std::string rhs)
+{
+ switch (opType)
+ {
+ default:
+ DE_FATAL("Unsupported op type");
+ return "";
+ case OPTYPE_CLUSTERED_ADD:
+ return lhs + " + " + rhs;
+ case OPTYPE_CLUSTERED_MUL:
+ return lhs + " * " + rhs;
+ case OPTYPE_CLUSTERED_MIN:
+ switch (format)
+ {
+ default:
+ return "min(" + lhs + ", " + rhs + ")";
+ case VK_FORMAT_R32_SFLOAT:
+ case VK_FORMAT_R64_SFLOAT:
+ return "(isnan(" + lhs + ") ? " + rhs + " : (isnan(" + rhs + ") ? " + lhs + " : min(" + lhs + ", " + rhs + ")))";
+ case VK_FORMAT_R32G32_SFLOAT:
+ case VK_FORMAT_R32G32B32_SFLOAT:
+ case VK_FORMAT_R32G32B32A32_SFLOAT:
+ case VK_FORMAT_R64G64_SFLOAT:
+ case VK_FORMAT_R64G64B64_SFLOAT:
+ case VK_FORMAT_R64G64B64A64_SFLOAT:
+ return "mix(mix(min(" + lhs + ", " + rhs + "), " + lhs + ", isnan(" + rhs + ")), " + rhs + ", isnan(" + lhs + "))";
+ }
+ case OPTYPE_CLUSTERED_MAX:
+ switch (format)
+ {
+ default:
+ return "max(" + lhs + ", " + rhs + ")";
+ case VK_FORMAT_R32_SFLOAT:
+ case VK_FORMAT_R64_SFLOAT:
+ return "(isnan(" + lhs + ") ? " + rhs + " : (isnan(" + rhs + ") ? " + lhs + " : max(" + lhs + ", " + rhs + ")))";
+ case VK_FORMAT_R32G32_SFLOAT:
+ case VK_FORMAT_R32G32B32_SFLOAT:
+ case VK_FORMAT_R32G32B32A32_SFLOAT:
+ case VK_FORMAT_R64G64_SFLOAT:
+ case VK_FORMAT_R64G64B64_SFLOAT:
+ case VK_FORMAT_R64G64B64A64_SFLOAT:
+ return "mix(mix(max(" + lhs + ", " + rhs + "), " + lhs + ", isnan(" + rhs + ")), " + rhs + ", isnan(" + lhs + "))";
+ }
+ case OPTYPE_CLUSTERED_AND:
+ switch (format)
+ {
+ default:
+ return lhs + " & " + rhs;
+ case VK_FORMAT_R8_USCALED:
+ return lhs + " && " + rhs;
+ case VK_FORMAT_R8G8_USCALED:
+ return "bvec2(" + lhs + ".x && " + rhs + ".x, " + lhs + ".y && " + rhs + ".y)";
+ case VK_FORMAT_R8G8B8_USCALED:
+ return "bvec3(" + lhs + ".x && " + rhs + ".x, " + lhs + ".y && " + rhs + ".y, " + lhs + ".z && " + rhs + ".z)";
+ case VK_FORMAT_R8G8B8A8_USCALED:
+ return "bvec4(" + lhs + ".x && " + rhs + ".x, " + lhs + ".y && " + rhs + ".y, " + lhs + ".z && " + rhs + ".z, " + lhs + ".w && " + rhs + ".w)";
+ }
+ case OPTYPE_CLUSTERED_OR:
+ switch (format)
+ {
+ default:
+ return lhs + " | " + rhs;
+ case VK_FORMAT_R8_USCALED:
+ return lhs + " || " + rhs;
+ case VK_FORMAT_R8G8_USCALED:
+ return "bvec2(" + lhs + ".x || " + rhs + ".x, " + lhs + ".y || " + rhs + ".y)";
+ case VK_FORMAT_R8G8B8_USCALED:
+ return "bvec3(" + lhs + ".x || " + rhs + ".x, " + lhs + ".y || " + rhs + ".y, " + lhs + ".z || " + rhs + ".z)";
+ case VK_FORMAT_R8G8B8A8_USCALED:
+ return "bvec4(" + lhs + ".x || " + rhs + ".x, " + lhs + ".y || " + rhs + ".y, " + lhs + ".z || " + rhs + ".z, " + lhs + ".w || " + rhs + ".w)";
+ }
+ case OPTYPE_CLUSTERED_XOR:
+ switch (format)
+ {
+ default:
+ return lhs + " ^ " + rhs;
+ case VK_FORMAT_R8_USCALED:
+ return lhs + " ^^ " + rhs;
+ case VK_FORMAT_R8G8_USCALED:
+ return "bvec2(" + lhs + ".x ^^ " + rhs + ".x, " + lhs + ".y ^^ " + rhs + ".y)";
+ case VK_FORMAT_R8G8B8_USCALED:
+ return "bvec3(" + lhs + ".x ^^ " + rhs + ".x, " + lhs + ".y ^^ " + rhs + ".y, " + lhs + ".z ^^ " + rhs + ".z)";
+ case VK_FORMAT_R8G8B8A8_USCALED:
+ return "bvec4(" + lhs + ".x ^^ " + rhs + ".x, " + lhs + ".y ^^ " + rhs + ".y, " + lhs + ".z ^^ " + rhs + ".z, " + lhs + ".w ^^ " + rhs + ".w)";
+ }
+ }
+}
+
+std::string getIdentity(int opType, vk::VkFormat format)
+{
+ bool isFloat = false;
+ bool isInt = false;
+ bool isUnsigned = false;
+
+ switch (format)
+ {
+ default:
+ DE_FATAL("Unhandled format!");
+ break;
+ case VK_FORMAT_R32_SINT:
+ case VK_FORMAT_R32G32_SINT:
+ case VK_FORMAT_R32G32B32_SINT:
+ case VK_FORMAT_R32G32B32A32_SINT:
+ isInt = true;
+ break;
+ case VK_FORMAT_R32_UINT:
+ case VK_FORMAT_R32G32_UINT:
+ case VK_FORMAT_R32G32B32_UINT:
+ case VK_FORMAT_R32G32B32A32_UINT:
+ isUnsigned = true;
+ break;
+ case VK_FORMAT_R32_SFLOAT:
+ case VK_FORMAT_R32G32_SFLOAT:
+ case VK_FORMAT_R32G32B32_SFLOAT:
+ case VK_FORMAT_R32G32B32A32_SFLOAT:
+ case VK_FORMAT_R64_SFLOAT:
+ case VK_FORMAT_R64G64_SFLOAT:
+ case VK_FORMAT_R64G64B64_SFLOAT:
+ case VK_FORMAT_R64G64B64A64_SFLOAT:
+ isFloat = true;
+ break;
+ case VK_FORMAT_R8_USCALED:
+ case VK_FORMAT_R8G8_USCALED:
+ case VK_FORMAT_R8G8B8_USCALED:
+ case VK_FORMAT_R8G8B8A8_USCALED:
+ break; // bool types are not anything
+ }
+
+ switch (opType)
+ {
+ default:
+ DE_FATAL("Unsupported op type");
+ return "";
+ case OPTYPE_CLUSTERED_ADD:
+ return subgroups::getFormatNameForGLSL(format) + "(0)";
+ case OPTYPE_CLUSTERED_MUL:
+ return subgroups::getFormatNameForGLSL(format) + "(1)";
+ case OPTYPE_CLUSTERED_MIN:
+ if (isFloat)
+ {
+ return subgroups::getFormatNameForGLSL(format) + "(intBitsToFloat(0x7f800000))";
+ }
+ else if (isInt)
+ {
+ return subgroups::getFormatNameForGLSL(format) + "(0x7fffffff)";
+ }
+ else if (isUnsigned)
+ {
+ return subgroups::getFormatNameForGLSL(format) + "(0xffffffffu)";
+ }
+ else
+ {
+ DE_FATAL("Unhandled case");
+ return "";
+ }
+ case OPTYPE_CLUSTERED_MAX:
+ if (isFloat)
+ {
+ return subgroups::getFormatNameForGLSL(format) + "(intBitsToFloat(0xff800000))";
+ }
+ else if (isInt)
+ {
+ return subgroups::getFormatNameForGLSL(format) + "(0x80000000)";
+ }
+ else if (isUnsigned)
+ {
+ return subgroups::getFormatNameForGLSL(format) + "(0)";
+ }
+ else
+ {
+ DE_FATAL("Unhandled case");
+ return "";
+ }
+ case OPTYPE_CLUSTERED_AND:
+ return subgroups::getFormatNameForGLSL(format) + "(~0)";
+ case OPTYPE_CLUSTERED_OR:
+ return subgroups::getFormatNameForGLSL(format) + "(0)";
+ case OPTYPE_CLUSTERED_XOR:
+ return subgroups::getFormatNameForGLSL(format) + "(0)";
+ }
+}
+
+std::string getCompare(int opType, vk::VkFormat format, std::string lhs, std::string rhs)
+{
+ std::string formatName = subgroups::getFormatNameForGLSL(format);
+ switch (format)
+ {
+ default:
+ return "all(equal(" + lhs + ", " + rhs + "))";
+ case VK_FORMAT_R8_USCALED:
+ case VK_FORMAT_R32_UINT:
+ case VK_FORMAT_R32_SINT:
+ return "(" + lhs + " == " + rhs + ")";
+ case VK_FORMAT_R32_SFLOAT:
+ case VK_FORMAT_R64_SFLOAT:
+ switch (opType)
+ {
+ default:
+ return "(abs(" + lhs + " - " + rhs + ") < 0.00001)";
+ case OPTYPE_CLUSTERED_MIN:
+ case OPTYPE_CLUSTERED_MAX:
+ return "(" + lhs + " == " + rhs + ")";
+ }
+ case VK_FORMAT_R32G32_SFLOAT:
+ case VK_FORMAT_R32G32B32_SFLOAT:
+ case VK_FORMAT_R32G32B32A32_SFLOAT:
+ case VK_FORMAT_R64G64_SFLOAT:
+ case VK_FORMAT_R64G64B64_SFLOAT:
+ case VK_FORMAT_R64G64B64A64_SFLOAT:
+ switch (opType)
+ {
+ default:
+ return "all(lessThan(abs(" + lhs + " - " + rhs + "), " + formatName + "(0.00001)))";
+ case OPTYPE_CLUSTERED_MIN:
+ case OPTYPE_CLUSTERED_MAX:
+ return "all(equal(" + lhs + ", " + rhs + "))";
+ }
+ }
+}
+
+struct CaseDefinition
+{
+ int opType;
+ VkShaderStageFlags shaderStage;
+ VkFormat format;
+};
+
+std::string getBodySource(CaseDefinition caseDef)
+{
+ std::ostringstream bdy;
+ bdy << " bool tempResult = true;\n";
+
+ for (deUint32 i = 1; i <= subgroups::maxSupportedSubgroupSize(); i *= 2)
+ {
+ bdy << " {\n"
+ << " const uint clusterSize = " << i << ";\n"
+ << " if (clusterSize <= gl_SubgroupSize)\n"
+ << " {\n"
+ << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " op = "
+ << getOpTypeName(caseDef.opType) + "(data[gl_SubgroupInvocationID], clusterSize);\n"
+ << " for (uint clusterOffset = 0; clusterOffset < gl_SubgroupSize; clusterOffset += clusterSize)\n"
+ << " {\n"
+ << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " ref = "
+ << getIdentity(caseDef.opType, caseDef.format) << ";\n"
+ << " for (uint index = clusterOffset; index < (clusterOffset + clusterSize); index++)\n"
+ << " {\n"
+ << " if (subgroupBallotBitExtract(mask, index))\n"
+ << " {\n"
+ << " ref = " << getOpTypeOperation(caseDef.opType, caseDef.format, "ref", "data[index]") << ";\n"
+ << " }\n"
+ << " }\n"
+ << " if ((clusterOffset <= gl_SubgroupInvocationID) && (gl_SubgroupInvocationID < (clusterOffset + clusterSize)))\n"
+ << " {\n"
+ << " if (!" << getCompare(caseDef.opType, caseDef.format, "ref", "op") << ")\n"
+ << " {\n"
+ << " tempResult = false;\n"
+ << " }\n"
+ << " }\n"
+ << " }\n"
+ << " }\n"
+ << " }\n";
+ }
+ return bdy.str();
+}
+
+void initFrameBufferPrograms (SourceCollections& programCollection, CaseDefinition caseDef)
+{
+ const vk::ShaderBuildOptions buildOptions (programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+
+ subgroups::setFragmentShaderFrameBuffer(programCollection);
+
+ if (VK_SHADER_STAGE_VERTEX_BIT != caseDef.shaderStage)
+ subgroups::setVertexShaderFrameBuffer(programCollection);
+
+ std::string bdy = getBodySource(caseDef);
+
+ if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream vertexSrc;
+ vertexSrc << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450 )<< "\n"
+ << "#extension GL_KHR_shader_subgroup_clustered: enable\n"
+ << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ << "layout(location = 0) in highp vec4 in_position;\n"
+ << "layout(location = 0) out float out_color;\n"
+ << "layout(set = 0, binding = 0) uniform Buffer1\n"
+ << "{\n"
+ << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+ << "};\n"
+ << "\n"
+ << "void main (void)\n"
+ << "{\n"
+ << " uvec4 mask = subgroupBallot(true);\n"
+ << bdy
+ << " out_color = float(tempResult ? 1 : 0);\n"
+ << " gl_Position = in_position;\n"
+ << " gl_PointSize = 1.0f;\n"
+ << "}\n";
+ programCollection.glslSources.add("vert")
+ << glu::VertexSource(vertexSrc.str()) <<buildOptions;
+ }
+ else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream geometry;
+
+ geometry << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+ << "#extension GL_KHR_shader_subgroup_clustered: enable\n"
+ << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ << "layout(points) in;\n"
+ << "layout(points, max_vertices = 1) out;\n"
+ << "layout(location = 0) out float out_color;\n"
+ << "layout(set = 0, binding = 0) uniform Buffer1\n"
+ << "{\n"
+ << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+ << "};\n"
+ << "\n"
+ << "void main (void)\n"
+ << "{\n"
+ << " uvec4 mask = subgroupBallot(true);\n"
+ << bdy
+ << " out_color = tempResult ? 1.0 : 0.0;\n"
+ << " gl_Position = gl_in[0].gl_Position;\n"
+ << " EmitVertex();\n"
+ << " EndPrimitive();\n"
+ << "}\n";
+
+ programCollection.glslSources.add("geometry")
+ << glu::GeometrySource(geometry.str()) << buildOptions;
+ }
+ else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream controlSource;
+
+ controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+ << "#extension GL_KHR_shader_subgroup_clustered: enable\n"
+ << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ << "layout(vertices = 2) out;\n"
+ << "layout(location = 0) out float out_color[];\n"
+ << "layout(set = 0, binding = 0) uniform Buffer1\n"
+ << "{\n"
+ << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+ << "};\n"
+ << "\n"
+ << "void main (void)\n"
+ << "{\n"
+ << " if (gl_InvocationID == 0)\n"
+ <<" {\n"
+ << " gl_TessLevelOuter[0] = 1.0f;\n"
+ << " gl_TessLevelOuter[1] = 1.0f;\n"
+ << " }\n"
+ << " uvec4 mask = subgroupBallot(true);\n"
+ << bdy
+ << " out_color[gl_InvocationID] = tempResult ? 1.0 : 0.0;\n"
+ << " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+ << "}\n";
+
+ programCollection.glslSources.add("tesc")
+ << glu::TessellationControlSource(controlSource.str()) << buildOptions;
+ subgroups::setTesEvalShaderFrameBuffer(programCollection);
+ }
+ else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream evaluationSource;
+
+ evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+ << "#extension GL_KHR_shader_subgroup_clustered: enable\n"
+ << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ << "layout(isolines, equal_spacing, ccw ) in;\n"
+ << "layout(location = 0) out float out_color;\n"
+ << "layout(set = 0, binding = 0) uniform Buffer1\n"
+ << "{\n"
+ << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+ << "};\n"
+ << "\n"
+ << "void main (void)\n"
+ << "{\n"
+ << " uvec4 mask = subgroupBallot(true);\n"
+ << bdy
+ << " out_color = tempResult ? 1.0 : 0.0;\n"
+ << " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
+ << "}\n";
+
+ subgroups::setTesCtrlShaderFrameBuffer(programCollection);
+ programCollection.glslSources.add("tese")
+ << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
+ }
+ else
+ {
+ DE_FATAL("Unsupported shader stage");
+ }
+}
+
+void initPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
+{
+ std::string bdy = getBodySource(caseDef);
+
+ if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream src;
+
+ src << "#version 450\n"
+ << "#extension GL_KHR_shader_subgroup_clustered: enable\n"
+ << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ << "layout (local_size_x_id = 0, local_size_y_id = 1, "
+ "local_size_z_id = 2) in;\n"
+ << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
+ << "{\n"
+ << " uint result[];\n"
+ << "};\n"
+ << "layout(set = 0, binding = 1, std430) buffer Buffer2\n"
+ << "{\n"
+ << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[];\n"
+ << "};\n"
+ << "\n"
+ << "void main (void)\n"
+ << "{\n"
+ << " uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
+ << " highp uint offset = globalSize.x * ((globalSize.y * "
+ "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
+ "gl_GlobalInvocationID.x;\n"
+ << " uvec4 mask = subgroupBallot(true);\n"
+ << bdy
+ << " result[offset] = tempResult ? 1 : 0;\n"
+ << "}\n";
+
+ programCollection.glslSources.add("comp")
+ << glu::ComputeSource(src.str()) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+ else
+ {
+ {
+ const string vertex =
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_clustered: enable\n"
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
+ "{\n"
+ " uint result[];\n"
+ "};\n"
+ "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+ "{\n"
+ " " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
+ "};\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ " uvec4 mask = subgroupBallot(true);\n"
+ + bdy +
+ " result[gl_VertexIndex] = tempResult ? 1 : 0;\n"
+ " float pixelSize = 2.0f/1024.0f;\n"
+ " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
+ " gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
+ "}\n";
+
+ programCollection.glslSources.add("vert")
+ << glu::VertexSource(vertex) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+
+ {
+ const string tesc =
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_clustered: enable\n"
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(vertices=1) out;\n"
+ "layout(set = 0, binding = 1, std430) buffer Buffer1\n"
+ "{\n"
+ " uint result[];\n"
+ "};\n"
+ "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+ "{\n"
+ " " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
+ "};\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ " uvec4 mask = subgroupBallot(true);\n"
+ + bdy +
+ " result[gl_PrimitiveID] = tempResult ? 1 : 0;\n"
+ " if (gl_InvocationID == 0)\n"
+ " {\n"
+ " gl_TessLevelOuter[0] = 1.0f;\n"
+ " gl_TessLevelOuter[1] = 1.0f;\n"
+ " }\n"
+ " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+ "}\n";
+
+ programCollection.glslSources.add("tesc")
+ << glu::TessellationControlSource(tesc) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+
+ {
+ const string tese =
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_clustered: enable\n"
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(isolines) in;\n"
+ "layout(set = 0, binding = 2, std430) buffer Buffer1\n"
+ "{\n"
+ " uint result[];\n"
+ "};\n"
+ "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+ "{\n"
+ " " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
+ "};\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ " uvec4 mask = subgroupBallot(true);\n"
+ + bdy +
+ " result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = tempResult ? 1 : 0;\n"
+ " float pixelSize = 2.0f/1024.0f;\n"
+ " gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
+ "}\n";
+ programCollection.glslSources.add("tese")
+ << glu::TessellationEvaluationSource(tese) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+
+ {
+ const string geometry =
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_clustered: enable\n"
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(${TOPOLOGY}) in;\n"
+ "layout(points, max_vertices = 1) out;\n"
+ "layout(set = 0, binding = 3, std430) buffer Buffer1\n"
+ "{\n"
+ " uint result[];\n"
+ "};\n"
+ "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+ "{\n"
+ " " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
+ "};\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ " uvec4 mask = subgroupBallot(true);\n"
+ + bdy +
+ " result[gl_PrimitiveIDIn] = tempResult ? 1 : 0;\n"
+ " gl_Position = gl_in[0].gl_Position;\n"
+ " EmitVertex();\n"
+ " EndPrimitive();\n"
+ "}\n";
+ subgroups::addGeometryShadersFromTemplate(geometry, vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u), programCollection.glslSources);
+ }
+
+ {
+ const string fragment =
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_clustered: enable\n"
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(location = 0) out uint result;\n"
+ "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+ "{\n"
+ " " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
+ "};\n"
+ "void main (void)\n"
+ "{\n"
+ " uvec4 mask = subgroupBallot(true);\n"
+ + bdy +
+ " result = tempResult ? 1 : 0;\n"
+ "}\n";
+ programCollection.glslSources.add("fragment")
+ << glu::FragmentSource(fragment)<< vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+
+ subgroups::addNoSubgroupShader(programCollection);
+ }
+}
+
+void supportedCheck (Context& context, CaseDefinition caseDef)
+{
+ if (!subgroups::isSubgroupSupported(context))
+ TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
+
+ if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_CLUSTERED_BIT))
+ TCU_THROW(NotSupportedError, "Device does not support subgroup clustered operations");
+
+ if (subgroups::isDoubleFormat(caseDef.format) &&
+ !subgroups::isDoubleSupportedForDevice(context))
+ {
+ TCU_THROW(NotSupportedError, "Device does not support subgroup double operations");
+ }
+}
+
+tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
+{
+ if (!subgroups::areSubgroupOperationsSupportedForStage(
+ context, caseDef.shaderStage))
+ {
+ if (subgroups::areSubgroupOperationsRequiredForStage(
+ caseDef.shaderStage))
+ {
+ return tcu::TestStatus::fail(
+ "Shader stage " +
+ subgroups::getShaderStageName(caseDef.shaderStage) +
+ " is required to support subgroup operations!");
+ }
+ else
+ {
+ TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
+ }
+ }
+
+ subgroups::SSBOData inputData;
+ inputData.format = caseDef.format;
+ inputData.numElements = subgroups::maxSupportedSubgroupSize();
+ inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
+
+ if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+ return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages);
+ else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
+ return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages);
+ else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+ return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
+ else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
+ return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
+ else
+ TCU_THROW(InternalError, "Unhandled shader stage");
+}
+
+tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
+{
+ if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+ {
+ if (!subgroups::areSubgroupOperationsSupportedForStage(context, caseDef.shaderStage))
+ {
+ return tcu::TestStatus::fail(
+ "Shader stage " +
+ subgroups::getShaderStageName(caseDef.shaderStage) +
+ " is required to support subgroup operations!");
+ }
+ subgroups::SSBOData inputData;
+ inputData.format = caseDef.format;
+ inputData.numElements = subgroups::maxSupportedSubgroupSize();
+ inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
+
+ return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkCompute);
+ }
+ else
+ {
+ VkPhysicalDeviceSubgroupProperties subgroupProperties;
+ subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
+ subgroupProperties.pNext = DE_NULL;
+
+ VkPhysicalDeviceProperties2 properties;
+ properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+ properties.pNext = &subgroupProperties;
+
+ context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
+
+ VkShaderStageFlagBits stages = (VkShaderStageFlagBits)(caseDef.shaderStage & subgroupProperties.supportedStages);
+
+ if (VK_SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
+ {
+ if ( (stages & VK_SHADER_STAGE_FRAGMENT_BIT) == 0)
+ TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
+ else
+ stages = VK_SHADER_STAGE_FRAGMENT_BIT;
+ }
+
+ if ((VkShaderStageFlagBits)0u == stages)
+ TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
+
+ subgroups::SSBOData inputData;
+ inputData.format = caseDef.format;
+ inputData.numElements = subgroups::maxSupportedSubgroupSize();
+ inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
+ inputData.binding = 4u;
+ inputData.stages = stages;
+
+ return subgroups::allStages(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, stages);
+ }
+}
+}
+
+namespace vkt
+{
+namespace subgroups
+{
+tcu::TestCaseGroup* createSubgroupsClusteredTests(tcu::TestContext& testCtx)
+{
+ de::MovePtr<tcu::TestCaseGroup> graphicGroup(new tcu::TestCaseGroup(
+ testCtx, "graphics", "Subgroup clustered category tests: graphics"));
+ de::MovePtr<tcu::TestCaseGroup> computeGroup(new tcu::TestCaseGroup(
+ testCtx, "compute", "Subgroup clustered category tests: compute"));
+ de::MovePtr<tcu::TestCaseGroup> framebufferGroup(new tcu::TestCaseGroup(
+ testCtx, "framebuffer", "Subgroup clustered category tests: framebuffer"));
+
+ const VkShaderStageFlags stages[] =
+ {
+ VK_SHADER_STAGE_VERTEX_BIT,
+ VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
+ VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
+ VK_SHADER_STAGE_GEOMETRY_BIT
+ };
+
+ const VkFormat formats[] =
+ {
+ VK_FORMAT_R32_SINT, VK_FORMAT_R32G32_SINT, VK_FORMAT_R32G32B32_SINT,
+ VK_FORMAT_R32G32B32A32_SINT, VK_FORMAT_R32_UINT, VK_FORMAT_R32G32_UINT,
+ VK_FORMAT_R32G32B32_UINT, VK_FORMAT_R32G32B32A32_UINT,
+ VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT,
+ VK_FORMAT_R32G32B32_SFLOAT, VK_FORMAT_R32G32B32A32_SFLOAT,
+ VK_FORMAT_R64_SFLOAT, VK_FORMAT_R64G64_SFLOAT,
+ VK_FORMAT_R64G64B64_SFLOAT, VK_FORMAT_R64G64B64A64_SFLOAT,
+ VK_FORMAT_R8_USCALED, VK_FORMAT_R8G8_USCALED,
+ VK_FORMAT_R8G8B8_USCALED, VK_FORMAT_R8G8B8A8_USCALED,
+ };
+
+ for (int formatIndex = 0; formatIndex < DE_LENGTH_OF_ARRAY(formats); ++formatIndex)
+ {
+ const VkFormat format = formats[formatIndex];
+
+ for (int opTypeIndex = 0; opTypeIndex < OPTYPE_CLUSTERED_LAST; ++opTypeIndex)
+ {
+ bool isBool = false;
+ bool isFloat = false;
+
+ switch (format)
+ {
+ default:
+ break;
+ case VK_FORMAT_R32_SFLOAT:
+ case VK_FORMAT_R32G32_SFLOAT:
+ case VK_FORMAT_R32G32B32_SFLOAT:
+ case VK_FORMAT_R32G32B32A32_SFLOAT:
+ case VK_FORMAT_R64_SFLOAT:
+ case VK_FORMAT_R64G64_SFLOAT:
+ case VK_FORMAT_R64G64B64_SFLOAT:
+ case VK_FORMAT_R64G64B64A64_SFLOAT:
+ isFloat = true;
+ break;
+ case VK_FORMAT_R8_USCALED:
+ case VK_FORMAT_R8G8_USCALED:
+ case VK_FORMAT_R8G8B8_USCALED:
+ case VK_FORMAT_R8G8B8A8_USCALED:
+ isBool = true;
+ break;
+ }
+
+ bool isBitwiseOp = false;
+
+ switch (opTypeIndex)
+ {
+ default:
+ break;
+ case OPTYPE_CLUSTERED_AND:
+ case OPTYPE_CLUSTERED_OR:
+ case OPTYPE_CLUSTERED_XOR:
+ isBitwiseOp = true;
+ break;
+ }
+
+ if (isFloat && isBitwiseOp)
+ {
+ // Skip float with bitwise category.
+ continue;
+ }
+
+ if (isBool && !isBitwiseOp)
+ {
+ // Skip bool when its not the bitwise category.
+ continue;
+ }
+
+ const std::string name = de::toLower(getOpTypeName(opTypeIndex))
+ +"_" + subgroups::getFormatNameForGLSL(format);
+
+ {
+ const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_COMPUTE_BIT, format};
+ addFunctionCaseWithPrograms(computeGroup.get(), name, "", supportedCheck, initPrograms, test, caseDef);
+ }
+
+ {
+ const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_ALL_GRAPHICS, format};
+ addFunctionCaseWithPrograms(graphicGroup.get(), name,
+ "", supportedCheck, initPrograms, test, caseDef);
+ }
+
+ for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
+ {
+ const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex], format};
+ addFunctionCaseWithPrograms(framebufferGroup.get(), name +"_" + getShaderStageName(caseDef.shaderStage), "",
+ supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
+ }
+ }
+ }
+ de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(
+ testCtx, "clustered", "Subgroup clustered category tests"));
+
+ group->addChild(graphicGroup.release());
+ group->addChild(computeGroup.release());
+ group->addChild(framebufferGroup.release());
+
+ return group.release();
+}
+
+} // subgroups
+} // vkt
--- /dev/null
+#ifndef _VKTSUBGROUPSCLUSTEREDTESTS_HPP
+#define _VKTSUBGROUPSCLUSTEREDTESTS_HPP
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "tcuDefs.hpp"
+#include "vktTestCase.hpp"
+
+namespace vkt
+{
+namespace subgroups
+{
+
+tcu::TestCaseGroup* createSubgroupsClusteredTests(tcu::TestContext& testCtx);
+
+} // subgroups
+} // vkt
+
+#endif // _VKTSUBGROUPSCLUSTEREDTESTS_HPP
--- /dev/null
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ * Copyright (c) 2018 NVIDIA Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "vktSubgroupsPartitionedTests.hpp"
+#include "vktSubgroupsTestsUtils.hpp"
+
+#include <string>
+#include <vector>
+
+using namespace tcu;
+using namespace std;
+using namespace vk;
+using namespace vkt;
+
+namespace
+{
+enum OpType
+{
+ OPTYPE_ADD = 0,
+ OPTYPE_MUL,
+ OPTYPE_MIN,
+ OPTYPE_MAX,
+ OPTYPE_AND,
+ OPTYPE_OR,
+ OPTYPE_XOR,
+ OPTYPE_INCLUSIVE_ADD,
+ OPTYPE_INCLUSIVE_MUL,
+ OPTYPE_INCLUSIVE_MIN,
+ OPTYPE_INCLUSIVE_MAX,
+ OPTYPE_INCLUSIVE_AND,
+ OPTYPE_INCLUSIVE_OR,
+ OPTYPE_INCLUSIVE_XOR,
+ OPTYPE_EXCLUSIVE_ADD,
+ OPTYPE_EXCLUSIVE_MUL,
+ OPTYPE_EXCLUSIVE_MIN,
+ OPTYPE_EXCLUSIVE_MAX,
+ OPTYPE_EXCLUSIVE_AND,
+ OPTYPE_EXCLUSIVE_OR,
+ OPTYPE_EXCLUSIVE_XOR,
+ OPTYPE_LAST
+};
+
+static bool checkVertexPipelineStages(std::vector<const void*> datas,
+ deUint32 width, deUint32)
+{
+ const deUint32* data =
+ reinterpret_cast<const deUint32*>(datas[0]);
+ for (deUint32 x = 0; x < width; ++x)
+ {
+ deUint32 val = data[x];
+
+ if (0xFFFFFF != val)
+ {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static bool checkCompute(std::vector<const void*> datas,
+ const deUint32 numWorkgroups[3], const deUint32 localSize[3],
+ deUint32)
+{
+ const deUint32* data =
+ reinterpret_cast<const deUint32*>(datas[0]);
+
+ for (deUint32 nX = 0; nX < numWorkgroups[0]; ++nX)
+ {
+ for (deUint32 nY = 0; nY < numWorkgroups[1]; ++nY)
+ {
+ for (deUint32 nZ = 0; nZ < numWorkgroups[2]; ++nZ)
+ {
+ for (deUint32 lX = 0; lX < localSize[0]; ++lX)
+ {
+ for (deUint32 lY = 0; lY < localSize[1]; ++lY)
+ {
+ for (deUint32 lZ = 0; lZ < localSize[2];
+ ++lZ)
+ {
+ const deUint32 globalInvocationX =
+ nX * localSize[0] + lX;
+ const deUint32 globalInvocationY =
+ nY * localSize[1] + lY;
+ const deUint32 globalInvocationZ =
+ nZ * localSize[2] + lZ;
+
+ const deUint32 globalSizeX =
+ numWorkgroups[0] * localSize[0];
+ const deUint32 globalSizeY =
+ numWorkgroups[1] * localSize[1];
+
+ const deUint32 offset =
+ globalSizeX *
+ ((globalSizeY *
+ globalInvocationZ) +
+ globalInvocationY) +
+ globalInvocationX;
+
+ if (0xFFFFFF != data[offset])
+ {
+ return false;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
+std::string getOpTypeName(int opType)
+{
+ switch (opType)
+ {
+ default:
+ DE_FATAL("Unsupported op type");
+ return "";
+ case OPTYPE_ADD:
+ return "subgroupAdd";
+ case OPTYPE_MUL:
+ return "subgroupMul";
+ case OPTYPE_MIN:
+ return "subgroupMin";
+ case OPTYPE_MAX:
+ return "subgroupMax";
+ case OPTYPE_AND:
+ return "subgroupAnd";
+ case OPTYPE_OR:
+ return "subgroupOr";
+ case OPTYPE_XOR:
+ return "subgroupXor";
+ case OPTYPE_INCLUSIVE_ADD:
+ return "subgroupInclusiveAdd";
+ case OPTYPE_INCLUSIVE_MUL:
+ return "subgroupInclusiveMul";
+ case OPTYPE_INCLUSIVE_MIN:
+ return "subgroupInclusiveMin";
+ case OPTYPE_INCLUSIVE_MAX:
+ return "subgroupInclusiveMax";
+ case OPTYPE_INCLUSIVE_AND:
+ return "subgroupInclusiveAnd";
+ case OPTYPE_INCLUSIVE_OR:
+ return "subgroupInclusiveOr";
+ case OPTYPE_INCLUSIVE_XOR:
+ return "subgroupInclusiveXor";
+ case OPTYPE_EXCLUSIVE_ADD:
+ return "subgroupExclusiveAdd";
+ case OPTYPE_EXCLUSIVE_MUL:
+ return "subgroupExclusiveMul";
+ case OPTYPE_EXCLUSIVE_MIN:
+ return "subgroupExclusiveMin";
+ case OPTYPE_EXCLUSIVE_MAX:
+ return "subgroupExclusiveMax";
+ case OPTYPE_EXCLUSIVE_AND:
+ return "subgroupExclusiveAnd";
+ case OPTYPE_EXCLUSIVE_OR:
+ return "subgroupExclusiveOr";
+ case OPTYPE_EXCLUSIVE_XOR:
+ return "subgroupExclusiveXor";
+ }
+}
+
+std::string getOpTypeNamePartitioned(int opType)
+{
+ switch (opType)
+ {
+ default:
+ DE_FATAL("Unsupported op type");
+ return "";
+ case OPTYPE_ADD:
+ return "subgroupPartitionedAddNV";
+ case OPTYPE_MUL:
+ return "subgroupPartitionedMulNV";
+ case OPTYPE_MIN:
+ return "subgroupPartitionedMinNV";
+ case OPTYPE_MAX:
+ return "subgroupPartitionedMaxNV";
+ case OPTYPE_AND:
+ return "subgroupPartitionedAndNV";
+ case OPTYPE_OR:
+ return "subgroupPartitionedOrNV";
+ case OPTYPE_XOR:
+ return "subgroupPartitionedXorNV";
+ case OPTYPE_INCLUSIVE_ADD:
+ return "subgroupPartitionedInclusiveAddNV";
+ case OPTYPE_INCLUSIVE_MUL:
+ return "subgroupPartitionedInclusiveMulNV";
+ case OPTYPE_INCLUSIVE_MIN:
+ return "subgroupPartitionedInclusiveMinNV";
+ case OPTYPE_INCLUSIVE_MAX:
+ return "subgroupPartitionedInclusiveMaxNV";
+ case OPTYPE_INCLUSIVE_AND:
+ return "subgroupPartitionedInclusiveAndNV";
+ case OPTYPE_INCLUSIVE_OR:
+ return "subgroupPartitionedInclusiveOrNV";
+ case OPTYPE_INCLUSIVE_XOR:
+ return "subgroupPartitionedInclusiveXorNV";
+ case OPTYPE_EXCLUSIVE_ADD:
+ return "subgroupPartitionedExclusiveAddNV";
+ case OPTYPE_EXCLUSIVE_MUL:
+ return "subgroupPartitionedExclusiveMulNV";
+ case OPTYPE_EXCLUSIVE_MIN:
+ return "subgroupPartitionedExclusiveMinNV";
+ case OPTYPE_EXCLUSIVE_MAX:
+ return "subgroupPartitionedExclusiveMaxNV";
+ case OPTYPE_EXCLUSIVE_AND:
+ return "subgroupPartitionedExclusiveAndNV";
+ case OPTYPE_EXCLUSIVE_OR:
+ return "subgroupPartitionedExclusiveOrNV";
+ case OPTYPE_EXCLUSIVE_XOR:
+ return "subgroupPartitionedExclusiveXorNV";
+ }
+}
+
+std::string getIdentity(int opType, vk::VkFormat format)
+{
+ bool isFloat = false;
+ bool isInt = false;
+ bool isUnsigned = false;
+
+ switch (format)
+ {
+ default:
+ DE_FATAL("Unhandled format!");
+ return "";
+ case VK_FORMAT_R32_SINT:
+ case VK_FORMAT_R32G32_SINT:
+ case VK_FORMAT_R32G32B32_SINT:
+ case VK_FORMAT_R32G32B32A32_SINT:
+ isInt = true;
+ break;
+ case VK_FORMAT_R32_UINT:
+ case VK_FORMAT_R32G32_UINT:
+ case VK_FORMAT_R32G32B32_UINT:
+ case VK_FORMAT_R32G32B32A32_UINT:
+ isUnsigned = true;
+ break;
+ case VK_FORMAT_R32_SFLOAT:
+ case VK_FORMAT_R32G32_SFLOAT:
+ case VK_FORMAT_R32G32B32_SFLOAT:
+ case VK_FORMAT_R32G32B32A32_SFLOAT:
+ case VK_FORMAT_R64_SFLOAT:
+ case VK_FORMAT_R64G64_SFLOAT:
+ case VK_FORMAT_R64G64B64_SFLOAT:
+ case VK_FORMAT_R64G64B64A64_SFLOAT:
+ isFloat = true;
+ break;
+ case VK_FORMAT_R8_USCALED:
+ case VK_FORMAT_R8G8_USCALED:
+ case VK_FORMAT_R8G8B8_USCALED:
+ case VK_FORMAT_R8G8B8A8_USCALED:
+ break; // bool types are not anything
+ }
+
+ switch (opType)
+ {
+ default:
+ DE_FATAL("Unsupported op type");
+ return "";
+ case OPTYPE_ADD:
+ case OPTYPE_INCLUSIVE_ADD:
+ case OPTYPE_EXCLUSIVE_ADD:
+ return subgroups::getFormatNameForGLSL(format) + "(0)";
+ case OPTYPE_MUL:
+ case OPTYPE_INCLUSIVE_MUL:
+ case OPTYPE_EXCLUSIVE_MUL:
+ return subgroups::getFormatNameForGLSL(format) + "(1)";
+ case OPTYPE_MIN:
+ case OPTYPE_INCLUSIVE_MIN:
+ case OPTYPE_EXCLUSIVE_MIN:
+ if (isFloat)
+ {
+ return subgroups::getFormatNameForGLSL(format) + "(intBitsToFloat(0x7f800000))";
+ }
+ else if (isInt)
+ {
+ return subgroups::getFormatNameForGLSL(format) + "(0x7fffffff)";
+ }
+ else if (isUnsigned)
+ {
+ return subgroups::getFormatNameForGLSL(format) + "(0xffffffffu)";
+ }
+ else
+ {
+ DE_FATAL("Unhandled case");
+ return "";
+ }
+ case OPTYPE_MAX:
+ case OPTYPE_INCLUSIVE_MAX:
+ case OPTYPE_EXCLUSIVE_MAX:
+ if (isFloat)
+ {
+ return subgroups::getFormatNameForGLSL(format) + "(intBitsToFloat(0xff800000))";
+ }
+ else if (isInt)
+ {
+ return subgroups::getFormatNameForGLSL(format) + "(0x80000000)";
+ }
+ else if (isUnsigned)
+ {
+ return subgroups::getFormatNameForGLSL(format) + "(0)";
+ }
+ else
+ {
+ DE_FATAL("Unhandled case");
+ return "";
+ }
+ case OPTYPE_AND:
+ case OPTYPE_INCLUSIVE_AND:
+ case OPTYPE_EXCLUSIVE_AND:
+ return subgroups::getFormatNameForGLSL(format) + "(~0)";
+ case OPTYPE_OR:
+ case OPTYPE_INCLUSIVE_OR:
+ case OPTYPE_EXCLUSIVE_OR:
+ return subgroups::getFormatNameForGLSL(format) + "(0)";
+ case OPTYPE_XOR:
+ case OPTYPE_INCLUSIVE_XOR:
+ case OPTYPE_EXCLUSIVE_XOR:
+ return subgroups::getFormatNameForGLSL(format) + "(0)";
+ }
+}
+
+std::string getCompare(int opType, vk::VkFormat format, std::string lhs, std::string rhs)
+{
+ std::string formatName = subgroups::getFormatNameForGLSL(format);
+ switch (format)
+ {
+ default:
+ return "all(equal(" + lhs + ", " + rhs + "))";
+ case VK_FORMAT_R8_USCALED:
+ case VK_FORMAT_R32_UINT:
+ case VK_FORMAT_R32_SINT:
+ return "(" + lhs + " == " + rhs + ")";
+ case VK_FORMAT_R32_SFLOAT:
+ case VK_FORMAT_R64_SFLOAT:
+ switch (opType)
+ {
+ default:
+ return "(abs(" + lhs + " - " + rhs + ") < 0.00001)";
+ case OPTYPE_MIN:
+ case OPTYPE_INCLUSIVE_MIN:
+ case OPTYPE_EXCLUSIVE_MIN:
+ case OPTYPE_MAX:
+ case OPTYPE_INCLUSIVE_MAX:
+ case OPTYPE_EXCLUSIVE_MAX:
+ return "(" + lhs + " == " + rhs + ")";
+ }
+ case VK_FORMAT_R32G32_SFLOAT:
+ case VK_FORMAT_R32G32B32_SFLOAT:
+ case VK_FORMAT_R32G32B32A32_SFLOAT:
+ case VK_FORMAT_R64G64_SFLOAT:
+ case VK_FORMAT_R64G64B64_SFLOAT:
+ case VK_FORMAT_R64G64B64A64_SFLOAT:
+ switch (opType)
+ {
+ default:
+ return "all(lessThan(abs(" + lhs + " - " + rhs + "), " + formatName + "(0.00001)))";
+ case OPTYPE_MIN:
+ case OPTYPE_INCLUSIVE_MIN:
+ case OPTYPE_EXCLUSIVE_MIN:
+ case OPTYPE_MAX:
+ case OPTYPE_INCLUSIVE_MAX:
+ case OPTYPE_EXCLUSIVE_MAX:
+ return "all(equal(" + lhs + ", " + rhs + "))";
+ }
+ }
+}
+
+struct CaseDefinition
+{
+ int opType;
+ VkShaderStageFlags shaderStage;
+ VkFormat format;
+};
+
+string getTestString(const CaseDefinition &caseDef)
+{
+ // NOTE: tempResult can't have anything in bits 31:24 to avoid int->float
+ // conversion overflow in framebuffer tests.
+ string fmt = subgroups::getFormatNameForGLSL(caseDef.format);
+ string bdy =
+ " uint tempResult = 0;\n"
+ " uint id = gl_SubgroupInvocationID;\n";
+
+ // Test the case where the partition has a single subset with all invocations in it.
+ // This should generate the same result as the non-partitioned function.
+ bdy +=
+ " uvec4 allBallot = mask;\n"
+ " " + fmt + " allResult = " + getOpTypeNamePartitioned(caseDef.opType) + "(data[gl_SubgroupInvocationID], allBallot);\n"
+ " " + fmt + " refResult = " + getOpTypeName(caseDef.opType) + "(data[gl_SubgroupInvocationID]);\n"
+ " if (" + getCompare(caseDef.opType, caseDef.format, "allResult", "refResult") + ") {\n"
+ " tempResult |= 0x1;\n"
+ " }\n";
+
+ // The definition of a partition doesn't forbid bits corresponding to inactive
+ // invocations being in the subset with active invocations. In other words, test that
+ // bits corresponding to inactive invocations are ignored.
+ bdy +=
+ " if (0 == (gl_SubgroupInvocationID % 2)) {\n"
+ " " + fmt + " allResult = " + getOpTypeNamePartitioned(caseDef.opType) + "(data[gl_SubgroupInvocationID], allBallot);\n"
+ " " + fmt + " refResult = " + getOpTypeName(caseDef.opType) + "(data[gl_SubgroupInvocationID]);\n"
+ " if (" + getCompare(caseDef.opType, caseDef.format, "allResult", "refResult") + ") {\n"
+ " tempResult |= 0x2;\n"
+ " }\n"
+ " } else {\n"
+ " tempResult |= 0x2;\n"
+ " }\n";
+
+ // Test the case where the partition has each invocation in a unique subset. For
+ // exclusive ops, the result is identity. For reduce/inclusive, it's the original value.
+ string expectedSelfResult = "data[gl_SubgroupInvocationID]";
+ if (caseDef.opType >= OPTYPE_EXCLUSIVE_ADD &&
+ caseDef.opType <= OPTYPE_EXCLUSIVE_XOR) {
+ expectedSelfResult = getIdentity(caseDef.opType, caseDef.format);
+ }
+
+ bdy +=
+ " uvec4 selfBallot = subgroupPartitionNV(gl_SubgroupInvocationID);\n"
+ " " + fmt + " selfResult = " + getOpTypeNamePartitioned(caseDef.opType) + "(data[gl_SubgroupInvocationID], selfBallot);\n"
+ " if (" + getCompare(caseDef.opType, caseDef.format, "selfResult", expectedSelfResult) + ") {\n"
+ " tempResult |= 0x4;\n"
+ " }\n";
+
+ // Test "random" partitions based on a hash of the invocation id.
+ // This "hash" function produces interesting/randomish partitions.
+ static const char *idhash = "((id%N)+(id%(N+1))-(id%2)+(id/2))%((N+1)/2)";
+
+ bdy +=
+ " for (uint N = 1; N < 16; ++N) {\n"
+ " " + fmt + " idhashFmt = " + fmt + "(" + idhash + ");\n"
+ " uvec4 partitionBallot = subgroupPartitionNV(idhashFmt) & mask;\n"
+ " " + fmt + " partitionedResult = " + getOpTypeNamePartitioned(caseDef.opType) + "(data[gl_SubgroupInvocationID], partitionBallot);\n"
+ " for (uint i = 0; i < N; ++i) {\n"
+ " " + fmt + " iFmt = " + fmt + "(i);\n"
+ " if (" + getCompare(caseDef.opType, caseDef.format, "idhashFmt", "iFmt") + ") {\n"
+ " " + fmt + " subsetResult = " + getOpTypeName(caseDef.opType) + "(data[gl_SubgroupInvocationID]);\n"
+ " tempResult |= " + getCompare(caseDef.opType, caseDef.format, "partitionedResult", "subsetResult") + " ? (0x4 << N) : 0;\n"
+ " }\n"
+ " }\n"
+ " }\n"
+ // tests in flow control:
+ " if (1 == (gl_SubgroupInvocationID % 2)) {\n"
+ " for (uint N = 1; N < 7; ++N) {\n"
+ " " + fmt + " idhashFmt = " + fmt + "(" + idhash + ");\n"
+ " uvec4 partitionBallot = subgroupPartitionNV(idhashFmt) & mask;\n"
+ " " + fmt + " partitionedResult = " + getOpTypeNamePartitioned(caseDef.opType) + "(data[gl_SubgroupInvocationID], partitionBallot);\n"
+ " for (uint i = 0; i < N; ++i) {\n"
+ " " + fmt + " iFmt = " + fmt + "(i);\n"
+ " if (" + getCompare(caseDef.opType, caseDef.format, "idhashFmt", "iFmt") + ") {\n"
+ " " + fmt + " subsetResult = " + getOpTypeName(caseDef.opType) + "(data[gl_SubgroupInvocationID]);\n"
+ " tempResult |= " + getCompare(caseDef.opType, caseDef.format, "partitionedResult", "subsetResult") + " ? (0x20000 << N) : 0;\n"
+ " }\n"
+ " }\n"
+ " }\n"
+ " } else {\n"
+ " tempResult |= 0xFC0000;\n"
+ " }\n"
+ ;
+
+ return bdy;
+}
+
+void initFrameBufferPrograms (SourceCollections& programCollection, CaseDefinition caseDef)
+{
+ const vk::ShaderBuildOptions buildOptions (programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ std::ostringstream bdy;
+
+ subgroups::setFragmentShaderFrameBuffer(programCollection);
+
+ if (VK_SHADER_STAGE_VERTEX_BIT != caseDef.shaderStage)
+ subgroups::setVertexShaderFrameBuffer(programCollection);
+
+ bdy << getTestString(caseDef);
+
+ if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream vertexSrc;
+ vertexSrc << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+ << "#extension GL_NV_shader_subgroup_partitioned: enable\n"
+ << "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
+ << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ << "layout(location = 0) in highp vec4 in_position;\n"
+ << "layout(location = 0) out float out_color;\n"
+ << "layout(set = 0, binding = 0) uniform Buffer1\n"
+ << "{\n"
+ << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+ << "};\n"
+ << "\n"
+ << "void main (void)\n"
+ << "{\n"
+ << " uvec4 mask = subgroupBallot(true);\n"
+ << bdy.str()
+ << " out_color = float(tempResult);\n"
+ << " gl_Position = in_position;\n"
+ << " gl_PointSize = 1.0f;\n"
+ << "}\n";
+ programCollection.glslSources.add("vert")
+ << glu::VertexSource(vertexSrc.str()) << buildOptions;
+ }
+ else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream geometry;
+
+ geometry << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+ << "#extension GL_NV_shader_subgroup_partitioned: enable\n"
+ << "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
+ << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ << "layout(points) in;\n"
+ << "layout(points, max_vertices = 1) out;\n"
+ << "layout(location = 0) out float out_color;\n"
+ << "layout(set = 0, binding = 0) uniform Buffer\n"
+ << "{\n"
+ << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+ << "};\n"
+ << "\n"
+ << "void main (void)\n"
+ << "{\n"
+ << " uvec4 mask = subgroupBallot(true);\n"
+ << bdy.str()
+ << " out_color = float(tempResult);\n"
+ << " gl_Position = gl_in[0].gl_Position;\n"
+ << " EmitVertex();\n"
+ << " EndPrimitive();\n"
+ << "}\n";
+
+ programCollection.glslSources.add("geometry")
+ << glu::GeometrySource(geometry.str()) << buildOptions;
+ }
+ else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream controlSource;
+ controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+ << "#extension GL_NV_shader_subgroup_partitioned: enable\n"
+ << "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
+ << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ << "layout(vertices = 2) out;\n"
+ << "layout(location = 0) out float out_color[];\n"
+ << "layout(set = 0, binding = 0) uniform Buffer1\n"
+ << "{\n"
+ << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+ << "};\n"
+ << "\n"
+ << "void main (void)\n"
+ << "{\n"
+ << " if (gl_InvocationID == 0)\n"
+ <<" {\n"
+ << " gl_TessLevelOuter[0] = 1.0f;\n"
+ << " gl_TessLevelOuter[1] = 1.0f;\n"
+ << " }\n"
+ << " uvec4 mask = subgroupBallot(true);\n"
+ << bdy.str()
+ << " out_color[gl_InvocationID] = float(tempResult);"
+ << " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+ << "}\n";
+
+
+ programCollection.glslSources.add("tesc")
+ << glu::TessellationControlSource(controlSource.str()) << buildOptions;
+ subgroups::setTesEvalShaderFrameBuffer(programCollection);
+ }
+ else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
+ {
+
+ std::ostringstream evaluationSource;
+ evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+ << "#extension GL_NV_shader_subgroup_partitioned: enable\n"
+ << "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
+ << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ << "layout(isolines, equal_spacing, ccw ) in;\n"
+ << "layout(location = 0) out float out_color;\n"
+ << "layout(set = 0, binding = 0) uniform Buffer1\n"
+ << "{\n"
+ << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+ << "};\n"
+ << "\n"
+ << "void main (void)\n"
+ << "{\n"
+ << " uvec4 mask = subgroupBallot(true);\n"
+ << bdy.str()
+ << " out_color = float(tempResult);\n"
+ << " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
+ << "}\n";
+
+ subgroups::setTesCtrlShaderFrameBuffer(programCollection);
+ programCollection.glslSources.add("tese") << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
+ }
+ else
+ {
+ DE_FATAL("Unsupported shader stage");
+ }
+}
+
+void initPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
+{
+ const string bdy = getTestString(caseDef);
+
+ if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream src;
+
+ src << "#version 450\n"
+ << "#extension GL_NV_shader_subgroup_partitioned: enable\n"
+ << "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
+ << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ << "layout (local_size_x_id = 0, local_size_y_id = 1, "
+ "local_size_z_id = 2) in;\n"
+ << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
+ << "{\n"
+ << " uint result[];\n"
+ << "};\n"
+ << "layout(set = 0, binding = 1, std430) buffer Buffer2\n"
+ << "{\n"
+ << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[];\n"
+ << "};\n"
+ << "\n"
+ << "void main (void)\n"
+ << "{\n"
+ << " uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
+ << " highp uint offset = globalSize.x * ((globalSize.y * "
+ "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
+ "gl_GlobalInvocationID.x;\n"
+ << " uvec4 mask = subgroupBallot(true);\n"
+ << bdy
+ << " result[offset] = tempResult;\n"
+ << "}\n";
+
+ programCollection.glslSources.add("comp")
+ << glu::ComputeSource(src.str()) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+ else
+ {
+ {
+ const std::string vertex =
+ "#version 450\n"
+ "#extension GL_NV_shader_subgroup_partitioned: enable\n"
+ "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
+ "{\n"
+ " uint result[];\n"
+ "};\n"
+ "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+ "{\n"
+ " " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
+ "};\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ " uvec4 mask = subgroupBallot(true);\n"
+ + bdy+
+ " result[gl_VertexIndex] = tempResult;\n"
+ " float pixelSize = 2.0f/1024.0f;\n"
+ " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
+ " gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
+ " gl_PointSize = 1.0f;\n"
+ "}\n";
+ programCollection.glslSources.add("vert")
+ << glu::VertexSource(vertex) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+
+ {
+ const std::string tesc =
+ "#version 450\n"
+ "#extension GL_NV_shader_subgroup_partitioned: enable\n"
+ "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(vertices=1) out;\n"
+ "layout(set = 0, binding = 1, std430) buffer Buffer1\n"
+ "{\n"
+ " uint result[];\n"
+ "};\n"
+ "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+ "{\n"
+ " " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
+ "};\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ " uvec4 mask = subgroupBallot(true);\n"
+ + bdy +
+ " result[gl_PrimitiveID] = tempResult;\n"
+ " if (gl_InvocationID == 0)\n"
+ " {\n"
+ " gl_TessLevelOuter[0] = 1.0f;\n"
+ " gl_TessLevelOuter[1] = 1.0f;\n"
+ " }\n"
+ " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+ "}\n";
+ programCollection.glslSources.add("tesc")
+ << glu::TessellationControlSource(tesc) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+
+ {
+ const std::string tese =
+ "#version 450\n"
+ "#extension GL_NV_shader_subgroup_partitioned: enable\n"
+ "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(isolines) in;\n"
+ "layout(set = 0, binding = 2, std430) buffer Buffer1\n"
+ "{\n"
+ " uint result[];\n"
+ "};\n"
+ "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+ "{\n"
+ " " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
+ "};\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ " uvec4 mask = subgroupBallot(true);\n"
+ + bdy +
+ " result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = tempResult;\n"
+ " float pixelSize = 2.0f/1024.0f;\n"
+ " gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
+ "}\n";
+ programCollection.glslSources.add("tese")
+ << glu::TessellationEvaluationSource(tese) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+
+ {
+ const std::string geometry =
+ "#version 450\n"
+ "#extension GL_NV_shader_subgroup_partitioned: enable\n"
+ "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(${TOPOLOGY}) in;\n"
+ "layout(points, max_vertices = 1) out;\n"
+ "layout(set = 0, binding = 3, std430) buffer Buffer1\n"
+ "{\n"
+ " uint result[];\n"
+ "};\n"
+ "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+ "{\n"
+ " " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
+ "};\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ " uvec4 mask = subgroupBallot(true);\n"
+ + bdy +
+ " result[gl_PrimitiveIDIn] = tempResult;\n"
+ " gl_Position = gl_in[0].gl_Position;\n"
+ " EmitVertex();\n"
+ " EndPrimitive();\n"
+ "}\n";
+ subgroups::addGeometryShadersFromTemplate(geometry, vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u),
+ programCollection.glslSources);
+ }
+
+ {
+ const std::string fragment =
+ "#version 450\n"
+ "#extension GL_NV_shader_subgroup_partitioned: enable\n"
+ "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(location = 0) out uint result;\n"
+ "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+ "{\n"
+ " " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
+ "};\n"
+ "void main (void)\n"
+ "{\n"
+ " uvec4 mask = subgroupBallot(true);\n"
+ + bdy +
+ " result = tempResult;\n"
+ "}\n";
+ programCollection.glslSources.add("fragment")
+ << glu::FragmentSource(fragment)<< vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+ subgroups::addNoSubgroupShader(programCollection);
+ }
+}
+
+void supportedCheck (Context& context, CaseDefinition caseDef)
+{
+ if (!subgroups::isSubgroupSupported(context))
+ TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
+
+ if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_PARTITIONED_BIT_NV))
+ {
+ TCU_THROW(NotSupportedError, "Device does not support subgroup partitioned operations");
+ }
+
+ if (subgroups::isDoubleFormat(caseDef.format) &&
+ !subgroups::isDoubleSupportedForDevice(context))
+ {
+ TCU_THROW(NotSupportedError, "Device does not support subgroup double operations");
+ }
+}
+
+tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
+{
+ if (!subgroups::areSubgroupOperationsSupportedForStage(
+ context, caseDef.shaderStage))
+ {
+ if (subgroups::areSubgroupOperationsRequiredForStage(
+ caseDef.shaderStage))
+ {
+ return tcu::TestStatus::fail(
+ "Shader stage " +
+ subgroups::getShaderStageName(caseDef.shaderStage) +
+ " is required to support subgroup operations!");
+ }
+ else
+ {
+ TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
+ }
+ }
+
+ subgroups::SSBOData inputData;
+ inputData.format = caseDef.format;
+ inputData.numElements = subgroups::maxSupportedSubgroupSize();
+ inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
+
+ if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+ return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages);
+ else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
+ return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages);
+ else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+ return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
+ else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
+ return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
+ else
+ TCU_THROW(InternalError, "Unhandled shader stage");
+}
+
+bool checkShaderStages (Context& context, const CaseDefinition& caseDef)
+{
+ if (!subgroups::areSubgroupOperationsSupportedForStage(
+ context, caseDef.shaderStage))
+ {
+ if (subgroups::areSubgroupOperationsRequiredForStage(
+ caseDef.shaderStage))
+ {
+ return false;
+ }
+ else
+ {
+ TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
+ }
+ }
+ return true;
+}
+
+tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
+{
+ if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+ {
+ if(!checkShaderStages(context,caseDef))
+ {
+ return tcu::TestStatus::fail(
+ "Shader stage " +
+ subgroups::getShaderStageName(caseDef.shaderStage) +
+ " is required to support subgroup operations!");
+ }
+ subgroups::SSBOData inputData;
+ inputData.format = caseDef.format;
+ inputData.numElements = subgroups::maxSupportedSubgroupSize();
+ inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
+
+ return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkCompute);
+ }
+ else
+ {
+ VkPhysicalDeviceSubgroupProperties subgroupProperties;
+ subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
+ subgroupProperties.pNext = DE_NULL;
+
+ VkPhysicalDeviceProperties2 properties;
+ properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+ properties.pNext = &subgroupProperties;
+
+ context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
+
+ VkShaderStageFlagBits stages = (VkShaderStageFlagBits)(caseDef.shaderStage & subgroupProperties.supportedStages);
+
+ if ( VK_SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
+ {
+ if ( (stages & VK_SHADER_STAGE_FRAGMENT_BIT) == 0)
+ TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
+ else
+ stages = VK_SHADER_STAGE_FRAGMENT_BIT;
+ }
+
+ if ((VkShaderStageFlagBits)0u == stages)
+ TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
+
+ subgroups::SSBOData inputData;
+ inputData.format = caseDef.format;
+ inputData.numElements = subgroups::maxSupportedSubgroupSize();
+ inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
+ inputData.binding = 4u;
+ inputData.stages = stages;
+
+ return subgroups::allStages(context, VK_FORMAT_R32_UINT, &inputData,
+ 1, checkVertexPipelineStages, stages);
+ }
+}
+}
+
+namespace vkt
+{
+namespace subgroups
+{
+tcu::TestCaseGroup* createSubgroupsPartitionedTests(tcu::TestContext& testCtx)
+{
+ de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(
+ testCtx, "partitioned", "NV_shader_subgroup_partitioned category tests"));
+
+ const VkShaderStageFlags stages[] =
+ {
+ VK_SHADER_STAGE_VERTEX_BIT,
+ VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
+ VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
+ VK_SHADER_STAGE_GEOMETRY_BIT,
+ };
+
+ const VkFormat formats[] =
+ {
+ VK_FORMAT_R32_SINT, VK_FORMAT_R32G32_SINT, VK_FORMAT_R32G32B32_SINT,
+ VK_FORMAT_R32G32B32A32_SINT, VK_FORMAT_R32_UINT, VK_FORMAT_R32G32_UINT,
+ VK_FORMAT_R32G32B32_UINT, VK_FORMAT_R32G32B32A32_UINT,
+ VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT,
+ VK_FORMAT_R32G32B32_SFLOAT, VK_FORMAT_R32G32B32A32_SFLOAT,
+ VK_FORMAT_R64_SFLOAT, VK_FORMAT_R64G64_SFLOAT,
+ VK_FORMAT_R64G64B64_SFLOAT, VK_FORMAT_R64G64B64A64_SFLOAT,
+ VK_FORMAT_R8_USCALED, VK_FORMAT_R8G8_USCALED,
+ VK_FORMAT_R8G8B8_USCALED, VK_FORMAT_R8G8B8A8_USCALED,
+ };
+
+ for (int formatIndex = 0; formatIndex < DE_LENGTH_OF_ARRAY(formats); ++formatIndex)
+ {
+ const VkFormat format = formats[formatIndex];
+
+ for (int opTypeIndex = 0; opTypeIndex < OPTYPE_LAST; ++opTypeIndex)
+ {
+ bool isBool = false;
+ bool isFloat = false;
+
+ switch (format)
+ {
+ default:
+ break;
+ case VK_FORMAT_R32_SFLOAT:
+ case VK_FORMAT_R32G32_SFLOAT:
+ case VK_FORMAT_R32G32B32_SFLOAT:
+ case VK_FORMAT_R32G32B32A32_SFLOAT:
+ case VK_FORMAT_R64_SFLOAT:
+ case VK_FORMAT_R64G64_SFLOAT:
+ case VK_FORMAT_R64G64B64_SFLOAT:
+ case VK_FORMAT_R64G64B64A64_SFLOAT:
+ isFloat = true;
+ break;
+ case VK_FORMAT_R8_USCALED:
+ case VK_FORMAT_R8G8_USCALED:
+ case VK_FORMAT_R8G8B8_USCALED:
+ case VK_FORMAT_R8G8B8A8_USCALED:
+ isBool = true;
+ break;
+ }
+
+ bool isBitwiseOp = false;
+
+ switch (opTypeIndex)
+ {
+ default:
+ break;
+ case OPTYPE_AND:
+ case OPTYPE_INCLUSIVE_AND:
+ case OPTYPE_EXCLUSIVE_AND:
+ case OPTYPE_OR:
+ case OPTYPE_INCLUSIVE_OR:
+ case OPTYPE_EXCLUSIVE_OR:
+ case OPTYPE_XOR:
+ case OPTYPE_INCLUSIVE_XOR:
+ case OPTYPE_EXCLUSIVE_XOR:
+ isBitwiseOp = true;
+ break;
+ }
+
+ if (isFloat && isBitwiseOp)
+ {
+ // Skip float with bitwise category.
+ continue;
+ }
+
+ if (isBool && !isBitwiseOp)
+ {
+ // Skip bool when its not the bitwise category.
+ continue;
+ }
+ std::string op = getOpTypeName(opTypeIndex);
+
+ {
+ const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_COMPUTE_BIT, format};
+ addFunctionCaseWithPrograms(group.get(),
+ de::toLower(op) + "_" +
+ subgroups::getFormatNameForGLSL(format) +
+ "_" + getShaderStageName(caseDef.shaderStage),
+ "", supportedCheck, initPrograms, test, caseDef);
+ }
+
+ {
+ const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_ALL_GRAPHICS, format};
+ addFunctionCaseWithPrograms(group.get(),
+ de::toLower(op) + "_" +
+ subgroups::getFormatNameForGLSL(format) +
+ "_graphic",
+ "", supportedCheck, initPrograms, test, caseDef);
+ }
+
+ for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
+ {
+ const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex], format};
+ addFunctionCaseWithPrograms(group.get(), de::toLower(op) + "_" + subgroups::getFormatNameForGLSL(format) +
+ "_" + getShaderStageName(caseDef.shaderStage) + "_framebuffer", "",
+ supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
+ }
+ }
+ }
+
+ return group.release();
+}
+
+} // subgroups
+} // vkt
+
--- /dev/null
+#ifndef _VKTSUBGROUPSPARTITIONEDTESTS_HPP
+#define _VKTSUBGROUPSPARTITIONEDTESTS_HPP
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ * Copyright (c) 2018 NVIDIA Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "tcuDefs.hpp"
+#include "vktTestCase.hpp"
+
+namespace vkt
+{
+namespace subgroups
+{
+
+tcu::TestCaseGroup* createSubgroupsPartitionedTests(tcu::TestContext& testCtx);
+
+} // subgroups
+} // vkt
+
+#endif // _VKTSUBGROUPSPARTITIONEDTESTS_HPP
--- /dev/null
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "vktSubgroupsQuadTests.hpp"
+#include "vktSubgroupsTestsUtils.hpp"
+
+#include <string>
+#include <vector>
+
+using namespace tcu;
+using namespace std;
+using namespace vk;
+using namespace vkt;
+
+namespace
+{
+enum OpType
+{
+ OPTYPE_QUAD_BROADCAST = 0,
+ OPTYPE_QUAD_SWAP_HORIZONTAL,
+ OPTYPE_QUAD_SWAP_VERTICAL,
+ OPTYPE_QUAD_SWAP_DIAGONAL,
+ OPTYPE_LAST
+};
+
+static bool checkVertexPipelineStages(std::vector<const void*> datas,
+ deUint32 width, deUint32)
+{
+ return vkt::subgroups::check(datas, width, 1);
+}
+
+static bool checkCompute(std::vector<const void*> datas,
+ const deUint32 numWorkgroups[3], const deUint32 localSize[3],
+ deUint32)
+{
+ return vkt::subgroups::checkCompute(datas, numWorkgroups, localSize, 1);
+}
+
+std::string getOpTypeName(int opType)
+{
+ switch (opType)
+ {
+ default:
+ DE_FATAL("Unsupported op type");
+ return "";
+ case OPTYPE_QUAD_BROADCAST:
+ return "subgroupQuadBroadcast";
+ case OPTYPE_QUAD_SWAP_HORIZONTAL:
+ return "subgroupQuadSwapHorizontal";
+ case OPTYPE_QUAD_SWAP_VERTICAL:
+ return "subgroupQuadSwapVertical";
+ case OPTYPE_QUAD_SWAP_DIAGONAL:
+ return "subgroupQuadSwapDiagonal";
+ }
+}
+
+struct CaseDefinition
+{
+ int opType;
+ VkShaderStageFlags shaderStage;
+ VkFormat format;
+ int direction;
+};
+
+void initFrameBufferPrograms (SourceCollections& programCollection, CaseDefinition caseDef)
+{
+ const vk::ShaderBuildOptions buildOptions (programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ std::string swapTable[OPTYPE_LAST];
+
+ subgroups::setFragmentShaderFrameBuffer(programCollection);
+
+ if (VK_SHADER_STAGE_VERTEX_BIT != caseDef.shaderStage)
+ subgroups::setVertexShaderFrameBuffer(programCollection);
+
+ swapTable[OPTYPE_QUAD_BROADCAST] = "";
+ swapTable[OPTYPE_QUAD_SWAP_HORIZONTAL] = " const uint swapTable[4] = {1, 0, 3, 2};\n";
+ swapTable[OPTYPE_QUAD_SWAP_VERTICAL] = " const uint swapTable[4] = {2, 3, 0, 1};\n";
+ swapTable[OPTYPE_QUAD_SWAP_DIAGONAL] = " const uint swapTable[4] = {3, 2, 1, 0};\n";
+
+ if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream vertexSrc;
+ vertexSrc << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+ << "#extension GL_KHR_shader_subgroup_quad: enable\n"
+ << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ << "layout(location = 0) in highp vec4 in_position;\n"
+ << "layout(location = 0) out float result;\n"
+ << "layout(set = 0, binding = 0) uniform Buffer1\n"
+ << "{\n"
+ << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+ << "};\n"
+ << "\n"
+ << "void main (void)\n"
+ << "{\n"
+ << " uvec4 mask = subgroupBallot(true);\n"
+ << swapTable[caseDef.opType];
+
+ if (OPTYPE_QUAD_BROADCAST == caseDef.opType)
+ {
+ vertexSrc << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " op = "
+ << getOpTypeName(caseDef.opType) << "(data[gl_SubgroupInvocationID], " << caseDef.direction << ");\n"
+ << " uint otherID = (gl_SubgroupInvocationID & ~0x3) + " << caseDef.direction << ";\n";
+ }
+ else
+ {
+ vertexSrc << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " op = "
+ << getOpTypeName(caseDef.opType) << "(data[gl_SubgroupInvocationID]);\n"
+ << " uint otherID = (gl_SubgroupInvocationID & ~0x3) + swapTable[gl_SubgroupInvocationID & 0x3];\n";
+ }
+
+ vertexSrc << " if (subgroupBallotBitExtract(mask, otherID))\n"
+ << " {\n"
+ << " result = (op == data[otherID]) ? 1.0f : 0.0f;\n"
+ << " }\n"
+ << " else\n"
+ << " {\n"
+ << " result = 1.0f;\n" // Invocation we read from was inactive, so we can't verify results!
+ << " }\n"
+ << " gl_Position = in_position;\n"
+ << " gl_PointSize = 1.0f;\n"
+ << "}\n";
+ programCollection.glslSources.add("vert")
+ << glu::VertexSource(vertexSrc.str()) << buildOptions;
+ }
+ else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream geometry;
+
+ geometry << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+ << "#extension GL_KHR_shader_subgroup_quad: enable\n"
+ << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ << "layout(points) in;\n"
+ << "layout(points, max_vertices = 1) out;\n"
+ << "layout(location = 0) out float out_color;\n"
+
+ << "layout(set = 0, binding = 0) uniform Buffer1\n"
+ << "{\n"
+ << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+ << "};\n"
+ << "\n"
+ << "void main (void)\n"
+ << "{\n"
+ << " uvec4 mask = subgroupBallot(true);\n"
+ << swapTable[caseDef.opType];
+
+ if (OPTYPE_QUAD_BROADCAST == caseDef.opType)
+ {
+ geometry << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " op = "
+ << getOpTypeName(caseDef.opType) << "(data[gl_SubgroupInvocationID], " << caseDef.direction << ");\n"
+ << " uint otherID = (gl_SubgroupInvocationID & ~0x3) + " << caseDef.direction << ";\n";
+ }
+ else
+ {
+ geometry << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " op = "
+ << getOpTypeName(caseDef.opType) << "(data[gl_SubgroupInvocationID]);\n"
+ << " uint otherID = (gl_SubgroupInvocationID & ~0x3) + swapTable[gl_SubgroupInvocationID & 0x3];\n";
+ }
+
+ geometry << " if (subgroupBallotBitExtract(mask, otherID))\n"
+ << " {\n"
+ << " out_color = (op == data[otherID]) ? 1.0 : 0.0;\n"
+ << " }\n"
+ << " else\n"
+ << " {\n"
+ << " out_color = 1.0;\n" // Invocation we read from was inactive, so we can't verify results!
+ << " }\n"
+ << " gl_Position = gl_in[0].gl_Position;\n"
+ << " EmitVertex();\n"
+ << " EndPrimitive();\n"
+ << "}\n";
+
+ programCollection.glslSources.add("geometry")
+ << glu::GeometrySource(geometry.str()) << buildOptions;
+ }
+ else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream controlSource;
+
+ controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+ << "#extension GL_KHR_shader_subgroup_quad: enable\n"
+ << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ << "layout(vertices = 2) out;\n"
+ << "layout(location = 0) out float out_color[];\n"
+ << "layout(set = 0, binding = 0) uniform Buffer1\n"
+ << "{\n"
+ << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+ << "};\n"
+ << "\n"
+ << "void main (void)\n"
+ << "{\n"
+ << " if (gl_InvocationID == 0)\n"
+ <<" {\n"
+ << " gl_TessLevelOuter[0] = 1.0f;\n"
+ << " gl_TessLevelOuter[1] = 1.0f;\n"
+ << " }\n"
+ << " uvec4 mask = subgroupBallot(true);\n"
+ << swapTable[caseDef.opType];
+
+ if (OPTYPE_QUAD_BROADCAST == caseDef.opType)
+ {
+ controlSource << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " op = "
+ << getOpTypeName(caseDef.opType) << "(data[gl_SubgroupInvocationID], " << caseDef.direction << ");\n"
+ << " uint otherID = (gl_SubgroupInvocationID & ~0x3) + " << caseDef.direction << ";\n";
+ }
+ else
+ {
+ controlSource << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " op = "
+ << getOpTypeName(caseDef.opType) << "(data[gl_SubgroupInvocationID]);\n"
+ << " uint otherID = (gl_SubgroupInvocationID & ~0x3) + swapTable[gl_SubgroupInvocationID & 0x3];\n";
+ }
+
+ controlSource << " if (subgroupBallotBitExtract(mask, otherID))\n"
+ << " {\n"
+ << " out_color[gl_InvocationID] = (op == data[otherID]) ? 1.0 : 0.0;\n"
+ << " }\n"
+ << " else\n"
+ << " {\n"
+ << " out_color[gl_InvocationID] = 1.0; \n"// Invocation we read from was inactive, so we can't verify results!
+ << " }\n"
+ << " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+ << "}\n";
+
+ programCollection.glslSources.add("tesc")
+ << glu::TessellationControlSource(controlSource.str()) << buildOptions;
+ subgroups::setTesEvalShaderFrameBuffer(programCollection);
+ }
+ else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
+ {
+ ostringstream evaluationSource;
+ evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+ << "#extension GL_KHR_shader_subgroup_quad: enable\n"
+ << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ << "layout(isolines, equal_spacing, ccw ) in;\n"
+ << "layout(location = 0) out float out_color;\n"
+ << "layout(set = 0, binding = 0) uniform Buffer1\n"
+ << "{\n"
+ << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+ << "};\n"
+ << "\n"
+ << "void main (void)\n"
+ << "{\n"
+ << " uvec4 mask = subgroupBallot(true);\n"
+ << swapTable[caseDef.opType];
+
+ if (OPTYPE_QUAD_BROADCAST == caseDef.opType)
+ {
+ evaluationSource << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " op = "
+ << getOpTypeName(caseDef.opType) << "(data[gl_SubgroupInvocationID], " << caseDef.direction << ");\n"
+ << " uint otherID = (gl_SubgroupInvocationID & ~0x3) + " << caseDef.direction << ";\n";
+ }
+ else
+ {
+ evaluationSource << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " op = "
+ << getOpTypeName(caseDef.opType) << "(data[gl_SubgroupInvocationID]);\n"
+ << " uint otherID = (gl_SubgroupInvocationID & ~0x3) + swapTable[gl_SubgroupInvocationID & 0x3];\n";
+ }
+
+ evaluationSource << " if (subgroupBallotBitExtract(mask, otherID))\n"
+ << " {\n"
+ << " out_color = (op == data[otherID]) ? 1.0 : 0.0;\n"
+ << " }\n"
+ << " else\n"
+ << " {\n"
+ << " out_color = 1.0;\n" // Invocation we read from was inactive, so we can't verify results!
+ << " }\n"
+ << " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
+ << "}\n";
+
+ subgroups::setTesCtrlShaderFrameBuffer(programCollection);
+ programCollection.glslSources.add("tese")
+ << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
+ }
+ else
+ {
+ DE_FATAL("Unsupported shader stage");
+ }
+}
+
+void initPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
+{
+ std::string swapTable[OPTYPE_LAST];
+ swapTable[OPTYPE_QUAD_BROADCAST] = "";
+ swapTable[OPTYPE_QUAD_SWAP_HORIZONTAL] = " const uint swapTable[4] = {1, 0, 3, 2};\n";
+ swapTable[OPTYPE_QUAD_SWAP_VERTICAL] = " const uint swapTable[4] = {2, 3, 0, 1};\n";
+ swapTable[OPTYPE_QUAD_SWAP_DIAGONAL] = " const uint swapTable[4] = {3, 2, 1, 0};\n";
+
+ if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream src;
+
+ src << "#version 450\n"
+ << "#extension GL_KHR_shader_subgroup_quad: enable\n"
+ << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ << "layout (local_size_x_id = 0, local_size_y_id = 1, "
+ "local_size_z_id = 2) in;\n"
+ << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
+ << "{\n"
+ << " uint result[];\n"
+ << "};\n"
+ << "layout(set = 0, binding = 1, std430) buffer Buffer2\n"
+ << "{\n"
+ << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[];\n"
+ << "};\n"
+ << "\n"
+ << "void main (void)\n"
+ << "{\n"
+ << " uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
+ << " highp uint offset = globalSize.x * ((globalSize.y * "
+ "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
+ "gl_GlobalInvocationID.x;\n"
+ << " uvec4 mask = subgroupBallot(true);\n"
+ << swapTable[caseDef.opType];
+
+
+ if (OPTYPE_QUAD_BROADCAST == caseDef.opType)
+ {
+ src << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " op = "
+ << getOpTypeName(caseDef.opType) << "(data[gl_SubgroupInvocationID], " << caseDef.direction << ");\n"
+ << " uint otherID = (gl_SubgroupInvocationID & ~0x3) + " << caseDef.direction << ";\n";
+ }
+ else
+ {
+ src << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " op = "
+ << getOpTypeName(caseDef.opType) << "(data[gl_SubgroupInvocationID]);\n"
+ << " uint otherID = (gl_SubgroupInvocationID & ~0x3) + swapTable[gl_SubgroupInvocationID & 0x3];\n";
+ }
+
+ src << " if (subgroupBallotBitExtract(mask, otherID))\n"
+ << " {\n"
+ << " result[offset] = (op == data[otherID]) ? 1 : 0;\n"
+ << " }\n"
+ << " else\n"
+ << " {\n"
+ << " result[offset] = 1; // Invocation we read from was inactive, so we can't verify results!\n"
+ << " }\n"
+ << "}\n";
+
+ programCollection.glslSources.add("comp")
+ << glu::ComputeSource(src.str()) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+ else
+ {
+ std::ostringstream src;
+ if (OPTYPE_QUAD_BROADCAST == caseDef.opType)
+ {
+ src << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " op = "
+ << getOpTypeName(caseDef.opType) << "(data[gl_SubgroupInvocationID], " << caseDef.direction << ");\n"
+ << " uint otherID = (gl_SubgroupInvocationID & ~0x3) + " << caseDef.direction << ";\n";
+ }
+ else
+ {
+ src << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " op = "
+ << getOpTypeName(caseDef.opType) << "(data[gl_SubgroupInvocationID]);\n"
+ << " uint otherID = (gl_SubgroupInvocationID & ~0x3) + swapTable[gl_SubgroupInvocationID & 0x3];\n";
+ }
+ const string sourceType = src.str();
+
+ {
+ const string vertex =
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_quad: enable\n"
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
+ "{\n"
+ " uint result[];\n"
+ "};\n"
+ "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+ "{\n"
+ " " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
+ "};\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ " uvec4 mask = subgroupBallot(true);\n"
+ + swapTable[caseDef.opType]
+ + sourceType +
+ " if (subgroupBallotBitExtract(mask, otherID))\n"
+ " {\n"
+ " result[gl_VertexIndex] = (op == data[otherID]) ? 1 : 0;\n"
+ " }\n"
+ " else\n"
+ " {\n"
+ " result[gl_VertexIndex] = 1; // Invocation we read from was inactive, so we can't verify results!\n"
+ " }\n"
+ " float pixelSize = 2.0f/1024.0f;\n"
+ " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
+ " gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
+ "}\n";
+ programCollection.glslSources.add("vert")
+ << glu::VertexSource(vertex) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+
+ {
+ const string tesc =
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_quad: enable\n"
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(vertices=1) out;\n"
+ "layout(set = 0, binding = 1, std430) buffer Buffer1\n"
+ "{\n"
+ " uint result[];\n"
+ "};\n"
+ "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+ "{\n"
+ " " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
+ "};\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ " uvec4 mask = subgroupBallot(true);\n"
+ + swapTable[caseDef.opType]
+ + sourceType +
+ " if (subgroupBallotBitExtract(mask, otherID))\n"
+ " {\n"
+ " result[gl_PrimitiveID] = (op == data[otherID]) ? 1 : 0;\n"
+ " }\n"
+ " else\n"
+ " {\n"
+ " result[gl_PrimitiveID] = 1; // Invocation we read from was inactive, so we can't verify results!\n"
+ " }\n"
+ " if (gl_InvocationID == 0)\n"
+ " {\n"
+ " gl_TessLevelOuter[0] = 1.0f;\n"
+ " gl_TessLevelOuter[1] = 1.0f;\n"
+ " }\n"
+ " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+ "}\n";
+ programCollection.glslSources.add("tesc")
+ << glu::TessellationControlSource(tesc) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+
+ {
+ const string tese =
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_quad: enable\n"
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(isolines) in;\n"
+ "layout(set = 0, binding = 2, std430) buffer Buffer1\n"
+ "{\n"
+ " uint result[];\n"
+ "};\n"
+ "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+ "{\n"
+ " " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
+ "};\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ " uvec4 mask = subgroupBallot(true);\n"
+ + swapTable[caseDef.opType]
+ + sourceType +
+ " if (subgroupBallotBitExtract(mask, otherID))\n"
+ " {\n"
+ " result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = (op == data[otherID]) ? 1 : 0;\n"
+ " }\n"
+ " else\n"
+ " {\n"
+ " result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = 1; // Invocation we read from was inactive, so we can't verify results!\n"
+ " }\n"
+ " float pixelSize = 2.0f/1024.0f;\n"
+ " gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
+ "}\n";
+ programCollection.glslSources.add("tese")
+ << glu::TessellationEvaluationSource(tese) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+
+ {
+ const string geometry =
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_quad: enable\n"
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(${TOPOLOGY}) in;\n"
+ "layout(points, max_vertices = 1) out;\n"
+ "layout(set = 0, binding = 3, std430) buffer Buffer1\n"
+ "{\n"
+ " uint result[];\n"
+ "};\n"
+ "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+ "{\n"
+ " " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
+ "};\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ " uvec4 mask = subgroupBallot(true);\n"
+ + swapTable[caseDef.opType]
+ + sourceType +
+ " if (subgroupBallotBitExtract(mask, otherID))\n"
+ " {\n"
+ " result[gl_PrimitiveIDIn] = (op == data[otherID]) ? 1 : 0;\n"
+ " }\n"
+ " else\n"
+ " {\n"
+ " result[gl_PrimitiveIDIn] = 1; // Invocation we read from was inactive, so we can't verify results!\n"
+ " }\n"
+ " gl_Position = gl_in[0].gl_Position;\n"
+ " EmitVertex();\n"
+ " EndPrimitive();\n"
+ "}\n";
+ subgroups::addGeometryShadersFromTemplate(geometry, vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u),
+ programCollection.glslSources);
+ }
+
+ {
+ const string fragment =
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_quad: enable\n"
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "layout(location = 0) out uint result;\n"
+ "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+ "{\n"
+ " " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
+ "};\n"
+ "void main (void)\n"
+ "{\n"
+ " uvec4 mask = subgroupBallot(true);\n"
+ + swapTable[caseDef.opType]
+ + sourceType +
+ " if (subgroupBallotBitExtract(mask, otherID))\n"
+ " {\n"
+ " result = (op == data[otherID]) ? 1 : 0;\n"
+ " }\n"
+ " else\n"
+ " {\n"
+ " result = 1; // Invocation we read from was inactive, so we can't verify results!\n"
+ " }\n"
+ "}\n";
+ programCollection.glslSources.add("fragment")
+ << glu::FragmentSource(fragment)<< vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+ subgroups::addNoSubgroupShader(programCollection);
+ }
+}
+
+void supportedCheck (Context& context, CaseDefinition caseDef)
+{
+ if (!subgroups::isSubgroupSupported(context))
+ TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
+
+ if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_QUAD_BIT))
+ TCU_THROW(NotSupportedError, "Device does not support subgroup quad operations");
+
+
+ if (subgroups::isDoubleFormat(caseDef.format) &&
+ !subgroups::isDoubleSupportedForDevice(context))
+ {
+ TCU_THROW(NotSupportedError, "Device does not support subgroup double operations");
+ }
+}
+
+tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
+{
+ if (!subgroups::areSubgroupOperationsSupportedForStage(
+ context, caseDef.shaderStage))
+ {
+ if (subgroups::areSubgroupOperationsRequiredForStage(
+ caseDef.shaderStage))
+ {
+ return tcu::TestStatus::fail(
+ "Shader stage " +
+ subgroups::getShaderStageName(caseDef.shaderStage) +
+ " is required to support subgroup operations!");
+ }
+ else
+ {
+ TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
+ }
+ }
+
+ subgroups::SSBOData inputData;
+ inputData.format = caseDef.format;
+ inputData.numElements = subgroups::maxSupportedSubgroupSize();
+ inputData.initializeType = subgroups::SSBOData::InitializeNonZero;;
+
+ if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+ return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages);
+ else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
+ return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages);
+ else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+ return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
+ else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
+ return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
+ else
+ TCU_THROW(InternalError, "Unhandled shader stage");
+}
+
+
+tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
+{
+ if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+ {
+ if (!subgroups::areSubgroupOperationsSupportedForStage(context, caseDef.shaderStage))
+ {
+ return tcu::TestStatus::fail(
+ "Shader stage " +
+ subgroups::getShaderStageName(caseDef.shaderStage) +
+ " is required to support subgroup operations!");
+ }
+ subgroups::SSBOData inputData;
+ inputData.format = caseDef.format;
+ inputData.numElements = subgroups::maxSupportedSubgroupSize();
+ inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
+
+ return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkCompute);
+ }
+ else
+ {
+ VkPhysicalDeviceSubgroupProperties subgroupProperties;
+ subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
+ subgroupProperties.pNext = DE_NULL;
+
+ VkPhysicalDeviceProperties2 properties;
+ properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+ properties.pNext = &subgroupProperties;
+
+ context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
+
+ VkShaderStageFlagBits stages = (VkShaderStageFlagBits)(caseDef.shaderStage & subgroupProperties.supportedStages);
+
+ if (VK_SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
+ {
+ if ( (stages & VK_SHADER_STAGE_FRAGMENT_BIT) == 0)
+ TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
+ else
+ stages = VK_SHADER_STAGE_FRAGMENT_BIT;
+ }
+
+ if ((VkShaderStageFlagBits)0u == stages)
+ TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
+
+ subgroups::SSBOData inputData;
+ inputData.format = caseDef.format;
+ inputData.numElements = subgroups::maxSupportedSubgroupSize();
+ inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
+ inputData.binding = 4u;
+ inputData.stages = stages;
+
+ return subgroups::allStages(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, stages);
+ }
+}
+}
+
+namespace vkt
+{
+namespace subgroups
+{
+tcu::TestCaseGroup* createSubgroupsQuadTests(tcu::TestContext& testCtx)
+{
+ de::MovePtr<tcu::TestCaseGroup> graphicGroup(new tcu::TestCaseGroup(
+ testCtx, "graphics", "Subgroup arithmetic category tests: graphics"));
+ de::MovePtr<tcu::TestCaseGroup> computeGroup(new tcu::TestCaseGroup(
+ testCtx, "compute", "Subgroup arithmetic category tests: compute"));
+ de::MovePtr<tcu::TestCaseGroup> framebufferGroup(new tcu::TestCaseGroup(
+ testCtx, "framebuffer", "Subgroup arithmetic category tests: framebuffer"));
+
+ const VkFormat formats[] =
+ {
+ VK_FORMAT_R32_SINT, VK_FORMAT_R32G32_SINT, VK_FORMAT_R32G32B32_SINT,
+ VK_FORMAT_R32G32B32A32_SINT, VK_FORMAT_R32_UINT, VK_FORMAT_R32G32_UINT,
+ VK_FORMAT_R32G32B32_UINT, VK_FORMAT_R32G32B32A32_UINT,
+ VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT,
+ VK_FORMAT_R32G32B32_SFLOAT, VK_FORMAT_R32G32B32A32_SFLOAT,
+ VK_FORMAT_R64_SFLOAT, VK_FORMAT_R64G64_SFLOAT,
+ VK_FORMAT_R64G64B64_SFLOAT, VK_FORMAT_R64G64B64A64_SFLOAT,
+ VK_FORMAT_R8_USCALED, VK_FORMAT_R8G8_USCALED,
+ VK_FORMAT_R8G8B8_USCALED, VK_FORMAT_R8G8B8A8_USCALED,
+ };
+
+ const VkShaderStageFlags stages[] =
+ {
+ VK_SHADER_STAGE_VERTEX_BIT,
+ VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
+ VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
+ VK_SHADER_STAGE_GEOMETRY_BIT,
+ };
+
+ for (int direction = 0; direction < 4; ++direction)
+ {
+ for (int formatIndex = 0; formatIndex < DE_LENGTH_OF_ARRAY(formats); ++formatIndex)
+ {
+ const VkFormat format = formats[formatIndex];
+
+ for (int opTypeIndex = 0; opTypeIndex < OPTYPE_LAST; ++opTypeIndex)
+ {
+ const std::string op = de::toLower(getOpTypeName(opTypeIndex));
+ std::ostringstream name;
+ name << de::toLower(op);
+
+ if (OPTYPE_QUAD_BROADCAST == opTypeIndex)
+ {
+ name << "_" << direction;
+ }
+ else
+ {
+ if (0 != direction)
+ {
+ // We don't need direction for swap operations.
+ continue;
+ }
+ }
+
+ name << "_" << subgroups::getFormatNameForGLSL(format);
+
+ {
+ const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_COMPUTE_BIT, format, direction};
+ addFunctionCaseWithPrograms(computeGroup.get(), name.str(), "", supportedCheck, initPrograms, test, caseDef);
+ }
+
+ {
+ const CaseDefinition caseDef =
+ {
+ opTypeIndex,
+ VK_SHADER_STAGE_ALL_GRAPHICS,
+ format,
+ direction
+ };
+ addFunctionCaseWithPrograms(graphicGroup.get(), name.str(), "", supportedCheck, initPrograms, test, caseDef);
+ }
+ for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
+ {
+ const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex], format, direction};
+ addFunctionCaseWithPrograms(framebufferGroup.get(), name.str()+"_"+ getShaderStageName(caseDef.shaderStage), "",
+ supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
+ }
+
+ }
+ }
+ }
+
+ de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(
+ testCtx, "quad", "Subgroup quad category tests"));
+
+ group->addChild(graphicGroup.release());
+ group->addChild(computeGroup.release());
+ group->addChild(framebufferGroup.release());
+
+ return group.release();
+}
+} // subgroups
+} // vkt
--- /dev/null
+#ifndef _VKTSUBGROUPSQUADTESTS_HPP
+#define _VKTSUBGROUPSQUADTESTS_HPP
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "tcuDefs.hpp"
+#include "vktTestCase.hpp"
+
+namespace vkt
+{
+namespace subgroups
+{
+
+tcu::TestCaseGroup* createSubgroupsQuadTests(tcu::TestContext& testCtx);
+
+} // subgroups
+} // vkt
+
+#endif // _VKTSUBGROUPSQUADTESTS_HPP
--- /dev/null
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "vktSubgroupsShapeTests.hpp"
+#include "vktSubgroupsTestsUtils.hpp"
+
+#include <string>
+#include <vector>
+
+using namespace tcu;
+using namespace std;
+using namespace vk;
+using namespace vkt;
+
+namespace
+{
+static bool checkVertexPipelineStages(std::vector<const void*> datas,
+ deUint32 width, deUint32)
+{
+ return vkt::subgroups::check(datas, width, 1);
+}
+
+static bool checkCompute(std::vector<const void*> datas,
+ const deUint32 numWorkgroups[3], const deUint32 localSize[3],
+ deUint32)
+{
+ return vkt::subgroups::checkCompute(datas, numWorkgroups, localSize, 1);
+}
+
+enum OpType
+{
+ OPTYPE_CLUSTERED = 0,
+ OPTYPE_QUAD,
+ OPTYPE_LAST
+};
+
+std::string getOpTypeName(int opType)
+{
+ switch (opType)
+ {
+ default:
+ DE_FATAL("Unsupported op type");
+ return "";
+ case OPTYPE_CLUSTERED:
+ return "clustered";
+ case OPTYPE_QUAD:
+ return "quad";
+ }
+}
+
+struct CaseDefinition
+{
+ int opType;
+ VkShaderStageFlags shaderStage;
+};
+
+void initFrameBufferPrograms (SourceCollections& programCollection, CaseDefinition caseDef)
+{
+ const vk::ShaderBuildOptions buildOptions (programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ std::ostringstream bdy;
+ std::string extension = (OPTYPE_CLUSTERED == caseDef.opType) ?
+ "#extension GL_KHR_shader_subgroup_clustered: enable\n" :
+ "#extension GL_KHR_shader_subgroup_quad: enable\n";
+
+ subgroups::setFragmentShaderFrameBuffer(programCollection);
+
+ if (VK_SHADER_STAGE_VERTEX_BIT != caseDef.shaderStage)
+ subgroups::setVertexShaderFrameBuffer(programCollection);
+
+ extension += "#extension GL_KHR_shader_subgroup_ballot: enable\n";
+
+ bdy << " uint tempResult = 0x1;\n"
+ << " uvec4 mask = subgroupBallot(true);\n";
+
+ if (OPTYPE_CLUSTERED == caseDef.opType)
+ {
+ for (deUint32 i = 1; i <= subgroups::maxSupportedSubgroupSize(); i *= 2)
+ {
+ bdy << " if (gl_SubgroupSize >= " << i << ")\n"
+ << " {\n"
+ << " uvec4 contribution = uvec4(0);\n"
+ << " const uint modID = gl_SubgroupInvocationID % 32;\n"
+ << " switch (gl_SubgroupInvocationID / 32)\n"
+ << " {\n"
+ << " case 0: contribution.x = 1 << modID; break;\n"
+ << " case 1: contribution.y = 1 << modID; break;\n"
+ << " case 2: contribution.z = 1 << modID; break;\n"
+ << " case 3: contribution.w = 1 << modID; break;\n"
+ << " }\n"
+ << " uvec4 result = subgroupClusteredOr(contribution, " << i << ");\n"
+ << " uint rootID = gl_SubgroupInvocationID & ~(" << i - 1 << ");\n"
+ << " for (uint i = 0; i < " << i << "; i++)\n"
+ << " {\n"
+ << " uint nextID = rootID + i;\n"
+ << " if (subgroupBallotBitExtract(mask, nextID) ^^ subgroupBallotBitExtract(result, nextID))\n"
+ << " {\n"
+ << " tempResult = 0;\n"
+ << " }\n"
+ << " }\n"
+ << " }\n";
+ }
+ }
+ else
+ {
+ bdy << " uint cluster[4] =\n"
+ << " {\n"
+ << " subgroupQuadBroadcast(gl_SubgroupInvocationID, 0),\n"
+ << " subgroupQuadBroadcast(gl_SubgroupInvocationID, 1),\n"
+ << " subgroupQuadBroadcast(gl_SubgroupInvocationID, 2),\n"
+ << " subgroupQuadBroadcast(gl_SubgroupInvocationID, 3)\n"
+ << " };\n"
+ << " uint rootID = gl_SubgroupInvocationID & ~0x3;\n"
+ << " for (uint i = 0; i < 4; i++)\n"
+ << " {\n"
+ << " uint nextID = rootID + i;\n"
+ << " if (subgroupBallotBitExtract(mask, nextID) && (cluster[i] != nextID))\n"
+ << " {\n"
+ << " tempResult = mask.x;\n"
+ << " }\n"
+ << " }\n";
+ }
+
+ if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream vertexSrc;
+ vertexSrc << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+ << extension
+ << "layout(location = 0) in highp vec4 in_position;\n"
+ << "layout(location = 0) out float result;\n"
+ << "\n"
+ << "void main (void)\n"
+ << "{\n"
+ << bdy.str()
+ << " result = float(tempResult);\n"
+ << " gl_Position = in_position;\n"
+ << " gl_PointSize = 1.0f;\n"
+ << "}\n";
+ programCollection.glslSources.add("vert")
+ << glu::VertexSource(vertexSrc.str()) << buildOptions;
+ }
+ else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream geometry;
+
+ geometry << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+ << extension
+ << "layout(points) in;\n"
+ << "layout(points, max_vertices = 1) out;\n"
+ << "layout(location = 0) out float out_color;\n"
+ << "\n"
+ << "void main (void)\n"
+ << "{\n"
+ << bdy.str()
+ << " out_color = float(tempResult);\n"
+ << " gl_Position = gl_in[0].gl_Position;\n"
+ << " EmitVertex();\n"
+ << " EndPrimitive();\n"
+ << "}\n";
+
+ programCollection.glslSources.add("geometry")
+ << glu::GeometrySource(geometry.str()) << buildOptions;
+ }
+ else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream controlSource;
+
+ controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+ << extension
+ << "layout(vertices = 2) out;\n"
+ << "layout(location = 0) out float out_color[];\n"
+ << "\n"
+ << "void main (void)\n"
+ << "{\n"
+ << " if (gl_InvocationID == 0)\n"
+ <<" {\n"
+ << " gl_TessLevelOuter[0] = 1.0f;\n"
+ << " gl_TessLevelOuter[1] = 1.0f;\n"
+ << " }\n"
+ << bdy.str()
+ << " out_color[gl_InvocationID] = float(tempResult);\n"
+ << " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+ << "}\n";
+
+ programCollection.glslSources.add("tesc")
+ << glu::TessellationControlSource(controlSource.str()) << buildOptions;
+ subgroups::setTesEvalShaderFrameBuffer(programCollection);
+ }
+ else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream evaluationSource;
+
+ evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+ << extension
+ << "layout(isolines, equal_spacing, ccw ) in;\n"
+ << "layout(location = 0) out float out_color;\n"
+ << "void main (void)\n"
+ << "{\n"
+ << bdy.str()
+ << " out_color = float(tempResult);\n"
+ << " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
+ << "}\n";
+
+ subgroups::setTesCtrlShaderFrameBuffer(programCollection);
+ programCollection.glslSources.add("tese")
+ << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
+ }
+ else
+ {
+ DE_FATAL("Unsupported shader stage");
+ }
+}
+
+void initPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
+{
+ std::string extension = (OPTYPE_CLUSTERED == caseDef.opType) ?
+ "#extension GL_KHR_shader_subgroup_clustered: enable\n" :
+ "#extension GL_KHR_shader_subgroup_quad: enable\n";
+
+ extension += "#extension GL_KHR_shader_subgroup_ballot: enable\n";
+
+ std::ostringstream bdy;
+
+ bdy << " uint tempResult = 0x1;\n"
+ << " uvec4 mask = subgroupBallot(true);\n";
+
+ if (OPTYPE_CLUSTERED == caseDef.opType)
+ {
+ for (deUint32 i = 1; i <= subgroups::maxSupportedSubgroupSize(); i *= 2)
+ {
+ bdy << " if (gl_SubgroupSize >= " << i << ")\n"
+ << " {\n"
+ << " uvec4 contribution = uvec4(0);\n"
+ << " const uint modID = gl_SubgroupInvocationID % 32;\n"
+ << " switch (gl_SubgroupInvocationID / 32)\n"
+ << " {\n"
+ << " case 0: contribution.x = 1 << modID; break;\n"
+ << " case 1: contribution.y = 1 << modID; break;\n"
+ << " case 2: contribution.z = 1 << modID; break;\n"
+ << " case 3: contribution.w = 1 << modID; break;\n"
+ << " }\n"
+ << " uvec4 result = subgroupClusteredOr(contribution, " << i << ");\n"
+ << " uint rootID = gl_SubgroupInvocationID & ~(" << i - 1 << ");\n"
+ << " for (uint i = 0; i < " << i << "; i++)\n"
+ << " {\n"
+ << " uint nextID = rootID + i;\n"
+ << " if (subgroupBallotBitExtract(mask, nextID) ^^ subgroupBallotBitExtract(result, nextID))\n"
+ << " {\n"
+ << " tempResult = 0;\n"
+ << " }\n"
+ << " }\n"
+ << " }\n";
+ }
+ }
+ else
+ {
+ bdy << " uint cluster[4] =\n"
+ << " {\n"
+ << " subgroupQuadBroadcast(gl_SubgroupInvocationID, 0),\n"
+ << " subgroupQuadBroadcast(gl_SubgroupInvocationID, 1),\n"
+ << " subgroupQuadBroadcast(gl_SubgroupInvocationID, 2),\n"
+ << " subgroupQuadBroadcast(gl_SubgroupInvocationID, 3)\n"
+ << " };\n"
+ << " uint rootID = gl_SubgroupInvocationID & ~0x3;\n"
+ << " for (uint i = 0; i < 4; i++)\n"
+ << " {\n"
+ << " uint nextID = rootID + i;\n"
+ << " if (subgroupBallotBitExtract(mask, nextID) && (cluster[i] != nextID))\n"
+ << " {\n"
+ << " tempResult = mask.x;\n"
+ << " }\n"
+ << " }\n";
+ }
+
+ if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream src;
+
+ src << "#version 450\n"
+ << extension
+ << "layout (local_size_x_id = 0, local_size_y_id = 1, "
+ "local_size_z_id = 2) in;\n"
+ << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
+ << "{\n"
+ << " uint result[];\n"
+ << "};\n"
+ << "\n"
+ << "void main (void)\n"
+ << "{\n"
+ << " uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
+ << " highp uint offset = globalSize.x * ((globalSize.y * "
+ "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
+ "gl_GlobalInvocationID.x;\n"
+ << bdy.str()
+ << " result[offset] = tempResult;\n"
+ << "}\n";
+
+ programCollection.glslSources.add("comp")
+ << glu::ComputeSource(src.str()) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+ else
+ {
+ {
+ const string vertex =
+ "#version 450\n"
+ + extension +
+ "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
+ "{\n"
+ " uint result[];\n"
+ "};\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ + bdy.str() +
+ " result[gl_VertexIndex] = tempResult;\n"
+ " float pixelSize = 2.0f/1024.0f;\n"
+ " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
+ " gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
+ "}\n";
+
+ programCollection.glslSources.add("vert")
+ << glu::VertexSource(vertex) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+
+ {
+ const string tesc =
+ "#version 450\n"
+ + extension +
+ "layout(vertices=1) out;\n"
+ "layout(set = 0, binding = 1, std430) buffer Buffer1\n"
+ "{\n"
+ " uint result[];\n"
+ "};\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ + bdy.str() +
+ " result[gl_PrimitiveID] = 1;\n"
+ " if (gl_InvocationID == 0)\n"
+ " {\n"
+ " gl_TessLevelOuter[0] = 1.0f;\n"
+ " gl_TessLevelOuter[1] = 1.0f;\n"
+ " }\n"
+ " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+ "}\n";
+
+ programCollection.glslSources.add("tesc")
+ << glu::TessellationControlSource(tesc) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+
+ {
+ const string tese =
+ "#version 450\n"
+ + extension +
+ "layout(isolines) in;\n"
+ "layout(set = 0, binding = 2, std430) buffer Buffer1\n"
+ "{\n"
+ " uint result[];\n"
+ "};\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ + bdy.str() +
+ " result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = 1;\n"
+ " float pixelSize = 2.0f/1024.0f;\n"
+ " gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
+ "}\n";
+
+ programCollection.glslSources.add("tese")
+ << glu::TessellationEvaluationSource(tese) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+
+ {
+ const string geometry =
+ "#version 450\n"
+ + extension +
+ "layout(${TOPOLOGY}) in;\n"
+ "layout(points, max_vertices = 1) out;\n"
+ "layout(set = 0, binding = 3, std430) buffer Buffer1\n"
+ "{\n"
+ " uint result[];\n"
+ "};\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ + bdy.str() +
+ " result[gl_PrimitiveIDIn] = tempResult;\n"
+ " gl_Position = gl_in[0].gl_Position;\n"
+ " EmitVertex();\n"
+ " EndPrimitive();\n"
+ "}\n";
+
+ subgroups::addGeometryShadersFromTemplate(geometry, vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u),
+ programCollection.glslSources);
+ }
+
+ {
+ const string fragment =
+ "#version 450\n"
+ + extension +
+ "layout(location = 0) out uint result;\n"
+ "void main (void)\n"
+ "{\n"
+ + bdy.str() +
+ " result = tempResult;\n"
+ "}\n";
+
+ programCollection.glslSources.add("fragment")
+ << glu::FragmentSource(fragment)<< vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+ subgroups::addNoSubgroupShader(programCollection);
+ }
+}
+
+void supportedCheck (Context& context, CaseDefinition caseDef)
+{
+ if (!subgroups::isSubgroupSupported(context))
+ TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
+
+ if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_BALLOT_BIT))
+ {
+ TCU_THROW(NotSupportedError, "Device does not support subgroup ballot operations");
+ }
+
+ if (OPTYPE_CLUSTERED == caseDef.opType)
+ {
+ if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_CLUSTERED_BIT))
+ {
+ TCU_THROW(NotSupportedError, "Subgroup shape tests require that clustered operations are supported!");
+ }
+ }
+
+ if (OPTYPE_QUAD == caseDef.opType)
+ {
+ if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_QUAD_BIT))
+ {
+ TCU_THROW(NotSupportedError, "Subgroup shape tests require that quad operations are supported!");
+ }
+ }
+}
+
+tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
+{
+ if (!subgroups::areSubgroupOperationsSupportedForStage(
+ context, caseDef.shaderStage))
+ {
+ if (subgroups::areSubgroupOperationsRequiredForStage(
+ caseDef.shaderStage))
+ {
+ return tcu::TestStatus::fail(
+ "Shader stage " +
+ subgroups::getShaderStageName(caseDef.shaderStage) +
+ " is required to support subgroup operations!");
+ }
+ else
+ {
+ TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
+ }
+ }
+
+ if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+ return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages);
+ else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
+ return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages);
+ else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+ return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
+ else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
+ return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
+ else
+ TCU_THROW(InternalError, "Unhandled shader stage");
+}
+
+
+tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
+{
+ if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_BASIC_BIT))
+ {
+ return tcu::TestStatus::fail(
+ "Subgroup feature " +
+ subgroups::getShaderStageName(VK_SUBGROUP_FEATURE_BASIC_BIT) +
+ " is a required capability!");
+ }
+
+ if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+ {
+ if (!subgroups::areSubgroupOperationsSupportedForStage(context, caseDef.shaderStage))
+ {
+ return tcu::TestStatus::fail(
+ "Shader stage " +
+ subgroups::getShaderStageName(caseDef.shaderStage) +
+ " is required to support subgroup operations!");
+ }
+ return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkCompute);
+ }
+ else
+ {
+ VkPhysicalDeviceSubgroupProperties subgroupProperties;
+ subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
+ subgroupProperties.pNext = DE_NULL;
+
+ VkPhysicalDeviceProperties2 properties;
+ properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+ properties.pNext = &subgroupProperties;
+
+ context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
+
+ VkShaderStageFlagBits stages = (VkShaderStageFlagBits)(caseDef.shaderStage & subgroupProperties.supportedStages);
+
+ if (VK_SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
+ {
+ if ( (stages & VK_SHADER_STAGE_FRAGMENT_BIT) == 0)
+ TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
+ else
+ stages = VK_SHADER_STAGE_FRAGMENT_BIT;
+ }
+
+ if ((VkShaderStageFlagBits)0u == stages)
+ TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
+
+ return subgroups::allStages(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages, stages);
+ }
+}
+}
+
+namespace vkt
+{
+namespace subgroups
+{
+tcu::TestCaseGroup* createSubgroupsShapeTests(tcu::TestContext& testCtx)
+{
+ de::MovePtr<tcu::TestCaseGroup> graphicGroup(new tcu::TestCaseGroup(
+ testCtx, "graphics", "Subgroup shape category tests: graphics"));
+ de::MovePtr<tcu::TestCaseGroup> computeGroup(new tcu::TestCaseGroup(
+ testCtx, "compute", "Subgroup shape category tests: compute"));
+ de::MovePtr<tcu::TestCaseGroup> framebufferGroup(new tcu::TestCaseGroup(
+ testCtx, "framebuffer", "Subgroup shape category tests: framebuffer"));
+
+ const VkShaderStageFlags stages[] =
+ {
+ VK_SHADER_STAGE_VERTEX_BIT,
+ VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
+ VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
+ VK_SHADER_STAGE_GEOMETRY_BIT,
+ };
+
+ for (int opTypeIndex = 0; opTypeIndex < OPTYPE_LAST; ++opTypeIndex)
+ {
+ const std::string op = de::toLower(getOpTypeName(opTypeIndex));
+
+ {
+ const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_COMPUTE_BIT};
+ addFunctionCaseWithPrograms(computeGroup.get(), op, "", supportedCheck, initPrograms, test, caseDef);
+
+ }
+
+ {
+ const CaseDefinition caseDef =
+ {
+ opTypeIndex,
+ VK_SHADER_STAGE_ALL_GRAPHICS
+ };
+ addFunctionCaseWithPrograms(graphicGroup.get(),
+ op, "",
+ supportedCheck, initPrograms, test, caseDef);
+ }
+
+ for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
+ {
+ const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex]};
+ addFunctionCaseWithPrograms(framebufferGroup.get(),op + "_" + getShaderStageName(caseDef.shaderStage), "",
+ supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
+ }
+ }
+
+ de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(
+ testCtx, "shape", "Subgroup shape category tests"));
+
+ group->addChild(graphicGroup.release());
+ group->addChild(computeGroup.release());
+ group->addChild(framebufferGroup.release());
+
+ return group.release();
+}
+
+} // subgroups
+} // vkt
--- /dev/null
+#ifndef _VKTSUBGROUPSSHAPETESTS_HPP
+#define _VKTSUBGROUPSSHAPETESTS_HPP
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "tcuDefs.hpp"
+#include "vktTestCase.hpp"
+
+namespace vkt
+{
+namespace subgroups
+{
+
+tcu::TestCaseGroup* createSubgroupsShapeTests(tcu::TestContext& testCtx);
+
+} // subgroups
+} // vkt
+
+#endif // _VKTSUBGROUPSSHAPETESTS_HPP
--- /dev/null
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "vktSubgroupsShuffleTests.hpp"
+#include "vktSubgroupsTestsUtils.hpp"
+
+#include <string>
+#include <vector>
+
+using namespace tcu;
+using namespace std;
+using namespace vk;
+using namespace vkt;
+
+namespace
+{
+enum OpType
+{
+ OPTYPE_SHUFFLE = 0,
+ OPTYPE_SHUFFLE_XOR,
+ OPTYPE_SHUFFLE_UP,
+ OPTYPE_SHUFFLE_DOWN,
+ OPTYPE_LAST
+};
+
+static bool checkVertexPipelineStages(std::vector<const void*> datas,
+ deUint32 width, deUint32)
+{
+ return vkt::subgroups::check(datas, width, 1);
+}
+
+static bool checkCompute(std::vector<const void*> datas,
+ const deUint32 numWorkgroups[3], const deUint32 localSize[3],
+ deUint32)
+{
+ return vkt::subgroups::checkCompute(datas, numWorkgroups, localSize, 1);
+}
+
+std::string getOpTypeName(int opType)
+{
+ switch (opType)
+ {
+ default:
+ DE_FATAL("Unsupported op type");
+ return "";
+ case OPTYPE_SHUFFLE:
+ return "subgroupShuffle";
+ case OPTYPE_SHUFFLE_XOR:
+ return "subgroupShuffleXor";
+ case OPTYPE_SHUFFLE_UP:
+ return "subgroupShuffleUp";
+ case OPTYPE_SHUFFLE_DOWN:
+ return "subgroupShuffleDown";
+ }
+}
+
+struct CaseDefinition
+{
+ int opType;
+ VkShaderStageFlags shaderStage;
+ VkFormat format;
+};
+
+const std::string to_string(int x) {
+ std::ostringstream oss;
+ oss << x;
+ return oss.str();
+}
+
+const std::string DeclSource(CaseDefinition caseDef, int baseBinding)
+{
+ return
+ "layout(set = 0, binding = " + to_string(baseBinding) + ", std430) readonly buffer Buffer2\n"
+ "{\n"
+ " " + subgroups::getFormatNameForGLSL(caseDef.format) + " data1[];\n"
+ "};\n"
+ "layout(set = 0, binding = " + to_string(baseBinding + 1) + ", std430) readonly buffer Buffer3\n"
+ "{\n"
+ " uint data2[];\n"
+ "};\n";
+}
+
+const std::string TestSource(CaseDefinition caseDef)
+{
+ std::string idTable[OPTYPE_LAST];
+ idTable[OPTYPE_SHUFFLE] = "id_in";
+ idTable[OPTYPE_SHUFFLE_XOR] = "gl_SubgroupInvocationID ^ id_in";
+ idTable[OPTYPE_SHUFFLE_UP] = "gl_SubgroupInvocationID - id_in";
+ idTable[OPTYPE_SHUFFLE_DOWN] = "gl_SubgroupInvocationID + id_in";
+
+ const std::string testSource =
+ " uint temp_res;\n"
+ " uvec4 mask = subgroupBallot(true);\n"
+ " uint id_in = data2[gl_SubgroupInvocationID] & (gl_SubgroupSize - 1);\n"
+ " " + subgroups::getFormatNameForGLSL(caseDef.format) + " op = "
+ + getOpTypeName(caseDef.opType) + "(data1[gl_SubgroupInvocationID], id_in);\n"
+ " uint id = " + idTable[caseDef.opType] + ";\n"
+ " if ((id < gl_SubgroupSize) && subgroupBallotBitExtract(mask, id))\n"
+ " {\n"
+ " temp_res = (op == data1[id]) ? 1 : 0;\n"
+ " }\n"
+ " else\n"
+ " {\n"
+ " temp_res = 1; // Invocation we read from was inactive, so we can't verify results!\n"
+ " }\n";
+
+ return testSource;
+}
+
+void initFrameBufferPrograms (SourceCollections& programCollection, CaseDefinition caseDef)
+{
+ const vk::ShaderBuildOptions buildOptions (programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+
+ subgroups::setFragmentShaderFrameBuffer(programCollection);
+
+ if (VK_SHADER_STAGE_VERTEX_BIT != caseDef.shaderStage)
+ subgroups::setVertexShaderFrameBuffer(programCollection);
+
+ const std::string extSource =
+ (OPTYPE_SHUFFLE == caseDef.opType || OPTYPE_SHUFFLE_XOR == caseDef.opType) ?
+ "#extension GL_KHR_shader_subgroup_shuffle: enable\n" :
+ "#extension GL_KHR_shader_subgroup_shuffle_relative: enable\n";
+
+ const std::string testSource = TestSource(caseDef);
+
+ if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream vertexSrc;
+ vertexSrc << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+ << "layout(location = 0) in highp vec4 in_position;\n"
+ << "layout(location = 0) out float result;\n"
+ << extSource
+ << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ << "layout(set = 0, binding = 0) uniform Buffer1\n"
+ << "{\n"
+ << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data1[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+ << "};\n"
+ << "layout(set = 0, binding = 1) uniform Buffer2\n"
+ << "{\n"
+ << " uint data2[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+ << "};\n"
+ << "\n"
+ << "void main (void)\n"
+ << "{\n"
+ << testSource
+ << " result = temp_res;\n"
+ << " gl_Position = in_position;\n"
+ << " gl_PointSize = 1.0f;\n"
+ << "}\n";
+ programCollection.glslSources.add("vert")
+ << glu::VertexSource(vertexSrc.str()) << buildOptions;
+ }
+ else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream geometry;
+
+ geometry << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+ << extSource
+ << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ << "layout(points) in;\n"
+ << "layout(points, max_vertices = 1) out;\n"
+ << "layout(location = 0) out float out_color;\n"
+ << "layout(set = 0, binding = 0) uniform Buffer1\n"
+ << "{\n"
+ << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data1[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+ << "};\n"
+ << "layout(set = 0, binding = 1) uniform Buffer2\n"
+ << "{\n"
+ << " uint data2[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+ << "};\n"
+ << "\n"
+ << "void main (void)\n"
+ << "{\n"
+ << testSource
+ << " out_color = temp_res;\n"
+ << " gl_Position = gl_in[0].gl_Position;\n"
+ << " EmitVertex();\n"
+ << " EndPrimitive();\n"
+ << "}\n";
+
+ programCollection.glslSources.add("geometry")
+ << glu::GeometrySource(geometry.str()) << buildOptions;
+ }
+ else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream controlSource;
+
+ controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+ << extSource
+ << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ << "layout(vertices = 2) out;\n"
+ << "layout(location = 0) out float out_color[];\n"
+ << "layout(set = 0, binding = 0) uniform Buffer1\n"
+ << "{\n"
+ << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data1[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+ << "};\n"
+ << "layout(set = 0, binding = 1) uniform Buffer2\n"
+ << "{\n"
+ << " uint data2[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+ << "};\n"
+ << "\n"
+ << "void main (void)\n"
+ << "{\n"
+ << " if (gl_InvocationID == 0)\n"
+ <<" {\n"
+ << " gl_TessLevelOuter[0] = 1.0f;\n"
+ << " gl_TessLevelOuter[1] = 1.0f;\n"
+ << " }\n"
+ << testSource
+ << " out_color[gl_InvocationID] = temp_res;\n"
+ << " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+ << "}\n";
+
+ programCollection.glslSources.add("tesc")
+ << glu::TessellationControlSource(controlSource.str()) << buildOptions;
+ subgroups::setTesEvalShaderFrameBuffer(programCollection);
+
+ }
+ else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream evaluationSource;
+ evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+ << extSource
+ << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ << "layout(isolines, equal_spacing, ccw ) in;\n"
+ << "layout(location = 0) out float out_color;\n"
+ << "layout(set = 0, binding = 0) uniform Buffer1\n"
+ << "{\n"
+ << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data1[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+ << "};\n"
+ << "layout(set = 0, binding = 1) uniform Buffer2\n"
+ << "{\n"
+ << " uint data2[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+ << "};\n"
+ << "\n"
+ << "void main (void)\n"
+ << "{\n"
+ << testSource
+ << " out_color = temp_res;\n"
+ << " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
+ << "}\n";
+
+ subgroups::setTesCtrlShaderFrameBuffer(programCollection);
+ programCollection.glslSources.add("tese")
+ << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
+ }
+ else
+ {
+ DE_FATAL("Unsupported shader stage");
+ }
+}
+
+void initPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
+{
+ const std::string vSource =
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n";
+ const std::string eSource =
+ (OPTYPE_SHUFFLE == caseDef.opType || OPTYPE_SHUFFLE_XOR == caseDef.opType) ?
+ "#extension GL_KHR_shader_subgroup_shuffle: enable\n" :
+ "#extension GL_KHR_shader_subgroup_shuffle_relative: enable\n";
+ const std::string extSource = vSource + eSource;
+
+ const std::string testSource = TestSource(caseDef);
+
+ if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream src;
+
+ src << extSource
+ << "layout (local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n"
+ << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
+ << "{\n"
+ << " uint result[];\n"
+ << "};\n"
+ << DeclSource(caseDef, 1)
+ << "\n"
+ << "void main (void)\n"
+ << "{\n"
+ << " uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
+ << " highp uint offset = globalSize.x * ((globalSize.y * "
+ "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
+ "gl_GlobalInvocationID.x;\n"
+ << testSource
+ << " result[offset] = temp_res;\n"
+ << "}\n";
+
+ programCollection.glslSources.add("comp")
+ << glu::ComputeSource(src.str()) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+ else
+ {
+ const std::string declSource = DeclSource(caseDef, 4);
+
+ {
+ const string vertex =
+ extSource +
+ "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
+ "{\n"
+ " uint result[];\n"
+ "};\n"
+ + declSource +
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ + testSource +
+ " result[gl_VertexIndex] = temp_res;\n"
+ " float pixelSize = 2.0f/1024.0f;\n"
+ " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
+ " gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
+ " gl_PointSize = 1.0f;\n"
+ "}\n";
+
+ programCollection.glslSources.add("vert")
+ << glu::VertexSource(vertex) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+
+ {
+ const string tesc =
+ extSource +
+ "layout(vertices=1) out;\n"
+ "layout(set = 0, binding = 1, std430) buffer Buffer1\n"
+ "{\n"
+ " uint result[];\n"
+ "};\n"
+ + declSource +
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ + testSource +
+ " result[gl_PrimitiveID] = temp_res;\n"
+ " if (gl_InvocationID == 0)\n"
+ " {\n"
+ " gl_TessLevelOuter[0] = 1.0f;\n"
+ " gl_TessLevelOuter[1] = 1.0f;\n"
+ " }\n"
+ " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+ "}\n";
+
+ programCollection.glslSources.add("tesc")
+ << glu::TessellationControlSource(tesc) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+
+ {
+ const string tese =
+ extSource +
+ "layout(isolines) in;\n"
+ "layout(set = 0, binding = 2, std430) buffer Buffer1\n"
+ "{\n"
+ " uint result[];\n"
+ "};\n"
+ + declSource +
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ + testSource +
+ " result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = temp_res;\n"
+ " float pixelSize = 2.0f/1024.0f;\n"
+ " gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
+ "}\n";
+
+ programCollection.glslSources.add("tese")
+ << glu::TessellationEvaluationSource(tese) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+
+ {
+ const string geometry =
+ extSource +
+ "layout(${TOPOLOGY}) in;\n"
+ "layout(points, max_vertices = 1) out;\n"
+ "layout(set = 0, binding = 3, std430) buffer Buffer1\n"
+ "{\n"
+ " uint result[];\n"
+ "};\n"
+ + declSource +
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ + testSource +
+ " result[gl_PrimitiveIDIn] = temp_res;\n"
+ " gl_Position = gl_in[0].gl_Position;\n"
+ " EmitVertex();\n"
+ " EndPrimitive();\n"
+ "}\n";
+
+ subgroups::addGeometryShadersFromTemplate(geometry, vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u),
+ programCollection.glslSources);
+ }
+ {
+ const string fragment =
+ extSource +
+ "layout(location = 0) out uint result;\n"
+ + declSource +
+ "void main (void)\n"
+ "{\n"
+ + testSource +
+ " result = temp_res;\n"
+ "}\n";
+
+ programCollection.glslSources.add("fragment")
+ << glu::FragmentSource(fragment)<< vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+
+ subgroups::addNoSubgroupShader(programCollection);
+ }
+}
+
+void supportedCheck (Context& context, CaseDefinition caseDef)
+{
+ if (!subgroups::isSubgroupSupported(context))
+ TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
+
+ switch (caseDef.opType)
+ {
+ case OPTYPE_SHUFFLE:
+ case OPTYPE_SHUFFLE_XOR:
+ if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_SHUFFLE_BIT))
+ {
+ TCU_THROW(NotSupportedError, "Device does not support subgroup shuffle operations");
+ }
+ break;
+ default:
+ if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT))
+ {
+ TCU_THROW(NotSupportedError, "Device does not support subgroup shuffle relative operations");
+ }
+ break;
+ }
+
+ if (subgroups::isDoubleFormat(caseDef.format) &&
+ !subgroups::isDoubleSupportedForDevice(context))
+ TCU_THROW(NotSupportedError, "Device does not support subgroup double operations");
+}
+
+tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
+{
+ if (!subgroups::areSubgroupOperationsSupportedForStage(
+ context, caseDef.shaderStage))
+ {
+ if (subgroups::areSubgroupOperationsRequiredForStage(
+ caseDef.shaderStage))
+ {
+ return tcu::TestStatus::fail(
+ "Shader stage " +
+ subgroups::getShaderStageName(caseDef.shaderStage) +
+ " is required to support subgroup operations!");
+ }
+ else
+ {
+ TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
+ }
+ }
+
+ subgroups::SSBOData inputData[2];
+ inputData[0].format = caseDef.format;
+ inputData[0].numElements = subgroups::maxSupportedSubgroupSize();
+ inputData[0].initializeType = subgroups::SSBOData::InitializeNonZero;
+
+ inputData[1].format = VK_FORMAT_R32_UINT;
+ inputData[1].numElements = inputData[0].numElements;
+ inputData[1].initializeType = subgroups::SSBOData::InitializeNonZero;
+
+ if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+ return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, inputData, 2, checkVertexPipelineStages);
+ else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
+ return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, inputData, 2, checkVertexPipelineStages);
+ else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+ return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, inputData, 2, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
+ else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
+ return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, inputData, 2, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
+ else
+ TCU_THROW(InternalError, "Unhandled shader stage");
+}
+
+
+tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
+{
+ switch (caseDef.opType)
+ {
+ case OPTYPE_SHUFFLE:
+ case OPTYPE_SHUFFLE_XOR:
+ if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_SHUFFLE_BIT))
+ {
+ TCU_THROW(NotSupportedError, "Device does not support subgroup shuffle operations");
+ }
+ break;
+ default:
+ if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT))
+ {
+ TCU_THROW(NotSupportedError, "Device does not support subgroup shuffle relative operations");
+ }
+ break;
+ }
+
+ if (subgroups::isDoubleFormat(caseDef.format) && !subgroups::isDoubleSupportedForDevice(context))
+ {
+ TCU_THROW(NotSupportedError, "Device does not support subgroup double operations");
+ }
+
+ if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+ {
+ if (!subgroups::areSubgroupOperationsSupportedForStage(context, caseDef.shaderStage))
+ {
+ return tcu::TestStatus::fail(
+ "Shader stage " +
+ subgroups::getShaderStageName(caseDef.shaderStage) +
+ " is required to support subgroup operations!");
+ }
+ subgroups::SSBOData inputData[2];
+ inputData[0].format = caseDef.format;
+ inputData[0].numElements = subgroups::maxSupportedSubgroupSize();
+ inputData[0].initializeType = subgroups::SSBOData::InitializeNonZero;
+
+ inputData[1].format = VK_FORMAT_R32_UINT;
+ inputData[1].numElements = inputData[0].numElements;
+ inputData[1].initializeType = subgroups::SSBOData::InitializeNonZero;
+
+ return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, inputData, 2, checkCompute);
+ }
+
+ else
+ {
+ VkPhysicalDeviceSubgroupProperties subgroupProperties;
+ subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
+ subgroupProperties.pNext = DE_NULL;
+
+ VkPhysicalDeviceProperties2 properties;
+ properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+ properties.pNext = &subgroupProperties;
+
+ context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
+
+ VkShaderStageFlagBits stages = (VkShaderStageFlagBits)(caseDef.shaderStage & subgroupProperties.supportedStages);
+
+ if (VK_SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
+ {
+ if ( (stages & VK_SHADER_STAGE_FRAGMENT_BIT) == 0)
+ TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
+ else
+ stages = VK_SHADER_STAGE_FRAGMENT_BIT;
+ }
+
+ if ((VkShaderStageFlagBits)0u == stages)
+ TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
+
+ subgroups::SSBOData inputData[2];
+ inputData[0].format = caseDef.format;
+ inputData[0].numElements = subgroups::maxSupportedSubgroupSize();
+ inputData[0].initializeType = subgroups::SSBOData::InitializeNonZero;
+ inputData[0].binding = 4u;
+ inputData[0].stages = stages;
+
+ inputData[1].format = VK_FORMAT_R32_UINT;
+ inputData[1].numElements = inputData[0].numElements;
+ inputData[1].initializeType = subgroups::SSBOData::InitializeNonZero;
+ inputData[1].binding = 5u;
+ inputData[1].stages = stages;
+
+ return subgroups::allStages(context, VK_FORMAT_R32_UINT, inputData, 2, checkVertexPipelineStages, stages);
+ }
+}
+}
+
+namespace vkt
+{
+namespace subgroups
+{
+tcu::TestCaseGroup* createSubgroupsShuffleTests(tcu::TestContext& testCtx)
+{
+
+ de::MovePtr<tcu::TestCaseGroup> graphicGroup(new tcu::TestCaseGroup(
+ testCtx, "graphics", "Subgroup shuffle category tests: graphics"));
+ de::MovePtr<tcu::TestCaseGroup> computeGroup(new tcu::TestCaseGroup(
+ testCtx, "compute", "Subgroup shuffle category tests: compute"));
+ de::MovePtr<tcu::TestCaseGroup> framebufferGroup(new tcu::TestCaseGroup(
+ testCtx, "framebuffer", "Subgroup shuffle category tests: framebuffer"));
+
+ const VkFormat formats[] =
+ {
+ VK_FORMAT_R32_SINT, VK_FORMAT_R32G32_SINT, VK_FORMAT_R32G32B32_SINT,
+ VK_FORMAT_R32G32B32A32_SINT, VK_FORMAT_R32_UINT, VK_FORMAT_R32G32_UINT,
+ VK_FORMAT_R32G32B32_UINT, VK_FORMAT_R32G32B32A32_UINT,
+ VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT,
+ VK_FORMAT_R32G32B32_SFLOAT, VK_FORMAT_R32G32B32A32_SFLOAT,
+ VK_FORMAT_R64_SFLOAT, VK_FORMAT_R64G64_SFLOAT,
+ VK_FORMAT_R64G64B64_SFLOAT, VK_FORMAT_R64G64B64A64_SFLOAT,
+ VK_FORMAT_R8_USCALED, VK_FORMAT_R8G8_USCALED,
+ VK_FORMAT_R8G8B8_USCALED, VK_FORMAT_R8G8B8A8_USCALED,
+ };
+
+ const VkShaderStageFlags stages[] =
+ {
+ VK_SHADER_STAGE_VERTEX_BIT,
+ VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
+ VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
+ VK_SHADER_STAGE_GEOMETRY_BIT,
+ };
+
+ for (int formatIndex = 0; formatIndex < DE_LENGTH_OF_ARRAY(formats); ++formatIndex)
+ {
+ const VkFormat format = formats[formatIndex];
+
+ for (int opTypeIndex = 0; opTypeIndex < OPTYPE_LAST; ++opTypeIndex)
+ {
+
+ const string name =
+ de::toLower(getOpTypeName(opTypeIndex)) +
+ "_" + subgroups::getFormatNameForGLSL(format);
+
+ {
+ const CaseDefinition caseDef =
+ {
+ opTypeIndex,
+ VK_SHADER_STAGE_ALL_GRAPHICS,
+ format
+ };
+ addFunctionCaseWithPrograms(graphicGroup.get(), name, "", supportedCheck, initPrograms, test, caseDef);
+ }
+
+ {
+ const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_COMPUTE_BIT, format};
+ addFunctionCaseWithPrograms(computeGroup.get(), name, "", supportedCheck, initPrograms, test, caseDef);
+ }
+
+ for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
+ {
+ const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex], format};
+ addFunctionCaseWithPrograms(framebufferGroup.get(), name + "_" + getShaderStageName(caseDef.shaderStage), "",
+ supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
+ }
+ }
+ }
+
+ de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(
+ testCtx, "shuffle", "Subgroup shuffle category tests"));
+
+ group->addChild(graphicGroup.release());
+ group->addChild(computeGroup.release());
+ group->addChild(framebufferGroup.release());
+
+ return group.release();
+}
+
+} // subgroups
+} // vkt
--- /dev/null
+#ifndef _VKTSUBGROUPSSHUFFLETESTS_HPP
+#define _VKTSUBGROUPSSHUFFLETESTS_HPP
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "tcuDefs.hpp"
+#include "vktTestCase.hpp"
+
+namespace vkt
+{
+namespace subgroups
+{
+
+tcu::TestCaseGroup* createSubgroupsShuffleTests(tcu::TestContext& testCtx);
+
+} // subgroups
+} // vkt
+
+#endif // _VKTSUBGROUPSSHUFFLETESTS_HPP
--- /dev/null
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "vktSubgroupsTests.hpp"
+#include "vktSubgroupsBuiltinVarTests.hpp"
+#include "vktSubgroupsBuiltinMaskVarTests.hpp"
+#include "vktSubgroupsBasicTests.hpp"
+#include "vktSubgroupsVoteTests.hpp"
+#include "vktSubgroupsBallotTests.hpp"
+#include "vktSubgroupsBallotBroadcastTests.hpp"
+#include "vktSubgroupsBallotOtherTests.hpp"
+#include "vktSubgroupsArithmeticTests.hpp"
+#include "vktSubgroupsClusteredTests.hpp"
+#include "vktSubgroupsPartitionedTests.hpp"
+#include "vktSubgroupsShuffleTests.hpp"
+#include "vktSubgroupsQuadTests.hpp"
+#include "vktSubgroupsShapeTests.hpp"
+#include "vktTestGroupUtil.hpp"
+
+namespace vkt
+{
+namespace subgroups
+{
+
+namespace
+{
+
+void createChildren(tcu::TestCaseGroup* subgroupsTests)
+{
+ tcu::TestContext& testCtx = subgroupsTests->getTestContext();
+
+ subgroupsTests->addChild(createSubgroupsBuiltinVarTests(testCtx));
+ subgroupsTests->addChild(createSubgroupsBuiltinMaskVarTests(testCtx));
+ subgroupsTests->addChild(createSubgroupsBasicTests(testCtx));
+ subgroupsTests->addChild(createSubgroupsVoteTests(testCtx));
+ subgroupsTests->addChild(createSubgroupsBallotTests(testCtx));
+ subgroupsTests->addChild(createSubgroupsBallotBroadcastTests(testCtx));
+ subgroupsTests->addChild(createSubgroupsBallotOtherTests(testCtx));
+ subgroupsTests->addChild(createSubgroupsArithmeticTests(testCtx));
+ subgroupsTests->addChild(createSubgroupsClusteredTests(testCtx));
+ subgroupsTests->addChild(createSubgroupsPartitionedTests(testCtx));
+ subgroupsTests->addChild(createSubgroupsShuffleTests(testCtx));
+ subgroupsTests->addChild(createSubgroupsQuadTests(testCtx));
+ subgroupsTests->addChild(createSubgroupsShapeTests(testCtx));
+}
+
+} // anonymous
+
+tcu::TestCaseGroup* createTests(tcu::TestContext& testCtx)
+{
+ return createTestGroup(
+ testCtx, "subgroups", "Subgroups tests", createChildren);
+}
+
+} // subgroups
+} // vkt
--- /dev/null
+#ifndef _VKTSUBGROUPSTESTS_HPP
+#define _VKTSUBGROUPSTESTS_HPP
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "tcuDefs.hpp"
+#include "tcuTestCase.hpp"
+
+namespace vkt
+{
+namespace subgroups
+{
+
+tcu::TestCaseGroup* createTests(tcu::TestContext& testCtx);
+
+} // subgroups
+} // vkt
+
+#endif // _VKTSUBGROUPSTESTS_HPP
--- /dev/null
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests Utils
+ */ /*--------------------------------------------------------------------*/
+
+#include "vktSubgroupsTestsUtils.hpp"
+#include "deRandom.hpp"
+#include "tcuCommandLine.hpp"
+#include "tcuStringTemplate.hpp"
+#include "vkBarrierUtil.hpp"
+#include "vkImageUtil.hpp"
+#include "vkTypeUtil.hpp"
+#include "vkCmdUtil.hpp"
+#include "vkObjUtil.hpp"
+
+using namespace tcu;
+using namespace std;
+using namespace vk;
+using namespace vkt;
+
+namespace
+{
+deUint32 getFormatSizeInBytes(const VkFormat format)
+{
+ switch (format)
+ {
+ default:
+ DE_FATAL("Unhandled format!");
+ return 0;
+ case VK_FORMAT_R32_SINT:
+ case VK_FORMAT_R32_UINT:
+ return sizeof(deInt32);
+ case VK_FORMAT_R32G32_SINT:
+ case VK_FORMAT_R32G32_UINT:
+ return static_cast<deUint32>(sizeof(deInt32) * 2);
+ case VK_FORMAT_R32G32B32_SINT:
+ case VK_FORMAT_R32G32B32_UINT:
+ case VK_FORMAT_R32G32B32A32_SINT:
+ case VK_FORMAT_R32G32B32A32_UINT:
+ return static_cast<deUint32>(sizeof(deInt32) * 4);
+ case VK_FORMAT_R32_SFLOAT:
+ return 4;
+ case VK_FORMAT_R32G32_SFLOAT:
+ return 8;
+ case VK_FORMAT_R32G32B32_SFLOAT:
+ return 16;
+ case VK_FORMAT_R32G32B32A32_SFLOAT:
+ return 16;
+ case VK_FORMAT_R64_SFLOAT:
+ return 8;
+ case VK_FORMAT_R64G64_SFLOAT:
+ return 16;
+ case VK_FORMAT_R64G64B64_SFLOAT:
+ return 32;
+ case VK_FORMAT_R64G64B64A64_SFLOAT:
+ return 32;
+ // The below formats are used to represent bool and bvec* types. These
+ // types are passed to the shader as int and ivec* types, before the
+ // calculations are done as booleans. We need a distinct type here so
+ // that the shader generators can switch on it and generate the correct
+ // shader source for testing.
+ case VK_FORMAT_R8_USCALED:
+ return sizeof(deInt32);
+ case VK_FORMAT_R8G8_USCALED:
+ return static_cast<deUint32>(sizeof(deInt32) * 2);
+ case VK_FORMAT_R8G8B8_USCALED:
+ case VK_FORMAT_R8G8B8A8_USCALED:
+ return static_cast<deUint32>(sizeof(deInt32) * 4);
+ }
+}
+
+Move<VkPipelineLayout> makePipelineLayout(
+ Context& context, const VkDescriptorSetLayout descriptorSetLayout)
+{
+ const vk::VkPipelineLayoutCreateInfo pipelineLayoutParams = {
+ VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // VkStructureType sType;
+ DE_NULL, // const void* pNext;
+ 0u, // VkPipelineLayoutCreateFlags flags;
+ 1u, // deUint32 setLayoutCount;
+ &descriptorSetLayout, // const VkDescriptorSetLayout* pSetLayouts;
+ 0u, // deUint32 pushConstantRangeCount;
+ DE_NULL, // const VkPushConstantRange* pPushConstantRanges;
+ };
+ return createPipelineLayout(context.getDeviceInterface(),
+ context.getDevice(), &pipelineLayoutParams);
+}
+
+Move<VkRenderPass> makeRenderPass(Context& context, VkFormat format)
+{
+ VkAttachmentReference colorReference = {
+ 0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
+ };
+
+ const VkSubpassDescription subpassDescription = {0u,
+ VK_PIPELINE_BIND_POINT_GRAPHICS, 0, DE_NULL, 1, &colorReference,
+ DE_NULL, DE_NULL, 0, DE_NULL
+ };
+
+ const VkSubpassDependency subpassDependencies[2] = {
+ { VK_SUBPASS_EXTERNAL, 0u, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+ VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
+ VK_ACCESS_MEMORY_READ_BIT, VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
+ VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
+ VK_DEPENDENCY_BY_REGION_BIT
+ },
+ { 0u, VK_SUBPASS_EXTERNAL, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
+ VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+ VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
+ VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
+ VK_ACCESS_MEMORY_READ_BIT, VK_DEPENDENCY_BY_REGION_BIT
+ },
+ };
+
+ VkAttachmentDescription attachmentDescription = {0u, format,
+ VK_SAMPLE_COUNT_1_BIT, VK_ATTACHMENT_LOAD_OP_CLEAR,
+ VK_ATTACHMENT_STORE_OP_STORE, VK_ATTACHMENT_LOAD_OP_DONT_CARE,
+ VK_ATTACHMENT_STORE_OP_DONT_CARE, VK_IMAGE_LAYOUT_UNDEFINED,
+ VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL
+ };
+
+ const VkRenderPassCreateInfo renderPassCreateInfo = {
+ VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, DE_NULL, 0u, 1,
+ &attachmentDescription, 1, &subpassDescription, 2, subpassDependencies
+ };
+
+ return createRenderPass(context.getDeviceInterface(), context.getDevice(),
+ &renderPassCreateInfo);
+}
+
+Move<VkFramebuffer> makeFramebuffer(Context& context,
+ const VkRenderPass renderPass, const VkImageView imageView, deUint32 width,
+ deUint32 height)
+{
+ const VkFramebufferCreateInfo framebufferCreateInfo = {
+ VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, DE_NULL, 0u, renderPass, 1,
+ &imageView, width, height, 1
+ };
+
+ return createFramebuffer(context.getDeviceInterface(), context.getDevice(),
+ &framebufferCreateInfo);
+}
+
+Move<VkPipeline> makeGraphicsPipeline(Context& context,
+ const VkPipelineLayout pipelineLayout,
+ const VkShaderStageFlags stages,
+ const VkShaderModule vertexShaderModule,
+ const VkShaderModule fragmentShaderModule,
+ const VkShaderModule geometryShaderModule,
+ const VkShaderModule tessellationControlModule,
+ const VkShaderModule tessellationEvaluationModule,
+ const VkRenderPass renderPass,
+ const VkPrimitiveTopology topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
+ const VkVertexInputBindingDescription* vertexInputBindingDescription = DE_NULL,
+ const VkVertexInputAttributeDescription* vertexInputAttributeDescriptions = DE_NULL,
+ const bool frameBufferTests = false,
+ const vk::VkFormat attachmentFormat = VK_FORMAT_R32G32B32A32_SFLOAT)
+{
+ std::vector<VkViewport> noViewports;
+ std::vector<VkRect2D> noScissors;
+
+ const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfo =
+ {
+ VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
+ DE_NULL, // const void* pNext;
+ 0u, // VkPipelineVertexInputStateCreateFlags flags;
+ vertexInputBindingDescription == DE_NULL ? 0u : 1u, // deUint32 vertexBindingDescriptionCount;
+ vertexInputBindingDescription, // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
+ vertexInputAttributeDescriptions == DE_NULL ? 0u : 1u, // deUint32 vertexAttributeDescriptionCount;
+ vertexInputAttributeDescriptions, // const VkVertexInputAttributeDescription* pVertexAttributeDescriptions;
+ };
+
+ const deUint32 numChannels = getNumUsedChannels(mapVkFormat(attachmentFormat).order);
+ const VkColorComponentFlags colorComponent =
+ numChannels == 1 ? VK_COLOR_COMPONENT_R_BIT :
+ numChannels == 2 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT :
+ numChannels == 3 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT :
+ VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
+
+ const VkPipelineColorBlendAttachmentState colorBlendAttachmentState =
+ {
+ VK_FALSE, VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD,
+ VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD,
+ colorComponent
+ };
+
+ const VkPipelineColorBlendStateCreateInfo colorBlendStateCreateInfo =
+ {
+ VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, DE_NULL, 0u,
+ VK_FALSE, VK_LOGIC_OP_CLEAR, 1, &colorBlendAttachmentState,
+ { 0.0f, 0.0f, 0.0f, 0.0f }
+ };
+
+ const deUint32 patchControlPoints = (VK_SHADER_STAGE_FRAGMENT_BIT & stages && frameBufferTests) ? 2u : 1u;
+
+ return vk::makeGraphicsPipeline(context.getDeviceInterface(), // const DeviceInterface& vk
+ context.getDevice(), // const VkDevice device
+ pipelineLayout, // const VkPipelineLayout pipelineLayout
+ vertexShaderModule, // const VkShaderModule vertexShaderModule
+ tessellationControlModule, // const VkShaderModule tessellationControlShaderModule
+ tessellationEvaluationModule, // const VkShaderModule tessellationEvalShaderModule
+ geometryShaderModule, // const VkShaderModule geometryShaderModule
+ fragmentShaderModule, // const VkShaderModule fragmentShaderModule
+ renderPass, // const VkRenderPass renderPass
+ noViewports, // const std::vector<VkViewport>& viewports
+ noScissors, // const std::vector<VkRect2D>& scissors
+ topology, // const VkPrimitiveTopology topology
+ 0u, // const deUint32 subpass
+ patchControlPoints, // const deUint32 patchControlPoints
+ &vertexInputStateCreateInfo, // const VkPipelineVertexInputStateCreateInfo* vertexInputStateCreateInfo
+ DE_NULL, // const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo
+ DE_NULL, // const VkPipelineMultisampleStateCreateInfo* multisampleStateCreateInfo
+ DE_NULL, // const VkPipelineDepthStencilStateCreateInfo* depthStencilStateCreateInfo
+ &colorBlendStateCreateInfo); // const VkPipelineColorBlendStateCreateInfo* colorBlendStateCreateInfo
+}
+
+Move<VkPipeline> makeComputePipeline(Context& context,
+ const VkPipelineLayout pipelineLayout, const VkShaderModule shaderModule,
+ deUint32 localSizeX, deUint32 localSizeY, deUint32 localSizeZ)
+{
+ const deUint32 localSize[3] = {localSizeX, localSizeY, localSizeZ};
+
+ const vk::VkSpecializationMapEntry entries[3] =
+ {
+ {0, sizeof(deUint32) * 0, sizeof(deUint32)},
+ {1, sizeof(deUint32) * 1, sizeof(deUint32)},
+ {2, static_cast<deUint32>(sizeof(deUint32) * 2), sizeof(deUint32)},
+ };
+
+ const vk::VkSpecializationInfo info =
+ {
+ /* mapEntryCount = */ 3,
+ /* pMapEntries = */ entries,
+ /* dataSize = */ sizeof(localSize),
+ /* pData = */ localSize
+ };
+
+ const vk::VkPipelineShaderStageCreateInfo pipelineShaderStageParams =
+ {
+ VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
+ DE_NULL, // const void* pNext;
+ 0u, // VkPipelineShaderStageCreateFlags flags;
+ VK_SHADER_STAGE_COMPUTE_BIT, // VkShaderStageFlagBits stage;
+ shaderModule, // VkShaderModule module;
+ "main", // const char* pName;
+ &info, // const VkSpecializationInfo* pSpecializationInfo;
+ };
+
+ const vk::VkComputePipelineCreateInfo pipelineCreateInfo =
+ {
+ VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // VkStructureType sType;
+ DE_NULL, // const void* pNext;
+ 0u, // VkPipelineCreateFlags flags;
+ pipelineShaderStageParams, // VkPipelineShaderStageCreateInfo stage;
+ pipelineLayout, // VkPipelineLayout layout;
+ DE_NULL, // VkPipeline basePipelineHandle;
+ 0, // deInt32 basePipelineIndex;
+ };
+
+ return createComputePipeline(context.getDeviceInterface(),
+ context.getDevice(), DE_NULL, &pipelineCreateInfo);
+}
+
+Move<VkDescriptorSet> makeDescriptorSet(Context& context,
+ const VkDescriptorPool descriptorPool,
+ const VkDescriptorSetLayout setLayout)
+{
+ const VkDescriptorSetAllocateInfo allocateParams =
+ {
+ VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, // VkStructureType
+ // sType;
+ DE_NULL, // const void* pNext;
+ descriptorPool, // VkDescriptorPool descriptorPool;
+ 1u, // deUint32 setLayoutCount;
+ &setLayout, // const VkDescriptorSetLayout* pSetLayouts;
+ };
+ return allocateDescriptorSet(
+ context.getDeviceInterface(), context.getDevice(), &allocateParams);
+}
+
+Move<VkCommandPool> makeCommandPool(Context& context)
+{
+ const VkCommandPoolCreateInfo commandPoolParams =
+ {
+ VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, // VkStructureType sType;
+ DE_NULL, // const void* pNext;
+ VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, // VkCommandPoolCreateFlags
+ // flags;
+ context.getUniversalQueueFamilyIndex(), // deUint32 queueFamilyIndex;
+ };
+
+ return createCommandPool(
+ context.getDeviceInterface(), context.getDevice(), &commandPoolParams);
+}
+
+Move<VkCommandBuffer> makeCommandBuffer(
+ Context& context, const VkCommandPool commandPool)
+{
+ const VkCommandBufferAllocateInfo bufferAllocateParams =
+ {
+ VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, // VkStructureType sType;
+ DE_NULL, // const void* pNext;
+ commandPool, // VkCommandPool commandPool;
+ VK_COMMAND_BUFFER_LEVEL_PRIMARY, // VkCommandBufferLevel level;
+ 1u, // deUint32 bufferCount;
+ };
+ return allocateCommandBuffer(context.getDeviceInterface(),
+ context.getDevice(), &bufferAllocateParams);
+}
+
+Move<VkFence> submitCommandBuffer(
+ Context& context, const VkCommandBuffer commandBuffer)
+{
+ const VkFenceCreateInfo fenceParams =
+ {
+ VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, // VkStructureType sType;
+ DE_NULL, // const void* pNext;
+ 0u, // VkFenceCreateFlags flags;
+ };
+
+ Move<VkFence> fence(createFence(
+ context.getDeviceInterface(), context.getDevice(), &fenceParams));
+
+ const VkSubmitInfo submitInfo =
+ {
+ VK_STRUCTURE_TYPE_SUBMIT_INFO, // VkStructureType sType;
+ DE_NULL, // const void* pNext;
+ 0u, // deUint32 waitSemaphoreCount;
+ DE_NULL, // const VkSemaphore* pWaitSemaphores;
+ (const VkPipelineStageFlags*)DE_NULL,
+ 1u, // deUint32 commandBufferCount;
+ &commandBuffer, // const VkCommandBuffer* pCommandBuffers;
+ 0u, // deUint32 signalSemaphoreCount;
+ DE_NULL, // const VkSemaphore* pSignalSemaphores;
+ };
+
+ vk::VkResult result = (context.getDeviceInterface().queueSubmit(
+ context.getUniversalQueue(), 1u, &submitInfo, *fence));
+ VK_CHECK(result);
+
+ return Move<VkFence>(fence);
+}
+
+void waitFence(Context& context, Move<VkFence> fence)
+{
+ VK_CHECK(context.getDeviceInterface().waitForFences(
+ context.getDevice(), 1u, &fence.get(), DE_TRUE, ~0ull));
+}
+
+struct Buffer;
+struct Image;
+
+struct BufferOrImage
+{
+ bool isImage() const
+ {
+ return m_isImage;
+ }
+
+ Buffer* getAsBuffer()
+ {
+ if (m_isImage) DE_FATAL("Trying to get a buffer as an image!");
+ return reinterpret_cast<Buffer* >(this);
+ }
+
+ Image* getAsImage()
+ {
+ if (!m_isImage) DE_FATAL("Trying to get an image as a buffer!");
+ return reinterpret_cast<Image*>(this);
+ }
+
+ virtual VkDescriptorType getType() const
+ {
+ if (m_isImage)
+ {
+ return VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
+ }
+ else
+ {
+ return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
+ }
+ }
+
+ Allocation& getAllocation() const
+ {
+ return *m_allocation;
+ }
+
+ virtual ~BufferOrImage() {}
+
+protected:
+ explicit BufferOrImage(bool image) : m_isImage(image) {}
+
+ bool m_isImage;
+ de::details::MovePtr<Allocation> m_allocation;
+};
+
+struct Buffer : public BufferOrImage
+{
+ explicit Buffer(
+ Context& context, VkDeviceSize sizeInBytes, VkBufferUsageFlags usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT)
+ : BufferOrImage (false)
+ , m_sizeInBytes (sizeInBytes)
+ , m_usage (usage)
+ {
+ const vk::VkBufferCreateInfo bufferCreateInfo =
+ {
+ VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+ DE_NULL,
+ 0u,
+ sizeInBytes,
+ m_usage,
+ VK_SHARING_MODE_EXCLUSIVE,
+ 0u,
+ DE_NULL,
+ };
+ m_buffer = createBuffer(context.getDeviceInterface(),
+ context.getDevice(), &bufferCreateInfo);
+ vk::VkMemoryRequirements req = getBufferMemoryRequirements(
+ context.getDeviceInterface(), context.getDevice(), *m_buffer);
+ req.size *= 2;
+ m_allocation = context.getDefaultAllocator().allocate(
+ req, MemoryRequirement::HostVisible);
+ VK_CHECK(context.getDeviceInterface().bindBufferMemory(
+ context.getDevice(), *m_buffer, m_allocation->getMemory(),
+ m_allocation->getOffset()));
+ }
+
+ virtual VkDescriptorType getType() const
+ {
+ if (VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT == m_usage)
+ {
+ return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
+ }
+ return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
+ }
+
+ VkBuffer getBuffer() const {
+ return *m_buffer;
+ }
+
+ const VkBuffer* getBufferPtr() const {
+ return &(*m_buffer);
+ }
+
+ VkDeviceSize getSize() const {
+ return m_sizeInBytes;
+ }
+
+private:
+ Move<VkBuffer> m_buffer;
+ VkDeviceSize m_sizeInBytes;
+ const VkBufferUsageFlags m_usage;
+};
+
+struct Image : public BufferOrImage
+{
+ explicit Image(Context& context, deUint32 width, deUint32 height,
+ VkFormat format, VkImageUsageFlags usage = VK_IMAGE_USAGE_STORAGE_BIT)
+ : BufferOrImage(true)
+ {
+ const VkImageCreateInfo imageCreateInfo =
+ {
+ VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, DE_NULL, 0, VK_IMAGE_TYPE_2D,
+ format, {width, height, 1}, 1, 1, VK_SAMPLE_COUNT_1_BIT,
+ VK_IMAGE_TILING_OPTIMAL, usage,
+ VK_SHARING_MODE_EXCLUSIVE, 0u, DE_NULL,
+ VK_IMAGE_LAYOUT_UNDEFINED
+ };
+ m_image = createImage(context.getDeviceInterface(), context.getDevice(),
+ &imageCreateInfo);
+ vk::VkMemoryRequirements req = getImageMemoryRequirements(
+ context.getDeviceInterface(), context.getDevice(), *m_image);
+ req.size *= 2;
+ m_allocation =
+ context.getDefaultAllocator().allocate(req, MemoryRequirement::Any);
+ VK_CHECK(context.getDeviceInterface().bindImageMemory(
+ context.getDevice(), *m_image, m_allocation->getMemory(),
+ m_allocation->getOffset()));
+
+ const VkComponentMapping componentMapping =
+ {
+ VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
+ VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY
+ };
+
+ const VkImageViewCreateInfo imageViewCreateInfo =
+ {
+ VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, DE_NULL, 0, *m_image,
+ VK_IMAGE_VIEW_TYPE_2D, imageCreateInfo.format, componentMapping,
+ {
+ VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1,
+ }
+ };
+
+ m_imageView = createImageView(context.getDeviceInterface(),
+ context.getDevice(), &imageViewCreateInfo);
+
+ const struct VkSamplerCreateInfo samplerCreateInfo =
+ {
+ VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
+ DE_NULL,
+ 0u,
+ VK_FILTER_NEAREST,
+ VK_FILTER_NEAREST,
+ VK_SAMPLER_MIPMAP_MODE_NEAREST,
+ VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
+ VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
+ VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
+ 0.0f,
+ VK_FALSE,
+ 1.0f,
+ DE_FALSE,
+ VK_COMPARE_OP_ALWAYS,
+ 0.0f,
+ 0.0f,
+ VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK,
+ VK_FALSE,
+ };
+
+ m_sampler = createSampler(context.getDeviceInterface(), context.getDevice(), &samplerCreateInfo);
+ }
+
+ VkImage getImage() const {
+ return *m_image;
+ }
+
+ VkImageView getImageView() const {
+ return *m_imageView;
+ }
+
+ VkSampler getSampler() const {
+ return *m_sampler;
+ }
+
+private:
+ Move<VkImage> m_image;
+ Move<VkImageView> m_imageView;
+ Move<VkSampler> m_sampler;
+};
+}
+
+std::string vkt::subgroups::getSharedMemoryBallotHelper()
+{
+ return "shared uvec4 superSecretComputeShaderHelper[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];\n"
+ "uvec4 sharedMemoryBallot(bool vote)\n"
+ "{\n"
+ " uint groupOffset = gl_SubgroupID;\n"
+ " // One invocation in the group 0's the whole group's data\n"
+ " if (subgroupElect())\n"
+ " {\n"
+ " superSecretComputeShaderHelper[groupOffset] = uvec4(0);\n"
+ " }\n"
+ " subgroupMemoryBarrierShared();\n"
+ " if (vote)\n"
+ " {\n"
+ " const highp uint invocationId = gl_SubgroupInvocationID % 32;\n"
+ " const highp uint bitToSet = 1u << invocationId;\n"
+ " switch (gl_SubgroupInvocationID / 32)\n"
+ " {\n"
+ " case 0: atomicOr(superSecretComputeShaderHelper[groupOffset].x, bitToSet); break;\n"
+ " case 1: atomicOr(superSecretComputeShaderHelper[groupOffset].y, bitToSet); break;\n"
+ " case 2: atomicOr(superSecretComputeShaderHelper[groupOffset].z, bitToSet); break;\n"
+ " case 3: atomicOr(superSecretComputeShaderHelper[groupOffset].w, bitToSet); break;\n"
+ " }\n"
+ " }\n"
+ " subgroupMemoryBarrierShared();\n"
+ " return superSecretComputeShaderHelper[groupOffset];\n"
+ "}\n";
+}
+
+deUint32 vkt::subgroups::getSubgroupSize(Context& context)
+{
+ VkPhysicalDeviceSubgroupProperties subgroupProperties;
+ subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
+ subgroupProperties.pNext = DE_NULL;
+
+ VkPhysicalDeviceProperties2 properties;
+ properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+ properties.pNext = &subgroupProperties;
+
+ context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
+
+ return subgroupProperties.subgroupSize;
+}
+
+VkDeviceSize vkt::subgroups::maxSupportedSubgroupSize() {
+ return 128u;
+}
+
+std::string vkt::subgroups::getShaderStageName(VkShaderStageFlags stage)
+{
+ switch (stage)
+ {
+ default:
+ DE_FATAL("Unhandled stage!");
+ return "";
+ case VK_SHADER_STAGE_COMPUTE_BIT:
+ return "compute";
+ case VK_SHADER_STAGE_FRAGMENT_BIT:
+ return "fragment";
+ case VK_SHADER_STAGE_VERTEX_BIT:
+ return "vertex";
+ case VK_SHADER_STAGE_GEOMETRY_BIT:
+ return "geometry";
+ case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
+ return "tess_control";
+ case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
+ return "tess_eval";
+ }
+}
+
+std::string vkt::subgroups::getSubgroupFeatureName(vk::VkSubgroupFeatureFlagBits bit)
+{
+ switch (bit)
+ {
+ default:
+ DE_FATAL("Unknown subgroup feature category!");
+ return "";
+ case VK_SUBGROUP_FEATURE_BASIC_BIT:
+ return "VK_SUBGROUP_FEATURE_BASIC_BIT";
+ case VK_SUBGROUP_FEATURE_VOTE_BIT:
+ return "VK_SUBGROUP_FEATURE_VOTE_BIT";
+ case VK_SUBGROUP_FEATURE_ARITHMETIC_BIT:
+ return "VK_SUBGROUP_FEATURE_ARITHMETIC_BIT";
+ case VK_SUBGROUP_FEATURE_BALLOT_BIT:
+ return "VK_SUBGROUP_FEATURE_BALLOT_BIT";
+ case VK_SUBGROUP_FEATURE_SHUFFLE_BIT:
+ return "VK_SUBGROUP_FEATURE_SHUFFLE_BIT";
+ case VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT:
+ return "VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT";
+ case VK_SUBGROUP_FEATURE_CLUSTERED_BIT:
+ return "VK_SUBGROUP_FEATURE_CLUSTERED_BIT";
+ case VK_SUBGROUP_FEATURE_QUAD_BIT:
+ return "VK_SUBGROUP_FEATURE_QUAD_BIT";
+ }
+}
+
+void vkt::subgroups::addNoSubgroupShader (SourceCollections& programCollection)
+{
+ {
+ /*
+ "#version 450\n"
+ "void main (void)\n"
+ "{\n"
+ " float pixelSize = 2.0f/1024.0f;\n"
+ " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
+ " gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
+ " gl_PointSize = 1.0f;\n"
+ "}\n"
+ */
+ const std::string vertNoSubgroup =
+ "; SPIR-V\n"
+ "; Version: 1.3\n"
+ "; Generator: Khronos Glslang Reference Front End; 1\n"
+ "; Bound: 37\n"
+ "; Schema: 0\n"
+ "OpCapability Shader\n"
+ "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+ "OpMemoryModel Logical GLSL450\n"
+ "OpEntryPoint Vertex %4 \"main\" %22 %26\n"
+ "OpMemberDecorate %20 0 BuiltIn Position\n"
+ "OpMemberDecorate %20 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %20 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %20 3 BuiltIn CullDistance\n"
+ "OpDecorate %20 Block\n"
+ "OpDecorate %26 BuiltIn VertexIndex\n"
+ "%2 = OpTypeVoid\n"
+ "%3 = OpTypeFunction %2\n"
+ "%6 = OpTypeFloat 32\n"
+ "%7 = OpTypePointer Function %6\n"
+ "%9 = OpConstant %6 0.00195313\n"
+ "%12 = OpConstant %6 2\n"
+ "%14 = OpConstant %6 1\n"
+ "%16 = OpTypeVector %6 4\n"
+ "%17 = OpTypeInt 32 0\n"
+ "%18 = OpConstant %17 1\n"
+ "%19 = OpTypeArray %6 %18\n"
+ "%20 = OpTypeStruct %16 %6 %19 %19\n"
+ "%21 = OpTypePointer Output %20\n"
+ "%22 = OpVariable %21 Output\n"
+ "%23 = OpTypeInt 32 1\n"
+ "%24 = OpConstant %23 0\n"
+ "%25 = OpTypePointer Input %23\n"
+ "%26 = OpVariable %25 Input\n"
+ "%33 = OpConstant %6 0\n"
+ "%35 = OpTypePointer Output %16\n"
+ "%37 = OpConstant %23 1\n"
+ "%38 = OpTypePointer Output %6\n"
+ "%4 = OpFunction %2 None %3\n"
+ "%5 = OpLabel\n"
+ "%8 = OpVariable %7 Function\n"
+ "%10 = OpVariable %7 Function\n"
+ "OpStore %8 %9\n"
+ "%11 = OpLoad %6 %8\n"
+ "%13 = OpFDiv %6 %11 %12\n"
+ "%15 = OpFSub %6 %13 %14\n"
+ "OpStore %10 %15\n"
+ "%27 = OpLoad %23 %26\n"
+ "%28 = OpConvertSToF %6 %27\n"
+ "%29 = OpLoad %6 %8\n"
+ "%30 = OpFMul %6 %28 %29\n"
+ "%31 = OpLoad %6 %10\n"
+ "%32 = OpFAdd %6 %30 %31\n"
+ "%34 = OpCompositeConstruct %16 %32 %33 %33 %14\n"
+ "%36 = OpAccessChain %35 %22 %24\n"
+ "OpStore %36 %34\n"
+ "%39 = OpAccessChain %38 %22 %37\n"
+ "OpStore %39 %14\n"
+ "OpReturn\n"
+ "OpFunctionEnd\n";
+ programCollection.spirvAsmSources.add("vert_noSubgroup") << vertNoSubgroup;
+ }
+
+ {
+ /*
+ "#version 450\n"
+ "layout(vertices=1) out;\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ " if (gl_InvocationID == 0)\n"
+ " {\n"
+ " gl_TessLevelOuter[0] = 1.0f;\n"
+ " gl_TessLevelOuter[1] = 1.0f;\n"
+ " }\n"
+ " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+ "}\n"
+ */
+ const std::string tescNoSubgroup =
+ "; SPIR-V\n"
+ "; Version: 1.3\n"
+ "; Generator: Khronos Glslang Reference Front End; 1\n"
+ "; Bound: 45\n"
+ "; Schema: 0\n"
+ "OpCapability Tessellation\n"
+ "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+ "OpMemoryModel Logical GLSL450\n"
+ "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %32 %38\n"
+ "OpExecutionMode %4 OutputVertices 1\n"
+ "OpDecorate %8 BuiltIn InvocationId\n"
+ "OpDecorate %20 Patch\n"
+ "OpDecorate %20 BuiltIn TessLevelOuter\n"
+ "OpMemberDecorate %29 0 BuiltIn Position\n"
+ "OpMemberDecorate %29 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
+ "OpDecorate %29 Block\n"
+ "OpMemberDecorate %34 0 BuiltIn Position\n"
+ "OpMemberDecorate %34 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %34 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %34 3 BuiltIn CullDistance\n"
+ "OpDecorate %34 Block\n"
+ "%2 = OpTypeVoid\n"
+ "%3 = OpTypeFunction %2\n"
+ "%6 = OpTypeInt 32 1\n"
+ "%7 = OpTypePointer Input %6\n"
+ "%8 = OpVariable %7 Input\n"
+ "%10 = OpConstant %6 0\n"
+ "%11 = OpTypeBool\n"
+ "%15 = OpTypeFloat 32\n"
+ "%16 = OpTypeInt 32 0\n"
+ "%17 = OpConstant %16 4\n"
+ "%18 = OpTypeArray %15 %17\n"
+ "%19 = OpTypePointer Output %18\n"
+ "%20 = OpVariable %19 Output\n"
+ "%21 = OpConstant %15 1\n"
+ "%22 = OpTypePointer Output %15\n"
+ "%24 = OpConstant %6 1\n"
+ "%26 = OpTypeVector %15 4\n"
+ "%27 = OpConstant %16 1\n"
+ "%28 = OpTypeArray %15 %27\n"
+ "%29 = OpTypeStruct %26 %15 %28 %28\n"
+ "%30 = OpTypeArray %29 %27\n"
+ "%31 = OpTypePointer Output %30\n"
+ "%32 = OpVariable %31 Output\n"
+ "%34 = OpTypeStruct %26 %15 %28 %28\n"
+ "%35 = OpConstant %16 32\n"
+ "%36 = OpTypeArray %34 %35\n"
+ "%37 = OpTypePointer Input %36\n"
+ "%38 = OpVariable %37 Input\n"
+ "%40 = OpTypePointer Input %26\n"
+ "%43 = OpTypePointer Output %26\n"
+ "%4 = OpFunction %2 None %3\n"
+ "%5 = OpLabel\n"
+ "%9 = OpLoad %6 %8\n"
+ "%12 = OpIEqual %11 %9 %10\n"
+ "OpSelectionMerge %14 None\n"
+ "OpBranchConditional %12 %13 %14\n"
+ "%13 = OpLabel\n"
+ "%23 = OpAccessChain %22 %20 %10\n"
+ "OpStore %23 %21\n"
+ "%25 = OpAccessChain %22 %20 %24\n"
+ "OpStore %25 %21\n"
+ "OpBranch %14\n"
+ "%14 = OpLabel\n"
+ "%33 = OpLoad %6 %8\n"
+ "%39 = OpLoad %6 %8\n"
+ "%41 = OpAccessChain %40 %38 %39 %10\n"
+ "%42 = OpLoad %26 %41\n"
+ "%44 = OpAccessChain %43 %32 %33 %10\n"
+ "OpStore %44 %42\n"
+ "OpReturn\n"
+ "OpFunctionEnd\n";
+ programCollection.spirvAsmSources.add("tesc_noSubgroup") << tescNoSubgroup;
+ }
+
+ {
+ /*
+ "#version 450\n"
+ "layout(isolines) in;\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ " float pixelSize = 2.0f/1024.0f;\n"
+ " gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
+ "}\n";
+ */
+ const std::string teseNoSubgroup =
+ "; SPIR-V\n"
+ "; Version: 1.3\n"
+ "; Generator: Khronos Glslang Reference Front End; 2\n"
+ "; Bound: 42\n"
+ "; Schema: 0\n"
+ "OpCapability Tessellation\n"
+ "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+ "OpMemoryModel Logical GLSL450\n"
+ "OpEntryPoint TessellationEvaluation %4 \"main\" %16 %23 %29\n"
+ "OpExecutionMode %4 Isolines\n"
+ "OpExecutionMode %4 SpacingEqual\n"
+ "OpExecutionMode %4 VertexOrderCcw\n"
+ "OpMemberDecorate %14 0 BuiltIn Position\n"
+ "OpMemberDecorate %14 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %14 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %14 3 BuiltIn CullDistance\n"
+ "OpDecorate %14 Block\n"
+ "OpMemberDecorate %19 0 BuiltIn Position\n"
+ "OpMemberDecorate %19 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %19 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %19 3 BuiltIn CullDistance\n"
+ "OpDecorate %19 Block\n"
+ "OpDecorate %29 BuiltIn TessCoord\n"
+ "%2 = OpTypeVoid\n"
+ "%3 = OpTypeFunction %2\n"
+ "%6 = OpTypeFloat 32\n"
+ "%7 = OpTypePointer Function %6\n"
+ "%9 = OpConstant %6 0.00195313\n"
+ "%10 = OpTypeVector %6 4\n"
+ "%11 = OpTypeInt 32 0\n"
+ "%12 = OpConstant %11 1\n"
+ "%13 = OpTypeArray %6 %12\n"
+ "%14 = OpTypeStruct %10 %6 %13 %13\n"
+ "%15 = OpTypePointer Output %14\n"
+ "%16 = OpVariable %15 Output\n"
+ "%17 = OpTypeInt 32 1\n"
+ "%18 = OpConstant %17 0\n"
+ "%19 = OpTypeStruct %10 %6 %13 %13\n"
+ "%20 = OpConstant %11 32\n"
+ "%21 = OpTypeArray %19 %20\n"
+ "%22 = OpTypePointer Input %21\n"
+ "%23 = OpVariable %22 Input\n"
+ "%24 = OpTypePointer Input %10\n"
+ "%27 = OpTypeVector %6 3\n"
+ "%28 = OpTypePointer Input %27\n"
+ "%29 = OpVariable %28 Input\n"
+ "%30 = OpConstant %11 0\n"
+ "%31 = OpTypePointer Input %6\n"
+ "%36 = OpConstant %6 2\n"
+ "%40 = OpTypePointer Output %10\n"
+ "%4 = OpFunction %2 None %3\n"
+ "%5 = OpLabel\n"
+ "%8 = OpVariable %7 Function\n"
+ "OpStore %8 %9\n"
+ "%25 = OpAccessChain %24 %23 %18 %18\n"
+ "%26 = OpLoad %10 %25\n"
+ "%32 = OpAccessChain %31 %29 %30\n"
+ "%33 = OpLoad %6 %32\n"
+ "%34 = OpLoad %6 %8\n"
+ "%35 = OpFMul %6 %33 %34\n"
+ "%37 = OpFDiv %6 %35 %36\n"
+ "%38 = OpCompositeConstruct %10 %37 %37 %37 %37\n"
+ "%39 = OpFAdd %10 %26 %38\n"
+ "%41 = OpAccessChain %40 %16 %18\n"
+ "OpStore %41 %39\n"
+ "OpReturn\n"
+ "OpFunctionEnd\n";
+ programCollection.spirvAsmSources.add("tese_noSubgroup") << teseNoSubgroup;
+ }
+
+}
+
+
+std::string vkt::subgroups::getVertShaderForStage(vk::VkShaderStageFlags stage)
+{
+ switch (stage)
+ {
+ default:
+ DE_FATAL("Unhandled stage!");
+ return "";
+ case VK_SHADER_STAGE_FRAGMENT_BIT:
+ return
+ "#version 450\n"
+ "void main (void)\n"
+ "{\n"
+ " float pixelSize = 2.0f/1024.0f;\n"
+ " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
+ " gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
+ "}\n";
+ case VK_SHADER_STAGE_GEOMETRY_BIT:
+ return
+ "#version 450\n"
+ "void main (void)\n"
+ "{\n"
+ "}\n";
+ case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
+ case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
+ return
+ "#version 450\n"
+ "void main (void)\n"
+ "{\n"
+ "}\n";
+ }
+}
+
+bool vkt::subgroups::isSubgroupSupported(Context& context)
+{
+ return context.contextSupports(vk::ApiVersion(1, 1, 0));
+}
+
+bool vkt::subgroups::areSubgroupOperationsSupportedForStage(
+ Context& context, const VkShaderStageFlags stage)
+{
+ VkPhysicalDeviceSubgroupProperties subgroupProperties;
+ subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
+ subgroupProperties.pNext = DE_NULL;
+
+ VkPhysicalDeviceProperties2 properties;
+ properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+ properties.pNext = &subgroupProperties;
+
+ context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
+
+ return (stage & subgroupProperties.supportedStages) ? true : false;
+}
+
+bool vkt::subgroups::areSubgroupOperationsRequiredForStage(
+ VkShaderStageFlags stage)
+{
+ switch (stage)
+ {
+ default:
+ return false;
+ case VK_SHADER_STAGE_COMPUTE_BIT:
+ return true;
+ }
+}
+
+bool vkt::subgroups::isSubgroupFeatureSupportedForDevice(
+ Context& context,
+ VkSubgroupFeatureFlagBits bit) {
+ VkPhysicalDeviceSubgroupProperties subgroupProperties;
+ subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
+ subgroupProperties.pNext = DE_NULL;
+
+ VkPhysicalDeviceProperties2 properties;
+ properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+ properties.pNext = &subgroupProperties;
+
+ context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
+
+ return (bit & subgroupProperties.supportedOperations) ? true : false;
+}
+
+bool vkt::subgroups::isFragmentSSBOSupportedForDevice(Context& context)
+{
+ const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
+ context.getInstanceInterface(), context.getPhysicalDevice());
+ return features.fragmentStoresAndAtomics ? true : false;
+}
+
+bool vkt::subgroups::isVertexSSBOSupportedForDevice(Context& context)
+{
+ const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
+ context.getInstanceInterface(), context.getPhysicalDevice());
+ return features.vertexPipelineStoresAndAtomics ? true : false;
+}
+
+bool vkt::subgroups::isDoubleSupportedForDevice(Context& context)
+{
+ const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
+ context.getInstanceInterface(), context.getPhysicalDevice());
+ return features.shaderFloat64 ? true : false;
+}
+
+bool vkt::subgroups::isDoubleFormat(VkFormat format)
+{
+ switch (format)
+ {
+ default:
+ return false;
+ case VK_FORMAT_R64_SFLOAT:
+ case VK_FORMAT_R64G64_SFLOAT:
+ case VK_FORMAT_R64G64B64_SFLOAT:
+ case VK_FORMAT_R64G64B64A64_SFLOAT:
+ return true;
+ }
+}
+
+std::string vkt::subgroups::getFormatNameForGLSL (VkFormat format)
+{
+ switch (format)
+ {
+ default:
+ DE_FATAL("Unhandled format!");
+ return "";
+ case VK_FORMAT_R32_SINT:
+ return "int";
+ case VK_FORMAT_R32G32_SINT:
+ return "ivec2";
+ case VK_FORMAT_R32G32B32_SINT:
+ return "ivec3";
+ case VK_FORMAT_R32G32B32A32_SINT:
+ return "ivec4";
+ case VK_FORMAT_R32_UINT:
+ return "uint";
+ case VK_FORMAT_R32G32_UINT:
+ return "uvec2";
+ case VK_FORMAT_R32G32B32_UINT:
+ return "uvec3";
+ case VK_FORMAT_R32G32B32A32_UINT:
+ return "uvec4";
+ case VK_FORMAT_R32_SFLOAT:
+ return "float";
+ case VK_FORMAT_R32G32_SFLOAT:
+ return "vec2";
+ case VK_FORMAT_R32G32B32_SFLOAT:
+ return "vec3";
+ case VK_FORMAT_R32G32B32A32_SFLOAT:
+ return "vec4";
+ case VK_FORMAT_R64_SFLOAT:
+ return "double";
+ case VK_FORMAT_R64G64_SFLOAT:
+ return "dvec2";
+ case VK_FORMAT_R64G64B64_SFLOAT:
+ return "dvec3";
+ case VK_FORMAT_R64G64B64A64_SFLOAT:
+ return "dvec4";
+ case VK_FORMAT_R8_USCALED:
+ return "bool";
+ case VK_FORMAT_R8G8_USCALED:
+ return "bvec2";
+ case VK_FORMAT_R8G8B8_USCALED:
+ return "bvec3";
+ case VK_FORMAT_R8G8B8A8_USCALED:
+ return "bvec4";
+ }
+}
+
+void vkt::subgroups::setVertexShaderFrameBuffer (SourceCollections& programCollection)
+{
+ /*
+ "layout(location = 0) in highp vec4 in_position;\n"
+ "void main (void)\n"
+ "{\n"
+ " gl_Position = in_position;\n"
+ "}\n";
+ */
+ programCollection.spirvAsmSources.add("vert") <<
+ "; SPIR-V\n"
+ "; Version: 1.3\n"
+ "; Generator: Khronos Glslang Reference Front End; 2\n"
+ "; Bound: 21\n"
+ "; Schema: 0\n"
+ "OpCapability Shader\n"
+ "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+ "OpMemoryModel Logical GLSL450\n"
+ "OpEntryPoint Vertex %4 \"main\" %13 %17\n"
+ "OpMemberDecorate %11 0 BuiltIn Position\n"
+ "OpMemberDecorate %11 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %11 3 BuiltIn CullDistance\n"
+ "OpDecorate %11 Block\n"
+ "OpDecorate %17 Location 0\n"
+ "%2 = OpTypeVoid\n"
+ "%3 = OpTypeFunction %2\n"
+ "%6 = OpTypeFloat 32\n"
+ "%7 = OpTypeVector %6 4\n"
+ "%8 = OpTypeInt 32 0\n"
+ "%9 = OpConstant %8 1\n"
+ "%10 = OpTypeArray %6 %9\n"
+ "%11 = OpTypeStruct %7 %6 %10 %10\n"
+ "%12 = OpTypePointer Output %11\n"
+ "%13 = OpVariable %12 Output\n"
+ "%14 = OpTypeInt 32 1\n"
+ "%15 = OpConstant %14 0\n"
+ "%16 = OpTypePointer Input %7\n"
+ "%17 = OpVariable %16 Input\n"
+ "%19 = OpTypePointer Output %7\n"
+ "%4 = OpFunction %2 None %3\n"
+ "%5 = OpLabel\n"
+ "%18 = OpLoad %7 %17\n"
+ "%20 = OpAccessChain %19 %13 %15\n"
+ "OpStore %20 %18\n"
+ "OpReturn\n"
+ "OpFunctionEnd\n";
+}
+
+void vkt::subgroups::setFragmentShaderFrameBuffer (vk::SourceCollections& programCollection)
+{
+ /*
+ "layout(location = 0) in float in_color;\n"
+ "layout(location = 0) out uint out_color;\n"
+ "void main()\n"
+ {\n"
+ " out_color = uint(in_color);\n"
+ "}\n";
+ */
+ programCollection.spirvAsmSources.add("fragment") <<
+ "; SPIR-V\n"
+ "; Version: 1.3\n"
+ "; Generator: Khronos Glslang Reference Front End; 2\n"
+ "; Bound: 14\n"
+ "; Schema: 0\n"
+ "OpCapability Shader\n"
+ "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+ "OpMemoryModel Logical GLSL450\n"
+ "OpEntryPoint Fragment %4 \"main\" %8 %11\n"
+ "OpExecutionMode %4 OriginUpperLeft\n"
+ "OpDecorate %8 Location 0\n"
+ "OpDecorate %11 Location 0\n"
+ "%2 = OpTypeVoid\n"
+ "%3 = OpTypeFunction %2\n"
+ "%6 = OpTypeInt 32 0\n"
+ "%7 = OpTypePointer Output %6\n"
+ "%8 = OpVariable %7 Output\n"
+ "%9 = OpTypeFloat 32\n"
+ "%10 = OpTypePointer Input %9\n"
+ "%11 = OpVariable %10 Input\n"
+ "%4 = OpFunction %2 None %3\n"
+ "%5 = OpLabel\n"
+ "%12 = OpLoad %9 %11\n"
+ "%13 = OpConvertFToU %6 %12\n"
+ "OpStore %8 %13\n"
+ "OpReturn\n"
+ "OpFunctionEnd\n";
+}
+
+void vkt::subgroups::setTesCtrlShaderFrameBuffer (vk::SourceCollections& programCollection)
+{
+ /*
+ "#extension GL_KHR_shader_subgroup_basic: enable\n"
+ "#extension GL_EXT_tessellation_shader : require\n"
+ "layout(vertices = 2) out;\n"
+ "void main (void)\n"
+ "{\n"
+ " if (gl_InvocationID == 0)\n"
+ {\n"
+ " gl_TessLevelOuter[0] = 1.0f;\n"
+ " gl_TessLevelOuter[1] = 1.0f;\n"
+ " }\n"
+ " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+ "}\n";
+ */
+ programCollection.spirvAsmSources.add("tesc") <<
+ "; SPIR-V\n"
+ "; Version: 1.3\n"
+ "; Generator: Khronos Glslang Reference Front End; 2\n"
+ "; Bound: 46\n"
+ "; Schema: 0\n"
+ "OpCapability Tessellation\n"
+ "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+ "OpMemoryModel Logical GLSL450\n"
+ "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %33 %39\n"
+ "OpExecutionMode %4 OutputVertices 2\n"
+ "OpDecorate %8 BuiltIn InvocationId\n"
+ "OpDecorate %20 Patch\n"
+ "OpDecorate %20 BuiltIn TessLevelOuter\n"
+ "OpMemberDecorate %29 0 BuiltIn Position\n"
+ "OpMemberDecorate %29 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
+ "OpDecorate %29 Block\n"
+ "OpMemberDecorate %35 0 BuiltIn Position\n"
+ "OpMemberDecorate %35 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %35 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %35 3 BuiltIn CullDistance\n"
+ "OpDecorate %35 Block\n"
+ "%2 = OpTypeVoid\n"
+ "%3 = OpTypeFunction %2\n"
+ "%6 = OpTypeInt 32 1\n"
+ "%7 = OpTypePointer Input %6\n"
+ "%8 = OpVariable %7 Input\n"
+ "%10 = OpConstant %6 0\n"
+ "%11 = OpTypeBool\n"
+ "%15 = OpTypeFloat 32\n"
+ "%16 = OpTypeInt 32 0\n"
+ "%17 = OpConstant %16 4\n"
+ "%18 = OpTypeArray %15 %17\n"
+ "%19 = OpTypePointer Output %18\n"
+ "%20 = OpVariable %19 Output\n"
+ "%21 = OpConstant %15 1\n"
+ "%22 = OpTypePointer Output %15\n"
+ "%24 = OpConstant %6 1\n"
+ "%26 = OpTypeVector %15 4\n"
+ "%27 = OpConstant %16 1\n"
+ "%28 = OpTypeArray %15 %27\n"
+ "%29 = OpTypeStruct %26 %15 %28 %28\n"
+ "%30 = OpConstant %16 2\n"
+ "%31 = OpTypeArray %29 %30\n"
+ "%32 = OpTypePointer Output %31\n"
+ "%33 = OpVariable %32 Output\n"
+ "%35 = OpTypeStruct %26 %15 %28 %28\n"
+ "%36 = OpConstant %16 32\n"
+ "%37 = OpTypeArray %35 %36\n"
+ "%38 = OpTypePointer Input %37\n"
+ "%39 = OpVariable %38 Input\n"
+ "%41 = OpTypePointer Input %26\n"
+ "%44 = OpTypePointer Output %26\n"
+ "%4 = OpFunction %2 None %3\n"
+ "%5 = OpLabel\n"
+ "%9 = OpLoad %6 %8\n"
+ "%12 = OpIEqual %11 %9 %10\n"
+ "OpSelectionMerge %14 None\n"
+ "OpBranchConditional %12 %13 %14\n"
+ "%13 = OpLabel\n"
+ "%23 = OpAccessChain %22 %20 %10\n"
+ "OpStore %23 %21\n"
+ "%25 = OpAccessChain %22 %20 %24\n"
+ "OpStore %25 %21\n"
+ "OpBranch %14\n"
+ "%14 = OpLabel\n"
+ "%34 = OpLoad %6 %8\n"
+ "%40 = OpLoad %6 %8\n"
+ "%42 = OpAccessChain %41 %39 %40 %10\n"
+ "%43 = OpLoad %26 %42\n"
+ "%45 = OpAccessChain %44 %33 %34 %10\n"
+ "OpStore %45 %43\n"
+ "OpReturn\n"
+ "OpFunctionEnd\n";
+}
+
+void vkt::subgroups::setTesEvalShaderFrameBuffer (vk::SourceCollections& programCollection)
+{
+ /*
+ "#extension GL_KHR_shader_subgroup_ballot: enable\n"
+ "#extension GL_EXT_tessellation_shader : require\n"
+ "layout(isolines, equal_spacing, ccw ) in;\n"
+ "layout(location = 0) in float in_color[];\n"
+ "layout(location = 0) out float out_color;\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
+ " out_color = in_color[0];\n"
+ "}\n";
+ */
+ programCollection.spirvAsmSources.add("tese") <<
+ "; SPIR-V\n"
+ "; Version: 1.3\n"
+ "; Generator: Khronos Glslang Reference Front End; 2\n"
+ "; Bound: 45\n"
+ "; Schema: 0\n"
+ "OpCapability Tessellation\n"
+ "%1 = OpExtInstImport \"GLSL.std.450\"\n"
+ "OpMemoryModel Logical GLSL450\n"
+ "OpEntryPoint TessellationEvaluation %4 \"main\" %13 %20 %29 %39 %42\n"
+ "OpExecutionMode %4 Isolines\n"
+ "OpExecutionMode %4 SpacingEqual\n"
+ "OpExecutionMode %4 VertexOrderCcw\n"
+ "OpMemberDecorate %11 0 BuiltIn Position\n"
+ "OpMemberDecorate %11 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %11 3 BuiltIn CullDistance\n"
+ "OpDecorate %11 Block\n"
+ "OpMemberDecorate %16 0 BuiltIn Position\n"
+ "OpMemberDecorate %16 1 BuiltIn PointSize\n"
+ "OpMemberDecorate %16 2 BuiltIn ClipDistance\n"
+ "OpMemberDecorate %16 3 BuiltIn CullDistance\n"
+ "OpDecorate %16 Block\n"
+ "OpDecorate %29 BuiltIn TessCoord\n"
+ "OpDecorate %39 Location 0\n"
+ "OpDecorate %42 Location 0\n"
+ "%2 = OpTypeVoid\n"
+ "%3 = OpTypeFunction %2\n"
+ "%6 = OpTypeFloat 32\n"
+ "%7 = OpTypeVector %6 4\n"
+ "%8 = OpTypeInt 32 0\n"
+ "%9 = OpConstant %8 1\n"
+ "%10 = OpTypeArray %6 %9\n"
+ "%11 = OpTypeStruct %7 %6 %10 %10\n"
+ "%12 = OpTypePointer Output %11\n"
+ "%13 = OpVariable %12 Output\n"
+ "%14 = OpTypeInt 32 1\n"
+ "%15 = OpConstant %14 0\n"
+ "%16 = OpTypeStruct %7 %6 %10 %10\n"
+ "%17 = OpConstant %8 32\n"
+ "%18 = OpTypeArray %16 %17\n"
+ "%19 = OpTypePointer Input %18\n"
+ "%20 = OpVariable %19 Input\n"
+ "%21 = OpTypePointer Input %7\n"
+ "%24 = OpConstant %14 1\n"
+ "%27 = OpTypeVector %6 3\n"
+ "%28 = OpTypePointer Input %27\n"
+ "%29 = OpVariable %28 Input\n"
+ "%30 = OpConstant %8 0\n"
+ "%31 = OpTypePointer Input %6\n"
+ "%36 = OpTypePointer Output %7\n"
+ "%38 = OpTypePointer Output %6\n"
+ "%39 = OpVariable %38 Output\n"
+ "%40 = OpTypeArray %6 %17\n"
+ "%41 = OpTypePointer Input %40\n"
+ "%42 = OpVariable %41 Input\n"
+ "%4 = OpFunction %2 None %3\n"
+ "%5 = OpLabel\n"
+ "%22 = OpAccessChain %21 %20 %15 %15\n"
+ "%23 = OpLoad %7 %22\n"
+ "%25 = OpAccessChain %21 %20 %24 %15\n"
+ "%26 = OpLoad %7 %25\n"
+ "%32 = OpAccessChain %31 %29 %30\n"
+ "%33 = OpLoad %6 %32\n"
+ "%34 = OpCompositeConstruct %7 %33 %33 %33 %33\n"
+ "%35 = OpExtInst %7 %1 FMix %23 %26 %34\n"
+ "%37 = OpAccessChain %36 %13 %15\n"
+ "OpStore %37 %35\n"
+ "%43 = OpAccessChain %31 %42 %15\n"
+ "%44 = OpLoad %6 %43\n"
+ "OpStore %39 %44\n"
+ "OpReturn\n"
+ "OpFunctionEnd\n";
+}
+
+void vkt::subgroups::addGeometryShadersFromTemplate (const std::string& glslTemplate, const vk::ShaderBuildOptions& options, vk::GlslSourceCollection& collection)
+{
+ tcu::StringTemplate geometryTemplate(glslTemplate);
+
+ map<string, string> linesParams;
+ linesParams.insert(pair<string, string>("TOPOLOGY", "lines"));
+
+ map<string, string> pointsParams;
+ pointsParams.insert(pair<string, string>("TOPOLOGY", "points"));
+
+ collection.add("geometry_lines") << glu::GeometrySource(geometryTemplate.specialize(linesParams)) << options;
+ collection.add("geometry_points") << glu::GeometrySource(geometryTemplate.specialize(pointsParams)) << options;
+}
+
+void vkt::subgroups::addGeometryShadersFromTemplate (const std::string& spirvTemplate, const vk::SpirVAsmBuildOptions& options, vk::SpirVAsmCollection& collection)
+{
+ tcu::StringTemplate geometryTemplate(spirvTemplate);
+
+ map<string, string> linesParams;
+ linesParams.insert(pair<string, string>("TOPOLOGY", "InputLines"));
+
+ map<string, string> pointsParams;
+ pointsParams.insert(pair<string, string>("TOPOLOGY", "InputPoints"));
+
+ collection.add("geometry_lines") << geometryTemplate.specialize(linesParams) << options;
+ collection.add("geometry_points") << geometryTemplate.specialize(pointsParams) << options;
+}
+
+void initializeMemory(Context& context, const Allocation& alloc, subgroups::SSBOData& data)
+{
+ const vk::VkFormat format = data.format;
+ const vk::VkDeviceSize size = getFormatSizeInBytes(format) * data.numElements;
+ if (subgroups::SSBOData::InitializeNonZero == data.initializeType)
+ {
+ de::Random rnd(context.getTestContext().getCommandLine().getBaseSeed());
+
+ switch (format)
+ {
+ default:
+ DE_FATAL("Illegal buffer format");
+ break;
+ case VK_FORMAT_R8_USCALED:
+ case VK_FORMAT_R8G8_USCALED:
+ case VK_FORMAT_R8G8B8_USCALED:
+ case VK_FORMAT_R8G8B8A8_USCALED:
+ case VK_FORMAT_R32_SINT:
+ case VK_FORMAT_R32G32_SINT:
+ case VK_FORMAT_R32G32B32_SINT:
+ case VK_FORMAT_R32G32B32A32_SINT:
+ case VK_FORMAT_R32_UINT:
+ case VK_FORMAT_R32G32_UINT:
+ case VK_FORMAT_R32G32B32_UINT:
+ case VK_FORMAT_R32G32B32A32_UINT:
+ {
+ deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
+
+ for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint32)); k++)
+ {
+ ptr[k] = rnd.getUint32();
+ }
+ }
+ break;
+ case VK_FORMAT_R32_SFLOAT:
+ case VK_FORMAT_R32G32_SFLOAT:
+ case VK_FORMAT_R32G32B32_SFLOAT:
+ case VK_FORMAT_R32G32B32A32_SFLOAT:
+ {
+ float* ptr = reinterpret_cast<float*>(alloc.getHostPtr());
+
+ for (vk::VkDeviceSize k = 0; k < (size / sizeof(float)); k++)
+ {
+ ptr[k] = rnd.getFloat();
+ }
+ }
+ break;
+ case VK_FORMAT_R64_SFLOAT:
+ case VK_FORMAT_R64G64_SFLOAT:
+ case VK_FORMAT_R64G64B64_SFLOAT:
+ case VK_FORMAT_R64G64B64A64_SFLOAT:
+ {
+ double* ptr = reinterpret_cast<double*>(alloc.getHostPtr());
+
+ for (vk::VkDeviceSize k = 0; k < (size / sizeof(double)); k++)
+ {
+ ptr[k] = rnd.getDouble();
+ }
+ }
+ break;
+ }
+ }
+ else if (subgroups::SSBOData::InitializeZero == data.initializeType)
+ {
+ deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
+
+ for (vk::VkDeviceSize k = 0; k < size / 4; k++)
+ {
+ ptr[k] = 0;
+ }
+ }
+
+ if (subgroups::SSBOData::InitializeNone != data.initializeType)
+ {
+ flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
+ }
+}
+
+deUint32 getResultBinding (const VkShaderStageFlagBits shaderStage)
+{
+ switch(shaderStage)
+ {
+ case VK_SHADER_STAGE_VERTEX_BIT:
+ return 0u;
+ break;
+ case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
+ return 1u;
+ break;
+ case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
+ return 2u;
+ break;
+ case VK_SHADER_STAGE_GEOMETRY_BIT:
+ return 3u;
+ break;
+ default:
+ DE_ASSERT(0);
+ return -1;
+ }
+ DE_ASSERT(0);
+ return -1;
+}
+
+tcu::TestStatus vkt::subgroups::makeTessellationEvaluationFrameBufferTest(
+ Context& context, VkFormat format, SSBOData* extraData,
+ deUint32 extraDataCount,
+ bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
+ const VkShaderStageFlags shaderStage)
+{
+ const deUint32 maxWidth = 1024u;
+ vector<de::SharedPtr<BufferOrImage> > inputBuffers (extraDataCount);
+ DescriptorSetLayoutBuilder layoutBuilder;
+ DescriptorPoolBuilder poolBuilder;
+ DescriptorSetUpdateBuilder updateBuilder;
+ Move <VkDescriptorPool> descriptorPool;
+ Move <VkDescriptorSet> descriptorSet;
+
+ const Unique<VkShaderModule> vertexShaderModule (createShaderModule(context.getDeviceInterface(), context.getDevice(),
+ context.getBinaryCollection().get("vert"), 0u));
+ const Unique<VkShaderModule> teCtrlShaderModule (createShaderModule(context.getDeviceInterface(), context.getDevice(),
+ context.getBinaryCollection().get("tesc"), 0u));
+ const Unique<VkShaderModule> teEvalShaderModule (createShaderModule(context.getDeviceInterface(), context.getDevice(),
+ context.getBinaryCollection().get("tese"), 0u));
+ const Unique<VkShaderModule> fragmentShaderModule (createShaderModule(context.getDeviceInterface(), context.getDevice(),
+ context.getBinaryCollection().get("fragment"), 0u));
+ const Unique<VkRenderPass> renderPass (makeRenderPass(context, format));
+
+ const VkVertexInputBindingDescription vertexInputBinding =
+ {
+ 0u, // binding;
+ static_cast<deUint32>(sizeof(tcu::Vec4)), // stride;
+ VK_VERTEX_INPUT_RATE_VERTEX // inputRate
+ };
+
+ const VkVertexInputAttributeDescription vertexInputAttribute =
+ {
+ 0u,
+ 0u,
+ VK_FORMAT_R32G32B32A32_SFLOAT,
+ 0u
+ };
+
+ for (deUint32 i = 0u; i < extraDataCount; i++)
+ {
+ if (extraData[i].isImage)
+ {
+ inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
+ }
+ else
+ {
+ vk::VkDeviceSize size = getFormatSizeInBytes(extraData[i].format) * extraData[i].numElements;
+ inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
+ }
+ const Allocation& alloc = inputBuffers[i]->getAllocation();
+ initializeMemory(context, alloc, extraData[i]);
+ }
+
+ for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
+ layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, shaderStage, DE_NULL);
+
+ const Unique<VkDescriptorSetLayout> descriptorSetLayout (layoutBuilder.build(context.getDeviceInterface(), context.getDevice()));
+
+ const Unique<VkPipelineLayout> pipelineLayout (makePipelineLayout(context, *descriptorSetLayout));
+
+ const Unique<VkPipeline> pipeline (makeGraphicsPipeline(context, *pipelineLayout,
+ VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT |
+ VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
+ *vertexShaderModule, *fragmentShaderModule, DE_NULL, *teCtrlShaderModule, *teEvalShaderModule,
+ *renderPass, VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, &vertexInputBinding, &vertexInputAttribute, true, format));
+
+ for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
+ poolBuilder.addType(inputBuffers[ndx]->getType());
+
+ if (extraDataCount > 0)
+ {
+ descriptorPool = poolBuilder.build(context.getDeviceInterface(), context.getDevice(),
+ VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
+ descriptorSet = makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout);
+ }
+
+ for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
+ {
+ if (inputBuffers[buffersNdx]->isImage())
+ {
+ VkDescriptorImageInfo info =
+ makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
+ inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
+
+ updateBuilder.writeSingle(*descriptorSet,
+ DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
+ inputBuffers[buffersNdx]->getType(), &info);
+ }
+ else
+ {
+ VkDescriptorBufferInfo info =
+ makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
+ 0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
+
+ updateBuilder.writeSingle(*descriptorSet,
+ DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
+ inputBuffers[buffersNdx]->getType(), &info);
+ }
+ }
+
+ updateBuilder.update(context.getDeviceInterface(), context.getDevice());
+
+ const Unique<VkCommandPool> cmdPool (makeCommandPool(context));
+ const deUint32 subgroupSize = getSubgroupSize(context);
+ const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
+ const vk::VkDeviceSize vertexBufferSize = 2ull * maxWidth * sizeof(tcu::Vec4);
+ Buffer vertexBuffer (context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
+ unsigned totalIterations = 0u;
+ unsigned failedIterations = 0u;
+ Image discardableImage (context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
+
+ {
+ const Allocation& alloc = vertexBuffer.getAllocation();
+ std::vector<tcu::Vec4> data (2u * maxWidth, Vec4(1.0f, 0.0f, 1.0f, 1.0f));
+ const float pixelSize = 2.0f / static_cast<float>(maxWidth);
+ float leftHandPosition = -1.0f;
+
+ for(deUint32 ndx = 0u; ndx < data.size(); ndx+=2u)
+ {
+ data[ndx][0] = leftHandPosition;
+ leftHandPosition += pixelSize;
+ data[ndx+1][0] = leftHandPosition;
+ }
+
+ deMemcpy(alloc.getHostPtr(), &data[0], data.size() * sizeof(tcu::Vec4));
+ flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
+ }
+
+ for (deUint32 width = 1u; width < maxWidth; ++width)
+ {
+ const Unique<VkFramebuffer> framebuffer (makeFramebuffer(context, *renderPass, discardableImage.getImageView(), maxWidth, 1));
+ const VkViewport viewport = makeViewport(maxWidth, 1u);
+ const VkRect2D scissor = makeRect2D(maxWidth, 1u);
+ const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
+ Buffer imageBufferResult (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
+ const VkDeviceSize vertexBufferOffset = 0u;
+
+ totalIterations++;
+
+ beginCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
+ {
+
+ context.getDeviceInterface().cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
+ context.getDeviceInterface().cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
+
+ beginRenderPass(context.getDeviceInterface(), *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
+
+ context.getDeviceInterface().cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
+
+ if (extraDataCount > 0)
+ {
+ context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer,
+ VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
+ &descriptorSet.get(), 0u, DE_NULL);
+ }
+
+ context.getDeviceInterface().cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
+ context.getDeviceInterface().cmdDraw(*cmdBuffer, 2 * width, 1, 0, 0);
+
+ endRenderPass(context.getDeviceInterface(), *cmdBuffer);
+
+ copyImageToBuffer(context.getDeviceInterface(), *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
+ endCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
+
+ Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer));
+ waitFence(context, fence);
+ }
+
+ {
+ const Allocation& allocResult = imageBufferResult.getAllocation();
+ invalidateAlloc(context.getDeviceInterface(), context.getDevice(), allocResult);
+
+ std::vector<const void*> datas;
+ datas.push_back(allocResult.getHostPtr());
+ if (!checkResult(datas, width/2u, subgroupSize))
+ failedIterations++;
+ }
+ }
+
+ if (0 < failedIterations)
+ {
+ context.getTestContext().getLog()
+ << TestLog::Message << (totalIterations - failedIterations) << " / "
+ << totalIterations << " values passed" << TestLog::EndMessage;
+ return tcu::TestStatus::fail("Failed!");
+ }
+
+ return tcu::TestStatus::pass("OK");
+}
+
+bool vkt::subgroups::check(std::vector<const void*> datas,
+ deUint32 width, deUint32 ref)
+{
+ const deUint32* data = reinterpret_cast<const deUint32*>(datas[0]);
+
+ for (deUint32 n = 0; n < width; ++n)
+ {
+ if (data[n] != ref)
+ {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool vkt::subgroups::checkCompute(std::vector<const void*> datas,
+ const deUint32 numWorkgroups[3], const deUint32 localSize[3],
+ deUint32 ref)
+{
+ const deUint32 globalSizeX = numWorkgroups[0] * localSize[0];
+ const deUint32 globalSizeY = numWorkgroups[1] * localSize[1];
+ const deUint32 globalSizeZ = numWorkgroups[2] * localSize[2];
+
+ return check(datas, globalSizeX * globalSizeY * globalSizeZ, ref);
+}
+
+tcu::TestStatus vkt::subgroups::makeGeometryFrameBufferTest(
+ Context& context, VkFormat format, SSBOData* extraData,
+ deUint32 extraDataCount,
+ bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize))
+{
+ const deUint32 maxWidth = 1024u;
+ vector<de::SharedPtr<BufferOrImage> > inputBuffers (extraDataCount);
+ DescriptorSetLayoutBuilder layoutBuilder;
+ DescriptorPoolBuilder poolBuilder;
+ DescriptorSetUpdateBuilder updateBuilder;
+ Move <VkDescriptorPool> descriptorPool;
+ Move <VkDescriptorSet> descriptorSet;
+
+ const Unique<VkShaderModule> vertexShaderModule (createShaderModule(context.getDeviceInterface(), context.getDevice(),
+ context.getBinaryCollection().get("vert"), 0u));
+ const Unique<VkShaderModule> geometryShaderModule (createShaderModule(context.getDeviceInterface(), context.getDevice(),
+ context.getBinaryCollection().get("geometry"), 0u));
+ const Unique<VkShaderModule> fragmentShaderModule (createShaderModule(context.getDeviceInterface(), context.getDevice(),
+ context.getBinaryCollection().get("fragment"), 0u));
+ const Unique<VkRenderPass> renderPass (makeRenderPass(context, format));
+ const VkVertexInputBindingDescription vertexInputBinding =
+ {
+ 0u, // binding;
+ static_cast<deUint32>(sizeof(tcu::Vec4)), // stride;
+ VK_VERTEX_INPUT_RATE_VERTEX // inputRate
+ };
+
+ const VkVertexInputAttributeDescription vertexInputAttribute =
+ {
+ 0u,
+ 0u,
+ VK_FORMAT_R32G32B32A32_SFLOAT,
+ 0u
+ };
+
+ for (deUint32 i = 0u; i < extraDataCount; i++)
+ {
+ if (extraData[i].isImage)
+ {
+ inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
+ }
+ else
+ {
+ vk::VkDeviceSize size = getFormatSizeInBytes(extraData[i].format) * extraData[i].numElements;
+ inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
+ }
+ const Allocation& alloc = inputBuffers[i]->getAllocation();
+ initializeMemory(context, alloc, extraData[i]);
+ }
+
+ for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
+ layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_GEOMETRY_BIT, DE_NULL);
+
+ const Unique<VkDescriptorSetLayout> descriptorSetLayout (layoutBuilder.build(context.getDeviceInterface(), context.getDevice()));
+
+ const Unique<VkPipelineLayout> pipelineLayout (makePipelineLayout(context, *descriptorSetLayout));
+
+ const Unique<VkPipeline> pipeline (makeGraphicsPipeline(context, *pipelineLayout,
+ VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_GEOMETRY_BIT,
+ *vertexShaderModule, *fragmentShaderModule, *geometryShaderModule, DE_NULL, DE_NULL,
+ *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST, &vertexInputBinding, &vertexInputAttribute, true, format));
+
+ for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
+ poolBuilder.addType(inputBuffers[ndx]->getType());
+
+ if (extraDataCount > 0)
+ {
+ descriptorPool = poolBuilder.build(context.getDeviceInterface(), context.getDevice(),
+ VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
+ descriptorSet = makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout);
+ }
+
+ for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
+ {
+ if (inputBuffers[buffersNdx]->isImage())
+ {
+ VkDescriptorImageInfo info =
+ makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
+ inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
+
+ updateBuilder.writeSingle(*descriptorSet,
+ DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
+ inputBuffers[buffersNdx]->getType(), &info);
+ }
+ else
+ {
+ VkDescriptorBufferInfo info =
+ makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
+ 0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
+
+ updateBuilder.writeSingle(*descriptorSet,
+ DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
+ inputBuffers[buffersNdx]->getType(), &info);
+ }
+ }
+
+ updateBuilder.update(context.getDeviceInterface(), context.getDevice());
+
+ const Unique<VkCommandPool> cmdPool (makeCommandPool(context));
+ const deUint32 subgroupSize = getSubgroupSize(context);
+ const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
+ const vk::VkDeviceSize vertexBufferSize = maxWidth * sizeof(tcu::Vec4);
+ Buffer vertexBuffer (context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
+ unsigned totalIterations = 0u;
+ unsigned failedIterations = 0u;
+ Image discardableImage (context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
+
+ {
+ const Allocation& alloc = vertexBuffer.getAllocation();
+ std::vector<tcu::Vec4> data (maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
+ const float pixelSize = 2.0f / static_cast<float>(maxWidth);
+ float leftHandPosition = -1.0f;
+
+ for(deUint32 ndx = 0u; ndx < maxWidth; ++ndx)
+ {
+ data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
+ leftHandPosition += pixelSize;
+ }
+
+ deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
+ flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
+ }
+
+ for (deUint32 width = 1u; width < maxWidth; width++)
+ {
+ totalIterations++;
+ const Unique<VkFramebuffer> framebuffer (makeFramebuffer(context, *renderPass, discardableImage.getImageView(), maxWidth, 1));
+ const VkViewport viewport = makeViewport(maxWidth, 1u);
+ const VkRect2D scissor = makeRect2D(maxWidth, 1u);
+ const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
+ Buffer imageBufferResult (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
+ const VkDeviceSize vertexBufferOffset = 0u;
+
+ for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
+ {
+ const Allocation& alloc = inputBuffers[ndx]->getAllocation();
+ initializeMemory(context, alloc, extraData[ndx]);
+ }
+
+ beginCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
+ {
+ context.getDeviceInterface().cmdSetViewport(
+ *cmdBuffer, 0, 1, &viewport);
+
+ context.getDeviceInterface().cmdSetScissor(
+ *cmdBuffer, 0, 1, &scissor);
+
+ beginRenderPass(context.getDeviceInterface(), *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
+
+ context.getDeviceInterface().cmdBindPipeline(
+ *cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
+
+ if (extraDataCount > 0)
+ {
+ context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer,
+ VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
+ &descriptorSet.get(), 0u, DE_NULL);
+ }
+
+ context.getDeviceInterface().cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
+
+ context.getDeviceInterface().cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
+
+ endRenderPass(context.getDeviceInterface(), *cmdBuffer);
+
+ copyImageToBuffer(context.getDeviceInterface(), *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
+
+ endCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
+ Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer));
+ waitFence(context, fence);
+ }
+
+ {
+ const Allocation& allocResult = imageBufferResult.getAllocation();
+ invalidateAlloc(context.getDeviceInterface(), context.getDevice(), allocResult);
+
+ std::vector<const void*> datas;
+ datas.push_back(allocResult.getHostPtr());
+ if (!checkResult(datas, width, subgroupSize))
+ failedIterations++;
+ }
+ }
+
+ if (0 < failedIterations)
+ {
+ context.getTestContext().getLog()
+ << TestLog::Message << (totalIterations - failedIterations) << " / "
+ << totalIterations << " values passed" << TestLog::EndMessage;
+ return tcu::TestStatus::fail("Failed!");
+ }
+
+ return tcu::TestStatus::pass("OK");
+}
+
+
+tcu::TestStatus vkt::subgroups::allStages(
+ Context& context, VkFormat format, SSBOData* extraDatas,
+ deUint32 extraDatasCount,
+ bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
+ const VkShaderStageFlags shaderStageTested)
+{
+ const deUint32 maxWidth = 1024u;
+ vector<VkShaderStageFlagBits> stagesVector;
+ VkShaderStageFlags shaderStageRequired = (VkShaderStageFlags)0ull;
+
+ Move<VkShaderModule> vertexShaderModule;
+ Move<VkShaderModule> teCtrlShaderModule;
+ Move<VkShaderModule> teEvalShaderModule;
+ Move<VkShaderModule> geometryShaderModule;
+ Move<VkShaderModule> fragmentShaderModule;
+
+ if (shaderStageTested & VK_SHADER_STAGE_VERTEX_BIT)
+ {
+ stagesVector.push_back(VK_SHADER_STAGE_VERTEX_BIT);
+ }
+ if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
+ {
+ stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
+ shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
+ shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
+ }
+ if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
+ {
+ stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
+ shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
+ shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
+ }
+ if (shaderStageTested & VK_SHADER_STAGE_GEOMETRY_BIT)
+ {
+ stagesVector.push_back(VK_SHADER_STAGE_GEOMETRY_BIT);
+ const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
+ shaderStageRequired |= (shaderStageTested & required) ? (VkShaderStageFlags) 0 : required;
+ }
+ if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
+ {
+ const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
+ shaderStageRequired |= (shaderStageTested & required) ? (VkShaderStageFlags) 0 : required;
+ }
+
+ const deUint32 stagesCount = static_cast<deUint32>(stagesVector.size());
+ const string vert = (shaderStageRequired & VK_SHADER_STAGE_VERTEX_BIT) ? "vert_noSubgroup" : "vert";
+ const string tesc = (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? "tesc_noSubgroup" : "tesc";
+ const string tese = (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? "tese_noSubgroup" : "tese";
+
+ shaderStageRequired = shaderStageTested | shaderStageRequired;
+
+ vertexShaderModule = createShaderModule(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get(vert), 0u);
+ if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
+ {
+ teCtrlShaderModule = createShaderModule(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get(tesc), 0u);
+ teEvalShaderModule = createShaderModule(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get(tese), 0u);
+ }
+ if (shaderStageRequired & VK_SHADER_STAGE_GEOMETRY_BIT)
+ {
+ if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
+ {
+ // tessellation shaders output line primitives
+ geometryShaderModule = createShaderModule(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("geometry_lines"), 0u);
+ }
+ else
+ {
+ // otherwise points are processed by geometry shader
+ geometryShaderModule = createShaderModule(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("geometry_points"), 0u);
+ }
+ }
+ if (shaderStageRequired & VK_SHADER_STAGE_FRAGMENT_BIT)
+ fragmentShaderModule = createShaderModule(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("fragment"), 0u);
+
+ std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(stagesCount + extraDatasCount);
+
+ DescriptorSetLayoutBuilder layoutBuilder;
+ // The implicit result SSBO we use to store our outputs from the shader
+ for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
+ {
+ const VkDeviceSize shaderSize = (stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? maxWidth * 2 : maxWidth;
+ const VkDeviceSize size = getFormatSizeInBytes(format) * shaderSize;
+ inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
+
+ layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1, stagesVector[ndx], getResultBinding(stagesVector[ndx]), DE_NULL);
+ }
+
+ for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
+ {
+ const deUint32 datasNdx = ndx - stagesCount;
+ if (extraDatas[datasNdx].isImage)
+ {
+ inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraDatas[datasNdx].numElements), 1, extraDatas[datasNdx].format));
+ }
+ else
+ {
+ const vk::VkDeviceSize size = getFormatSizeInBytes(extraDatas[datasNdx].format) * extraDatas[datasNdx].numElements;
+ inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
+ }
+
+ const Allocation& alloc = inputBuffers[ndx]->getAllocation();
+ initializeMemory(context, alloc, extraDatas[datasNdx]);
+
+ layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1,
+ extraDatas[datasNdx].stages, extraDatas[datasNdx].binding, DE_NULL);
+ }
+
+ const Unique<VkDescriptorSetLayout> descriptorSetLayout(
+ layoutBuilder.build(context.getDeviceInterface(), context.getDevice()));
+
+ const Unique<VkPipelineLayout> pipelineLayout(
+ makePipelineLayout(context, *descriptorSetLayout));
+
+ const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
+ const Unique<VkPipeline> pipeline(makeGraphicsPipeline(context, *pipelineLayout,
+ shaderStageRequired,
+ *vertexShaderModule, *fragmentShaderModule, *geometryShaderModule, *teCtrlShaderModule, *teEvalShaderModule,
+ *renderPass,
+ (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? VK_PRIMITIVE_TOPOLOGY_PATCH_LIST : VK_PRIMITIVE_TOPOLOGY_POINT_LIST));
+
+ DescriptorPoolBuilder poolBuilder;
+
+ for (deUint32 ndx = 0u; ndx < static_cast<deUint32>(inputBuffers.size()); ndx++)
+ {
+ poolBuilder.addType(inputBuffers[ndx]->getType());
+ }
+
+ const Unique<VkDescriptorPool> descriptorPool(
+ poolBuilder.build(context.getDeviceInterface(), context.getDevice(),
+ VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
+
+ // Create descriptor set
+ const Unique<VkDescriptorSet> descriptorSet(
+ makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout));
+
+ DescriptorSetUpdateBuilder updateBuilder;
+
+ for (deUint32 ndx = 0u; ndx < stagesCount; ndx++)
+ {
+ if (inputBuffers[ndx]->isImage())
+ {
+ VkDescriptorImageInfo info =
+ makeDescriptorImageInfo(inputBuffers[ndx]->getAsImage()->getSampler(),
+ inputBuffers[ndx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
+
+ updateBuilder.writeSingle(*descriptorSet,
+ DescriptorSetUpdateBuilder::Location::binding(getResultBinding(stagesVector[ndx])),
+ inputBuffers[ndx]->getType(), &info);
+ }
+ else
+ {
+ VkDescriptorBufferInfo info =
+ makeDescriptorBufferInfo(inputBuffers[ndx]->getAsBuffer()->getBuffer(),
+ 0ull, inputBuffers[ndx]->getAsBuffer()->getSize());
+
+ updateBuilder.writeSingle(*descriptorSet,
+ DescriptorSetUpdateBuilder::Location::binding(getResultBinding(stagesVector[ndx])),
+ inputBuffers[ndx]->getType(), &info);
+ }
+ }
+
+ for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ndx++)
+ {
+ if (inputBuffers[ndx]->isImage())
+ {
+ VkDescriptorImageInfo info =
+ makeDescriptorImageInfo(inputBuffers[ndx]->getAsImage()->getSampler(),
+ inputBuffers[ndx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
+
+ updateBuilder.writeSingle(*descriptorSet,
+ DescriptorSetUpdateBuilder::Location::binding(extraDatas[ndx -stagesCount].binding),
+ inputBuffers[ndx]->getType(), &info);
+ }
+ else
+ {
+ VkDescriptorBufferInfo info =
+ makeDescriptorBufferInfo(inputBuffers[ndx]->getAsBuffer()->getBuffer(),
+ 0ull, inputBuffers[ndx]->getAsBuffer()->getSize());
+
+ updateBuilder.writeSingle(*descriptorSet,
+ DescriptorSetUpdateBuilder::Location::binding(extraDatas[ndx - stagesCount].binding),
+ inputBuffers[ndx]->getType(), &info);
+ }
+ }
+ updateBuilder.update(context.getDeviceInterface(), context.getDevice());
+
+ {
+ const Unique<VkCommandPool> cmdPool (makeCommandPool(context));
+ const deUint32 subgroupSize = getSubgroupSize(context);
+ const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
+ unsigned totalIterations = 0u;
+ unsigned failedIterations = 0u;
+ Image resultImage (context, maxWidth, 1, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
+ const Unique<VkFramebuffer> framebuffer (makeFramebuffer(context, *renderPass, resultImage.getImageView(), maxWidth, 1));
+ const VkViewport viewport = makeViewport(maxWidth, 1u);
+ const VkRect2D scissor = makeRect2D(maxWidth, 1u);
+ const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
+ Buffer imageBufferResult (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
+ const VkImageSubresourceRange subresourceRange =
+ {
+ VK_IMAGE_ASPECT_COLOR_BIT, //VkImageAspectFlags aspectMask
+ 0u, //deUint32 baseMipLevel
+ 1u, //deUint32 levelCount
+ 0u, //deUint32 baseArrayLayer
+ 1u //deUint32 layerCount
+ };
+
+ const VkImageMemoryBarrier colorAttachmentBarrier = makeImageMemoryBarrier(
+ (VkAccessFlags)0u, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
+ VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+ resultImage.getImage(), subresourceRange);
+
+ for (deUint32 width = 1u; width < maxWidth; width++)
+ {
+ for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
+ {
+ // re-init the data
+ const Allocation& alloc = inputBuffers[ndx]->getAllocation();
+ initializeMemory(context, alloc, extraDatas[ndx - stagesCount]);
+ }
+
+ totalIterations++;
+
+ beginCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
+
+ context.getDeviceInterface().cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, (VkDependencyFlags)0, 0u, (const VkMemoryBarrier*)DE_NULL, 0u, (const VkBufferMemoryBarrier*)DE_NULL, 1u, &colorAttachmentBarrier);
+
+ context.getDeviceInterface().cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
+
+ context.getDeviceInterface().cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
+
+ beginRenderPass(context.getDeviceInterface(), *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
+
+ context.getDeviceInterface().cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
+
+ context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer,
+ VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
+ &descriptorSet.get(), 0u, DE_NULL);
+
+ context.getDeviceInterface().cmdDraw(*cmdBuffer, width, 1, 0, 0);
+
+ endRenderPass(context.getDeviceInterface(), *cmdBuffer);
+
+ copyImageToBuffer(context.getDeviceInterface(), *cmdBuffer, resultImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(width, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
+
+ endCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
+
+ Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer));
+ waitFence(context, fence);
+
+ for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
+ {
+ std::vector<const void*> datas;
+ if (!inputBuffers[ndx]->isImage())
+ {
+ const Allocation& resultAlloc = inputBuffers[ndx]->getAllocation();
+ invalidateAlloc(context.getDeviceInterface(), context.getDevice(), resultAlloc);
+ // we always have our result data first
+ datas.push_back(resultAlloc.getHostPtr());
+ }
+
+ for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
+ {
+ const deUint32 datasNdx = index - stagesCount;
+ if ((stagesVector[ndx] & extraDatas[datasNdx].stages) && (!inputBuffers[index]->isImage()))
+ {
+ const Allocation& resultAlloc = inputBuffers[index]->getAllocation();
+ invalidateAlloc(context.getDeviceInterface(), context.getDevice(), resultAlloc);
+ // we always have our result data first
+ datas.push_back(resultAlloc.getHostPtr());
+ }
+ }
+
+ if (!checkResult(datas, (stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? width * 2 : width , subgroupSize))
+ failedIterations++;
+ }
+ if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
+ {
+ std::vector<const void*> datas;
+ const Allocation& resultAlloc = imageBufferResult.getAllocation();
+ invalidateAlloc(context.getDeviceInterface(), context.getDevice(), resultAlloc);
+
+ // we always have our result data first
+ datas.push_back(resultAlloc.getHostPtr());
+
+ for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
+ {
+ const deUint32 datasNdx = index - stagesCount;
+ if (VK_SHADER_STAGE_FRAGMENT_BIT & extraDatas[datasNdx].stages && (!inputBuffers[index]->isImage()))
+ {
+ const Allocation& alloc = inputBuffers[index]->getAllocation();
+ invalidateAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
+ // we always have our result data first
+ datas.push_back(alloc.getHostPtr());
+ }
+ }
+
+ if (!checkResult(datas, width , subgroupSize))
+ failedIterations++;
+ }
+
+ context.getDeviceInterface().resetCommandBuffer(*cmdBuffer, 0);
+ }
+
+ if (0 < failedIterations)
+ {
+ context.getTestContext().getLog()
+ << TestLog::Message << (totalIterations - failedIterations) << " / "
+ << totalIterations << " values passed" << TestLog::EndMessage;
+ return tcu::TestStatus::fail("Failed!");
+ }
+ }
+
+ return tcu::TestStatus::pass("OK");
+}
+
+tcu::TestStatus vkt::subgroups::makeVertexFrameBufferTest(Context& context, vk::VkFormat format,
+ SSBOData* extraData, deUint32 extraDataCount,
+ bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize))
+{
+ const deUint32 maxWidth = 1024u;
+ vector<de::SharedPtr<BufferOrImage> > inputBuffers (extraDataCount);
+ DescriptorSetLayoutBuilder layoutBuilder;
+ const Unique<VkShaderModule> vertexShaderModule (createShaderModule
+ (context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("vert"), 0u));
+ const Unique<VkShaderModule> fragmentShaderModule (createShaderModule
+ (context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("fragment"), 0u));
+ const Unique<VkRenderPass> renderPass (makeRenderPass(context, format));
+
+ const VkVertexInputBindingDescription vertexInputBinding =
+ {
+ 0u, // binding;
+ static_cast<deUint32>(sizeof(tcu::Vec4)), // stride;
+ VK_VERTEX_INPUT_RATE_VERTEX // inputRate
+ };
+
+ const VkVertexInputAttributeDescription vertexInputAttribute =
+ {
+ 0u,
+ 0u,
+ VK_FORMAT_R32G32B32A32_SFLOAT,
+ 0u
+ };
+
+ for (deUint32 i = 0u; i < extraDataCount; i++)
+ {
+ if (extraData[i].isImage)
+ {
+ inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
+ }
+ else
+ {
+ vk::VkDeviceSize size = getFormatSizeInBytes(extraData[i].format) * extraData[i].numElements;
+ inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
+ }
+ const Allocation& alloc = inputBuffers[i]->getAllocation();
+ initializeMemory(context, alloc, extraData[i]);
+ }
+
+ for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
+ layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_VERTEX_BIT, DE_NULL);
+
+ const Unique<VkDescriptorSetLayout> descriptorSetLayout (layoutBuilder.build(context.getDeviceInterface(), context.getDevice()));
+
+ const Unique<VkPipelineLayout> pipelineLayout (makePipelineLayout(context, *descriptorSetLayout));
+
+ const Unique<VkPipeline> pipeline (makeGraphicsPipeline(context, *pipelineLayout,
+ VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
+ *vertexShaderModule, *fragmentShaderModule,
+ DE_NULL, DE_NULL, DE_NULL,
+ *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
+ &vertexInputBinding, &vertexInputAttribute, true, format));
+ DescriptorPoolBuilder poolBuilder;
+ DescriptorSetUpdateBuilder updateBuilder;
+
+
+ for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
+ poolBuilder.addType(inputBuffers[ndx]->getType());
+
+ Move <VkDescriptorPool> descriptorPool;
+ Move <VkDescriptorSet> descriptorSet;
+
+ if (extraDataCount > 0)
+ {
+ descriptorPool = poolBuilder.build(context.getDeviceInterface(), context.getDevice(),
+ VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
+ descriptorSet = makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout);
+ }
+
+ for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
+ {
+ const Allocation& alloc = inputBuffers[ndx]->getAllocation();
+ initializeMemory(context, alloc, extraData[ndx]);
+ }
+
+ for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
+ {
+ if (inputBuffers[buffersNdx]->isImage())
+ {
+ VkDescriptorImageInfo info =
+ makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
+ inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
+
+ updateBuilder.writeSingle(*descriptorSet,
+ DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
+ inputBuffers[buffersNdx]->getType(), &info);
+ }
+ else
+ {
+ VkDescriptorBufferInfo info =
+ makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
+ 0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
+
+ updateBuilder.writeSingle(*descriptorSet,
+ DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
+ inputBuffers[buffersNdx]->getType(), &info);
+ }
+ }
+ updateBuilder.update(context.getDeviceInterface(), context.getDevice());
+
+ const Unique<VkCommandPool> cmdPool (makeCommandPool(context));
+
+ const deUint32 subgroupSize = getSubgroupSize(context);
+
+ const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
+
+ const vk::VkDeviceSize vertexBufferSize = maxWidth * sizeof(tcu::Vec4);
+ Buffer vertexBuffer (context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
+
+ unsigned totalIterations = 0u;
+ unsigned failedIterations = 0u;
+
+ Image discardableImage (context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
+
+ {
+ const Allocation& alloc = vertexBuffer.getAllocation();
+ std::vector<tcu::Vec4> data (maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
+ const float pixelSize = 2.0f / static_cast<float>(maxWidth);
+ float leftHandPosition = -1.0f;
+
+ for(deUint32 ndx = 0u; ndx < maxWidth; ++ndx)
+ {
+ data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
+ leftHandPosition += pixelSize;
+ }
+
+ deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
+ flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
+ }
+
+ for (deUint32 width = 1u; width < maxWidth; width++)
+ {
+ totalIterations++;
+ const Unique<VkFramebuffer> framebuffer (makeFramebuffer(context, *renderPass, discardableImage.getImageView(), maxWidth, 1));
+ const VkViewport viewport = makeViewport(maxWidth, 1u);
+ const VkRect2D scissor = makeRect2D(maxWidth, 1u);
+ const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
+ Buffer imageBufferResult (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
+ const VkDeviceSize vertexBufferOffset = 0u;
+
+ for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
+ {
+ const Allocation& alloc = inputBuffers[ndx]->getAllocation();
+ initializeMemory(context, alloc, extraData[ndx]);
+ }
+
+ beginCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
+ {
+ context.getDeviceInterface().cmdSetViewport(
+ *cmdBuffer, 0, 1, &viewport);
+
+ context.getDeviceInterface().cmdSetScissor(
+ *cmdBuffer, 0, 1, &scissor);
+
+ beginRenderPass(context.getDeviceInterface(), *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
+
+ context.getDeviceInterface().cmdBindPipeline(
+ *cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
+
+ if (extraDataCount > 0)
+ {
+ context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer,
+ VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
+ &descriptorSet.get(), 0u, DE_NULL);
+ }
+
+ context.getDeviceInterface().cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
+
+ context.getDeviceInterface().cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
+
+ endRenderPass(context.getDeviceInterface(), *cmdBuffer);
+
+ copyImageToBuffer(context.getDeviceInterface(), *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
+
+ endCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
+ Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer));
+ waitFence(context, fence);
+ }
+
+ {
+ const Allocation& allocResult = imageBufferResult.getAllocation();
+ invalidateAlloc(context.getDeviceInterface(), context.getDevice(), allocResult);
+
+ std::vector<const void*> datas;
+ datas.push_back(allocResult.getHostPtr());
+ if (!checkResult(datas, width, subgroupSize))
+ failedIterations++;
+ }
+ }
+
+ if (0 < failedIterations)
+ {
+ context.getTestContext().getLog()
+ << TestLog::Message << (totalIterations - failedIterations) << " / "
+ << totalIterations << " values passed" << TestLog::EndMessage;
+ return tcu::TestStatus::fail("Failed!");
+ }
+
+ return tcu::TestStatus::pass("OK");
+}
+
+
+tcu::TestStatus vkt::subgroups::makeFragmentFrameBufferTest (Context& context, VkFormat format, SSBOData* extraDatas,
+ deUint32 extraDatasCount,
+ bool (*checkResult)(std::vector<const void*> datas, deUint32 width,
+ deUint32 height, deUint32 subgroupSize))
+{
+ const Unique<VkShaderModule> vertexShaderModule (createShaderModule
+ (context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("vert"), 0u));
+ const Unique<VkShaderModule> fragmentShaderModule (createShaderModule
+ (context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("fragment"), 0u));
+
+ std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(extraDatasCount);
+
+ for (deUint32 i = 0; i < extraDatasCount; i++)
+ {
+ if (extraDatas[i].isImage)
+ {
+ inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context,
+ static_cast<deUint32>(extraDatas[i].numElements), 1, extraDatas[i].format));
+ }
+ else
+ {
+ vk::VkDeviceSize size =
+ getFormatSizeInBytes(extraDatas[i].format) * extraDatas[i].numElements;
+ inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
+ }
+
+ const Allocation& alloc = inputBuffers[i]->getAllocation();
+ initializeMemory(context, alloc, extraDatas[i]);
+ }
+
+ DescriptorSetLayoutBuilder layoutBuilder;
+
+ for (deUint32 i = 0; i < extraDatasCount; i++)
+ {
+ layoutBuilder.addBinding(inputBuffers[i]->getType(), 1,
+ VK_SHADER_STAGE_FRAGMENT_BIT, DE_NULL);
+ }
+
+ const Unique<VkDescriptorSetLayout> descriptorSetLayout(
+ layoutBuilder.build(context.getDeviceInterface(), context.getDevice()));
+
+ const Unique<VkPipelineLayout> pipelineLayout(
+ makePipelineLayout(context, *descriptorSetLayout));
+
+ const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
+ const Unique<VkPipeline> pipeline(makeGraphicsPipeline(context, *pipelineLayout,
+ VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
+ *vertexShaderModule, *fragmentShaderModule, DE_NULL, DE_NULL, DE_NULL, *renderPass, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
+ DE_NULL, DE_NULL, true));
+
+ DescriptorPoolBuilder poolBuilder;
+
+ // To stop validation complaining, always add at least one type to pool.
+ poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
+ for (deUint32 i = 0; i < extraDatasCount; i++)
+ {
+ poolBuilder.addType(inputBuffers[i]->getType());
+ }
+
+ Move<VkDescriptorPool> descriptorPool;
+ // Create descriptor set
+ Move<VkDescriptorSet> descriptorSet;
+
+ if (extraDatasCount > 0)
+ {
+ descriptorPool = poolBuilder.build(context.getDeviceInterface(), context.getDevice(),
+ VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
+
+ descriptorSet = makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout);
+ }
+
+ DescriptorSetUpdateBuilder updateBuilder;
+
+ for (deUint32 i = 0; i < extraDatasCount; i++)
+ {
+ if (inputBuffers[i]->isImage())
+ {
+ VkDescriptorImageInfo info =
+ makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(),
+ inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
+
+ updateBuilder.writeSingle(*descriptorSet,
+ DescriptorSetUpdateBuilder::Location::binding(i),
+ inputBuffers[i]->getType(), &info);
+ }
+ else
+ {
+ VkDescriptorBufferInfo info =
+ makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(),
+ 0ull, inputBuffers[i]->getAsBuffer()->getSize());
+
+ updateBuilder.writeSingle(*descriptorSet,
+ DescriptorSetUpdateBuilder::Location::binding(i),
+ inputBuffers[i]->getType(), &info);
+ }
+ }
+
+ if (extraDatasCount > 0)
+ updateBuilder.update(context.getDeviceInterface(), context.getDevice());
+
+ const Unique<VkCommandPool> cmdPool(makeCommandPool(context));
+
+ const deUint32 subgroupSize = getSubgroupSize(context);
+
+ const Unique<VkCommandBuffer> cmdBuffer(
+ makeCommandBuffer(context, *cmdPool));
+
+ unsigned totalIterations = 0;
+ unsigned failedIterations = 0;
+
+ for (deUint32 width = 8; width <= subgroupSize; width *= 2)
+ {
+ for (deUint32 height = 8; height <= subgroupSize; height *= 2)
+ {
+ totalIterations++;
+
+ // re-init the data
+ for (deUint32 i = 0; i < extraDatasCount; i++)
+ {
+ const Allocation& alloc = inputBuffers[i]->getAllocation();
+ initializeMemory(context, alloc, extraDatas[i]);
+ }
+
+ VkDeviceSize formatSize = getFormatSizeInBytes(format);
+ const VkDeviceSize resultImageSizeInBytes =
+ width * height * formatSize;
+
+ Image resultImage(context, width, height, format,
+ VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
+ VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
+
+ Buffer resultBuffer(context, resultImageSizeInBytes,
+ VK_IMAGE_USAGE_TRANSFER_DST_BIT);
+
+ const Unique<VkFramebuffer> framebuffer(makeFramebuffer(context,
+ *renderPass, resultImage.getImageView(), width, height));
+
+ beginCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
+
+ VkViewport viewport = makeViewport(width, height);
+
+ context.getDeviceInterface().cmdSetViewport(
+ *cmdBuffer, 0, 1, &viewport);
+
+ VkRect2D scissor = {{0, 0}, {width, height}};
+
+ context.getDeviceInterface().cmdSetScissor(
+ *cmdBuffer, 0, 1, &scissor);
+
+ beginRenderPass(context.getDeviceInterface(), *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, width, height), tcu::Vec4(0.0f));
+
+ context.getDeviceInterface().cmdBindPipeline(
+ *cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
+
+ if (extraDatasCount > 0)
+ {
+ context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer,
+ VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
+ &descriptorSet.get(), 0u, DE_NULL);
+ }
+
+ context.getDeviceInterface().cmdDraw(*cmdBuffer, 4, 1, 0, 0);
+
+ endRenderPass(context.getDeviceInterface(), *cmdBuffer);
+
+ copyImageToBuffer(context.getDeviceInterface(), *cmdBuffer, resultImage.getImage(), resultBuffer.getBuffer(), tcu::IVec2(width, height), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
+
+ endCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
+
+ Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer));
+
+ waitFence(context, fence);
+
+ std::vector<const void*> datas;
+ {
+ const Allocation& resultAlloc = resultBuffer.getAllocation();
+ invalidateAlloc(context.getDeviceInterface(), context.getDevice(), resultAlloc);
+
+ // we always have our result data first
+ datas.push_back(resultAlloc.getHostPtr());
+ }
+
+ if (!checkResult(datas, width, height, subgroupSize))
+ {
+ failedIterations++;
+ }
+
+ context.getDeviceInterface().resetCommandBuffer(*cmdBuffer, 0);
+ }
+ }
+
+ if (0 < failedIterations)
+ {
+ context.getTestContext().getLog()
+ << TestLog::Message << (totalIterations - failedIterations) << " / "
+ << totalIterations << " values passed" << TestLog::EndMessage;
+ return tcu::TestStatus::fail("Failed!");
+ }
+
+ return tcu::TestStatus::pass("OK");
+}
+
+tcu::TestStatus vkt::subgroups::makeComputeTest(
+ Context& context, VkFormat format, SSBOData* inputs, deUint32 inputsCount,
+ bool (*checkResult)(std::vector<const void*> datas,
+ const deUint32 numWorkgroups[3], const deUint32 localSize[3],
+ deUint32 subgroupSize))
+{
+ VkDeviceSize elementSize = getFormatSizeInBytes(format);
+
+ const VkDeviceSize resultBufferSize = maxSupportedSubgroupSize() *
+ maxSupportedSubgroupSize() *
+ maxSupportedSubgroupSize();
+ const VkDeviceSize resultBufferSizeInBytes = resultBufferSize * elementSize;
+
+ Buffer resultBuffer(
+ context, resultBufferSizeInBytes);
+
+ std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(inputsCount);
+
+ for (deUint32 i = 0; i < inputsCount; i++)
+ {
+ if (inputs[i].isImage)
+ {
+ inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context,
+ static_cast<deUint32>(inputs[i].numElements), 1, inputs[i].format));
+ }
+ else
+ {
+ vk::VkDeviceSize size =
+ getFormatSizeInBytes(inputs[i].format) * inputs[i].numElements;
+ inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
+ }
+
+ const Allocation& alloc = inputBuffers[i]->getAllocation();
+ initializeMemory(context, alloc, inputs[i]);
+ }
+
+ DescriptorSetLayoutBuilder layoutBuilder;
+ layoutBuilder.addBinding(
+ resultBuffer.getType(), 1, VK_SHADER_STAGE_COMPUTE_BIT, DE_NULL);
+
+ for (deUint32 i = 0; i < inputsCount; i++)
+ {
+ layoutBuilder.addBinding(
+ inputBuffers[i]->getType(), 1, VK_SHADER_STAGE_COMPUTE_BIT, DE_NULL);
+ }
+
+ const Unique<VkDescriptorSetLayout> descriptorSetLayout(
+ layoutBuilder.build(context.getDeviceInterface(), context.getDevice()));
+
+ const Unique<VkShaderModule> shaderModule(
+ createShaderModule(context.getDeviceInterface(), context.getDevice(),
+ context.getBinaryCollection().get("comp"), 0u));
+ const Unique<VkPipelineLayout> pipelineLayout(
+ makePipelineLayout(context, *descriptorSetLayout));
+
+ DescriptorPoolBuilder poolBuilder;
+
+ poolBuilder.addType(resultBuffer.getType());
+
+ for (deUint32 i = 0; i < inputsCount; i++)
+ {
+ poolBuilder.addType(inputBuffers[i]->getType());
+ }
+
+ const Unique<VkDescriptorPool> descriptorPool(
+ poolBuilder.build(context.getDeviceInterface(), context.getDevice(),
+ VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
+
+ // Create descriptor set
+ const Unique<VkDescriptorSet> descriptorSet(
+ makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout));
+
+ DescriptorSetUpdateBuilder updateBuilder;
+
+ const VkDescriptorBufferInfo resultDescriptorInfo =
+ makeDescriptorBufferInfo(
+ resultBuffer.getBuffer(), 0ull, resultBufferSizeInBytes);
+
+ updateBuilder.writeSingle(*descriptorSet,
+ DescriptorSetUpdateBuilder::Location::binding(0u),
+ VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultDescriptorInfo);
+
+ for (deUint32 i = 0; i < inputsCount; i++)
+ {
+ if (inputBuffers[i]->isImage())
+ {
+ VkDescriptorImageInfo info =
+ makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(),
+ inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
+
+ updateBuilder.writeSingle(*descriptorSet,
+ DescriptorSetUpdateBuilder::Location::binding(i + 1),
+ inputBuffers[i]->getType(), &info);
+ }
+ else
+ {
+ vk::VkDeviceSize size =
+ getFormatSizeInBytes(inputs[i].format) * inputs[i].numElements;
+ VkDescriptorBufferInfo info =
+ makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(), 0ull, size);
+
+ updateBuilder.writeSingle(*descriptorSet,
+ DescriptorSetUpdateBuilder::Location::binding(i + 1),
+ inputBuffers[i]->getType(), &info);
+ }
+ }
+
+ updateBuilder.update(context.getDeviceInterface(), context.getDevice());
+
+ const Unique<VkCommandPool> cmdPool(makeCommandPool(context));
+
+ unsigned totalIterations = 0;
+ unsigned failedIterations = 0;
+
+ const deUint32 subgroupSize = getSubgroupSize(context);
+
+ const Unique<VkCommandBuffer> cmdBuffer(
+ makeCommandBuffer(context, *cmdPool));
+
+ const deUint32 numWorkgroups[3] = {4, 2, 2};
+
+ const deUint32 localSizesToTestCount = 15;
+ deUint32 localSizesToTest[localSizesToTestCount][3] =
+ {
+ {1, 1, 1},
+ {32, 4, 1},
+ {32, 1, 4},
+ {1, 32, 4},
+ {1, 4, 32},
+ {4, 1, 32},
+ {4, 32, 1},
+ {subgroupSize, 1, 1},
+ {1, subgroupSize, 1},
+ {1, 1, subgroupSize},
+ {3, 5, 7},
+ {128, 1, 1},
+ {1, 128, 1},
+ {1, 1, 64},
+ {1, 1, 1} // Isn't used, just here to make double buffering checks easier
+ };
+
+ Move<VkPipeline> lastPipeline(
+ makeComputePipeline(context, *pipelineLayout, *shaderModule,
+ localSizesToTest[0][0], localSizesToTest[0][1], localSizesToTest[0][2]));
+
+ for (deUint32 index = 0; index < (localSizesToTestCount - 1); index++)
+ {
+ const deUint32 nextX = localSizesToTest[index + 1][0];
+ const deUint32 nextY = localSizesToTest[index + 1][1];
+ const deUint32 nextZ = localSizesToTest[index + 1][2];
+
+ // we are running one test
+ totalIterations++;
+
+ beginCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
+
+ context.getDeviceInterface().cmdBindPipeline(
+ *cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *lastPipeline);
+
+ context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer,
+ VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u,
+ &descriptorSet.get(), 0u, DE_NULL);
+
+ context.getDeviceInterface().cmdDispatch(*cmdBuffer,
+ numWorkgroups[0], numWorkgroups[1], numWorkgroups[2]);
+
+ endCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
+
+ Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer));
+
+ Move<VkPipeline> nextPipeline(
+ makeComputePipeline(context, *pipelineLayout, *shaderModule,
+ nextX, nextY, nextZ));
+
+ waitFence(context, fence);
+
+ std::vector<const void*> datas;
+
+ {
+ const Allocation& resultAlloc = resultBuffer.getAllocation();
+ invalidateAlloc(context.getDeviceInterface(), context.getDevice(), resultAlloc);
+
+ // we always have our result data first
+ datas.push_back(resultAlloc.getHostPtr());
+ }
+
+ for (deUint32 i = 0; i < inputsCount; i++)
+ {
+ if (!inputBuffers[i]->isImage())
+ {
+ const Allocation& resultAlloc = inputBuffers[i]->getAllocation();
+ invalidateAlloc(context.getDeviceInterface(), context.getDevice(), resultAlloc);
+
+ // we always have our result data first
+ datas.push_back(resultAlloc.getHostPtr());
+ }
+ }
+
+ if (!checkResult(datas, numWorkgroups, localSizesToTest[index], subgroupSize))
+ {
+ failedIterations++;
+ }
+
+ context.getDeviceInterface().resetCommandBuffer(*cmdBuffer, 0);
+
+ lastPipeline = nextPipeline;
+ }
+
+ if (0 < failedIterations)
+ {
+ context.getTestContext().getLog()
+ << TestLog::Message << (totalIterations - failedIterations) << " / "
+ << totalIterations << " values passed" << TestLog::EndMessage;
+ return tcu::TestStatus::fail("Failed!");
+ }
+
+ return tcu::TestStatus::pass("OK");
+}
--- /dev/null
+#ifndef _VKTSUBGROUPSTESTSUTILS_HPP
+#define _VKTSUBGROUPSTESTSUTILS_HPP
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups tests utility classes
+ */ /*--------------------------------------------------------------------*/
+
+#include "vkBuilderUtil.hpp"
+#include "vkDefs.hpp"
+#include "vkDeviceUtil.hpp"
+#include "vkMemUtil.hpp"
+#include "vkPlatform.hpp"
+#include "vkPrograms.hpp"
+#include "vkQueryUtil.hpp"
+#include "vkRef.hpp"
+#include "vkRefUtil.hpp"
+#include "vkStrUtil.hpp"
+#include "vkTypeUtil.hpp"
+#include "vktTestCase.hpp"
+#include "vktTestCaseUtil.hpp"
+
+#include "tcuFormatUtil.hpp"
+#include "tcuTestLog.hpp"
+#include "tcuVectorUtil.hpp"
+
+#include "gluShaderUtil.hpp"
+
+#include "deSharedPtr.hpp"
+#include "deUniquePtr.hpp"
+
+#include <string>
+
+namespace vkt
+{
+namespace subgroups
+{
+// A struct to represent input data to a shader
+struct SSBOData
+{
+ SSBOData() :
+ initializeType (InitializeNone),
+ format (vk::VK_FORMAT_UNDEFINED),
+ numElements (0),
+ isImage (false),
+ binding (0u),
+ stages ((vk::VkShaderStageFlagBits)0u)
+ {}
+
+ enum InputDataInitializeType
+ {
+ InitializeNone = 0,
+ InitializeNonZero,
+ InitializeZero,
+ } initializeType;
+
+ vk::VkFormat format;
+ vk::VkDeviceSize numElements;
+ bool isImage;
+ deUint32 binding;
+ vk::VkShaderStageFlagBits stages;
+};
+
+std::string getSharedMemoryBallotHelper();
+
+deUint32 getSubgroupSize(Context& context);
+
+vk::VkDeviceSize maxSupportedSubgroupSize();
+
+std::string getShaderStageName(vk::VkShaderStageFlags stage);
+
+std::string getSubgroupFeatureName(vk::VkSubgroupFeatureFlagBits bit);
+
+void addNoSubgroupShader (vk::SourceCollections& programCollection);
+
+std::string getVertShaderForStage(vk::VkShaderStageFlags stage);//TODO
+
+bool isSubgroupSupported(Context& context);
+
+bool areSubgroupOperationsSupportedForStage(
+ Context& context, vk::VkShaderStageFlags stage);
+
+bool areSubgroupOperationsRequiredForStage(vk::VkShaderStageFlags stage);
+
+bool isSubgroupFeatureSupportedForDevice(Context& context, vk::VkSubgroupFeatureFlagBits bit);
+
+bool isFragmentSSBOSupportedForDevice(Context& context);
+
+bool isVertexSSBOSupportedForDevice(Context& context);
+
+bool isDoubleSupportedForDevice(Context& context);
+
+bool isDoubleFormat(vk::VkFormat format);
+
+std::string getFormatNameForGLSL(vk::VkFormat format);
+
+void addGeometryShadersFromTemplate (const std::string& glslTemplate, const vk::ShaderBuildOptions& options, vk::GlslSourceCollection& collection);
+void addGeometryShadersFromTemplate (const std::string& spirvTemplate, const vk::SpirVAsmBuildOptions& options, vk::SpirVAsmCollection& collection);
+
+void setVertexShaderFrameBuffer (vk::SourceCollections& programCollection);
+
+void setFragmentShaderFrameBuffer (vk::SourceCollections& programCollection);
+
+void setFragmentShaderFrameBuffer (vk::SourceCollections& programCollection);
+
+void setTesCtrlShaderFrameBuffer (vk::SourceCollections& programCollection);
+
+void setTesEvalShaderFrameBuffer (vk::SourceCollections& programCollection);
+
+bool check(std::vector<const void*> datas,
+ deUint32 width, deUint32 ref);
+
+bool checkCompute(std::vector<const void*> datas,
+ const deUint32 numWorkgroups[3], const deUint32 localSize[3],
+ deUint32 ref);
+
+tcu::TestStatus makeTessellationEvaluationFrameBufferTest(Context& context, vk::VkFormat format,
+ SSBOData* extraData, deUint32 extraDataCount,
+ bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
+ const vk::VkShaderStageFlags shaderStage = vk::VK_SHADER_STAGE_ALL_GRAPHICS);
+
+tcu::TestStatus makeGeometryFrameBufferTest(Context& context, vk::VkFormat format, SSBOData* extraData,
+ deUint32 extraDataCount,
+ bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize));
+
+tcu::TestStatus allStages(Context& context, vk::VkFormat format,
+ SSBOData* extraData, deUint32 extraDataCount,
+ bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
+ const vk::VkShaderStageFlags shaderStage);
+
+tcu::TestStatus makeVertexFrameBufferTest(Context& context, vk::VkFormat format,
+ SSBOData* extraData, deUint32 extraDataCount,
+ bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize));
+
+tcu::TestStatus makeFragmentFrameBufferTest(Context& context, vk::VkFormat format,
+ SSBOData* extraData, deUint32 extraDataCount,
+ bool (*checkResult)(std::vector<const void*> datas, deUint32 width,
+ deUint32 height, deUint32 subgroupSize));
+
+tcu::TestStatus makeComputeTest(
+ Context& context, vk::VkFormat format, SSBOData* inputs,
+ deUint32 inputsCount,
+ bool (*checkResult)(std::vector<const void*> datas,
+ const deUint32 numWorkgroups[3], const deUint32 localSize[3],
+ deUint32 subgroupSize));
+} // subgroups
+} // vkt
+
+#endif // _VKTSUBGROUPSTESTSUTILS_HPP
--- /dev/null
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "vktSubgroupsVoteTests.hpp"
+#include "vktSubgroupsTestsUtils.hpp"
+
+#include <string>
+#include <vector>
+
+using namespace tcu;
+using namespace std;
+using namespace vk;
+using namespace vkt;
+
+namespace
+{
+enum OpType
+{
+ OPTYPE_ALL = 0,
+ OPTYPE_ANY,
+ OPTYPE_ALLEQUAL,
+ OPTYPE_LAST
+};
+
+static bool checkVertexPipelineStages(std::vector<const void*> datas,
+ deUint32 width, deUint32)
+{
+ return vkt::subgroups::check(datas, width, 0x1F);
+}
+
+static bool checkFragmentPipelineStages(std::vector<const void*> datas,
+ deUint32 width, deUint32 height, deUint32)
+{
+ const deUint32* data =
+ reinterpret_cast<const deUint32*>(datas[0]);
+ for (deUint32 x = 0u; x < width; ++x)
+ {
+ for (deUint32 y = 0u; y < height; ++y)
+ {
+ const deUint32 ndx = (x * height + y);
+ deUint32 val = data[ndx] & 0x1F;
+
+ if (data[ndx] & 0x40) //Helper fragment shader invocation was executed
+ {
+ if(val != 0x1F)
+ return false;
+ }
+ else //Helper fragment shader invocation was not executed yet
+ {
+ if (val != 0x1E)
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+static bool checkCompute(std::vector<const void*> datas,
+ const deUint32 numWorkgroups[3], const deUint32 localSize[3],
+ deUint32)
+{
+ return vkt::subgroups::checkCompute(datas, numWorkgroups, localSize, 0x1F);
+}
+
+std::string getOpTypeName(int opType)
+{
+ switch (opType)
+ {
+ default:
+ DE_FATAL("Unsupported op type");
+ return "";
+ case OPTYPE_ALL:
+ return "subgroupAll";
+ case OPTYPE_ANY:
+ return "subgroupAny";
+ case OPTYPE_ALLEQUAL:
+ return "subgroupAllEqual";
+ }
+}
+
+struct CaseDefinition
+{
+ int opType;
+ VkShaderStageFlags shaderStage;
+ VkFormat format;
+};
+
+void initFrameBufferPrograms (SourceCollections& programCollection, CaseDefinition caseDef)
+{
+ const vk::ShaderBuildOptions buildOptions (programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ const bool formatIsBoolean =
+ VK_FORMAT_R8_USCALED == caseDef.format || VK_FORMAT_R8G8_USCALED == caseDef.format || VK_FORMAT_R8G8B8_USCALED == caseDef.format || VK_FORMAT_R8G8B8A8_USCALED == caseDef.format;
+
+ if (VK_SHADER_STAGE_FRAGMENT_BIT != caseDef.shaderStage)
+ subgroups::setFragmentShaderFrameBuffer(programCollection);
+
+ if (VK_SHADER_STAGE_FRAGMENT_BIT == caseDef.shaderStage)
+ {
+ const string vertex = "#version 450\n"
+ "void main (void)\n"
+ "{\n"
+ " vec2 uv = vec2(float(gl_VertexIndex & 1), float((gl_VertexIndex >> 1) & 1));\n"
+ " gl_Position = vec4(uv * 4.0f -2.0f, 0.0f, 1.0f);\n"
+ " gl_PointSize = 1.0f;\n"
+ "}\n";
+ programCollection.glslSources.add("vert") << glu::VertexSource(vertex) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+ else if (VK_SHADER_STAGE_VERTEX_BIT != caseDef.shaderStage)
+ subgroups::setVertexShaderFrameBuffer(programCollection);
+
+ const string source =
+ (OPTYPE_ALL == caseDef.opType) ?
+ " result = " + getOpTypeName(caseDef.opType) +
+ "(true) ? 0x1 : 0;\n"
+ " result |= " + getOpTypeName(caseDef.opType) +
+ "(false) ? 0 : 0x1A;\n"
+ " result |= 0x4;\n"
+ : (OPTYPE_ANY == caseDef.opType) ?
+ " result = " + getOpTypeName(caseDef.opType) +
+ "(true) ? 0x1 : 0;\n"
+ " result |= " + getOpTypeName(caseDef.opType) +
+ "(false) ? 0 : 0x1A;\n"
+ " result |= 0x4;\n"
+ : (OPTYPE_ALLEQUAL == caseDef.opType) ?
+ " " + subgroups::getFormatNameForGLSL(caseDef.format) + " valueEqual = " + subgroups::getFormatNameForGLSL(caseDef.format) + "(1.25 * float(data[gl_SubgroupInvocationID]) + 5.0);\n" +
+ " " + subgroups::getFormatNameForGLSL(caseDef.format) + " valueNoEqual = " + subgroups::getFormatNameForGLSL(caseDef.format) + (formatIsBoolean ? "(subgroupElect())\n;" : "(12.0 * float(data[gl_SubgroupInvocationID]) + gl_SubgroupInvocationID);\n") +
+ " result = " + getOpTypeName(caseDef.opType) + "("
+ + subgroups::getFormatNameForGLSL(caseDef.format) + "(1)) ? 0x1 : 0;\n"
+ " result |= " + getOpTypeName(caseDef.opType) +
+ "(gl_SubgroupInvocationID) ? 0 : 0x2;\n"
+ " result |= " + getOpTypeName(caseDef.opType) +
+ "(data[0]) ? 0x4 : 0;\n"
+ " result |= " + getOpTypeName(caseDef.opType) +
+ "(valueEqual) ? 0x8 : 0x0;\n"
+ " result |= " + getOpTypeName(caseDef.opType) +
+ "(valueNoEqual) ? 0x0 : 0x10;\n"
+ " if (subgroupElect()) result |= 0x2 | 0x10;\n"
+ : "";
+
+ if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream vertexSrc;
+ vertexSrc << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+ << "#extension GL_KHR_shader_subgroup_vote: enable\n"
+ << "layout(location = 0) out vec4 out_color;\n"
+ << "layout(location = 0) in highp vec4 in_position;\n"
+ << "layout(set = 0, binding = 0) uniform Buffer1\n"
+ << "{\n"
+ << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+ << "};\n"
+ << "\n"
+ << "void main (void)\n"
+ << "{\n"
+ << " uint result;\n"
+ << source
+ << " out_color.r = float(result);\n"
+ << " gl_Position = in_position;\n"
+ << " gl_PointSize = 1.0f;\n"
+ << "}\n";
+
+ programCollection.glslSources.add("vert") << glu::VertexSource(vertexSrc.str()) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+ else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream geometry;
+
+ geometry << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+ << "#extension GL_KHR_shader_subgroup_vote: enable\n"
+ << "layout(points) in;\n"
+ << "layout(points, max_vertices = 1) out;\n"
+ << "layout(location = 0) out float out_color;\n"
+ << "layout(set = 0, binding = 0) uniform Buffer1\n"
+ << "{\n"
+ << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+ << "};\n"
+ << "\n"
+ << "void main (void)\n"
+ << "{\n"
+ << " uint result;\n"
+ << source
+ << " out_color = float(result);\n"
+ << " gl_Position = gl_in[0].gl_Position;\n"
+ << " EmitVertex();\n"
+ << " EndPrimitive();\n"
+ << "}\n";
+
+ programCollection.glslSources.add("geometry")
+ << glu::GeometrySource(geometry.str()) << buildOptions;
+ }
+ else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream controlSource;
+ controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+ << "#extension GL_KHR_shader_subgroup_vote: enable\n"
+ << "layout(vertices = 2) out;\n"
+ << "layout(location = 0) out float out_color[];\n"
+ << "layout(set = 0, binding = 0) uniform Buffer1\n"
+ << "{\n"
+ << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+ << "};\n"
+ << "\n"
+ << "void main (void)\n"
+ << "{\n"
+ << " uint result;\n"
+ << " if (gl_InvocationID == 0)\n"
+ <<" {\n"
+ << " gl_TessLevelOuter[0] = 1.0f;\n"
+ << " gl_TessLevelOuter[1] = 1.0f;\n"
+ << " }\n"
+ << source
+ << " out_color[gl_InvocationID] = float(result);"
+ << " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+ << "}\n";
+
+ programCollection.glslSources.add("tesc")
+ << glu::TessellationControlSource(controlSource.str()) << buildOptions;
+ subgroups::setTesEvalShaderFrameBuffer(programCollection);
+ }
+ else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream evaluationSource;
+ evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+ << "#extension GL_KHR_shader_subgroup_vote: enable\n"
+ << "#extension GL_EXT_tessellation_shader : require\n"
+ << "layout(isolines, equal_spacing, ccw ) in;\n"
+ << "layout(location = 0) out float out_color;\n"
+ << "layout(set = 0, binding = 0) uniform Buffer1\n"
+ << "{\n"
+ << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+ << "};\n"
+ << "\n"
+ << "void main (void)\n"
+ << "{\n"
+ << " uint result;\n"
+ << " highp uint offset = gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5);\n"
+ << source
+ << " out_color = float(result);\n"
+ << " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
+ << "}\n";
+
+ subgroups::setTesCtrlShaderFrameBuffer(programCollection);
+ programCollection.glslSources.add("tese")
+ << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
+ }
+ else if (VK_SHADER_STAGE_FRAGMENT_BIT == caseDef.shaderStage)
+ {
+ const string sourceFragment =
+ (OPTYPE_ALL == caseDef.opType) ?
+ " result |= " + getOpTypeName(caseDef.opType) +
+ "(!gl_HelperInvocation) ? 0x0 : 0x1;\n"
+ " result |= " + getOpTypeName(caseDef.opType) +
+ "(false) ? 0 : 0x1A;\n"
+ " result |= 0x4;\n"
+ : (OPTYPE_ANY == caseDef.opType) ?
+ " result |= " + getOpTypeName(caseDef.opType) +
+ "(gl_HelperInvocation) ? 0x1 : 0x0;\n"
+ " result |= " + getOpTypeName(caseDef.opType) +
+ "(false) ? 0 : 0x1A;\n"
+ " result |= 0x4;\n"
+ : (OPTYPE_ALLEQUAL == caseDef.opType) ?
+ " " + subgroups::getFormatNameForGLSL(caseDef.format) + " valueEqual = " + subgroups::getFormatNameForGLSL(caseDef.format) + "(1.25 * float(data[gl_SubgroupInvocationID]) + 5.0);\n" +
+ " " + subgroups::getFormatNameForGLSL(caseDef.format) + " valueNoEqual = " + subgroups::getFormatNameForGLSL(caseDef.format) + (formatIsBoolean ? "(subgroupElect());\n" : "(12.0 * float(data[gl_SubgroupInvocationID]) + int(gl_FragCoord.x*gl_SubgroupInvocationID));\n") +
+ " result |= " + getOpTypeName(caseDef.opType) + "("
+ + subgroups::getFormatNameForGLSL(caseDef.format) + "(1)) ? 0x10 : 0;\n"
+ " result |= " + getOpTypeName(caseDef.opType) +
+ "(gl_SubgroupInvocationID) ? 0 : 0x2;\n"
+ " result |= " + getOpTypeName(caseDef.opType) +
+ "(data[0]) ? 0x4 : 0;\n"
+ " result |= " + getOpTypeName(caseDef.opType) +
+ "(valueEqual) ? 0x8 : 0x0;\n"
+ " result |= " + getOpTypeName(caseDef.opType) +
+ "(gl_HelperInvocation) ? 0x0 : 0x1;\n"
+ " if (subgroupElect()) result |= 0x2 | 0x10;\n"
+ : "";
+
+ std::ostringstream fragmentSource;
+ fragmentSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
+ << "#extension GL_KHR_shader_subgroup_vote: enable\n"
+ << "layout(location = 0) out uint out_color;\n"
+ << "layout(set = 0, binding = 0) uniform Buffer1\n"
+ << "{\n"
+ << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
+ << "};\n"
+ << ""
+ << "void main()\n"
+ << "{\n"
+ << " uint result = 0u;\n"
+ << " if (dFdx(gl_SubgroupInvocationID * gl_FragCoord.x * gl_FragCoord.y) - dFdy(gl_SubgroupInvocationID * gl_FragCoord.x * gl_FragCoord.y) > 0.0f)\n"
+ << " {\n"
+ << " result |= 0x20;\n" // to be sure that compiler doesn't remove dFdx and dFdy executions
+ << " }\n"
+ << " bool helper = subgroupAny(gl_HelperInvocation);\n"
+ << " if (helper)\n"
+ << " {\n"
+ << " result |= 0x40;\n"
+ << " }\n"
+ << sourceFragment
+ << " out_color = result;\n"
+ << "}\n";
+
+ programCollection.glslSources.add("fragment")
+ << glu::FragmentSource(fragmentSource.str())<< vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+ else
+ {
+ DE_FATAL("Unsupported shader stage");
+ }
+}
+
+void initPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
+{
+ const bool formatIsBoolean =
+ VK_FORMAT_R8_USCALED == caseDef.format || VK_FORMAT_R8G8_USCALED == caseDef.format || VK_FORMAT_R8G8B8_USCALED == caseDef.format || VK_FORMAT_R8G8B8A8_USCALED == caseDef.format;
+ if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+ {
+ std::ostringstream src;
+
+ src << "#version 450\n"
+ << "#extension GL_KHR_shader_subgroup_vote: enable\n"
+ << "layout (local_size_x_id = 0, local_size_y_id = 1, "
+ "local_size_z_id = 2) in;\n"
+ << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
+ << "{\n"
+ << " uint result[];\n"
+ << "};\n"
+ << "layout(set = 0, binding = 1, std430) buffer Buffer2\n"
+ << "{\n"
+ << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[];\n"
+ << "};\n"
+ << "\n"
+ << "void main (void)\n"
+ << "{\n"
+ << " uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
+ << " highp uint offset = globalSize.x * ((globalSize.y * "
+ "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
+ "gl_GlobalInvocationID.x;\n";
+ if (OPTYPE_ALL == caseDef.opType)
+ {
+ src << " result[offset] = " << getOpTypeName(caseDef.opType)
+ << "(true) ? 0x1 : 0;\n"
+ << " result[offset] |= " << getOpTypeName(caseDef.opType)
+ << "(false) ? 0 : 0x1A;\n"
+ << " result[offset] |= " << getOpTypeName(caseDef.opType)
+ << "(data[gl_SubgroupInvocationID] > 0) ? 0x4 : 0;\n";
+ }
+ else if (OPTYPE_ANY == caseDef.opType)
+ {
+ src << " result[offset] = " << getOpTypeName(caseDef.opType)
+ << "(true) ? 0x1 : 0;\n"
+ << " result[offset] |= " << getOpTypeName(caseDef.opType)
+ << "(false) ? 0 : 0x1A;\n"
+ << " result[offset] |= " << getOpTypeName(caseDef.opType)
+ << "(data[gl_SubgroupInvocationID] == data[0]) ? 0x4 : 0;\n";
+ }
+
+ else if (OPTYPE_ALLEQUAL == caseDef.opType)
+ {
+ src << " " << subgroups::getFormatNameForGLSL(caseDef.format) <<" valueEqual = " << subgroups::getFormatNameForGLSL(caseDef.format) << "(1.25 * float(data[gl_SubgroupInvocationID]) + 5.0);\n"
+ << " " << subgroups::getFormatNameForGLSL(caseDef.format) <<" valueNoEqual = " << subgroups::getFormatNameForGLSL(caseDef.format) << (formatIsBoolean ? "(subgroupElect());\n" : "(12.0 * float(data[gl_SubgroupInvocationID]) + offset);\n")
+ <<" result[offset] = " << getOpTypeName(caseDef.opType) << "("
+ << subgroups::getFormatNameForGLSL(caseDef.format) << "(1)) ? 0x1 : 0x0;\n"
+ << " result[offset] |= " << getOpTypeName(caseDef.opType)
+ << "(gl_SubgroupInvocationID) ? 0x0 : 0x2;\n"
+ << " result[offset] |= " << getOpTypeName(caseDef.opType)
+ << "(data[0]) ? 0x4 : 0x0;\n"
+ << " result[offset] |= "<< getOpTypeName(caseDef.opType)
+ << "(valueEqual) ? 0x8 : 0x0;\n"
+ << " result[offset] |= "<< getOpTypeName(caseDef.opType)
+ << "(valueNoEqual) ? 0x0 : 0x10;\n"
+ << " if (subgroupElect()) result[offset] |= 0x2 | 0x10;\n";
+ }
+
+ src << "}\n";
+
+ programCollection.glslSources.add("comp")
+ << glu::ComputeSource(src.str()) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+ else
+ {
+ const string source =
+ (OPTYPE_ALL == caseDef.opType) ?
+ " result[offset] = " + getOpTypeName(caseDef.opType) +
+ "(true) ? 0x1 : 0;\n"
+ " result[offset] |= " + getOpTypeName(caseDef.opType) +
+ "(false) ? 0 : 0x1A;\n"
+ " result[offset] |= 0x4;\n"
+ : (OPTYPE_ANY == caseDef.opType) ?
+ " result[offset] = " + getOpTypeName(caseDef.opType) +
+ "(true) ? 0x1 : 0;\n"
+ " result[offset] |= " + getOpTypeName(caseDef.opType) +
+ "(false) ? 0 : 0x1A;\n"
+ " result[offset] |= 0x4;\n"
+ : (OPTYPE_ALLEQUAL == caseDef.opType) ?
+ " " + subgroups::getFormatNameForGLSL(caseDef.format) + " valueEqual = " + subgroups::getFormatNameForGLSL(caseDef.format) + "(1.25 * float(data[gl_SubgroupInvocationID]) + 5.0);\n" +
+ " " + subgroups::getFormatNameForGLSL(caseDef.format) + " valueNoEqual = " + subgroups::getFormatNameForGLSL(caseDef.format) + (formatIsBoolean ? "(subgroupElect());\n" : "(12.0 * float(data[gl_SubgroupInvocationID]) + gl_SubgroupInvocationID);\n") +
+ " result[offset] = " + getOpTypeName(caseDef.opType) + "("
+ + subgroups::getFormatNameForGLSL(caseDef.format) + "(1)) ? 0x1 : 0;\n"
+ " result[offset] |= " + getOpTypeName(caseDef.opType) +
+ "(gl_SubgroupInvocationID) ? 0 : 0x2;\n"
+ " result[offset] |= " + getOpTypeName(caseDef.opType) +
+ "(data[0]) ? 0x4 : 0;\n"
+ " result[offset] |= " + getOpTypeName(caseDef.opType) +
+ "(valueEqual) ? 0x8 : 0x0;\n"
+ " result[offset] |= " + getOpTypeName(caseDef.opType) +
+ "(valueNoEqual) ? 0x0 : 0x10;\n"
+ " if (subgroupElect()) result[offset] |= 0x2 | 0x10;\n"
+ : "";
+
+ const string formatString = subgroups::getFormatNameForGLSL(caseDef.format);
+
+ {
+ const string vertex =
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_vote: enable\n"
+ "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
+ "{\n"
+ " uint result[];\n"
+ "};\n"
+ "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+ "{\n"
+ " " + formatString + " data[];\n"
+ "};\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ " highp uint offset = gl_VertexIndex;\n"
+ + source +
+ " float pixelSize = 2.0f/1024.0f;\n"
+ " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
+ " gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
+ " gl_PointSize = 1.0f;\n"
+ "}\n";
+ programCollection.glslSources.add("vert")
+ << glu::VertexSource(vertex) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+
+ {
+ const string tesc =
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_vote: enable\n"
+ "layout(vertices=1) out;\n"
+ "layout(set = 0, binding = 1, std430) buffer Buffer1\n"
+ "{\n"
+ " uint result[];\n"
+ "};\n"
+ "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+ "{\n"
+ " " + formatString + " data[];\n"
+ "};\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ " highp uint offset = gl_PrimitiveID;\n"
+ + source +
+ " if (gl_InvocationID == 0)\n"
+ " {\n"
+ " gl_TessLevelOuter[0] = 1.0f;\n"
+ " gl_TessLevelOuter[1] = 1.0f;\n"
+ " }\n"
+ " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
+ "}\n";
+
+ programCollection.glslSources.add("tesc")
+ << glu::TessellationControlSource(tesc) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+
+ {
+ const string tese =
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_vote: enable\n"
+ "layout(isolines) in;\n"
+ "layout(set = 0, binding = 2, std430) buffer Buffer1\n"
+ "{\n"
+ " uint result[];\n"
+ "};\n"
+ "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+ "{\n"
+ " " + formatString + " data[];\n"
+ "};\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ " highp uint offset = gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5);\n"
+ + source +
+ " float pixelSize = 2.0f/1024.0f;\n"
+ " gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
+ "}\n";
+
+ programCollection.glslSources.add("tese")
+ << glu::TessellationEvaluationSource(tese) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+
+ {
+ const string geometry =
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_vote: enable\n"
+ "layout(${TOPOLOGY}) in;\n"
+ "layout(points, max_vertices = 1) out;\n"
+ "layout(set = 0, binding = 3, std430) buffer Buffer1\n"
+ "{\n"
+ " uint result[];\n"
+ "};\n"
+ "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+ "{\n"
+ " " + formatString + " data[];\n"
+ "};\n"
+ "\n"
+ "void main (void)\n"
+ "{\n"
+ " highp uint offset = gl_PrimitiveIDIn;\n"
+ + source +
+ " gl_Position = gl_in[0].gl_Position;\n"
+ " EmitVertex();\n"
+ " EndPrimitive();\n"
+ "}\n";
+
+ subgroups::addGeometryShadersFromTemplate(geometry, vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u),
+ programCollection.glslSources);
+ }
+
+ {
+ const string sourceFragment =
+ (OPTYPE_ALL == caseDef.opType) ?
+ " result = " + getOpTypeName(caseDef.opType) +
+ "(true) ? 0x1 : 0;\n"
+ " result |= " + getOpTypeName(caseDef.opType) +
+ "(false) ? 0 : 0x1A;\n"
+ " result |= 0x4;\n"
+ : (OPTYPE_ANY == caseDef.opType) ?
+ " result = " + getOpTypeName(caseDef.opType) +
+ "(true) ? 0x1 : 0;\n"
+ " result |= " + getOpTypeName(caseDef.opType) +
+ "(false) ? 0 : 0x1A;\n"
+ " result |= 0x4;\n"
+ : (OPTYPE_ALLEQUAL == caseDef.opType) ?
+ " " + subgroups::getFormatNameForGLSL(caseDef.format) + " valueEqual = " + subgroups::getFormatNameForGLSL(caseDef.format) + "(1.25 * float(data[gl_SubgroupInvocationID]) + 5.0);\n" +
+ " " + subgroups::getFormatNameForGLSL(caseDef.format) + " valueNoEqual = " + subgroups::getFormatNameForGLSL(caseDef.format) + (formatIsBoolean ? "(subgroupElect());\n" : "(12.0 * float(data[gl_SubgroupInvocationID]) + int(gl_FragCoord.x*gl_SubgroupInvocationID));\n") +
+ " result = " + getOpTypeName(caseDef.opType) + "("
+ + subgroups::getFormatNameForGLSL(caseDef.format) + "(1)) ? 0x1 : 0;\n"
+ " result |= " + getOpTypeName(caseDef.opType) +
+ "(gl_SubgroupInvocationID) ? 0 : 0x2;\n"
+ " result |= " + getOpTypeName(caseDef.opType) +
+ "(data[0]) ? 0x4 : 0;\n"
+ " result |= " + getOpTypeName(caseDef.opType) +
+ "(valueEqual) ? 0x8 : 0x0;\n"
+ " result |= " + getOpTypeName(caseDef.opType) +
+ "(valueNoEqual) ? 0x0 : 0x10;\n"
+ " if (subgroupElect()) result |= 0x2 | 0x10;\n"
+ : "";
+ const string fragment =
+ "#version 450\n"
+ "#extension GL_KHR_shader_subgroup_vote: enable\n"
+ "layout(location = 0) out uint result;\n"
+ "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
+ "{\n"
+ " " + formatString + " data[];\n"
+ "};\n"
+ "void main (void)\n"
+ "{\n"
+ + sourceFragment +
+ "}\n";
+
+ programCollection.glslSources.add("fragment")
+ << glu::FragmentSource(fragment)<< vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
+ }
+
+ subgroups::addNoSubgroupShader(programCollection);
+ }
+}
+
+void supportedCheck (Context& context, CaseDefinition caseDef)
+{
+ if (!subgroups::isSubgroupSupported(context))
+ TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
+
+ if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_VOTE_BIT))
+ {
+ TCU_THROW(NotSupportedError, "Device does not support subgroup vote operations");
+ }
+
+ if (subgroups::isDoubleFormat(caseDef.format) &&
+ !subgroups::isDoubleSupportedForDevice(context))
+ {
+ TCU_THROW(NotSupportedError, "Device does not support subgroup double operations");
+ }
+}
+
+tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
+{
+ if (!subgroups::areSubgroupOperationsSupportedForStage(
+ context, caseDef.shaderStage))
+ {
+ if (subgroups::areSubgroupOperationsRequiredForStage(
+ caseDef.shaderStage))
+ {
+ return tcu::TestStatus::fail(
+ "Shader stage " +
+ subgroups::getShaderStageName(caseDef.shaderStage) +
+ " is required to support subgroup operations!");
+ }
+ else
+ {
+ TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
+ }
+ }
+
+ subgroups::SSBOData inputData;
+ inputData.format = caseDef.format;
+ inputData.numElements = subgroups::maxSupportedSubgroupSize();
+ inputData.initializeType = OPTYPE_ALLEQUAL == caseDef.opType ? subgroups::SSBOData::InitializeZero : subgroups::SSBOData::InitializeNonZero;
+
+ if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
+ return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages);
+ else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
+ return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages);
+ else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
+ return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
+ else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
+ return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
+ else if (VK_SHADER_STAGE_FRAGMENT_BIT == caseDef.shaderStage)
+ return subgroups::makeFragmentFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkFragmentPipelineStages);
+ else
+ TCU_THROW(InternalError, "Unhandled shader stage");
+}
+
+
+tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
+{
+ if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
+ {
+ if (!subgroups::areSubgroupOperationsSupportedForStage(context, caseDef.shaderStage))
+ {
+ return tcu::TestStatus::fail(
+ "Shader stage " +
+ subgroups::getShaderStageName(caseDef.shaderStage) +
+ " is required to support subgroup operations!");
+ }
+
+ subgroups::SSBOData inputData;
+ inputData.format = caseDef.format;
+ inputData.numElements = subgroups::maxSupportedSubgroupSize();
+ inputData.initializeType = OPTYPE_ALLEQUAL == caseDef.opType ? subgroups::SSBOData::InitializeZero : subgroups::SSBOData::InitializeNonZero;
+
+ return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, &inputData,
+ 1, checkCompute);
+ }
+ else
+ {
+ VkPhysicalDeviceSubgroupProperties subgroupProperties;
+ subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
+ subgroupProperties.pNext = DE_NULL;
+
+ VkPhysicalDeviceProperties2 properties;
+ properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+ properties.pNext = &subgroupProperties;
+
+ context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
+
+ VkShaderStageFlagBits stages = (VkShaderStageFlagBits)(caseDef.shaderStage & subgroupProperties.supportedStages);
+
+ if (VK_SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
+ {
+ if ( (stages & VK_SHADER_STAGE_FRAGMENT_BIT) == 0)
+ TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
+ else
+ stages = VK_SHADER_STAGE_FRAGMENT_BIT;
+ }
+
+ if ((VkShaderStageFlagBits)0u == stages)
+ TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
+
+ subgroups::SSBOData inputData;
+ inputData.format = caseDef.format;
+ inputData.numElements = subgroups::maxSupportedSubgroupSize();
+ inputData.initializeType = OPTYPE_ALLEQUAL == caseDef.opType ? subgroups::SSBOData::InitializeZero : subgroups::SSBOData::InitializeNonZero;
+ inputData.binding = 4u;
+ inputData.stages = stages;
+
+ return subgroups::allStages(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, stages);
+ }
+}
+}
+
+namespace vkt
+{
+namespace subgroups
+{
+tcu::TestCaseGroup* createSubgroupsVoteTests(tcu::TestContext& testCtx)
+{
+ de::MovePtr<tcu::TestCaseGroup> graphicGroup(new tcu::TestCaseGroup(
+ testCtx, "graphics", "Subgroup arithmetic category tests: graphics"));
+ de::MovePtr<tcu::TestCaseGroup> computeGroup(new tcu::TestCaseGroup(
+ testCtx, "compute", "Subgroup arithmetic category tests: compute"));
+ de::MovePtr<tcu::TestCaseGroup> framebufferGroup(new tcu::TestCaseGroup(
+ testCtx, "framebuffer", "Subgroup arithmetic category tests: framebuffer"));
+
+ de::MovePtr<tcu::TestCaseGroup> fragHelperGroup(new tcu::TestCaseGroup(
+ testCtx, "frag_helper", "Subgroup arithmetic category tests: fragment helper invocation"));
+
+ const VkShaderStageFlags stages[] =
+ {
+ VK_SHADER_STAGE_VERTEX_BIT,
+ VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
+ VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
+ VK_SHADER_STAGE_GEOMETRY_BIT,
+ };
+
+ const VkFormat formats[] =
+ {
+ VK_FORMAT_R32_SINT, VK_FORMAT_R32G32_SINT, VK_FORMAT_R32G32B32_SINT,
+ VK_FORMAT_R32G32B32A32_SINT, VK_FORMAT_R32_UINT, VK_FORMAT_R32G32_UINT,
+ VK_FORMAT_R32G32B32_UINT, VK_FORMAT_R32G32B32A32_UINT,
+ VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT,
+ VK_FORMAT_R32G32B32_SFLOAT, VK_FORMAT_R32G32B32A32_SFLOAT,
+ VK_FORMAT_R64_SFLOAT, VK_FORMAT_R64G64_SFLOAT,
+ VK_FORMAT_R64G64B64_SFLOAT, VK_FORMAT_R64G64B64A64_SFLOAT,
+ VK_FORMAT_R8_USCALED, VK_FORMAT_R8G8_USCALED,
+ VK_FORMAT_R8G8B8_USCALED, VK_FORMAT_R8G8B8A8_USCALED,
+ };
+
+ for (int formatIndex = 0; formatIndex < DE_LENGTH_OF_ARRAY(formats); ++formatIndex)
+ {
+ const VkFormat format = formats[formatIndex];
+
+ for (int opTypeIndex = 0; opTypeIndex < OPTYPE_LAST; ++opTypeIndex)
+ {
+ // Skip the typed tests for all but subgroupAllEqual()
+ if ((VK_FORMAT_R32_UINT != format) && (OPTYPE_ALLEQUAL != opTypeIndex))
+ {
+ continue;
+ }
+
+ const std::string op = de::toLower(getOpTypeName(opTypeIndex));
+
+ {
+ const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_COMPUTE_BIT, format};
+ addFunctionCaseWithPrograms(computeGroup.get(),
+ op + "_" + subgroups::getFormatNameForGLSL(format),
+ "", supportedCheck, initPrograms, test, caseDef);
+ }
+
+ {
+ const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_ALL_GRAPHICS, format};
+ addFunctionCaseWithPrograms(graphicGroup.get(),
+ op + "_" + subgroups::getFormatNameForGLSL(format),
+ "", supportedCheck, initPrograms, test, caseDef);
+ }
+
+ for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
+ {
+ const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex], format};
+ addFunctionCaseWithPrograms(framebufferGroup.get(),
+ op + "_" +
+ subgroups::getFormatNameForGLSL(format)
+ + "_" + getShaderStageName(caseDef.shaderStage), "",
+ supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
+ }
+
+ const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_FRAGMENT_BIT, format};
+ addFunctionCaseWithPrograms(fragHelperGroup.get(),
+ op + "_" +
+ subgroups::getFormatNameForGLSL(format)
+ + "_" + getShaderStageName(caseDef.shaderStage), "",
+ supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
+ }
+ }
+
+ de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(
+ testCtx, "vote", "Subgroup vote category tests"));
+
+ group->addChild(graphicGroup.release());
+ group->addChild(computeGroup.release());
+ group->addChild(framebufferGroup.release());
+ group->addChild(fragHelperGroup.release());
+
+ return group.release();
+}
+
+} // subgroups
+} // vkt
--- /dev/null
+#ifndef _VKTSUBGROUPSVOTETESTS_HPP
+#define _VKTSUBGROUPSVOTETESTS_HPP
+/*------------------------------------------------------------------------
+ * Vulkan Conformance Tests
+ * ------------------------
+ *
+ * Copyright (c) 2017 The Khronos Group Inc.
+ * Copyright (c) 2017 Codeplay Software Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */ /*!
+ * \file
+ * \brief Subgroups Tests
+ */ /*--------------------------------------------------------------------*/
+
+#include "tcuDefs.hpp"
+#include "vktTestCase.hpp"
+
+namespace vkt
+{
+namespace subgroups
+{
+
+tcu::TestCaseGroup* createSubgroupsVoteTests(tcu::TestContext& testCtx);
+
+} // subgroups
+} // vkt
+
+#endif // _VKTSUBGROUPSVOTETESTS_HPP