Add tests for VK_EXT_subgroup_size_control
authorSamuel Iglesias Gonsálvez <siglesias@igalia.com>
Thu, 25 Jul 2019 13:34:26 +0000 (15:34 +0200)
committerAlexander Galazin <Alexander.Galazin@arm.com>
Thu, 26 Nov 2020 17:02:45 +0000 (12:02 -0500)
For compute and graphics pipelines:

* Check the required subgroup size set by the tests is honored by the
  driver (including for full subgroups and allow varying cases for
  computer shaders).
* Check that enabling allow varying subgroup size reports a subgroup
  size that falls within the limits of the driver.
* For compute pipeline, check that require full subgroups works as
  expected.

On top of that, added requiredSubgroupSize case for subgroup's compute
shader tests that check all possible required subgroup sizes announced
by the driver.

Also there is a test that checks subgroup size control limits sanity.

Affected tests:

  dEQP-VK.subgroups.*

Components: Vulkan
VK-GL-CTS issue: 1864

Change-Id: Ia48deb9318420889754bd97545045f85b3c6fed7

16 files changed:
external/vulkancts/modules/vulkan/subgroups/vktSubgroupsArithmeticTests.cpp
external/vulkancts/modules/vulkan/subgroups/vktSubgroupsBallotBroadcastTests.cpp
external/vulkancts/modules/vulkan/subgroups/vktSubgroupsBallotMasksTests.cpp
external/vulkancts/modules/vulkan/subgroups/vktSubgroupsBallotOtherTests.cpp
external/vulkancts/modules/vulkan/subgroups/vktSubgroupsBallotTests.cpp
external/vulkancts/modules/vulkan/subgroups/vktSubgroupsBasicTests.cpp
external/vulkancts/modules/vulkan/subgroups/vktSubgroupsBuiltinMaskVarTests.cpp
external/vulkancts/modules/vulkan/subgroups/vktSubgroupsBuiltinVarTests.cpp
external/vulkancts/modules/vulkan/subgroups/vktSubgroupsClusteredTests.cpp
external/vulkancts/modules/vulkan/subgroups/vktSubgroupsPartitionedTests.cpp
external/vulkancts/modules/vulkan/subgroups/vktSubgroupsQuadTests.cpp
external/vulkancts/modules/vulkan/subgroups/vktSubgroupsShapeTests.cpp
external/vulkancts/modules/vulkan/subgroups/vktSubgroupsShuffleTests.cpp
external/vulkancts/modules/vulkan/subgroups/vktSubgroupsTestsUtils.cpp
external/vulkancts/modules/vulkan/subgroups/vktSubgroupsTestsUtils.hpp
external/vulkancts/modules/vulkan/subgroups/vktSubgroupsVoteTests.cpp

index 97cb335..a2bcfd3 100755 (executable)
@@ -62,16 +62,18 @@ enum OpType
        OPTYPE_LAST
 };
 
-static bool checkVertexPipelineStages(std::vector<const void*> datas,
+static bool checkVertexPipelineStages(const void* internalData, std::vector<const void*> datas,
                                                                          deUint32 width, deUint32)
 {
+       DE_UNREF(internalData);
        return vkt::subgroups::check(datas, width, 0x3);
 }
 
-static bool checkCompute(std::vector<const void*> datas,
+static bool checkCompute(const void* internalData, std::vector<const void*> datas,
                                                 const deUint32 numWorkgroups[3], const deUint32 localSize[3],
                                                 deUint32)
 {
+       DE_UNREF(internalData);
        return vkt::subgroups::checkCompute(datas, numWorkgroups, localSize, 0x3);
 }
 
@@ -462,6 +464,7 @@ struct CaseDefinition
        VkShaderStageFlags      shaderStage;
        VkFormat                        format;
        de::SharedPtr<bool>     geometryPointSizeSupported;
+       deBool                          requiredSubgroupSize;
 };
 
 std::string getExtHeader(CaseDefinition caseDef)
@@ -616,6 +619,27 @@ void supportedCheck (Context& context, CaseDefinition caseDef)
        if (!subgroups::isFormatSupportedForDevice(context, caseDef.format))
                TCU_THROW(NotSupportedError, "Device does not support the specified format in subgroup operations");
 
+       if (caseDef.requiredSubgroupSize)
+       {
+               if (!context.requireDeviceFunctionality("VK_EXT_subgroup_size_control"))
+                       TCU_THROW(NotSupportedError, "Device does not support VK_EXT_subgroup_size_control extension");
+               VkPhysicalDeviceSubgroupSizeControlFeaturesEXT subgroupSizeControlFeatures;
+               subgroupSizeControlFeatures.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT;
+               subgroupSizeControlFeatures.pNext = DE_NULL;
+
+               VkPhysicalDeviceFeatures2 features;
+               features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
+               features.pNext = &subgroupSizeControlFeatures;
+
+               context.getInstanceInterface().getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features);
+
+               if (subgroupSizeControlFeatures.subgroupSizeControl == DE_FALSE)
+                       TCU_THROW(NotSupportedError, "Device does not support varying subgroup sizes nor required subgroup size");
+
+               if (subgroupSizeControlFeatures.computeFullSubgroups == DE_FALSE)
+                       TCU_THROW(NotSupportedError, "Device does not support full subgroups in compute shaders");
+       }
+
        *caseDef.geometryPointSizeSupported = subgroups::isTessellationAndGeometryPointSizeSupported(context);
 }
 
@@ -645,13 +669,13 @@ tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
        inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
 
        if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
-               return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages);
+               return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, DE_NULL, checkVertexPipelineStages);
        else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
-               return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages);
+               return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, DE_NULL, checkVertexPipelineStages);
        else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
-               return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
+               return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, DE_NULL, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
        else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
-               return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
+               return subgroups::makeTessellationEvaluationFrameBufferTest(context,  VK_FORMAT_R32_UINT, &inputData, 1, DE_NULL, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
        else
                TCU_THROW(InternalError, "Unhandled shader stage");
 }
@@ -691,7 +715,35 @@ tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
                inputData.numElements = subgroups::maxSupportedSubgroupSize();
                inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
 
-               return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkCompute);
+               if (caseDef.requiredSubgroupSize == DE_FALSE)
+                       return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, &inputData, 1, DE_NULL, checkCompute);
+
+               tcu::TestLog& log = context.getTestContext().getLog();
+               VkPhysicalDeviceSubgroupSizeControlPropertiesEXT subgroupSizeControlProperties;
+               subgroupSizeControlProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT;
+               subgroupSizeControlProperties.pNext = DE_NULL;
+               VkPhysicalDeviceProperties2 properties;
+               properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+               properties.pNext = &subgroupSizeControlProperties;
+
+               context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
+
+               log << tcu::TestLog::Message << "Testing required subgroup size range [" <<  subgroupSizeControlProperties.minSubgroupSize << ", "
+                       << subgroupSizeControlProperties.maxSubgroupSize << "]" << tcu::TestLog::EndMessage;
+
+               // According to the spec, requiredSubgroupSize must be a power-of-two integer.
+               for (deUint32 size = subgroupSizeControlProperties.minSubgroupSize; size <= subgroupSizeControlProperties.maxSubgroupSize; size *= 2)
+               {
+                       tcu::TestStatus result = subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, &inputData, 1, DE_NULL, checkCompute,
+                                                                                                                               size, VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT);
+                       if (result.getCode() != QP_TEST_RESULT_PASS)
+                       {
+                               log << tcu::TestLog::Message << "subgroupSize " << size << " failed" << tcu::TestLog::EndMessage;
+                               return result;
+                       }
+               }
+
+               return tcu::TestStatus::pass("OK");
        }
        else
        {
@@ -726,7 +778,8 @@ tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
                inputData.binding                       = 4u;
                inputData.stages                        = stages;
 
-               return subgroups::allStages(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, stages);
+               return subgroups::allStages(context, VK_FORMAT_R32_UINT, &inputData,
+                                                                       1, DE_NULL, checkVertexPipelineStages, stages);
        }
 }
 }
@@ -822,15 +875,16 @@ tcu::TestCaseGroup* createSubgroupsArithmeticTests(tcu::TestContext& testCtx)
                        std::string op = getOpTypeName(opTypeIndex);
 
                        {
-                               const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_COMPUTE_BIT, format, de::SharedPtr<bool>(new bool)};
+                               CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_COMPUTE_BIT, format, de::SharedPtr<bool>(new bool), DE_FALSE};
                                addFunctionCaseWithPrograms(computeGroup.get(),
                                                                                        de::toLower(op) + "_" +
                                                                                        subgroups::getFormatNameForGLSL(format),
                                                                                        "", supportedCheck, initPrograms, test, caseDef);
+                               caseDef.requiredSubgroupSize = DE_TRUE;
                        }
 
                        {
-                               const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_ALL_GRAPHICS, format, de::SharedPtr<bool>(new bool)};
+                               const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_ALL_GRAPHICS, format, de::SharedPtr<bool>(new bool), DE_FALSE};
                                addFunctionCaseWithPrograms(graphicGroup.get(),
                                                                                        de::toLower(op) + "_" +
                                                                                        subgroups::getFormatNameForGLSL(format),
@@ -839,7 +893,7 @@ tcu::TestCaseGroup* createSubgroupsArithmeticTests(tcu::TestContext& testCtx)
 
                        for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
                        {
-                               const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex], format, de::SharedPtr<bool>(new bool)};
+                               const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex], format, de::SharedPtr<bool>(new bool), DE_FALSE};
                                addFunctionCaseWithPrograms(framebufferGroup.get(), de::toLower(op) + "_" + subgroups::getFormatNameForGLSL(format) +
                                                                                        "_" + getShaderStageName(caseDef.shaderStage), "",
                                                                                        supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
index 07ae70d..585c198 100755 (executable)
@@ -44,16 +44,18 @@ enum OpType
        OPTYPE_LAST
 };
 
-static bool checkVertexPipelineStages(std::vector<const void*> datas,
+static bool checkVertexPipelineStages(const void* internalData, std::vector<const void*> datas,
                                                                          deUint32 width, deUint32)
 {
+       DE_UNREF(internalData);
        return vkt::subgroups::check(datas, width, 3);
 }
 
-static bool checkCompute(std::vector<const void*> datas,
+static bool checkCompute(const void* internalData, std::vector<const void*> datas,
                                                 const deUint32 numWorkgroups[3], const deUint32 localSize[3],
                                                 deUint32)
 {
+       DE_UNREF(internalData);
        return vkt::subgroups::checkCompute(datas, numWorkgroups, localSize, 3);
 }
 
@@ -81,6 +83,7 @@ struct CaseDefinition
        VkFormat                        format;
        de::SharedPtr<bool>     geometryPointSizeSupported;
        deBool                          extShaderSubGroupBallotTests;
+       deBool                          requiredSubgroupSize;
 };
 
 std::string getExtHeader(CaseDefinition caseDef)
@@ -262,7 +265,30 @@ void supportedCheck (Context& context, CaseDefinition caseDef)
        if ((caseDef.opType == OPTYPE_BROADCAST_NONCONST) && !subgroups::isSubgroupBroadcastDynamicIdSupported(context))
                TCU_THROW(NotSupportedError, "Device does not support SubgroupBroadcastDynamicId");
 
+       if (caseDef.requiredSubgroupSize)
+       {
+               if (!context.requireDeviceFunctionality("VK_EXT_subgroup_size_control"))
+                       TCU_THROW(NotSupportedError, "Device does not support VK_EXT_subgroup_size_control extension");
+               VkPhysicalDeviceSubgroupSizeControlFeaturesEXT subgroupSizeControlFeatures;
+               subgroupSizeControlFeatures.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT;
+               subgroupSizeControlFeatures.pNext = DE_NULL;
+
+               VkPhysicalDeviceFeatures2 features;
+               features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
+               features.pNext = &subgroupSizeControlFeatures;
+
+               context.getInstanceInterface().getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features);
+
+               if (subgroupSizeControlFeatures.subgroupSizeControl == DE_FALSE)
+                       TCU_THROW(NotSupportedError, "Device does not support varying subgroup sizes nor required subgroup size");
+
+               if (subgroupSizeControlFeatures.computeFullSubgroups == DE_FALSE)
+                       TCU_THROW(NotSupportedError, "Device does not support full subgroups in compute shaders");
+       }
+
        *caseDef.geometryPointSizeSupported = subgroups::isTessellationAndGeometryPointSizeSupported(context);
+
+
 }
 
 tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
@@ -289,13 +315,13 @@ tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
        inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
 
        if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
-               return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages);
+               return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, DE_NULL, checkVertexPipelineStages);
        else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
-               return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages);
+               return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, DE_NULL, checkVertexPipelineStages);
        else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
-               return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
+               return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, DE_NULL, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
        else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
-               return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
+               return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, DE_NULL, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
        else
                TCU_THROW(InternalError, "Unhandled shader stage");
 }
@@ -318,7 +344,35 @@ tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
                inputData.numElements = caseDef.extShaderSubGroupBallotTests ? 64u : subgroups::maxSupportedSubgroupSize();
                inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
 
-               return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkCompute);
+               if (caseDef.requiredSubgroupSize == DE_FALSE)
+                       return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, &inputData, 1, DE_NULL, checkCompute);
+
+               tcu::TestLog& log       = context.getTestContext().getLog();
+               VkPhysicalDeviceSubgroupSizeControlPropertiesEXT subgroupSizeControlProperties;
+               subgroupSizeControlProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT;
+               subgroupSizeControlProperties.pNext = DE_NULL;
+               VkPhysicalDeviceProperties2 properties;
+               properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+               properties.pNext = &subgroupSizeControlProperties;
+
+               context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
+
+               log << tcu::TestLog::Message << "Testing required subgroup size range [" <<  subgroupSizeControlProperties.minSubgroupSize << ", "
+                       << subgroupSizeControlProperties.maxSubgroupSize << "]" << tcu::TestLog::EndMessage;
+
+               // According to the spec, requiredSubgroupSize must be a power-of-two integer.
+               for (deUint32 size = subgroupSizeControlProperties.minSubgroupSize; size <= subgroupSizeControlProperties.maxSubgroupSize; size *= 2)
+               {
+                       tcu::TestStatus result = subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, &inputData, 1, DE_NULL, checkCompute,
+                                                                                                                               size, VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT);
+                       if (result.getCode() != QP_TEST_RESULT_PASS)
+                       {
+                               log << tcu::TestLog::Message << "subgroupSize " << size << " failed" << tcu::TestLog::EndMessage;
+                               return result;
+                       }
+               }
+
+               return tcu::TestStatus::pass("OK");
        }
        else
        {
@@ -353,7 +407,7 @@ tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
                inputData.binding                       = 4u;
                inputData.stages                        = stages;
 
-               return subgroups::allStages(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, stages);
+               return subgroups::allStages(context, VK_FORMAT_R32_UINT, &inputData, 1, DE_NULL, checkVertexPipelineStages, stages);
        }
 }
 }
@@ -400,16 +454,15 @@ tcu::TestCaseGroup* createSubgroupsBallotBroadcastTests(tcu::TestContext& testCt
                        const std::string name = getOpTypeCaseName(opTypeIndex) + "_" + subgroups::getFormatNameForGLSL(format);
 
                        {
-                               CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_COMPUTE_BIT, format, de::SharedPtr<bool>(new bool), DE_FALSE};
+                               CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_COMPUTE_BIT, format, de::SharedPtr<bool>(new bool), DE_FALSE, DE_FALSE};
                                addFunctionCaseWithPrograms(computeGroup.get(), name, "", supportedCheck, initPrograms, test, caseDef);
                                caseDef.extShaderSubGroupBallotTests = DE_TRUE;
                                if (formatTypeIsSupportedARB)
                                        addFunctionCaseWithPrograms(computeGroupARB.get(), name, "", supportedCheck, initPrograms, test, caseDef);
-
                        }
 
                        {
-                               CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_ALL_GRAPHICS, format, de::SharedPtr<bool>(new bool), DE_FALSE};
+                               CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_ALL_GRAPHICS, format, de::SharedPtr<bool>(new bool), DE_FALSE, DE_FALSE};
                                addFunctionCaseWithPrograms(graphicGroup.get(), name, "", supportedCheck, initPrograms, test, caseDef);
                                caseDef.extShaderSubGroupBallotTests = DE_TRUE;
                                if (formatTypeIsSupportedARB)
@@ -419,7 +472,7 @@ tcu::TestCaseGroup* createSubgroupsBallotBroadcastTests(tcu::TestContext& testCt
 
                        for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
                        {
-                               CaseDefinition caseDef = {opTypeIndex, stages[stageIndex], format, de::SharedPtr<bool>(new bool), DE_FALSE};
+                               CaseDefinition caseDef = {opTypeIndex, stages[stageIndex], format, de::SharedPtr<bool>(new bool), DE_FALSE, DE_FALSE};
                                addFunctionCaseWithPrograms(framebufferGroup.get(), name + getShaderStageName(caseDef.shaderStage), "",
                                                        supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
                                caseDef.extShaderSubGroupBallotTests = DE_TRUE;
index cf0ef40..85a057f 100755 (executable)
@@ -46,16 +46,18 @@ enum MaskType
        MASKTYPE_LAST
 };
 
-static bool checkVertexPipelineStages(std::vector<const void*> datas,
+static bool checkVertexPipelineStages(const void* internalData, std::vector<const void*> datas,
                                                                          deUint32 width, deUint32)
 {
+       DE_UNREF(internalData);
        return vkt::subgroups::check(datas, width, 0xf);
 }
 
-static bool checkCompute(std::vector<const void*> datas,
+static bool checkCompute(const void* internalData, std::vector<const void*> datas,
                                                 const deUint32 numWorkgroups[3], const deUint32 localSize[3],
                                                 deUint32)
 {
+       DE_UNREF(internalData);
        return vkt::subgroups::checkCompute(datas, numWorkgroups, localSize, 0xf);
 }
 
@@ -85,6 +87,7 @@ struct CaseDefinition
        int                                     maskType;
        VkShaderStageFlags      shaderStage;
        de::SharedPtr<bool>     geometryPointSizeSupported;
+       deBool                          requiredSubgroupSize;
 };
 
 std::string getBodySource(CaseDefinition caseDef)
@@ -403,6 +406,27 @@ void supportedCheck (Context& context, CaseDefinition caseDef)
        if (!subgroups::isInt64SupportedForDevice(context))
                TCU_THROW(NotSupportedError, "Int64 is not supported");
 
+       if (caseDef.requiredSubgroupSize)
+       {
+               if (!context.requireDeviceFunctionality("VK_EXT_subgroup_size_control"))
+                       TCU_THROW(NotSupportedError, "Device does not support VK_EXT_subgroup_size_control extension");
+               VkPhysicalDeviceSubgroupSizeControlFeaturesEXT subgroupSizeControlFeatures;
+               subgroupSizeControlFeatures.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT;
+               subgroupSizeControlFeatures.pNext = DE_NULL;
+
+               VkPhysicalDeviceFeatures2 features;
+               features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
+               features.pNext = &subgroupSizeControlFeatures;
+
+               context.getInstanceInterface().getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features);
+
+               if (subgroupSizeControlFeatures.subgroupSizeControl == DE_FALSE)
+                       TCU_THROW(NotSupportedError, "Device does not support varying subgroup sizes nor required subgroup size");
+
+               if (subgroupSizeControlFeatures.computeFullSubgroups == DE_FALSE)
+                       TCU_THROW(NotSupportedError, "Device does not support full subgroups in compute shaders");
+       }
+
        *caseDef.geometryPointSizeSupported = subgroups::isTessellationAndGeometryPointSizeSupported(context);
 }
 
@@ -425,11 +449,11 @@ tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
        }
 
        if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
-               return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages);
+               return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, DE_NULL, checkVertexPipelineStages);
        else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
-               return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages);
+               return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, DE_NULL, checkVertexPipelineStages);
        else if ((VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) & caseDef.shaderStage)
-               return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages);
+               return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, DE_NULL, checkVertexPipelineStages);
        else
                TCU_THROW(InternalError, "Unhandled shader stage");
 }
@@ -445,7 +469,35 @@ tcu::TestStatus test (Context& context, const CaseDefinition caseDef)
                                subgroups::getShaderStageName(caseDef.shaderStage) +
                                " is required to support subgroup operations!");
                }
-               return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkCompute);
+               if (caseDef.requiredSubgroupSize == DE_FALSE)
+                       return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, DE_NULL, checkCompute);
+
+               tcu::TestLog& log       = context.getTestContext().getLog();
+               VkPhysicalDeviceSubgroupSizeControlPropertiesEXT subgroupSizeControlProperties;
+               subgroupSizeControlProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT;
+               subgroupSizeControlProperties.pNext = DE_NULL;
+               VkPhysicalDeviceProperties2 properties;
+               properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+               properties.pNext = &subgroupSizeControlProperties;
+
+               context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
+
+               log << tcu::TestLog::Message << "Testing required subgroup size range [" <<  subgroupSizeControlProperties.minSubgroupSize << ", "
+                       << subgroupSizeControlProperties.maxSubgroupSize << "]" << tcu::TestLog::EndMessage;
+
+               // According to the spec, requiredSubgroupSize must be a power-of-two integer.
+               for (deUint32 size = subgroupSizeControlProperties.minSubgroupSize; size <= subgroupSizeControlProperties.maxSubgroupSize; size *= 2)
+               {
+                       tcu::TestStatus result = subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0u, DE_NULL, checkCompute,
+                                                                                                                               size, VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT);
+                       if (result.getCode() != QP_TEST_RESULT_PASS)
+                       {
+                               log << tcu::TestLog::Message << "subgroupSize " << size << " failed" << tcu::TestLog::EndMessage;
+                               return result;
+                       }
+               }
+
+               return tcu::TestStatus::pass("OK");
        }
        else
        {
@@ -472,7 +524,7 @@ tcu::TestStatus test (Context& context, const CaseDefinition caseDef)
                if ((VkShaderStageFlagBits)0u == stages)
                        TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
 
-               return subgroups::allStages(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages, stages);
+               return subgroups::allStages(context, VK_FORMAT_R32_UINT, DE_NULL, 0, DE_NULL, checkVertexPipelineStages, stages);
        }
        return tcu::TestStatus::pass("OK");
 }
@@ -503,18 +555,18 @@ tcu::TestCaseGroup* createSubgroupsBallotMasksTests(tcu::TestContext& testCtx)
                const string mask = de::toLower(getMaskTypeName(maskTypeIndex));
 
                {
-                       const CaseDefinition caseDef = {maskTypeIndex, VK_SHADER_STAGE_COMPUTE_BIT, de::SharedPtr<bool>(new bool)};
+                       CaseDefinition caseDef = {maskTypeIndex, VK_SHADER_STAGE_COMPUTE_BIT, de::SharedPtr<bool>(new bool), DE_FALSE};
                        addFunctionCaseWithPrograms(computeGroup.get(), mask, "", supportedCheck, initPrograms, test, caseDef);
                }
 
                {
-                       const CaseDefinition caseDef = {maskTypeIndex, VK_SHADER_STAGE_ALL_GRAPHICS, de::SharedPtr<bool>(new bool)};
+                       const CaseDefinition caseDef = {maskTypeIndex, VK_SHADER_STAGE_ALL_GRAPHICS, de::SharedPtr<bool>(new bool), DE_FALSE};
                        addFunctionCaseWithPrograms(graphicGroup.get(), mask, "", supportedCheck, initPrograms, test, caseDef);
                }
 
                for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
                {
-                       const CaseDefinition caseDef = {maskTypeIndex, stages[stageIndex], de::SharedPtr<bool>(new bool)};
+                       const CaseDefinition caseDef = {maskTypeIndex, stages[stageIndex], de::SharedPtr<bool>(new bool), DE_FALSE};
                        addFunctionCaseWithPrograms(framebufferGroup.get(), mask + "_" + getShaderStageName(caseDef.shaderStage), "", supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
                }
        }
index 930b1ce..b0ee3f1 100755 (executable)
@@ -48,16 +48,18 @@ enum OpType
        OPTYPE_LAST
 };
 
-static bool checkVertexPipelineStages(std::vector<const void*> datas,
+static bool checkVertexPipelineStages(const void* internalData, std::vector<const void*> datas,
                                                                          deUint32 width, deUint32)
 {
+       DE_UNREF(internalData);
        return vkt::subgroups::check(datas, width, 0xf);
 }
 
-static bool checkCompute(std::vector<const void*> datas,
+static bool checkCompute(const void* internalData, std::vector<const void*> datas,
                                                 const deUint32 numWorkgroups[3], const deUint32 localSize[3],
                                                 deUint32)
 {
+       DE_UNREF(internalData);
        return vkt::subgroups::checkCompute(datas, numWorkgroups, localSize, 0xf);
 }
 
@@ -90,6 +92,7 @@ struct CaseDefinition
        int                                     opType;
        VkShaderStageFlags      shaderStage;
        de::SharedPtr<bool>     geometryPointSizeSupported;
+       deBool                          requiredSubgroupSize;
 };
 
 std::string getBodySource(CaseDefinition caseDef)
@@ -487,6 +490,27 @@ void supportedCheck (Context& context, CaseDefinition caseDef)
                TCU_THROW(NotSupportedError, "Device does not support subgroup ballot operations");
        }
 
+       if (caseDef.requiredSubgroupSize)
+       {
+               if (!context.requireDeviceFunctionality("VK_EXT_subgroup_size_control"))
+                       TCU_THROW(NotSupportedError, "Device does not support VK_EXT_subgroup_size_control extension");
+               VkPhysicalDeviceSubgroupSizeControlFeaturesEXT subgroupSizeControlFeatures;
+               subgroupSizeControlFeatures.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT;
+               subgroupSizeControlFeatures.pNext = DE_NULL;
+
+               VkPhysicalDeviceFeatures2 features;
+               features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
+               features.pNext = &subgroupSizeControlFeatures;
+
+               context.getInstanceInterface().getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features);
+
+               if (subgroupSizeControlFeatures.subgroupSizeControl == DE_FALSE)
+                       TCU_THROW(NotSupportedError, "Device does not support varying subgroup sizes nor required subgroup size");
+
+               if (subgroupSizeControlFeatures.computeFullSubgroups == DE_FALSE)
+                       TCU_THROW(NotSupportedError, "Device does not support full subgroups in compute shaders");
+       }
+
        *caseDef.geometryPointSizeSupported = subgroups::isTessellationAndGeometryPointSizeSupported(context);
 }
 
@@ -509,11 +533,11 @@ tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
        }
 
        if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
-               return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages);
+               return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, DE_NULL, checkVertexPipelineStages);
        else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
-               return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages);
+               return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, DE_NULL, checkVertexPipelineStages);
        else if ((VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) & caseDef.shaderStage)
-               return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages);
+               return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, DE_NULL, checkVertexPipelineStages);
        else
                TCU_THROW(InternalError, "Unhandled shader stage");
 }
@@ -529,7 +553,35 @@ tcu::TestStatus test (Context& context, const CaseDefinition caseDef)
                                subgroups::getShaderStageName(caseDef.shaderStage) +
                                " is required to support subgroup operations!");
                }
-               return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkCompute);
+               if (caseDef.requiredSubgroupSize == DE_FALSE)
+                       return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, DE_NULL, checkCompute);
+
+               tcu::TestLog& log       = context.getTestContext().getLog();
+               VkPhysicalDeviceSubgroupSizeControlPropertiesEXT subgroupSizeControlProperties;
+               subgroupSizeControlProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT;
+               subgroupSizeControlProperties.pNext = DE_NULL;
+               VkPhysicalDeviceProperties2 properties;
+               properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+               properties.pNext = &subgroupSizeControlProperties;
+
+               context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
+
+               log << tcu::TestLog::Message << "Testing required subgroup size range [" <<  subgroupSizeControlProperties.minSubgroupSize << ", "
+                       << subgroupSizeControlProperties.maxSubgroupSize << "]" << tcu::TestLog::EndMessage;
+
+               // According to the spec, requiredSubgroupSize must be a power-of-two integer.
+               for (deUint32 size = subgroupSizeControlProperties.minSubgroupSize; size <= subgroupSizeControlProperties.maxSubgroupSize; size *= 2)
+               {
+                       tcu::TestStatus result = subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0u, DE_NULL, checkCompute,
+                                                                                                                               size, VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT);
+                       if (result.getCode() != QP_TEST_RESULT_PASS)
+                       {
+                               log << tcu::TestLog::Message << "subgroupSize " << size << " failed" << tcu::TestLog::EndMessage;
+                               return result;
+                       }
+               }
+
+               return tcu::TestStatus::pass("OK");
        }
        else
        {
@@ -556,7 +608,7 @@ tcu::TestStatus test (Context& context, const CaseDefinition caseDef)
                if ((VkShaderStageFlagBits)0u == stages)
                        TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
 
-               return subgroups::allStages(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages, stages);
+               return subgroups::allStages(context, VK_FORMAT_R32_UINT, DE_NULL, 0, DE_NULL, checkVertexPipelineStages, stages);
        }
        return tcu::TestStatus::pass("OK");
 }
@@ -587,18 +639,18 @@ tcu::TestCaseGroup* createSubgroupsBallotOtherTests(tcu::TestContext& testCtx)
        {
                const string op = de::toLower(getOpTypeName(opTypeIndex));
                {
-                       const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_COMPUTE_BIT, de::SharedPtr<bool>(new bool)};
+                       CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_COMPUTE_BIT, de::SharedPtr<bool>(new bool), DE_FALSE};
                        addFunctionCaseWithPrograms(computeGroup.get(), op, "", supportedCheck, initPrograms, test, caseDef);
                }
 
                {
-                       const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_ALL_GRAPHICS, de::SharedPtr<bool>(new bool)};
+                       const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_ALL_GRAPHICS, de::SharedPtr<bool>(new bool), DE_FALSE};
                        addFunctionCaseWithPrograms(graphicGroup.get(), op, "", supportedCheck, initPrograms, test, caseDef);
                }
 
                for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
                {
-                       const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex], de::SharedPtr<bool>(new bool)};
+                       const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex], de::SharedPtr<bool>(new bool), DE_FALSE};
                        addFunctionCaseWithPrograms(framebufferGroup.get(), op + "_" + getShaderStageName(caseDef.shaderStage), "", supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
                }
        }
index 7b11007..5ef1cb2 100755 (executable)
@@ -36,16 +36,18 @@ using namespace vkt;
 
 namespace
 {
-static bool checkVertexPipelineStages(std::vector<const void*> datas,
+static bool checkVertexPipelineStages(const void* internalData, std::vector<const void*> datas,
                                                                          deUint32 width, deUint32)
 {
+       DE_UNREF(internalData);
        return vkt::subgroups::check(datas, width, 0x7);
 }
 
-static bool checkCompute(std::vector<const void*> datas,
+static bool checkCompute(const void* internalData, std::vector<const void*> datas,
                                                 const deUint32 numWorkgroups[3], const deUint32 localSize[3],
                                                 deUint32)
 {
+       DE_UNREF(internalData);
        return vkt::subgroups::checkCompute(datas, numWorkgroups, localSize, 0x7);
 }
 
@@ -54,8 +56,10 @@ struct CaseDefinition
        VkShaderStageFlags      shaderStage;
        de::SharedPtr<bool>     geometryPointSizeSupported;
        deBool                          extShaderSubGroupBallotTests;
+       deBool                          requiredSubgroupSize;
 };
 
+
 void initFrameBufferPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
 {
        const vk::SpirVAsmBuildOptions  buildOptionsSpr (programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3);
@@ -916,6 +920,27 @@ void supportedCheck (Context& context, CaseDefinition caseDef)
                TCU_THROW(NotSupportedError, "Device does not support int64 data types");
        }
 
+       if (caseDef.requiredSubgroupSize)
+       {
+               if (!context.requireDeviceFunctionality("VK_EXT_subgroup_size_control"))
+                       TCU_THROW(NotSupportedError, "Device does not support VK_EXT_subgroup_size_control extension");
+               VkPhysicalDeviceSubgroupSizeControlFeaturesEXT subgroupSizeControlFeatures;
+               subgroupSizeControlFeatures.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT;
+               subgroupSizeControlFeatures.pNext = DE_NULL;
+
+               VkPhysicalDeviceFeatures2 features;
+               features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
+               features.pNext = &subgroupSizeControlFeatures;
+
+               context.getInstanceInterface().getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features);
+
+               if (subgroupSizeControlFeatures.subgroupSizeControl == DE_FALSE)
+                       TCU_THROW(NotSupportedError, "Device does not support varying subgroup sizes nor required subgroup size");
+
+               if (subgroupSizeControlFeatures.computeFullSubgroups == DE_FALSE)
+                       TCU_THROW(NotSupportedError, "Device does not support full subgroups in compute shaders");
+       }
+
        *caseDef.geometryPointSizeSupported = subgroups::isTessellationAndGeometryPointSizeSupported(context);
 }
 
@@ -944,13 +969,13 @@ tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
        inputData[0].initializeType = subgroups::SSBOData::InitializeNonZero;
 
        if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
-               return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, inputData, 1, checkVertexPipelineStages);
+               return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, inputData, 1, DE_NULL, checkVertexPipelineStages);
        else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
-               return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, inputData, 1, checkVertexPipelineStages);
+               return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, inputData, 1, DE_NULL, checkVertexPipelineStages);
        else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
-               return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
+               return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, inputData, 1, DE_NULL, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
        else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
-               return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
+               return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, inputData, 1, DE_NULL, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
        else
                TCU_THROW(InternalError, "Unhandled shader stage");
 }
@@ -972,7 +997,35 @@ tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
                inputData[0].numElements = subgroups::maxSupportedSubgroupSize();
                inputData[0].initializeType = subgroups::SSBOData::InitializeNonZero;
 
-               return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, inputData, 1, checkCompute);
+               if (caseDef.requiredSubgroupSize == DE_FALSE)
+                       return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, inputData, 1, DE_NULL, checkCompute);
+
+               tcu::TestLog& log       = context.getTestContext().getLog();
+               VkPhysicalDeviceSubgroupSizeControlPropertiesEXT subgroupSizeControlProperties;
+               subgroupSizeControlProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT;
+               subgroupSizeControlProperties.pNext = DE_NULL;
+               VkPhysicalDeviceProperties2 properties;
+               properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+               properties.pNext = &subgroupSizeControlProperties;
+
+               context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
+
+               log << tcu::TestLog::Message << "Testing required subgroup size range [" <<  subgroupSizeControlProperties.minSubgroupSize << ", "
+                       << subgroupSizeControlProperties.maxSubgroupSize << "]" << tcu::TestLog::EndMessage;
+
+               // According to the spec, requiredSubgroupSize must be a power-of-two integer.
+               for (deUint32 size = subgroupSizeControlProperties.minSubgroupSize; size <= subgroupSizeControlProperties.maxSubgroupSize; size *= 2)
+               {
+                       tcu::TestStatus result = subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, inputData, 1, DE_NULL, checkCompute,
+                                                                                                                               size, VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT);
+                       if (result.getCode() != QP_TEST_RESULT_PASS)
+                       {
+                               log << tcu::TestLog::Message << "subgroupSize " << size << " failed" << tcu::TestLog::EndMessage;
+                               return result;
+                       }
+               }
+
+               return tcu::TestStatus::pass("OK");
        }
        else
        {
@@ -1007,7 +1060,7 @@ tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
                inputData.binding                       = 4u;
                inputData.stages                        = stages;
 
-               return subgroups::allStages(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, stages);
+               return subgroups::allStages(context, VK_FORMAT_R32_UINT, &inputData, 1, DE_NULL, checkVertexPipelineStages, stages);
        }
 }
 }
@@ -1040,16 +1093,16 @@ tcu::TestCaseGroup* createSubgroupsBallotTests(tcu::TestContext& testCtx)
                VK_SHADER_STAGE_VERTEX_BIT
        };
 
+
        {
-               CaseDefinition caseDef = {VK_SHADER_STAGE_COMPUTE_BIT, de::SharedPtr<bool>(new bool), DE_FALSE};
+               CaseDefinition caseDef = {VK_SHADER_STAGE_COMPUTE_BIT, de::SharedPtr<bool>(new bool), DE_FALSE, DE_FALSE};
                addFunctionCaseWithPrograms(computeGroup.get(), getShaderStageName(caseDef.shaderStage), "", supportedCheck, initPrograms, test, caseDef);
                caseDef.extShaderSubGroupBallotTests = DE_TRUE;
                addFunctionCaseWithPrograms(computeGroupEXT.get(), getShaderStageName(caseDef.shaderStage), "", supportedCheck, initPrograms, test, caseDef);
-
        }
 
        {
-               CaseDefinition caseDef = {VK_SHADER_STAGE_ALL_GRAPHICS, de::SharedPtr<bool>(new bool), DE_FALSE};
+               CaseDefinition caseDef = {VK_SHADER_STAGE_ALL_GRAPHICS, de::SharedPtr<bool>(new bool), DE_FALSE, DE_FALSE};
                addFunctionCaseWithPrograms(graphicGroup.get(), "graphic", "", supportedCheck, initPrograms, test, caseDef);
                caseDef.extShaderSubGroupBallotTests = DE_TRUE;
                addFunctionCaseWithPrograms(graphicGroupEXT.get(), "graphic", "", supportedCheck, initPrograms, test, caseDef);
@@ -1057,7 +1110,7 @@ tcu::TestCaseGroup* createSubgroupsBallotTests(tcu::TestContext& testCtx)
 
        for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
        {
-               CaseDefinition caseDef = {stages[stageIndex],de::SharedPtr<bool>(new bool), DE_FALSE};
+               CaseDefinition caseDef = {stages[stageIndex],de::SharedPtr<bool>(new bool), DE_TRUE, DE_FALSE};
                addFunctionCaseWithPrograms(framebufferGroup.get(), getShaderStageName(caseDef.shaderStage), "",
                                        supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
                caseDef.extShaderSubGroupBallotTests = DE_TRUE;
index 8388d5a..4d3be7a 100755 (executable)
@@ -71,21 +71,24 @@ static bool _checkFragmentSubgroupBarriersNoSSBO(std::vector<const void*> datas,
        return true;
 }
 
-static bool checkFragmentSubgroupBarriersNoSSBO(std::vector<const void*> datas,
+static bool checkFragmentSubgroupBarriersNoSSBO(const void *internalData, std::vector<const void*> datas,
                deUint32 width, deUint32 height, deUint32)
 {
+       DE_UNREF(internalData);
        return _checkFragmentSubgroupBarriersNoSSBO(datas, width, height, false);
 }
 
-static bool checkFragmentSubgroupBarriersWithImageNoSSBO(std::vector<const void*> datas,
+static bool checkFragmentSubgroupBarriersWithImageNoSSBO(const void* internalData, std::vector<const void*> datas,
                deUint32 width, deUint32 height, deUint32)
 {
+       DE_UNREF(internalData);
        return _checkFragmentSubgroupBarriersNoSSBO(datas, width, height, true);
 }
 
-static bool checkVertexPipelineStagesSubgroupElectNoSSBO(std::vector<const void*> datas,
+static bool checkVertexPipelineStagesSubgroupElectNoSSBO(const void* internalData, std::vector<const void*> datas,
                deUint32 width, deUint32)
 {
+       DE_UNREF(internalData);
        const float* const      resultData                      = reinterpret_cast<const float*>(datas[0]);
        float                           poisonValuesFound       = 0.0f;
        float                           numSubgroupsUsed        = 0.0f;
@@ -110,9 +113,10 @@ static bool checkVertexPipelineStagesSubgroupElectNoSSBO(std::vector<const void*
        return numSubgroupsUsed == poisonValuesFound;
 }
 
-static bool checkVertexPipelineStagesSubgroupElect(std::vector<const void*> datas,
+static bool checkVertexPipelineStagesSubgroupElect(const void* internalData, std::vector<const void*> datas,
                deUint32 width, deUint32)
 {
+       DE_UNREF(internalData);
        const deUint32* const resultData =
                reinterpret_cast<const deUint32*>(datas[0]);
        deUint32 poisonValuesFound = 0;
@@ -141,9 +145,10 @@ static bool checkVertexPipelineStagesSubgroupElect(std::vector<const void*> data
        return numSubgroupsUsed == poisonValuesFound;
 }
 
-static bool checkVertexPipelineStagesSubgroupBarriers(std::vector<const void*> datas,
+static bool checkVertexPipelineStagesSubgroupBarriers(const void* internalData, std::vector<const void*> datas,
                deUint32 width, deUint32)
 {
+       DE_UNREF(internalData);
        const deUint32* const resultData = reinterpret_cast<const deUint32*>(datas[0]);
 
        // We used this SSBO to generate our unique value!
@@ -185,15 +190,17 @@ static bool _checkVertexPipelineStagesSubgroupBarriersNoSSBO(std::vector<const v
        return true;
 }
 
-static bool checkVertexPipelineStagesSubgroupBarriersNoSSBO(std::vector<const void*> datas,
+static bool checkVertexPipelineStagesSubgroupBarriersNoSSBO(const void* internalData, std::vector<const void*> datas,
                deUint32 width, deUint32)
 {
+       DE_UNREF(internalData);
        return _checkVertexPipelineStagesSubgroupBarriersNoSSBO(datas, width, false);
 }
 
-static bool checkVertexPipelineStagesSubgroupBarriersWithImageNoSSBO(std::vector<const void*> datas,
+static bool checkVertexPipelineStagesSubgroupBarriersWithImageNoSSBO(const void* internalData, std::vector<const void*> datas,
                deUint32 width, deUint32)
 {
+       DE_UNREF(internalData);
        return _checkVertexPipelineStagesSubgroupBarriersNoSSBO(datas, width, true);
 }
 
@@ -217,29 +224,33 @@ static bool _checkTessellationEvaluationSubgroupBarriersNoSSBO(std::vector<const
        return true;
 }
 
-static bool checkTessellationEvaluationSubgroupBarriersWithImageNoSSBO(std::vector<const void*> datas,
+static bool checkTessellationEvaluationSubgroupBarriersWithImageNoSSBO(const void* internalData, std::vector<const void*> datas,
        deUint32 width, deUint32 height)
 {
+       DE_UNREF(internalData);
        return _checkTessellationEvaluationSubgroupBarriersNoSSBO(datas, width, height, true);
 }
 
-static bool checkTessellationEvaluationSubgroupBarriersNoSSBO(std::vector<const void*> datas,
+static bool checkTessellationEvaluationSubgroupBarriersNoSSBO(const void* internalData, std::vector<const void*> datas,
                deUint32 width, deUint32 height)
 {
+       DE_UNREF(internalData);
        return _checkTessellationEvaluationSubgroupBarriersNoSSBO(datas, width, height, false);
 }
 
-static bool checkComputeSubgroupElect(std::vector<const void*> datas,
+static bool checkComputeSubgroupElect(const void* internalData, std::vector<const void*> datas,
                                                                          const deUint32 numWorkgroups[3], const deUint32 localSize[3],
                                                                          deUint32)
 {
+       DE_UNREF(internalData);
        return vkt::subgroups::checkCompute(datas, numWorkgroups, localSize, 1);
 }
 
-static bool checkComputeSubgroupBarriers(std::vector<const void*> datas,
+static bool checkComputeSubgroupBarriers(const void* internalData, std::vector<const void*> datas,
                const deUint32 numWorkgroups[3], const deUint32 localSize[3],
                deUint32)
 {
+       DE_UNREF(internalData);
        // We used this SSBO to generate our unique value!
        const deUint32 ref = *reinterpret_cast<const deUint32*>(datas[2]);
        return vkt::subgroups::checkCompute(datas, numWorkgroups, localSize, ref);
@@ -283,6 +294,7 @@ struct CaseDefinition
        int                                     opType;
        VkShaderStageFlags      shaderStage;
        de::SharedPtr<bool>     geometryPointSizeSupported;
+       deBool                          requiredSubgroupSize;
 };
 
 void initFrameBufferPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
@@ -1837,6 +1849,27 @@ void supportedCheck (Context& context, CaseDefinition caseDef)
        if (!subgroups::isSubgroupSupported(context))
                TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
 
+       if (caseDef.requiredSubgroupSize)
+       {
+               if (!context.requireDeviceFunctionality("VK_EXT_subgroup_size_control"))
+                       TCU_THROW(NotSupportedError, "Device does not support VK_EXT_subgroup_size_control extension");
+               VkPhysicalDeviceSubgroupSizeControlFeaturesEXT subgroupSizeControlFeatures;
+               subgroupSizeControlFeatures.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT;
+               subgroupSizeControlFeatures.pNext = DE_NULL;
+
+               VkPhysicalDeviceFeatures2 features;
+               features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
+               features.pNext = &subgroupSizeControlFeatures;
+
+               context.getInstanceInterface().getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features);
+
+               if (subgroupSizeControlFeatures.subgroupSizeControl == DE_FALSE)
+                       TCU_THROW(NotSupportedError, "Device does not support varying subgroup sizes nor required subgroup size");
+
+               if (subgroupSizeControlFeatures.computeFullSubgroups == DE_FALSE)
+                       TCU_THROW(NotSupportedError, "Device does not support full subgroups in compute shaders");
+       }
+
        *caseDef.geometryPointSizeSupported = subgroups::isTessellationAndGeometryPointSizeSupported(context);
 }
 
@@ -1900,9 +1933,9 @@ tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
        if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
        {
                if (OPTYPE_ELECT == caseDef.opType)
-                       return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32G32_SFLOAT, DE_NULL, 0u, checkVertexPipelineStagesSubgroupElectNoSSBO);
+                       return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32G32_SFLOAT, DE_NULL, 0u, DE_NULL, checkVertexPipelineStagesSubgroupElectNoSSBO);
                else
-                       return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32G32B32A32_SFLOAT, &inputDatas[0], inputDatasCount,
+                       return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32G32B32A32_SFLOAT, &inputDatas[0], inputDatasCount, DE_NULL,
                                (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType) ?
                                        checkVertexPipelineStagesSubgroupBarriersWithImageNoSSBO :
                                        checkVertexPipelineStagesSubgroupBarriersNoSSBO
@@ -1910,7 +1943,7 @@ tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
        }
        else if (VK_SHADER_STAGE_FRAGMENT_BIT == caseDef.shaderStage)
        {
-               return subgroups::makeFragmentFrameBufferTest(context, VK_FORMAT_R32G32B32A32_SFLOAT, &inputDatas[0], inputDatasCount,
+               return subgroups::makeFragmentFrameBufferTest(context, VK_FORMAT_R32G32B32A32_SFLOAT, &inputDatas[0], inputDatasCount, DE_NULL,
                        (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType) ?
                                checkFragmentSubgroupBarriersWithImageNoSSBO :
                                checkFragmentSubgroupBarriersNoSSBO
@@ -1919,9 +1952,9 @@ tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
        else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
        {
                if (OPTYPE_ELECT == caseDef.opType)
-                       return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32G32_SFLOAT, DE_NULL, 0u, checkVertexPipelineStagesSubgroupElectNoSSBO);
+                       return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32G32_SFLOAT, DE_NULL, 0u, DE_NULL, checkVertexPipelineStagesSubgroupElectNoSSBO);
                else
-                       return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32G32B32A32_SFLOAT, &inputDatas[0], inputDatasCount,
+                       return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32G32B32A32_SFLOAT, &inputDatas[0], inputDatasCount, DE_NULL,
                                (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType) ?
                                        checkVertexPipelineStagesSubgroupBarriersWithImageNoSSBO :
                                        checkVertexPipelineStagesSubgroupBarriersNoSSBO
@@ -1929,9 +1962,9 @@ tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
        }
 
        if (OPTYPE_ELECT == caseDef.opType)
-               return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32G32_SFLOAT, DE_NULL, 0u, checkVertexPipelineStagesSubgroupElectNoSSBO, caseDef.shaderStage);
+               return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32G32_SFLOAT, DE_NULL, 0u, DE_NULL, checkVertexPipelineStagesSubgroupElectNoSSBO, caseDef.shaderStage);
 
-       return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32G32B32A32_SFLOAT, &inputDatas[0], inputDatasCount,
+       return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32G32B32A32_SFLOAT, &inputDatas[0], inputDatasCount, DE_NULL,
                (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage) ?
                        ((OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType) ?
                                checkVertexPipelineStagesSubgroupBarriersWithImageNoSSBO :
@@ -1971,7 +2004,35 @@ tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
 
                if (OPTYPE_ELECT == caseDef.opType)
                {
-                       return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkComputeSubgroupElect);
+                       if (caseDef.requiredSubgroupSize == DE_FALSE)
+                               return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, DE_NULL, checkComputeSubgroupElect);
+
+                       tcu::TestLog& log       = context.getTestContext().getLog();
+                       VkPhysicalDeviceSubgroupSizeControlPropertiesEXT subgroupSizeControlProperties;
+                       subgroupSizeControlProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT;
+                       subgroupSizeControlProperties.pNext = DE_NULL;
+                       VkPhysicalDeviceProperties2 properties;
+                       properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+                       properties.pNext = &subgroupSizeControlProperties;
+
+                       context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
+
+                       log << tcu::TestLog::Message << "Testing required subgroup size range [" <<  subgroupSizeControlProperties.minSubgroupSize << ", "
+                               << subgroupSizeControlProperties.maxSubgroupSize << "]" << tcu::TestLog::EndMessage;
+
+                       // According to the spec, requiredSubgroupSize must be a power-of-two integer.
+                       for (deUint32 size = subgroupSizeControlProperties.minSubgroupSize; size <= subgroupSizeControlProperties.maxSubgroupSize; size *= 2)
+                               {
+                                       tcu::TestStatus result = subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0u, DE_NULL, checkComputeSubgroupElect,
+                                                                                                                                               size, VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT);
+                                       if (result.getCode() != QP_TEST_RESULT_PASS)
+                                               {
+                                                       log << tcu::TestLog::Message << "subgroupSize " << size << " failed" << tcu::TestLog::EndMessage;
+                                                       return result;
+                                               }
+                               }
+
+                       return tcu::TestStatus::pass("OK");
                }
                else
                {
@@ -1993,7 +2054,35 @@ tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
                        inputDatas[2].initializeType = subgroups::SSBOData::InitializeNone;
                        inputDatas[2].isImage = true;
 
-                       return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, inputDatas, inputDatasCount, checkComputeSubgroupBarriers);
+                       if (caseDef.requiredSubgroupSize == DE_FALSE)
+                               return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, inputDatas, inputDatasCount, DE_NULL, checkComputeSubgroupBarriers);
+
+                       tcu::TestLog& log = context.getTestContext().getLog();
+                       VkPhysicalDeviceSubgroupSizeControlPropertiesEXT subgroupSizeControlProperties;
+                       subgroupSizeControlProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT;
+                       subgroupSizeControlProperties.pNext = DE_NULL;
+                       VkPhysicalDeviceProperties2 properties;
+                       properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+                       properties.pNext = &subgroupSizeControlProperties;
+
+                       context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
+
+                       log << tcu::TestLog::Message << "Testing required subgroup size range [" <<  subgroupSizeControlProperties.minSubgroupSize << ", "
+                               << subgroupSizeControlProperties.maxSubgroupSize << "]" << tcu::TestLog::EndMessage;
+
+                       // According to the spec, requiredSubgroupSize must be a power-of-two integer.
+                       for (deUint32 size = subgroupSizeControlProperties.minSubgroupSize; size <= subgroupSizeControlProperties.maxSubgroupSize; size *= 2)
+                       {
+                               tcu::TestStatus result = subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, inputDatas, inputDatasCount, DE_NULL, checkComputeSubgroupBarriers,
+                                                                                                                                       size, VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT);
+                               if (result.getCode() != QP_TEST_RESULT_PASS)
+                               {
+                                       log << tcu::TestLog::Message << "subgroupSize " << size << " failed" << tcu::TestLog::EndMessage;
+                                       return result;
+                               }
+                       }
+
+                       return tcu::TestStatus::pass("OK");
                }
        }
        else
@@ -2066,7 +2155,7 @@ tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
                        inputData[4].binding            = 8u;
                        inputData[4].stages                     = VK_SHADER_STAGE_FRAGMENT_BIT;
 
-                       return subgroups::allStages(context, VK_FORMAT_R32_UINT, inputData, inputCount, checkVertexPipelineStagesSubgroupElect, stages);
+                       return subgroups::allStages(context, VK_FORMAT_R32_UINT, inputData, inputCount, DE_NULL, checkVertexPipelineStagesSubgroupElect, stages);
                }
                else
                {
@@ -2115,7 +2204,7 @@ tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
                                inputDatas[index + 3].stages                    = stagesBits[ndx];
                        }
 
-                       return subgroups::allStages(context, VK_FORMAT_R32_UINT, inputDatas, inputDatasCount, checkVertexPipelineStagesSubgroupBarriers, stages);
+                       return subgroups::allStages(context, VK_FORMAT_R32_UINT, inputDatas, inputDatasCount, DE_NULL, checkVertexPipelineStagesSubgroupBarriers, stages);
                }
        }
 }
@@ -2149,7 +2238,7 @@ tcu::TestCaseGroup* createSubgroupsBasicTests(tcu::TestContext& testCtx)
                const std::string op = de::toLower(getOpTypeName(opTypeIndex));
 
                {
-                       const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_COMPUTE_BIT, de::SharedPtr<bool>(new bool)};
+                       CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_COMPUTE_BIT, de::SharedPtr<bool>(new bool), DE_FALSE};
                        addFunctionCaseWithPrograms(computeGroup.get(), op, "",
                                                                                supportedCheck, initPrograms, test, caseDef);
                }
@@ -2161,7 +2250,7 @@ tcu::TestCaseGroup* createSubgroupsBasicTests(tcu::TestContext& testCtx)
                }
 
                {
-                       const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_ALL_GRAPHICS, de::SharedPtr<bool>(new bool)};
+                       const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_ALL_GRAPHICS, de::SharedPtr<bool>(new bool), DE_FALSE};
                        addFunctionCaseWithPrograms(graphicGroup.get(),
                                                                                op, "",
                                                                                supportedCheck, initPrograms, test, caseDef);
@@ -2172,10 +2261,10 @@ tcu::TestCaseGroup* createSubgroupsBasicTests(tcu::TestContext& testCtx)
                        if (OPTYPE_ELECT == opTypeIndex && stageIndex == 0)
                                continue;               // This is not tested. I don't know why.
 
-                       const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex], de::SharedPtr<bool>(new bool)};
+                       const CaseDefinition caseDefFrag = {opTypeIndex, stages[stageIndex], de::SharedPtr<bool>(new bool), DE_FALSE};
                        addFunctionCaseWithPrograms(framebufferGroup.get(),
-                                               op + "_" + getShaderStageName(caseDef.shaderStage), "",
-                                               supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
+                                               op + "_" + getShaderStageName(caseDefFrag.shaderStage), "",
+                                               supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDefFrag);
                }
        }
 
index 209904b..0cac8e9 100755 (executable)
@@ -38,16 +38,18 @@ namespace vkt
 namespace subgroups
 {
 
-static bool checkVertexPipelineStages(std::vector<const void*> datas,
+static bool checkVertexPipelineStages(const void* internalData, std::vector<const void*> datas,
                                                                          deUint32 width, deUint32)
 {
+       DE_UNREF(internalData);
        return check(datas, width, 1);
 }
 
-static bool checkComputeStage(std::vector<const void*> datas,
+static bool checkComputeStage(const void* internalData, std::vector<const void*> datas,
                                                 const deUint32 numWorkgroups[3], const deUint32 localSize[3],
                                                 deUint32)
 {
+       DE_UNREF(internalData);
        return checkCompute(datas, numWorkgroups, localSize, 1);
 }
 
@@ -58,6 +60,7 @@ struct CaseDefinition
        std::string                     varName;
        VkShaderStageFlags      shaderStage;
        de::SharedPtr<bool>     geometryPointSizeSupported;
+       deBool                          requiredSubgroupSize;
 };
 }
 
@@ -1356,6 +1359,27 @@ void supportedCheck (Context& context, CaseDefinition caseDef)
        if (!subgroups::isSubgroupSupported(context))
                TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
 
+       if (caseDef.requiredSubgroupSize)
+       {
+               if (!context.requireDeviceFunctionality("VK_EXT_subgroup_size_control"))
+                       TCU_THROW(NotSupportedError, "Device does not support VK_EXT_subgroup_size_control extension");
+               VkPhysicalDeviceSubgroupSizeControlFeaturesEXT subgroupSizeControlFeatures;
+               subgroupSizeControlFeatures.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT;
+               subgroupSizeControlFeatures.pNext = DE_NULL;
+
+               VkPhysicalDeviceFeatures2 features;
+               features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
+               features.pNext = &subgroupSizeControlFeatures;
+
+               context.getInstanceInterface().getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features);
+
+               if (subgroupSizeControlFeatures.subgroupSizeControl == DE_FALSE)
+                       TCU_THROW(NotSupportedError, "Device does not support varying subgroup sizes nor required subgroup size");
+
+               if (subgroupSizeControlFeatures.computeFullSubgroups == DE_FALSE)
+                       TCU_THROW(NotSupportedError, "Device does not support full subgroups in compute shaders");
+       }
+
        *caseDef.geometryPointSizeSupported = subgroups::isTessellationAndGeometryPointSizeSupported(context);
 }
 
@@ -1382,11 +1406,11 @@ tcu::TestStatus noSSBOtest(Context& context, const CaseDefinition caseDef)
        }
 
        if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
-               return makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages);
+               return makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, DE_NULL, checkVertexPipelineStages);
        else if ((VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) & caseDef.shaderStage )
-               return makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages);
+               return makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, DE_NULL, checkVertexPipelineStages);
 
-       return makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages);
+       return makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, DE_NULL, checkVertexPipelineStages);
 }
 
 
@@ -1405,7 +1429,36 @@ tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
                                                   "Shader stage " + getShaderStageName(caseDef.shaderStage) +
                                                   " is required to support subgroup operations!");
                }
-               return makeComputeTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkComputeStage);
+
+               if (caseDef.requiredSubgroupSize == DE_FALSE)
+                       return makeComputeTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, DE_NULL, checkComputeStage);
+
+               tcu::TestLog& log       = context.getTestContext().getLog();
+               VkPhysicalDeviceSubgroupSizeControlPropertiesEXT subgroupSizeControlProperties;
+               subgroupSizeControlProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT;
+               subgroupSizeControlProperties.pNext = DE_NULL;
+               VkPhysicalDeviceProperties2 properties;
+               properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+               properties.pNext = &subgroupSizeControlProperties;
+
+               context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
+
+               log << tcu::TestLog::Message << "Testing required subgroup size range [" <<  subgroupSizeControlProperties.minSubgroupSize << ", "
+                       << subgroupSizeControlProperties.maxSubgroupSize << "]" << tcu::TestLog::EndMessage;
+
+               // According to the spec, requiredSubgroupSize must be a power-of-two integer.
+               for (deUint32 size = subgroupSizeControlProperties.minSubgroupSize; size <= subgroupSizeControlProperties.maxSubgroupSize; size *= 2)
+               {
+                       tcu::TestStatus result = subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, DE_NULL, checkComputeStage,
+                                                                                                                               size, VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT);
+                       if (result.getCode() != QP_TEST_RESULT_PASS)
+                       {
+                               log << tcu::TestLog::Message << "subgroupSize " << size << " failed" << tcu::TestLog::EndMessage;
+                               return result;
+                       }
+               }
+
+               return tcu::TestStatus::pass("OK");
        }
        else
        {
@@ -1432,7 +1485,7 @@ tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
                if ((VkShaderStageFlagBits)0u == stages)
                        TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
 
-               return subgroups::allStages(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages, stages);
+               return subgroups::allStages(context, VK_FORMAT_R32_UINT, DE_NULL, 0, DE_NULL, checkVertexPipelineStages, stages);
        }
 }
 
@@ -1469,14 +1522,14 @@ de::MovePtr<tcu::TestCaseGroup> graphicGroup(new tcu::TestCaseGroup(
                const std::string varLower = de::toLower(var);
 
                {
-                       const CaseDefinition caseDef = {"gl_" + var, VK_SHADER_STAGE_ALL_GRAPHICS, de::SharedPtr<bool>(new bool)};
+                       const CaseDefinition caseDef = {"gl_" + var, VK_SHADER_STAGE_ALL_GRAPHICS, de::SharedPtr<bool>(new bool), DE_FALSE};
                        addFunctionCaseWithPrograms(graphicGroup.get(),
                                                                                varLower, "",
                                                                                supportedCheck, initPrograms, test, caseDef);
                }
 
                {
-                       const CaseDefinition caseDef = {"gl_" + var, VK_SHADER_STAGE_COMPUTE_BIT, de::SharedPtr<bool>(new bool)};
+                       CaseDefinition caseDef = {"gl_" + var, VK_SHADER_STAGE_COMPUTE_BIT, de::SharedPtr<bool>(new bool), DE_FALSE};
                        addFunctionCaseWithPrograms(computeGroup.get(),
                                                                                varLower, "",
                                                                                supportedCheck, initPrograms, test, caseDef);
@@ -1484,7 +1537,7 @@ de::MovePtr<tcu::TestCaseGroup> graphicGroup(new tcu::TestCaseGroup(
 
                for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
                {
-                       const CaseDefinition caseDef = {"gl_" + var, stages[stageIndex], de::SharedPtr<bool>(new bool)};
+                       const CaseDefinition caseDef = {"gl_" + var, stages[stageIndex], de::SharedPtr<bool>(new bool), DE_FALSE};
                        addFunctionCaseWithPrograms(framebufferGroup.get(),
                                                varLower + "_" +
                                                getShaderStageName(caseDef.shaderStage), "",
index db34b6e..7e8b64d 100755 (executable)
@@ -38,9 +38,10 @@ namespace vkt
 namespace subgroups
 {
 
-bool checkVertexPipelineStagesSubgroupSize(std::vector<const void*> datas,
+bool checkVertexPipelineStagesSubgroupSize(const void* internalData, std::vector<const void*> datas,
                deUint32 width, deUint32 subgroupSize)
 {
+       DE_UNREF(internalData);
        const deUint32* data =
                reinterpret_cast<const deUint32*>(datas[0]);
        for (deUint32 x = 0; x < width; ++x)
@@ -54,9 +55,10 @@ bool checkVertexPipelineStagesSubgroupSize(std::vector<const void*> datas,
        return true;
 }
 
-bool checkVertexPipelineStagesSubgroupInvocationID(std::vector<const void*> datas,
+bool checkVertexPipelineStagesSubgroupInvocationID(const void* internalData, std::vector<const void*> datas,
                deUint32 width, deUint32 subgroupSize)
 {
+       DE_UNREF(internalData);
        const deUint32* data =
                reinterpret_cast<const deUint32*>(datas[0]);
        vector<deUint32> subgroupInvocationHits(subgroupSize, 0);
@@ -84,10 +86,11 @@ bool checkVertexPipelineStagesSubgroupInvocationID(std::vector<const void*> data
        return true;
 }
 
-static bool checkComputeSubgroupSize(std::vector<const void*> datas,
+static bool checkComputeSubgroupSize(const void* internalData, std::vector<const void*> datas,
                                                                         const deUint32 numWorkgroups[3], const deUint32 localSize[3],
                                                                         deUint32 subgroupSize)
 {
+       DE_UNREF(internalData);
        const deUint32* data = reinterpret_cast<const deUint32*>(datas[0]);
 
        for (deUint32 nX = 0; nX < numWorkgroups[0]; ++nX)
@@ -134,10 +137,11 @@ static bool checkComputeSubgroupSize(std::vector<const void*> datas,
        return true;
 }
 
-static bool checkComputeSubgroupInvocationID(std::vector<const void*> datas,
+static bool checkComputeSubgroupInvocationID(const void* internalData, std::vector<const void*> datas,
                const deUint32 numWorkgroups[3], const deUint32 localSize[3],
                deUint32 subgroupSize)
 {
+       DE_UNREF(internalData);
        const deUint32* data = reinterpret_cast<const deUint32*>(datas[0]);
 
        for (deUint32 nX = 0; nX < numWorkgroups[0]; ++nX)
@@ -201,11 +205,13 @@ static bool checkComputeSubgroupInvocationID(std::vector<const void*> datas,
        return true;
 }
 
-static bool checkComputeNumSubgroups   (std::vector<const void*>       datas,
+static bool checkComputeNumSubgroups   (const void*                            internalData,
+                                                                               std::vector<const void*>        datas,
                                                                                const deUint32                          numWorkgroups[3],
                                                                                const deUint32                          localSize[3],
                                                                                deUint32)
 {
+       DE_UNREF(internalData);
        const deUint32* data = reinterpret_cast<const deUint32*>(datas[0]);
 
        for (deUint32 nX = 0; nX < numWorkgroups[0]; ++nX)
@@ -257,11 +263,13 @@ static bool checkComputeNumSubgroups      (std::vector<const void*>       datas,
        return true;
 }
 
-static bool checkComputeSubgroupID     (std::vector<const void*>       datas,
+static bool checkComputeSubgroupID     (const void*                            internalData,
+                                                                       std::vector<const void*>        datas,
                                                                        const deUint32                          numWorkgroups[3],
                                                                        const deUint32                          localSize[3],
                                                                        deUint32)
 {
+       DE_UNREF(internalData);
        const deUint32* data = reinterpret_cast<const deUint32*>(datas[0]);
 
        for (deUint32 nX = 0; nX < numWorkgroups[0]; ++nX)
@@ -318,6 +326,7 @@ struct CaseDefinition
        std::string                     varName;
        VkShaderStageFlags      shaderStage;
        de::SharedPtr<bool>     geometryPointSizeSupported;
+       deBool                          requiredSubgroupSize;
 };
 }
 
@@ -1580,6 +1589,27 @@ void supportedCheck (Context& context, CaseDefinition caseDef)
        if (!subgroups::isSubgroupSupported(context))
                TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
 
+       if (caseDef.requiredSubgroupSize)
+       {
+               if (!context.requireDeviceFunctionality("VK_EXT_subgroup_size_control"))
+                       TCU_THROW(NotSupportedError, "Device does not support VK_EXT_subgroup_size_control extension");
+               VkPhysicalDeviceSubgroupSizeControlFeaturesEXT subgroupSizeControlFeatures;
+               subgroupSizeControlFeatures.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT;
+               subgroupSizeControlFeatures.pNext = DE_NULL;
+
+               VkPhysicalDeviceFeatures2 features;
+               features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
+               features.pNext = &subgroupSizeControlFeatures;
+
+               context.getInstanceInterface().getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features);
+
+               if (subgroupSizeControlFeatures.subgroupSizeControl == DE_FALSE)
+                       TCU_THROW(NotSupportedError, "Device does not support varying subgroup sizes nor required subgroup size");
+
+               if (subgroupSizeControlFeatures.computeFullSubgroups == DE_FALSE)
+                       TCU_THROW(NotSupportedError, "Device does not support full subgroups in compute shaders");
+       }
+
        *caseDef.geometryPointSizeSupported = subgroups::isTessellationAndGeometryPointSizeSupported(context);
 }
 
@@ -1605,12 +1635,12 @@ tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
                if ("gl_SubgroupSize" == caseDef.varName)
                {
                        return makeVertexFrameBufferTest(
-                                          context, VK_FORMAT_R32G32B32A32_UINT, DE_NULL, 0, checkVertexPipelineStagesSubgroupSize);
+                                          context, VK_FORMAT_R32G32B32A32_UINT, DE_NULL, 0, DE_NULL, checkVertexPipelineStagesSubgroupSize);
                }
                else if ("gl_SubgroupInvocationID" == caseDef.varName)
                {
                        return makeVertexFrameBufferTest(
-                                          context, VK_FORMAT_R32G32B32A32_UINT, DE_NULL, 0, checkVertexPipelineStagesSubgroupInvocationID);
+                                          context, VK_FORMAT_R32G32B32A32_UINT, DE_NULL, 0, DE_NULL, checkVertexPipelineStagesSubgroupInvocationID);
                }
                else
                {
@@ -1624,12 +1654,12 @@ tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
                if ("gl_SubgroupSize" == caseDef.varName)
                {
                        return makeTessellationEvaluationFrameBufferTest(
-                                       context, VK_FORMAT_R32G32B32A32_UINT, DE_NULL, 0, checkVertexPipelineStagesSubgroupSize);
+                                          context, VK_FORMAT_R32G32B32A32_UINT, DE_NULL, 0, DE_NULL, checkVertexPipelineStagesSubgroupSize);
                }
                else if ("gl_SubgroupInvocationID" == caseDef.varName)
                {
                        return makeTessellationEvaluationFrameBufferTest(
-                                       context, VK_FORMAT_R32G32B32A32_UINT, DE_NULL, 0, checkVertexPipelineStagesSubgroupInvocationID);
+                                          context, VK_FORMAT_R32G32B32A32_UINT, DE_NULL, 0, DE_NULL, checkVertexPipelineStagesSubgroupInvocationID);
                }
                else
                {
@@ -1643,12 +1673,12 @@ tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
                if ("gl_SubgroupSize" == caseDef.varName)
                {
                        return makeGeometryFrameBufferTest(
-                                       context, VK_FORMAT_R32G32B32A32_UINT, DE_NULL, 0, checkVertexPipelineStagesSubgroupSize);
+                                   context, VK_FORMAT_R32G32B32A32_UINT, DE_NULL, 0, DE_NULL, checkVertexPipelineStagesSubgroupSize);
                }
                else if ("gl_SubgroupInvocationID" == caseDef.varName)
                {
                        return makeGeometryFrameBufferTest(
-                                       context, VK_FORMAT_R32G32B32A32_UINT, DE_NULL, 0, checkVertexPipelineStagesSubgroupInvocationID);
+                                       context, VK_FORMAT_R32G32B32A32_UINT, DE_NULL, 0, DE_NULL, checkVertexPipelineStagesSubgroupInvocationID);
                }
                else
                {
@@ -1677,19 +1707,131 @@ tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
 
                if ("gl_SubgroupSize" == caseDef.varName)
                {
-                       return makeComputeTest(context, VK_FORMAT_R32G32B32A32_UINT, DE_NULL, 0, checkComputeSubgroupSize);
+                       if (caseDef.requiredSubgroupSize == DE_FALSE)
+                               return makeComputeTest(context, VK_FORMAT_R32G32B32A32_UINT, DE_NULL, 0, DE_NULL, checkComputeSubgroupSize);
+
+                       tcu::TestLog& log       = context.getTestContext().getLog();
+                       VkPhysicalDeviceSubgroupSizeControlPropertiesEXT subgroupSizeControlProperties;
+                       subgroupSizeControlProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT;
+                       subgroupSizeControlProperties.pNext = DE_NULL;
+                       VkPhysicalDeviceProperties2 properties;
+                       properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+                       properties.pNext = &subgroupSizeControlProperties;
+
+                       context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
+
+                       log << tcu::TestLog::Message << "Testing required subgroup size range [" <<  subgroupSizeControlProperties.minSubgroupSize << ", "
+                               << subgroupSizeControlProperties.maxSubgroupSize << "]" << tcu::TestLog::EndMessage;
+
+                       // According to the spec, requiredSubgroupSize must be a power-of-two integer.
+                       for (deUint32 size = subgroupSizeControlProperties.minSubgroupSize; size <= subgroupSizeControlProperties.maxSubgroupSize; size *= 2)
+                       {
+                               tcu::TestStatus result = subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, DE_NULL, checkComputeSubgroupSize,
+                                                                                                                                               size, VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT);
+                               if (result.getCode() != QP_TEST_RESULT_PASS)
+                               {
+                                       log << tcu::TestLog::Message << "subgroupSize " << size << " failed" << tcu::TestLog::EndMessage;
+                                       return result;
+                               }
+                       }
+
+                       return tcu::TestStatus::pass("OK");
                }
                else if ("gl_SubgroupInvocationID" == caseDef.varName)
                {
-                       return makeComputeTest(context, VK_FORMAT_R32G32B32A32_UINT, DE_NULL, 0, checkComputeSubgroupInvocationID);
+                       if (caseDef.requiredSubgroupSize == DE_FALSE)
+                               return makeComputeTest(context, VK_FORMAT_R32G32B32A32_UINT, DE_NULL, 0, DE_NULL, checkComputeSubgroupInvocationID);
+
+                       tcu::TestLog& log       = context.getTestContext().getLog();
+                       VkPhysicalDeviceSubgroupSizeControlPropertiesEXT subgroupSizeControlProperties;
+                       subgroupSizeControlProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT;
+                       subgroupSizeControlProperties.pNext = DE_NULL;
+                       VkPhysicalDeviceProperties2 properties;
+                       properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+                       properties.pNext = &subgroupSizeControlProperties;
+
+                       context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
+
+                       log << tcu::TestLog::Message << "Testing required subgroup size range [" <<  subgroupSizeControlProperties.minSubgroupSize << ", "
+                               << subgroupSizeControlProperties.maxSubgroupSize << "]" << tcu::TestLog::EndMessage;
+
+                       // According to the spec, requiredSubgroupSize must be a power-of-two integer.
+                       for (deUint32 size = subgroupSizeControlProperties.minSubgroupSize; size <= subgroupSizeControlProperties.maxSubgroupSize; size *= 2)
+                       {
+                               tcu::TestStatus result = subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, DE_NULL, checkComputeSubgroupInvocationID,
+                                                                                                                                               size, VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT);
+                               if (result.getCode() != QP_TEST_RESULT_PASS)
+                               {
+                                       log << tcu::TestLog::Message << "subgroupSize " << size << " failed" << tcu::TestLog::EndMessage;
+                                       return result;
+                               }
+                       }
+
+                       return tcu::TestStatus::pass("OK");
                }
                else if ("gl_NumSubgroups" == caseDef.varName)
                {
-                       return makeComputeTest(context, VK_FORMAT_R32G32B32A32_UINT, DE_NULL, 0, checkComputeNumSubgroups);
+                       if (caseDef.requiredSubgroupSize == DE_FALSE)
+                               return makeComputeTest(context, VK_FORMAT_R32G32B32A32_UINT, DE_NULL, 0, DE_NULL, checkComputeNumSubgroups);
+
+                       tcu::TestLog& log       = context.getTestContext().getLog();
+                       VkPhysicalDeviceSubgroupSizeControlPropertiesEXT subgroupSizeControlProperties;
+                       subgroupSizeControlProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT;
+                       subgroupSizeControlProperties.pNext = DE_NULL;
+                       VkPhysicalDeviceProperties2 properties;
+                       properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+                       properties.pNext = &subgroupSizeControlProperties;
+
+                       context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
+
+                       log << tcu::TestLog::Message << "Testing required subgroup size range [" <<  subgroupSizeControlProperties.minSubgroupSize << ", "
+                               << subgroupSizeControlProperties.maxSubgroupSize << "]" << tcu::TestLog::EndMessage;
+
+                       // According to the spec, requiredSubgroupSize must be a power-of-two integer.
+                       for (deUint32 size = subgroupSizeControlProperties.minSubgroupSize; size <= subgroupSizeControlProperties.maxSubgroupSize; size *= 2)
+                       {
+                               tcu::TestStatus result = subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, DE_NULL, checkComputeNumSubgroups,
+                                                                                                                                               size, VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT);
+                               if (result.getCode() != QP_TEST_RESULT_PASS)
+                               {
+                                       log << tcu::TestLog::Message << "subgroupSize " << size << " failed" << tcu::TestLog::EndMessage;
+                                       return result;
+                               }
+                       }
+
+                       return tcu::TestStatus::pass("OK");
                }
                else if ("gl_SubgroupID" == caseDef.varName)
                {
-                       return makeComputeTest(context, VK_FORMAT_R32G32B32A32_UINT, DE_NULL, 0, checkComputeSubgroupID);
+                       if (caseDef.requiredSubgroupSize == DE_FALSE)
+                               return makeComputeTest(context, VK_FORMAT_R32G32B32A32_UINT, DE_NULL, 0, DE_NULL, checkComputeSubgroupID);
+
+                       tcu::TestLog& log       = context.getTestContext().getLog();
+                       VkPhysicalDeviceSubgroupSizeControlPropertiesEXT subgroupSizeControlProperties;
+                       subgroupSizeControlProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT;
+                       subgroupSizeControlProperties.pNext = DE_NULL;
+                       VkPhysicalDeviceProperties2 properties;
+                       properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+                       properties.pNext = &subgroupSizeControlProperties;
+
+                       context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
+
+                       log << tcu::TestLog::Message << "Testing required subgroup size range [" <<  subgroupSizeControlProperties.minSubgroupSize << ", "
+                               << subgroupSizeControlProperties.maxSubgroupSize << "]" << tcu::TestLog::EndMessage;
+
+                       // According to the spec, requiredSubgroupSize must be a power-of-two integer.
+                       for (deUint32 size = subgroupSizeControlProperties.minSubgroupSize; size <= subgroupSizeControlProperties.maxSubgroupSize; size *= 2)
+                       {
+                               tcu::TestStatus result = subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, DE_NULL, checkComputeSubgroupID,
+                                                                                                                                               size, VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT);
+                               if (result.getCode() != QP_TEST_RESULT_PASS)
+                               {
+                                       log << tcu::TestLog::Message << "subgroupSize " << size << " failed" << tcu::TestLog::EndMessage;
+                                       return result;
+                               }
+                       }
+
+                       return tcu::TestStatus::pass("OK");
                }
                else
                {
@@ -1725,11 +1867,11 @@ tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
 
                if ("gl_SubgroupSize" == caseDef.varName)
                {
-                       return subgroups::allStages(context, VK_FORMAT_R32G32B32A32_UINT, DE_NULL, 0, checkVertexPipelineStagesSubgroupSize, stages);
+                       return subgroups::allStages(context, VK_FORMAT_R32G32B32A32_UINT, DE_NULL, 0, DE_NULL, checkVertexPipelineStagesSubgroupSize, stages);
                }
                else if ("gl_SubgroupInvocationID" == caseDef.varName)
                {
-                       return subgroups::allStages(context, VK_FORMAT_R32G32B32A32_UINT, DE_NULL, 0, checkVertexPipelineStagesSubgroupInvocationID, stages);
+                       return subgroups::allStages(context, VK_FORMAT_R32G32B32A32_UINT, DE_NULL, 0, DE_NULL, checkVertexPipelineStagesSubgroupInvocationID, stages);
                }
                else
                {
@@ -1775,7 +1917,7 @@ tcu::TestCaseGroup* createSubgroupsBuiltinVarTests(tcu::TestContext& testCtx)
                const std::string varLower = de::toLower(var);
 
                {
-                       const CaseDefinition caseDef = { "gl_" + var, VK_SHADER_STAGE_ALL_GRAPHICS, de::SharedPtr<bool>(new bool)};
+                       const CaseDefinition caseDef = { "gl_" + var, VK_SHADER_STAGE_ALL_GRAPHICS, de::SharedPtr<bool>(new bool), DE_FALSE};
 
                        addFunctionCaseWithPrograms(graphicGroup.get(),
                                                                                varLower, "",
@@ -1783,7 +1925,7 @@ tcu::TestCaseGroup* createSubgroupsBuiltinVarTests(tcu::TestContext& testCtx)
                }
 
                {
-                       const CaseDefinition caseDef = {"gl_" + var, VK_SHADER_STAGE_COMPUTE_BIT, de::SharedPtr<bool>(new bool)};
+                       CaseDefinition caseDef = {"gl_" + var, VK_SHADER_STAGE_COMPUTE_BIT, de::SharedPtr<bool>(new bool), DE_FALSE};
                        addFunctionCaseWithPrograms(computeGroup.get(),
                                                varLower + "_" + getShaderStageName(caseDef.shaderStage), "",
                                                supportedCheck, initPrograms, test, caseDef);
@@ -1791,7 +1933,7 @@ tcu::TestCaseGroup* createSubgroupsBuiltinVarTests(tcu::TestContext& testCtx)
 
                for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
                {
-                       const CaseDefinition caseDef = {"gl_" + var, stages[stageIndex], de::SharedPtr<bool>(new bool)};
+                       const CaseDefinition caseDef = {"gl_" + var, stages[stageIndex], de::SharedPtr<bool>(new bool), DE_FALSE};
                        addFunctionCaseWithPrograms(framebufferGroup.get(),
                                                varLower + "_" + getShaderStageName(caseDef.shaderStage), "",
                                                supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
@@ -1802,7 +1944,7 @@ tcu::TestCaseGroup* createSubgroupsBuiltinVarTests(tcu::TestContext& testCtx)
        {
                const std::string var = compute_only_vars[a];
 
-               const CaseDefinition caseDef = {"gl_" + var, VK_SHADER_STAGE_COMPUTE_BIT, de::SharedPtr<bool>(new bool)};
+               CaseDefinition caseDef = {"gl_" + var, VK_SHADER_STAGE_COMPUTE_BIT, de::SharedPtr<bool>(new bool), DE_FALSE};
 
                addFunctionCaseWithPrograms(computeGroup.get(), de::toLower(var), "",
                                                                        supportedCheck, initPrograms, test, caseDef);
index 005fe97..4f48407 100755 (executable)
@@ -48,16 +48,18 @@ enum OpType
        OPTYPE_CLUSTERED_LAST
 };
 
-static bool checkVertexPipelineStages(std::vector<const void*> datas,
+static bool checkVertexPipelineStages(const void* internalData, std::vector<const void*> datas,
                                                                          deUint32 width, deUint32)
 {
+       DE_UNREF(internalData);
        return vkt::subgroups::check(datas, width, 1);
 }
 
-static bool checkCompute(std::vector<const void*> datas,
+static bool checkCompute(const void* internalData, std::vector<const void*> datas,
                                                 const deUint32 numWorkgroups[3], const deUint32 localSize[3],
                                                 deUint32)
 {
+       DE_UNREF(internalData);
        return vkt::subgroups::checkCompute(datas, numWorkgroups, localSize, 1);
 }
 
@@ -373,6 +375,7 @@ struct CaseDefinition
        VkShaderStageFlags      shaderStage;
        VkFormat                        format;
        de::SharedPtr<bool>     geometryPointSizeSupported;
+       deBool                          requiredSubgroupSize;
 };
 
 std::string getExtHeader(CaseDefinition caseDef)
@@ -450,6 +453,27 @@ void supportedCheck (Context& context, CaseDefinition caseDef)
        if (!subgroups::isFormatSupportedForDevice(context, caseDef.format))
                TCU_THROW(NotSupportedError, "Device does not support the specified format in subgroup operations");
 
+       if (caseDef.requiredSubgroupSize)
+       {
+               if (!context.requireDeviceFunctionality("VK_EXT_subgroup_size_control"))
+                       TCU_THROW(NotSupportedError, "Device does not support VK_EXT_subgroup_size_control extension");
+               VkPhysicalDeviceSubgroupSizeControlFeaturesEXT subgroupSizeControlFeatures;
+               subgroupSizeControlFeatures.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT;
+               subgroupSizeControlFeatures.pNext = DE_NULL;
+
+               VkPhysicalDeviceFeatures2 features;
+               features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
+               features.pNext = &subgroupSizeControlFeatures;
+
+               context.getInstanceInterface().getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features);
+
+               if (subgroupSizeControlFeatures.subgroupSizeControl == DE_FALSE)
+                       TCU_THROW(NotSupportedError, "Device does not support varying subgroup sizes nor required subgroup size");
+
+               if (subgroupSizeControlFeatures.computeFullSubgroups == DE_FALSE)
+                       TCU_THROW(NotSupportedError, "Device does not support full subgroups in compute shaders");
+       }
+
        *caseDef.geometryPointSizeSupported = subgroups::isTessellationAndGeometryPointSizeSupported(context);
 }
 
@@ -477,13 +501,13 @@ tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
        inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
 
        if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
-               return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages);
+               return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, DE_NULL, checkVertexPipelineStages);
        else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
-               return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages);
+               return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, DE_NULL, checkVertexPipelineStages);
        else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
-               return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
+               return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, DE_NULL, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
        else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
-               return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
+               return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, DE_NULL, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
        else
                TCU_THROW(InternalError, "Unhandled shader stage");
 }
@@ -506,7 +530,35 @@ tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
                inputData.numElements = subgroups::maxSupportedSubgroupSize();
                inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
 
-               return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkCompute);
+               if (caseDef.requiredSubgroupSize == DE_FALSE)
+                       return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, &inputData, 1, DE_NULL, checkCompute);
+
+               tcu::TestLog& log = context.getTestContext().getLog();
+               VkPhysicalDeviceSubgroupSizeControlPropertiesEXT subgroupSizeControlProperties;
+               subgroupSizeControlProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT;
+               subgroupSizeControlProperties.pNext = DE_NULL;
+               VkPhysicalDeviceProperties2 properties;
+               properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+               properties.pNext = &subgroupSizeControlProperties;
+
+               context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
+
+               log << tcu::TestLog::Message << "Testing required subgroup size range [" <<  subgroupSizeControlProperties.minSubgroupSize << ", "
+                       << subgroupSizeControlProperties.maxSubgroupSize << "]" << tcu::TestLog::EndMessage;
+
+               // According to the spec, requiredSubgroupSize must be a power-of-two integer.
+               for (deUint32 size = subgroupSizeControlProperties.minSubgroupSize; size <= subgroupSizeControlProperties.maxSubgroupSize; size *= 2)
+               {
+                       tcu::TestStatus result = subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, &inputData, 1, DE_NULL, checkCompute,
+                                                                                                                               size, VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT);
+                       if (result.getCode() != QP_TEST_RESULT_PASS)
+                       {
+                               log << tcu::TestLog::Message << "subgroupSize " << size << " failed" << tcu::TestLog::EndMessage;
+                               return result;
+                       }
+               }
+
+               return tcu::TestStatus::pass("OK");
        }
        else
        {
@@ -541,7 +593,7 @@ tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
                inputData.binding                       = 4u;
                inputData.stages                        = stages;
 
-               return subgroups::allStages(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, stages);
+               return subgroups::allStages(context, VK_FORMAT_R32_UINT, &inputData, 1, DE_NULL, checkVertexPipelineStages, stages);
        }
 }
 }
@@ -633,19 +685,19 @@ tcu::TestCaseGroup* createSubgroupsClusteredTests(tcu::TestContext& testCtx)
                                +"_" + subgroups::getFormatNameForGLSL(format);
 
                        {
-                               const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_COMPUTE_BIT, format, de::SharedPtr<bool>(new bool)};
+                               CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_COMPUTE_BIT, format, de::SharedPtr<bool>(new bool), DE_FALSE};
                                addFunctionCaseWithPrograms(computeGroup.get(), name, "", supportedCheck, initPrograms, test, caseDef);
                        }
 
                        {
-                               const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_ALL_GRAPHICS, format, de::SharedPtr<bool>(new bool)};
+                               const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_ALL_GRAPHICS, format, de::SharedPtr<bool>(new bool), DE_FALSE};
                                addFunctionCaseWithPrograms(graphicGroup.get(), name,
                                                                                "", supportedCheck, initPrograms, test, caseDef);
                        }
 
                        for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
                        {
-                               const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex], format, de::SharedPtr<bool>(new bool)};
+                               const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex], format, de::SharedPtr<bool>(new bool), DE_FALSE};
                                addFunctionCaseWithPrograms(framebufferGroup.get(), name +"_" + getShaderStageName(caseDef.shaderStage), "",
                                                        supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
                        }
index 26ae1a3..9c7f05e 100755 (executable)
@@ -63,9 +63,10 @@ enum OpType
        OPTYPE_LAST
 };
 
-static bool checkVertexPipelineStages(std::vector<const void*> datas,
+static bool checkVertexPipelineStages(const void* internalData, std::vector<const void*> datas,
                                                                          deUint32 width, deUint32)
 {
+       DE_UNREF(internalData);
        const deUint32* data =
                reinterpret_cast<const deUint32*>(datas[0]);
        for (deUint32 x = 0; x < width; ++x)
@@ -81,10 +82,11 @@ static bool checkVertexPipelineStages(std::vector<const void*> datas,
        return true;
 }
 
-static bool checkCompute(std::vector<const void*> datas,
+static bool checkCompute(const void* internalData, std::vector<const void*> datas,
                                                 const deUint32 numWorkgroups[3], const deUint32 localSize[3],
                                                 deUint32)
 {
+       DE_UNREF(internalData);
        const deUint32* data =
                reinterpret_cast<const deUint32*>(datas[0]);
 
@@ -460,6 +462,7 @@ struct CaseDefinition
        VkShaderStageFlags      shaderStage;
        VkFormat                        format;
        de::SharedPtr<bool>     geometryPointSizeSupported;
+       deBool                          requiredSubgroupSize;
 };
 
 string getTestString(const CaseDefinition &caseDef)
@@ -887,6 +890,27 @@ void supportedCheck (Context& context, CaseDefinition caseDef)
        if (!subgroups::isFormatSupportedForDevice(context, caseDef.format))
                TCU_THROW(NotSupportedError, "Device does not support the specified format in subgroup operations");
 
+       if (caseDef.requiredSubgroupSize)
+       {
+               if (!context.requireDeviceFunctionality("VK_EXT_subgroup_size_control"))
+                       TCU_THROW(NotSupportedError, "Device does not support VK_EXT_subgroup_size_control extension");
+               VkPhysicalDeviceSubgroupSizeControlFeaturesEXT subgroupSizeControlFeatures;
+               subgroupSizeControlFeatures.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT;
+               subgroupSizeControlFeatures.pNext = DE_NULL;
+
+               VkPhysicalDeviceFeatures2 features;
+               features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
+               features.pNext = &subgroupSizeControlFeatures;
+
+               context.getInstanceInterface().getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features);
+
+               if (subgroupSizeControlFeatures.subgroupSizeControl == DE_FALSE)
+                       TCU_THROW(NotSupportedError, "Device does not support varying subgroup sizes nor required subgroup size");
+
+               if (subgroupSizeControlFeatures.computeFullSubgroups == DE_FALSE)
+                       TCU_THROW(NotSupportedError, "Device does not support full subgroups in compute shaders");
+       }
+
        *caseDef.geometryPointSizeSupported = subgroups::isTessellationAndGeometryPointSizeSupported(context);
 }
 
@@ -916,13 +940,13 @@ tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
        inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
 
        if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
-               return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages);
+               return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, DE_NULL, checkVertexPipelineStages);
        else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
-               return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages);
+               return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, DE_NULL, checkVertexPipelineStages);
        else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
-               return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
+               return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, DE_NULL, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
        else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
-               return subgroups::makeTessellationEvaluationFrameBufferTest(context,  VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
+               return subgroups::makeTessellationEvaluationFrameBufferTest(context,  VK_FORMAT_R32_UINT, &inputData, 1, DE_NULL, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
        else
                TCU_THROW(InternalError, "Unhandled shader stage");
 }
@@ -962,7 +986,35 @@ tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
                inputData.numElements = subgroups::maxSupportedSubgroupSize();
                inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
 
-               return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkCompute);
+               if (caseDef.requiredSubgroupSize == DE_FALSE)
+                       return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, &inputData, 1, DE_NULL, checkCompute);
+
+               tcu::TestLog& log = context.getTestContext().getLog();
+               VkPhysicalDeviceSubgroupSizeControlPropertiesEXT subgroupSizeControlProperties;
+               subgroupSizeControlProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT;
+               subgroupSizeControlProperties.pNext = DE_NULL;
+               VkPhysicalDeviceProperties2 properties;
+               properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+               properties.pNext = &subgroupSizeControlProperties;
+
+               context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
+
+               log << tcu::TestLog::Message << "Testing required subgroup size range [" <<  subgroupSizeControlProperties.minSubgroupSize << ", "
+                       << subgroupSizeControlProperties.maxSubgroupSize << "]" << tcu::TestLog::EndMessage;
+
+               // According to the spec, requiredSubgroupSize must be a power-of-two integer.
+               for (deUint32 size = subgroupSizeControlProperties.minSubgroupSize; size <= subgroupSizeControlProperties.maxSubgroupSize; size *= 2)
+               {
+                       tcu::TestStatus result = subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, &inputData, 1, DE_NULL, checkCompute,
+                                                                                                                               size, VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT);
+                       if (result.getCode() != QP_TEST_RESULT_PASS)
+                       {
+                               log << tcu::TestLog::Message << "subgroupSize " << size << " failed" << tcu::TestLog::EndMessage;
+                               return result;
+                       }
+               }
+
+               return tcu::TestStatus::pass("OK");
        }
        else
        {
@@ -998,7 +1050,7 @@ tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
                inputData.stages                        = stages;
 
                return subgroups::allStages(context, VK_FORMAT_R32_UINT, &inputData,
-                                                                                1, checkVertexPipelineStages, stages);
+                                                                       1, DE_NULL, checkVertexPipelineStages, stages);
        }
 }
 }
@@ -1094,7 +1146,7 @@ tcu::TestCaseGroup* createSubgroupsPartitionedTests(tcu::TestContext& testCtx)
                        std::string op = getOpTypeName(opTypeIndex);
 
                        {
-                               const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_COMPUTE_BIT, format, de::SharedPtr<bool>(new bool)};
+                               CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_COMPUTE_BIT, format, de::SharedPtr<bool>(new bool), DE_FALSE};
                                addFunctionCaseWithPrograms(computeGroup.get(),
                                                                                        de::toLower(op) + "_" +
                                                                                        subgroups::getFormatNameForGLSL(format),
@@ -1102,7 +1154,7 @@ tcu::TestCaseGroup* createSubgroupsPartitionedTests(tcu::TestContext& testCtx)
                        }
 
                        {
-                               const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_ALL_GRAPHICS, format, de::SharedPtr<bool>(new bool)};
+                               const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_ALL_GRAPHICS, format, de::SharedPtr<bool>(new bool), DE_FALSE};
                                addFunctionCaseWithPrograms(graphicGroup.get(),
                                                                                        de::toLower(op) + "_" +
                                                                                        subgroups::getFormatNameForGLSL(format),
@@ -1111,7 +1163,7 @@ tcu::TestCaseGroup* createSubgroupsPartitionedTests(tcu::TestContext& testCtx)
 
                        for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
                        {
-                               const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex], format, de::SharedPtr<bool>(new bool)};
+                               const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex], format, de::SharedPtr<bool>(new bool), DE_FALSE};
                                addFunctionCaseWithPrograms(framebufferGroup.get(), de::toLower(op) + "_" + subgroups::getFormatNameForGLSL(format) +
                                                                                        "_" + getShaderStageName(caseDef.shaderStage), "",
                                                                                        supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
@@ -1131,4 +1183,3 @@ tcu::TestCaseGroup* createSubgroupsPartitionedTests(tcu::TestContext& testCtx)
 
 } // subgroups
 } // vkt
-
index e9e1db0..7c6fcaa 100755 (executable)
@@ -46,16 +46,18 @@ enum OpType
        OPTYPE_LAST
 };
 
-static bool checkVertexPipelineStages(std::vector<const void*> datas,
+static bool checkVertexPipelineStages(const void* internalData, std::vector<const void*> datas,
                                                                          deUint32 width, deUint32)
 {
+       DE_UNREF(internalData);
        return vkt::subgroups::check(datas, width, 1);
 }
 
-static bool checkCompute(std::vector<const void*> datas,
+static bool checkCompute(const void* internalData, std::vector<const void*> datas,
                                                 const deUint32 numWorkgroups[3], const deUint32 localSize[3],
                                                 deUint32)
 {
+       DE_UNREF(internalData);
        return vkt::subgroups::checkCompute(datas, numWorkgroups, localSize, 1);
 }
 
@@ -104,6 +106,7 @@ struct CaseDefinition
        VkShaderStageFlags      shaderStage;
        VkFormat                        format;
        de::SharedPtr<bool>     geometryPointSizeSupported;
+       deBool                          requiredSubgroupSize;
 };
 
 std::string getExtHeader(VkFormat format)
@@ -215,6 +218,27 @@ void supportedCheck (Context& context, CaseDefinition caseDef)
        if ((caseDef.opType == OPTYPE_QUAD_BROADCAST_NONCONST) && !subgroups::isSubgroupBroadcastDynamicIdSupported(context))
                TCU_THROW(NotSupportedError, "Device does not support SubgroupBroadcastDynamicId");
 
+       if (caseDef.requiredSubgroupSize)
+       {
+               if (!context.requireDeviceFunctionality("VK_EXT_subgroup_size_control"))
+                       TCU_THROW(NotSupportedError, "Device does not support VK_EXT_subgroup_size_control extension");
+               VkPhysicalDeviceSubgroupSizeControlFeaturesEXT subgroupSizeControlFeatures;
+               subgroupSizeControlFeatures.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT;
+               subgroupSizeControlFeatures.pNext = DE_NULL;
+
+               VkPhysicalDeviceFeatures2 features;
+               features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
+               features.pNext = &subgroupSizeControlFeatures;
+
+               context.getInstanceInterface().getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features);
+
+               if (subgroupSizeControlFeatures.subgroupSizeControl == DE_FALSE)
+                       TCU_THROW(NotSupportedError, "Device does not support varying subgroup sizes nor required subgroup size");
+
+               if (subgroupSizeControlFeatures.computeFullSubgroups == DE_FALSE)
+                       TCU_THROW(NotSupportedError, "Device does not support full subgroups in compute shaders");
+       }
+
        *caseDef.geometryPointSizeSupported = subgroups::isTessellationAndGeometryPointSizeSupported(context);
 }
 
@@ -242,13 +266,13 @@ tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
        inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
 
        if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
-               return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages);
+               return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, DE_NULL, checkVertexPipelineStages);
        else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
-               return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages);
+               return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, DE_NULL, checkVertexPipelineStages);
        else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
-               return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
+               return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, DE_NULL, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
        else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
-               return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
+               return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, DE_NULL, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
        else
                TCU_THROW(InternalError, "Unhandled shader stage");
 }
@@ -271,7 +295,35 @@ tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
                inputData.numElements = subgroups::maxSupportedSubgroupSize();
                inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
 
-               return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkCompute);
+               if (caseDef.requiredSubgroupSize == DE_FALSE)
+                       return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, &inputData, 1, DE_NULL, checkCompute);
+
+               tcu::TestLog& log = context.getTestContext().getLog();
+               VkPhysicalDeviceSubgroupSizeControlPropertiesEXT subgroupSizeControlProperties;
+               subgroupSizeControlProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT;
+               subgroupSizeControlProperties.pNext = DE_NULL;
+               VkPhysicalDeviceProperties2 properties;
+               properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+               properties.pNext = &subgroupSizeControlProperties;
+
+               context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
+
+               log << tcu::TestLog::Message << "Testing required subgroup size range [" <<  subgroupSizeControlProperties.minSubgroupSize << ", "
+                       << subgroupSizeControlProperties.maxSubgroupSize << "]" << tcu::TestLog::EndMessage;
+
+               // According to the spec, requiredSubgroupSize must be a power-of-two integer.
+               for (deUint32 size = subgroupSizeControlProperties.minSubgroupSize; size <= subgroupSizeControlProperties.maxSubgroupSize; size *= 2)
+               {
+                       tcu::TestStatus result = subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, &inputData, 1, DE_NULL, checkCompute,
+                                                                                                                               size, VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT);
+                       if (result.getCode() != QP_TEST_RESULT_PASS)
+                       {
+                               log << tcu::TestLog::Message << "subgroupSize " << size << " failed" << tcu::TestLog::EndMessage;
+                               return result;
+                       }
+               }
+
+               return tcu::TestStatus::pass("OK");
        }
        else
        {
@@ -306,7 +358,7 @@ tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
                inputData.binding                       = 4u;
                inputData.stages                        = stages;
 
-               return subgroups::allStages(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, stages);
+               return subgroups::allStages(context, VK_FORMAT_R32_UINT, &inputData, 1, DE_NULL, checkVertexPipelineStages, stages);
        }
 }
 }
@@ -346,7 +398,7 @@ tcu::TestCaseGroup* createSubgroupsQuadTests(tcu::TestContext& testCtx)
                        name << "_" << subgroups::getFormatNameForGLSL(format);
 
                        {
-                               const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_COMPUTE_BIT, format, de::SharedPtr<bool>(new bool)};
+                               CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_COMPUTE_BIT, format, de::SharedPtr<bool>(new bool), DE_FALSE};
                                addFunctionCaseWithPrograms(computeGroup.get(), name.str(), "", supportedCheck, initPrograms, test, caseDef);
                        }
 
@@ -356,13 +408,14 @@ tcu::TestCaseGroup* createSubgroupsQuadTests(tcu::TestContext& testCtx)
                                        opTypeIndex,
                                        VK_SHADER_STAGE_ALL_GRAPHICS,
                                        format,
-                                       de::SharedPtr<bool>(new bool)
+                                       de::SharedPtr<bool>(new bool),
+                                       DE_FALSE
                                };
                                addFunctionCaseWithPrograms(graphicGroup.get(), name.str(), "", supportedCheck, initPrograms, test, caseDef);
                        }
                        for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
                        {
-                               const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex], format, de::SharedPtr<bool>(new bool)};
+                               const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex], format, de::SharedPtr<bool>(new bool), DE_FALSE};
                                addFunctionCaseWithPrograms(framebufferGroup.get(), name.str()+"_"+ getShaderStageName(caseDef.shaderStage), "",
                                                                                        supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
                        }
index 66e4569..2ac7e8b 100755 (executable)
@@ -36,16 +36,18 @@ using namespace vkt;
 
 namespace
 {
-static bool checkVertexPipelineStages(std::vector<const void*> datas,
+static bool checkVertexPipelineStages(const void* internalData, std::vector<const void*> datas,
                                                                          deUint32 width, deUint32)
 {
+       DE_UNREF(internalData);
        return vkt::subgroups::check(datas, width, 1);
 }
 
-static bool checkCompute(std::vector<const void*> datas,
+static bool checkCompute(const void* internalData, std::vector<const void*> datas,
                                                 const deUint32 numWorkgroups[3], const deUint32 localSize[3],
                                                 deUint32)
 {
+       DE_UNREF(internalData);
        return vkt::subgroups::checkCompute(datas, numWorkgroups, localSize, 1);
 }
 
@@ -75,6 +77,7 @@ struct CaseDefinition
        int                                     opType;
        VkShaderStageFlags      shaderStage;
        de::SharedPtr<bool>     geometryPointSizeSupported;
+       deBool                          requiredSubgroupSize;
 };
 
 void initFrameBufferPrograms (SourceCollections& programCollection, CaseDefinition caseDef)
@@ -467,6 +470,27 @@ void supportedCheck (Context& context, CaseDefinition caseDef)
                }
        }
 
+       if (caseDef.requiredSubgroupSize)
+       {
+               if (!context.requireDeviceFunctionality("VK_EXT_subgroup_size_control"))
+                       TCU_THROW(NotSupportedError, "Device does not support VK_EXT_subgroup_size_control extension");
+               VkPhysicalDeviceSubgroupSizeControlFeaturesEXT subgroupSizeControlFeatures;
+               subgroupSizeControlFeatures.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT;
+               subgroupSizeControlFeatures.pNext = DE_NULL;
+
+               VkPhysicalDeviceFeatures2 features;
+               features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
+               features.pNext = &subgroupSizeControlFeatures;
+
+               context.getInstanceInterface().getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features);
+
+               if (subgroupSizeControlFeatures.subgroupSizeControl == DE_FALSE)
+                       TCU_THROW(NotSupportedError, "Device does not support varying subgroup sizes nor required subgroup size");
+
+               if (subgroupSizeControlFeatures.computeFullSubgroups == DE_FALSE)
+                       TCU_THROW(NotSupportedError, "Device does not support full subgroups in compute shaders");
+       }
+
        *caseDef.geometryPointSizeSupported = subgroups::isTessellationAndGeometryPointSizeSupported(context);
 }
 
@@ -490,13 +514,13 @@ tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
        }
 
        if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
-               return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages);
+               return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, DE_NULL, checkVertexPipelineStages);
        else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
-               return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages);
+               return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, DE_NULL, checkVertexPipelineStages);
        else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
-               return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
+               return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, DE_NULL, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
        else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
-               return subgroups::makeTessellationEvaluationFrameBufferTest(context,  VK_FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
+               return subgroups::makeTessellationEvaluationFrameBufferTest(context,  VK_FORMAT_R32_UINT, DE_NULL, 0, DE_NULL, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
        else
                TCU_THROW(InternalError, "Unhandled shader stage");
 }
@@ -521,7 +545,36 @@ tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
                                           subgroups::getShaderStageName(caseDef.shaderStage) +
                                           " is required to support subgroup operations!");
                }
-               return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkCompute);
+
+               if (caseDef.requiredSubgroupSize == DE_FALSE)
+                       return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, DE_NULL, checkCompute);
+
+               tcu::TestLog& log = context.getTestContext().getLog();
+               VkPhysicalDeviceSubgroupSizeControlPropertiesEXT subgroupSizeControlProperties;
+               subgroupSizeControlProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT;
+               subgroupSizeControlProperties.pNext = DE_NULL;
+               VkPhysicalDeviceProperties2 properties;
+               properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+               properties.pNext = &subgroupSizeControlProperties;
+
+               context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
+
+               log << tcu::TestLog::Message << "Testing required subgroup size range [" <<  subgroupSizeControlProperties.minSubgroupSize << ", "
+                       << subgroupSizeControlProperties.maxSubgroupSize << "]" << tcu::TestLog::EndMessage;
+
+               // According to the spec, requiredSubgroupSize must be a power-of-two integer.
+               for (deUint32 size = subgroupSizeControlProperties.minSubgroupSize; size <= subgroupSizeControlProperties.maxSubgroupSize; size *= 2)
+               {
+                       tcu::TestStatus result = subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, DE_NULL, 0, DE_NULL, checkCompute,
+                                                                                                                               size, VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT);
+                       if (result.getCode() != QP_TEST_RESULT_PASS)
+                       {
+                               log << tcu::TestLog::Message << "subgroupSize " << size << " failed" << tcu::TestLog::EndMessage;
+                               return result;
+                       }
+               }
+
+               return tcu::TestStatus::pass("OK");
        }
        else
        {
@@ -548,7 +601,7 @@ tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
                if ((VkShaderStageFlagBits)0u == stages)
                        TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
 
-               return subgroups::allStages(context, VK_FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages, stages);
+               return subgroups::allStages(context, VK_FORMAT_R32_UINT, DE_NULL, 0, DE_NULL, checkVertexPipelineStages, stages);
        }
 }
 }
@@ -579,7 +632,7 @@ tcu::TestCaseGroup* createSubgroupsShapeTests(tcu::TestContext& testCtx)
                const std::string op = de::toLower(getOpTypeName(opTypeIndex));
 
                {
-                       const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_COMPUTE_BIT, de::SharedPtr<bool>(new bool)};
+                       CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_COMPUTE_BIT, de::SharedPtr<bool>(new bool), DE_FALSE};
                        addFunctionCaseWithPrograms(computeGroup.get(), op, "", supportedCheck, initPrograms, test, caseDef);
                }
 
@@ -588,7 +641,8 @@ tcu::TestCaseGroup* createSubgroupsShapeTests(tcu::TestContext& testCtx)
                        {
                                opTypeIndex,
                                VK_SHADER_STAGE_ALL_GRAPHICS,
-                               de::SharedPtr<bool>(new bool)
+                               de::SharedPtr<bool>(new bool),
+                               DE_FALSE
                        };
                        addFunctionCaseWithPrograms(graphicGroup.get(),
                                                                        op, "",
@@ -597,7 +651,7 @@ tcu::TestCaseGroup* createSubgroupsShapeTests(tcu::TestContext& testCtx)
 
                for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
                {
-                       const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex], de::SharedPtr<bool>(new bool)};
+                       const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex], de::SharedPtr<bool>(new bool), DE_FALSE};
                        addFunctionCaseWithPrograms(framebufferGroup.get(),op + "_" + getShaderStageName(caseDef.shaderStage), "",
                                                                                supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
                }
index eef837f..e2f2514 100755 (executable)
@@ -45,16 +45,18 @@ enum OpType
        OPTYPE_LAST
 };
 
-static bool checkVertexPipelineStages(std::vector<const void*> datas,
+static bool checkVertexPipelineStages(const void* internalData, std::vector<const void*> datas,
                                                                          deUint32 width, deUint32)
 {
+       DE_UNREF(internalData);
        return vkt::subgroups::check(datas, width, 1);
 }
 
-static bool checkCompute(std::vector<const void*> datas,
+static bool checkCompute(const void* internalData, std::vector<const void*> datas,
                                                 const deUint32 numWorkgroups[3], const deUint32 localSize[3],
                                                 deUint32)
 {
+       DE_UNREF(internalData);
        return vkt::subgroups::checkCompute(datas, numWorkgroups, localSize, 1);
 }
 
@@ -82,6 +84,7 @@ struct CaseDefinition
        VkShaderStageFlags      shaderStage;
        VkFormat                        format;
        de::SharedPtr<bool>     geometryPointSizeSupported;
+       deBool                          requiredSubgroupSize;
 };
 
 const std::string to_string(int x) {
@@ -463,6 +466,27 @@ void supportedCheck (Context& context, CaseDefinition caseDef)
        if (!subgroups::isFormatSupportedForDevice(context, caseDef.format))
                TCU_THROW(NotSupportedError, "Device does not support the specified format in subgroup operations");
 
+       if (caseDef.requiredSubgroupSize)
+       {
+               if (!context.requireDeviceFunctionality("VK_EXT_subgroup_size_control"))
+                       TCU_THROW(NotSupportedError, "Device does not support VK_EXT_subgroup_size_control extension");
+               VkPhysicalDeviceSubgroupSizeControlFeaturesEXT subgroupSizeControlFeatures;
+               subgroupSizeControlFeatures.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT;
+               subgroupSizeControlFeatures.pNext = DE_NULL;
+
+               VkPhysicalDeviceFeatures2 features;
+               features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
+               features.pNext = &subgroupSizeControlFeatures;
+
+               context.getInstanceInterface().getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features);
+
+               if (subgroupSizeControlFeatures.subgroupSizeControl == DE_FALSE)
+                       TCU_THROW(NotSupportedError, "Device does not support varying subgroup sizes nor required subgroup size");
+
+               if (subgroupSizeControlFeatures.computeFullSubgroups == DE_FALSE)
+                       TCU_THROW(NotSupportedError, "Device does not support full subgroups in compute shaders");
+       }
+
        *caseDef.geometryPointSizeSupported = subgroups::isTessellationAndGeometryPointSizeSupported(context);
 }
 
@@ -497,13 +521,13 @@ tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
        inputData[1].initializeType = subgroups::SSBOData::InitializeNonZero;
 
        if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
-               return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, inputData, 2, checkVertexPipelineStages);
+               return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, inputData, 2, DE_NULL, checkVertexPipelineStages);
        else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
-               return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, inputData, 2, checkVertexPipelineStages);
+               return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, inputData, 2, DE_NULL, checkVertexPipelineStages);
        else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
-               return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, inputData, 2, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
+               return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, inputData, 2, DE_NULL, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
        else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
-               return subgroups::makeTessellationEvaluationFrameBufferTest(context,  VK_FORMAT_R32_UINT, inputData, 2, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
+               return subgroups::makeTessellationEvaluationFrameBufferTest(context,  VK_FORMAT_R32_UINT, inputData, 2, DE_NULL, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
        else
                TCU_THROW(InternalError, "Unhandled shader stage");
 }
@@ -531,7 +555,35 @@ tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
                inputData[1].numElements = inputData[0].numElements;
                inputData[1].initializeType = subgroups::SSBOData::InitializeNonZero;
 
-               return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, inputData, 2, checkCompute);
+               if (caseDef.requiredSubgroupSize == DE_FALSE)
+                       return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, inputData, 2, DE_NULL, checkCompute);
+
+               tcu::TestLog& log = context.getTestContext().getLog();
+               VkPhysicalDeviceSubgroupSizeControlPropertiesEXT subgroupSizeControlProperties;
+               subgroupSizeControlProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT;
+               subgroupSizeControlProperties.pNext = DE_NULL;
+               VkPhysicalDeviceProperties2 properties;
+               properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+               properties.pNext = &subgroupSizeControlProperties;
+
+               context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
+
+               log << tcu::TestLog::Message << "Testing required subgroup size range [" <<  subgroupSizeControlProperties.minSubgroupSize << ", "
+                       << subgroupSizeControlProperties.maxSubgroupSize << "]" << tcu::TestLog::EndMessage;
+
+               // According to the spec, requiredSubgroupSize must be a power-of-two integer.
+               for (deUint32 size = subgroupSizeControlProperties.minSubgroupSize; size <= subgroupSizeControlProperties.maxSubgroupSize; size *= 2)
+               {
+                       tcu::TestStatus result = subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, inputData, 2, DE_NULL, checkCompute,
+                                                                                                                               size, VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT);
+                       if (result.getCode() != QP_TEST_RESULT_PASS)
+                       {
+                               log << tcu::TestLog::Message << "subgroupSize " << size << " failed" << tcu::TestLog::EndMessage;
+                               return result;
+                       }
+               }
+
+               return tcu::TestStatus::pass("OK");
        }
 
        else
@@ -574,7 +626,7 @@ tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
                inputData[1].binding            = 5u;
                inputData[1].stages                     = stages;
 
-               return subgroups::allStages(context, VK_FORMAT_R32_UINT, inputData, 2, checkVertexPipelineStages, stages);
+               return subgroups::allStages(context, VK_FORMAT_R32_UINT, inputData, 2, DE_NULL, checkVertexPipelineStages, stages);
        }
 }
 }
@@ -620,19 +672,20 @@ tcu::TestCaseGroup* createSubgroupsShuffleTests(tcu::TestContext& testCtx)
                                        opTypeIndex,
                                        VK_SHADER_STAGE_ALL_GRAPHICS,
                                        format,
-                                       de::SharedPtr<bool>(new bool)
+                                       de::SharedPtr<bool>(new bool),
+                                       DE_FALSE
                                };
                                addFunctionCaseWithPrograms(graphicGroup.get(), name, "", supportedCheck, initPrograms, test, caseDef);
                        }
 
                        {
-                               const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_COMPUTE_BIT, format, de::SharedPtr<bool>(new bool)};
+                               CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_COMPUTE_BIT, format, de::SharedPtr<bool>(new bool), DE_FALSE};
                                addFunctionCaseWithPrograms(computeGroup.get(), name, "", supportedCheck, initPrograms, test, caseDef);
                        }
 
                        for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
                        {
-                               const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex], format, de::SharedPtr<bool>(new bool)};
+                               const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex], format, de::SharedPtr<bool>(new bool), DE_FALSE};
                                addFunctionCaseWithPrograms(framebufferGroup.get(), name + "_" + getShaderStageName(caseDef.shaderStage), "",
                                                                                        supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
                        }
index 7d57e5a..24457d0 100644 (file)
@@ -191,6 +191,303 @@ Move<VkRenderPass> makeRenderPass(Context& context, VkFormat format)
                                                        &renderPassCreateInfo);
 }
 
+Move<VkPipeline> makeGraphicsPipeline(const DeviceInterface&                                           vk,
+                                                                         const VkDevice                                                                device,
+                                                                         const VkPipelineLayout                                                pipelineLayout,
+                                                                         const VkShaderModule                                                  vertexShaderModule,
+                                                                         const VkShaderModule                                                  tessellationControlShaderModule,
+                                                                         const VkShaderModule                                                  tessellationEvalShaderModule,
+                                                                         const VkShaderModule                                                  geometryShaderModule,
+                                                                         const VkShaderModule                                                  fragmentShaderModule,
+                                                                         const VkRenderPass                                                    renderPass,
+                                                                         const std::vector<VkViewport>&                                viewports,
+                                                                         const std::vector<VkRect2D>&                                  scissors,
+                                                                         const VkPrimitiveTopology                                             topology,
+                                                                         const deUint32                                                                subpass,
+                                                                         const deUint32                                                                patchControlPoints,
+                                                                         const VkPipelineVertexInputStateCreateInfo*   vertexInputStateCreateInfo,
+                                                                         const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo,
+                                                                         const VkPipelineMultisampleStateCreateInfo*   multisampleStateCreateInfo,
+                                                                         const VkPipelineDepthStencilStateCreateInfo*  depthStencilStateCreateInfo,
+                                                                         const VkPipelineColorBlendStateCreateInfo*    colorBlendStateCreateInfo,
+                                                                         const VkPipelineDynamicStateCreateInfo*               dynamicStateCreateInfo,
+                                                                         const deUint32                                                                vertexShaderStageCreateFlags,
+                                                                         const deUint32                                                                tessellationControlShaderStageCreateFlags,
+                                                                         const deUint32                                                                tessellationEvalShaderStageCreateFlags,
+                                                                         const deUint32                                                                geometryShaderStageCreateFlags,
+                                                                         const deUint32                                                                fragmentShaderStageCreateFlags,
+                                                                         const deUint32                                                                requiredSubgroupSize[5])
+{
+       const VkBool32                                                                  disableRasterization                            = (fragmentShaderModule == DE_NULL);
+       const bool                                                                              hasTessellation                                         = (tessellationControlShaderModule != DE_NULL || tessellationEvalShaderModule != DE_NULL);
+
+       VkPipelineShaderStageCreateInfo                                 stageCreateInfo                                         =
+       {
+               VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,    // VkStructureType                     sType
+               DE_NULL,                                                                                                // const void*                         pNext
+               0u,                                                                                                             // VkPipelineShaderStageCreateFlags    flags
+               VK_SHADER_STAGE_VERTEX_BIT,                                                             // VkShaderStageFlagBits               stage
+               DE_NULL,                                                                                                // VkShaderModule                      module
+               "main",                                                                                                 // const char*                         pName
+               DE_NULL                                                                                                 // const VkSpecializationInfo*         pSpecializationInfo
+       };
+
+       std::vector<VkPipelineShaderStageCreateInfo>    pipelineShaderStageParams;
+
+       const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT requiredSubgroupSizeCreateInfo[5] =
+               {
+                       {
+                               VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
+                               DE_NULL,
+                               requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[0] : 0u,
+                       },
+                       {
+                               VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
+                               DE_NULL,
+                               requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[1] : 0u,
+                       },
+                       {
+                               VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
+                               DE_NULL,
+                               requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[2] : 0u,
+                       },
+                       {
+                               VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
+                               DE_NULL,
+                               requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[3] : 0u,
+                       },
+                       {
+                               VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
+                               DE_NULL,
+                               requiredSubgroupSize != DE_NULL ? requiredSubgroupSize[4] : 0u,
+                       },
+               };
+       {
+               stageCreateInfo.pNext   = (requiredSubgroupSizeCreateInfo[0].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[0] : DE_NULL;
+               stageCreateInfo.flags   = vertexShaderStageCreateFlags;
+               stageCreateInfo.stage   = VK_SHADER_STAGE_VERTEX_BIT;
+               stageCreateInfo.module  = vertexShaderModule;
+               pipelineShaderStageParams.push_back(stageCreateInfo);
+       }
+
+       if (tessellationControlShaderModule != DE_NULL)
+       {
+               stageCreateInfo.pNext   = (requiredSubgroupSizeCreateInfo[1].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[1] : DE_NULL;
+               stageCreateInfo.flags   = tessellationControlShaderStageCreateFlags;
+               stageCreateInfo.stage   = VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
+               stageCreateInfo.module  = tessellationControlShaderModule;
+               pipelineShaderStageParams.push_back(stageCreateInfo);
+       }
+
+       if (tessellationEvalShaderModule != DE_NULL)
+       {
+               stageCreateInfo.pNext   = (requiredSubgroupSize != DE_NULL && requiredSubgroupSizeCreateInfo[2].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[2] : DE_NULL;
+               stageCreateInfo.flags   = tessellationEvalShaderStageCreateFlags;
+               stageCreateInfo.stage   = VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
+               stageCreateInfo.module  = tessellationEvalShaderModule;
+               pipelineShaderStageParams.push_back(stageCreateInfo);
+       }
+
+       if (geometryShaderModule != DE_NULL)
+       {
+               stageCreateInfo.pNext   = (requiredSubgroupSizeCreateInfo[3].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[3] : DE_NULL;
+               stageCreateInfo.flags   = geometryShaderStageCreateFlags;
+               stageCreateInfo.stage   = VK_SHADER_STAGE_GEOMETRY_BIT;
+               stageCreateInfo.module  = geometryShaderModule;
+               pipelineShaderStageParams.push_back(stageCreateInfo);
+       }
+
+       if (fragmentShaderModule != DE_NULL)
+       {
+               stageCreateInfo.pNext   = (requiredSubgroupSizeCreateInfo[4].requiredSubgroupSize != 0u) ? &requiredSubgroupSizeCreateInfo[4] : DE_NULL;
+               stageCreateInfo.flags   = fragmentShaderStageCreateFlags;
+               stageCreateInfo.stage   = VK_SHADER_STAGE_FRAGMENT_BIT;
+               stageCreateInfo.module  = fragmentShaderModule;
+               pipelineShaderStageParams.push_back(stageCreateInfo);
+       }
+
+       const VkVertexInputBindingDescription                   vertexInputBindingDescription           =
+       {
+               0u,                                                             // deUint32             binding
+               sizeof(tcu::Vec4),                              // deUint32             stride
+               VK_VERTEX_INPUT_RATE_VERTEX,    // VkVertexInputRate    inputRate
+       };
+
+       const VkVertexInputAttributeDescription                 vertexInputAttributeDescription         =
+       {
+               0u,                                                             // deUint32    location
+               0u,                                                             // deUint32    binding
+               VK_FORMAT_R32G32B32A32_SFLOAT,  // VkFormat    format
+               0u                                                              // deUint32    offset
+       };
+
+       const VkPipelineVertexInputStateCreateInfo              vertexInputStateCreateInfoDefault       =
+       {
+               VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,      // VkStructureType                             sType
+               DE_NULL,                                                                                                        // const void*                                 pNext
+               (VkPipelineVertexInputStateCreateFlags)0,                                       // VkPipelineVertexInputStateCreateFlags       flags
+               1u,                                                                                                                     // deUint32                                    vertexBindingDescriptionCount
+               &vertexInputBindingDescription,                                                         // const VkVertexInputBindingDescription*      pVertexBindingDescriptions
+               1u,                                                                                                                     // deUint32                                    vertexAttributeDescriptionCount
+               &vertexInputAttributeDescription                                                        // const VkVertexInputAttributeDescription*    pVertexAttributeDescriptions
+       };
+
+       const VkPipelineInputAssemblyStateCreateInfo    inputAssemblyStateCreateInfo            =
+       {
+               VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,    // VkStructureType                            sType
+               DE_NULL,                                                                                                                // const void*                                pNext
+               0u,                                                                                                                             // VkPipelineInputAssemblyStateCreateFlags    flags
+               topology,                                                                                                               // VkPrimitiveTopology                        topology
+               VK_FALSE                                                                                                                // VkBool32                                   primitiveRestartEnable
+       };
+
+       const VkPipelineTessellationStateCreateInfo             tessStateCreateInfo                                     =
+       {
+               VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO,      // VkStructureType                           sType
+               DE_NULL,                                                                                                        // const void*                               pNext
+               0u,                                                                                                                     // VkPipelineTessellationStateCreateFlags    flags
+               patchControlPoints                                                                                      // deUint32                                  patchControlPoints
+       };
+
+       const VkPipelineViewportStateCreateInfo                 viewportStateCreateInfo                         =
+       {
+               VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,  // VkStructureType                             sType
+               DE_NULL,                                                                                                // const void*                                 pNext
+               (VkPipelineViewportStateCreateFlags)0,                                  // VkPipelineViewportStateCreateFlags          flags
+               viewports.empty() ? 1u : (deUint32)viewports.size(),    // deUint32                                    viewportCount
+               viewports.empty() ? DE_NULL : &viewports[0],                    // const VkViewport*                           pViewports
+               viewports.empty() ? 1u : (deUint32)scissors.size(),             // deUint32                                    scissorCount
+               scissors.empty() ? DE_NULL : &scissors[0]                               // const VkRect2D*                             pScissors
+       };
+
+       const VkPipelineRasterizationStateCreateInfo    rasterizationStateCreateInfoDefault     =
+       {
+               VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,     // VkStructureType                            sType
+               DE_NULL,                                                                                                        // const void*                                pNext
+               0u,                                                                                                                     // VkPipelineRasterizationStateCreateFlags    flags
+               VK_FALSE,                                                                                                       // VkBool32                                   depthClampEnable
+               disableRasterization,                                                                           // VkBool32                                   rasterizerDiscardEnable
+               VK_POLYGON_MODE_FILL,                                                                           // VkPolygonMode                              polygonMode
+               VK_CULL_MODE_NONE,                                                                                      // VkCullModeFlags                            cullMode
+               VK_FRONT_FACE_COUNTER_CLOCKWISE,                                                        // VkFrontFace                                frontFace
+               VK_FALSE,                                                                                                       // VkBool32                                   depthBiasEnable
+               0.0f,                                                                                                           // float                                      depthBiasConstantFactor
+               0.0f,                                                                                                           // float                                      depthBiasClamp
+               0.0f,                                                                                                           // float                                      depthBiasSlopeFactor
+               1.0f                                                                                                            // float                                      lineWidth
+       };
+
+       const VkPipelineMultisampleStateCreateInfo              multisampleStateCreateInfoDefault       =
+       {
+               VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,       // VkStructureType                          sType
+               DE_NULL,                                                                                                        // const void*                              pNext
+               0u,                                                                                                                     // VkPipelineMultisampleStateCreateFlags    flags
+               VK_SAMPLE_COUNT_1_BIT,                                                                          // VkSampleCountFlagBits                    rasterizationSamples
+               VK_FALSE,                                                                                                       // VkBool32                                 sampleShadingEnable
+               1.0f,                                                                                                           // float                                    minSampleShading
+               DE_NULL,                                                                                                        // const VkSampleMask*                      pSampleMask
+               VK_FALSE,                                                                                                       // VkBool32                                 alphaToCoverageEnable
+               VK_FALSE                                                                                                        // VkBool32                                 alphaToOneEnable
+       };
+
+       const VkStencilOpState                                                  stencilOpState                                          =
+       {
+               VK_STENCIL_OP_KEEP,             // VkStencilOp    failOp
+               VK_STENCIL_OP_KEEP,             // VkStencilOp    passOp
+               VK_STENCIL_OP_KEEP,             // VkStencilOp    depthFailOp
+               VK_COMPARE_OP_NEVER,    // VkCompareOp    compareOp
+               0,                                              // deUint32       compareMask
+               0,                                              // deUint32       writeMask
+               0                                               // deUint32       reference
+       };
+
+       const VkPipelineDepthStencilStateCreateInfo             depthStencilStateCreateInfoDefault      =
+       {
+               VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,     // VkStructureType                          sType
+               DE_NULL,                                                                                                        // const void*                              pNext
+               0u,                                                                                                                     // VkPipelineDepthStencilStateCreateFlags   flags
+               VK_FALSE,                                                                                                       // VkBool32                                 depthTestEnable
+               VK_FALSE,                                                                                                       // VkBool32                                 depthWriteEnable
+               VK_COMPARE_OP_LESS_OR_EQUAL,                                                            // VkCompareOp                              depthCompareOp
+               VK_FALSE,                                                                                                       // VkBool32                                 depthBoundsTestEnable
+               VK_FALSE,                                                                                                       // VkBool32                                 stencilTestEnable
+               stencilOpState,                                                                                         // VkStencilOpState                         front
+               stencilOpState,                                                                                         // VkStencilOpState                         back
+               0.0f,                                                                                                           // float                                    minDepthBounds
+               1.0f,                                                                                                           // float                                    maxDepthBounds
+       };
+
+       const VkPipelineColorBlendAttachmentState               colorBlendAttachmentState                       =
+       {
+               VK_FALSE,                                       // VkBool32                 blendEnable
+               VK_BLEND_FACTOR_ZERO,           // VkBlendFactor            srcColorBlendFactor
+               VK_BLEND_FACTOR_ZERO,           // VkBlendFactor            dstColorBlendFactor
+               VK_BLEND_OP_ADD,                        // VkBlendOp                colorBlendOp
+               VK_BLEND_FACTOR_ZERO,           // VkBlendFactor            srcAlphaBlendFactor
+               VK_BLEND_FACTOR_ZERO,           // VkBlendFactor            dstAlphaBlendFactor
+               VK_BLEND_OP_ADD,                        // VkBlendOp                alphaBlendOp
+               VK_COLOR_COMPONENT_R_BIT        // VkColorComponentFlags    colorWriteMask
+               | VK_COLOR_COMPONENT_G_BIT
+               | VK_COLOR_COMPONENT_B_BIT
+               | VK_COLOR_COMPONENT_A_BIT
+       };
+
+       const VkPipelineColorBlendStateCreateInfo               colorBlendStateCreateInfoDefault        =
+       {
+               VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,       // VkStructureType                               sType
+               DE_NULL,                                                                                                        // const void*                                   pNext
+               0u,                                                                                                                     // VkPipelineColorBlendStateCreateFlags          flags
+               VK_FALSE,                                                                                                       // VkBool32                                      logicOpEnable
+               VK_LOGIC_OP_CLEAR,                                                                                      // VkLogicOp                                     logicOp
+               1u,                                                                                                                     // deUint32                                      attachmentCount
+               &colorBlendAttachmentState,                                                                     // const VkPipelineColorBlendAttachmentState*    pAttachments
+               { 0.0f, 0.0f, 0.0f, 0.0f }                                                                      // float                                         blendConstants[4]
+       };
+
+       std::vector<VkDynamicState>                                             dynamicStates;
+
+       if (viewports.empty())
+               dynamicStates.push_back(VK_DYNAMIC_STATE_VIEWPORT);
+       if (scissors.empty())
+               dynamicStates.push_back(VK_DYNAMIC_STATE_SCISSOR);
+
+       const VkPipelineDynamicStateCreateInfo                  dynamicStateCreateInfoDefault           =
+       {
+               VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,   // VkStructureType                      sType
+               DE_NULL,                                                                                                // const void*                          pNext
+               0u,                                                                                                             // VkPipelineDynamicStateCreateFlags    flags
+               (deUint32)dynamicStates.size(),                                                 // deUint32                             dynamicStateCount
+               dynamicStates.empty() ? DE_NULL : &dynamicStates[0]             // const VkDynamicState*                pDynamicStates
+       };
+
+       const VkPipelineDynamicStateCreateInfo*                 dynamicStateCreateInfoDefaultPtr        = dynamicStates.empty() ? DE_NULL : &dynamicStateCreateInfoDefault;
+
+       const VkGraphicsPipelineCreateInfo                              pipelineCreateInfo                                      =
+       {
+               VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,                                                                                                                // VkStructureType                                  sType
+               DE_NULL,                                                                                                                                                                                                // const void*                                      pNext
+               0u,                                                                                                                                                                                                             // VkPipelineCreateFlags                            flags
+               (deUint32)pipelineShaderStageParams.size(),                                                                                                                             // deUint32                                         stageCount
+               &pipelineShaderStageParams[0],                                                                                                                                                  // const VkPipelineShaderStageCreateInfo*           pStages
+               vertexInputStateCreateInfo ? vertexInputStateCreateInfo : &vertexInputStateCreateInfoDefault,                   // const VkPipelineVertexInputStateCreateInfo*      pVertexInputState
+               &inputAssemblyStateCreateInfo,                                                                                                                                                  // const VkPipelineInputAssemblyStateCreateInfo*    pInputAssemblyState
+               hasTessellation ? &tessStateCreateInfo : DE_NULL,                                                                                                               // const VkPipelineTessellationStateCreateInfo*     pTessellationState
+               &viewportStateCreateInfo,                                                                                                                                                               // const VkPipelineViewportStateCreateInfo*         pViewportState
+               rasterizationStateCreateInfo ? rasterizationStateCreateInfo : &rasterizationStateCreateInfoDefault,             // const VkPipelineRasterizationStateCreateInfo*    pRasterizationState
+               multisampleStateCreateInfo ? multisampleStateCreateInfo: &multisampleStateCreateInfoDefault,                    // const VkPipelineMultisampleStateCreateInfo*      pMultisampleState
+               depthStencilStateCreateInfo ? depthStencilStateCreateInfo : &depthStencilStateCreateInfoDefault,                // const VkPipelineDepthStencilStateCreateInfo*     pDepthStencilState
+               colorBlendStateCreateInfo ? colorBlendStateCreateInfo : &colorBlendStateCreateInfoDefault,                              // const VkPipelineColorBlendStateCreateInfo*       pColorBlendState
+               dynamicStateCreateInfo ? dynamicStateCreateInfo : dynamicStateCreateInfoDefaultPtr,                                             // const VkPipelineDynamicStateCreateInfo*          pDynamicState
+               pipelineLayout,                                                                                                                                                                                 // VkPipelineLayout                                 layout
+               renderPass,                                                                                                                                                                                             // VkRenderPass                                     renderPass
+               subpass,                                                                                                                                                                                                // deUint32                                         subpass
+               DE_NULL,                                                                                                                                                                                                // VkPipeline                                       basePipelineHandle
+               0                                                                                                                                                                                                               // deInt32                                          basePipelineIndex;
+       };
+
+       return createGraphicsPipeline(vk, device, DE_NULL, &pipelineCreateInfo);
+}
+
 Move<VkPipeline> makeGraphicsPipeline(Context&                                                                 context,
                                                                          const VkPipelineLayout                                        pipelineLayout,
                                                                          const VkShaderStageFlags                                      stages,
@@ -204,7 +501,13 @@ Move<VkPipeline> makeGraphicsPipeline(Context&                                                                     context,
                                                                          const VkVertexInputBindingDescription*        vertexInputBindingDescription = DE_NULL,
                                                                          const VkVertexInputAttributeDescription*      vertexInputAttributeDescriptions = DE_NULL,
                                                                          const bool                                                            frameBufferTests = false,
-                                                                         const vk::VkFormat                                            attachmentFormat = VK_FORMAT_R32G32B32A32_SFLOAT)
+                                                                         const vk::VkFormat                                            attachmentFormat = VK_FORMAT_R32G32B32A32_SFLOAT,
+                                                                         const deUint32                                                        vertexShaderStageCreateFlags = 0u,
+                                                                         const deUint32                                                        tessellationControlShaderStageCreateFlags = 0u,
+                                                                         const deUint32                                                        tessellationEvalShaderStageCreateFlags = 0u,
+                                                                         const deUint32                                                        geometryShaderStageCreateFlags = 0u,
+                                                                         const deUint32                                                        fragmentShaderStageCreateFlags = 0u,
+                                                                         const deUint32                                                        requiredSubgroupSize[5] = DE_NULL)
 {
        std::vector<VkViewport> noViewports;
        std::vector<VkRect2D>   noScissors;
@@ -243,73 +546,32 @@ Move<VkPipeline> makeGraphicsPipeline(Context&                                                                    context,
 
        const deUint32 patchControlPoints = (VK_SHADER_STAGE_FRAGMENT_BIT & stages && frameBufferTests) ? 2u : 1u;
 
-       return vk::makeGraphicsPipeline(context.getDeviceInterface(),   // const DeviceInterface&                        vk
-                                                                       context.getDevice(),                    // const VkDevice                                device
-                                                                       pipelineLayout,                                 // const VkPipelineLayout                        pipelineLayout
-                                                                       vertexShaderModule,                             // const VkShaderModule                          vertexShaderModule
-                                                                       tessellationControlModule,              // const VkShaderModule                          tessellationControlShaderModule
-                                                                       tessellationEvaluationModule,   // const VkShaderModule                          tessellationEvalShaderModule
-                                                                       geometryShaderModule,                   // const VkShaderModule                          geometryShaderModule
-                                                                       fragmentShaderModule,                   // const VkShaderModule                          fragmentShaderModule
-                                                                       renderPass,                                             // const VkRenderPass                            renderPass
-                                                                       noViewports,                                    // const std::vector<VkViewport>&                viewports
-                                                                       noScissors,                                             // const std::vector<VkRect2D>&                  scissors
-                                                                       topology,                                               // const VkPrimitiveTopology                     topology
-                                                                       0u,                                                             // const deUint32                                subpass
-                                                                       patchControlPoints,                             // const deUint32                                patchControlPoints
-                                                                       &vertexInputStateCreateInfo,    // const VkPipelineVertexInputStateCreateInfo*   vertexInputStateCreateInfo
-                                                                       DE_NULL,                                                // const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo
-                                                                       DE_NULL,                                                // const VkPipelineMultisampleStateCreateInfo*   multisampleStateCreateInfo
-                                                                       DE_NULL,                                                // const VkPipelineDepthStencilStateCreateInfo*  depthStencilStateCreateInfo
-                                                                       &colorBlendStateCreateInfo);    // const VkPipelineColorBlendStateCreateInfo*    colorBlendStateCreateInfo
-}
-
-Move<VkPipeline> makeComputePipeline(Context& context,
-                                                                        const VkPipelineLayout pipelineLayout, const VkShaderModule shaderModule,
-                                                                        const deUint32 pipelineCreateFlags, VkPipeline basePipelineHandle,
-                                                                        deUint32 localSizeX, deUint32 localSizeY, deUint32 localSizeZ)
-{
-       const deUint32 localSize[3] = {localSizeX, localSizeY, localSizeZ};
-
-       const vk::VkSpecializationMapEntry entries[3] =
-       {
-               {0, sizeof(deUint32) * 0, sizeof(deUint32)},
-               {1, sizeof(deUint32) * 1, sizeof(deUint32)},
-               {2, static_cast<deUint32>(sizeof(deUint32) * 2), sizeof(deUint32)},
-       };
-
-       const vk::VkSpecializationInfo info =
-       {
-               /* mapEntryCount = */ 3,
-               /* pMapEntries   = */ entries,
-               /* dataSize      = */ sizeof(localSize),
-               /* pData         = */ localSize
-       };
-
-       const vk::VkPipelineShaderStageCreateInfo pipelineShaderStageParams =
-       {
-               VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,                            // VkStructureType                                      sType;
-               DE_NULL,                                                                                                                        // const void*                                          pNext;
-               0u,                                                                                                                                     // VkPipelineShaderStageCreateFlags     flags;
-               VK_SHADER_STAGE_COMPUTE_BIT,                                                                            // VkShaderStageFlagBits                        stage;
-               shaderModule,                                                                                                           // VkShaderModule                                       module;
-               "main",                                                                                                                         // const char*                                          pName;
-               &info,                                                                                                                          // const VkSpecializationInfo*          pSpecializationInfo;
-       };
-
-       const vk::VkComputePipelineCreateInfo pipelineCreateInfo =
-       {
-               VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // VkStructureType      sType;
-               DE_NULL,                                                                                // const void*                                          pNext;
-               pipelineCreateFlags,                                                    // VkPipelineCreateFlags                        flags;
-               pipelineShaderStageParams,                                              // VkPipelineShaderStageCreateInfo      stage;
-               pipelineLayout,                                                                 // VkPipelineLayout                                     layout;
-               basePipelineHandle,                                                             // VkPipeline                                           basePipelineHandle;
-               -1,                                                                                             // deInt32                                                      basePipelineIndex;
-       };
-
-       return createComputePipeline(context.getDeviceInterface(),
-                                                                context.getDevice(), DE_NULL, &pipelineCreateInfo);
+       return makeGraphicsPipeline(context.getDeviceInterface(),       // const DeviceInterface&                        vk
+                                                               context.getDevice(),                    // const VkDevice                                device
+                                                               pipelineLayout,                                 // const VkPipelineLayout                        pipelineLayout
+                                                               vertexShaderModule,                             // const VkShaderModule                          vertexShaderModule
+                                                               tessellationControlModule,              // const VkShaderModule                          tessellationControlShaderModule
+                                                               tessellationEvaluationModule,   // const VkShaderModule                          tessellationEvalShaderModule
+                                                               geometryShaderModule,                   // const VkShaderModule                          geometryShaderModule
+                                                               fragmentShaderModule,                   // const VkShaderModule                          fragmentShaderModule
+                                                               renderPass,                                             // const VkRenderPass                            renderPass
+                                                               noViewports,                                    // const std::vector<VkViewport>&                viewports
+                                                               noScissors,                                             // const std::vector<VkRect2D>&                  scissors
+                                                               topology,                                               // const VkPrimitiveTopology                     topology
+                                                               0u,                                                             // const deUint32                                subpass
+                                                               patchControlPoints,                             // const deUint32                                patchControlPoints
+                                                               &vertexInputStateCreateInfo,    // const VkPipelineVertexInputStateCreateInfo*   vertexInputStateCreateInfo
+                                                               DE_NULL,                                                // const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo
+                                                               DE_NULL,                                                // const VkPipelineMultisampleStateCreateInfo*   multisampleStateCreateInfo
+                                                               DE_NULL,                                                // const VkPipelineDepthStencilStateCreateInfo*  depthStencilStateCreateInfo
+                                                               &colorBlendStateCreateInfo,             // const VkPipelineColorBlendStateCreateInfo*    colorBlendStateCreateInfo
+                                                               DE_NULL,                                                // const VkPipelineDynamicStateCreateInfo*
+                                                               vertexShaderStageCreateFlags,   // const deUint32                                                                vertexShaderStageCreateFlags,
+                                                               tessellationControlShaderStageCreateFlags,      // const deUint32                                        tessellationControlShaderStageCreateFlags
+                                                               tessellationEvalShaderStageCreateFlags,         // const deUint32                                        tessellationEvalShaderStageCreateFlags
+                                                               geometryShaderStageCreateFlags, // const deUint32                                                                geometryShaderStageCreateFlags
+                                                               fragmentShaderStageCreateFlags, // const deUint32                                                                fragmentShaderStageCreateFlags
+                                                               requiredSubgroupSize);                  // const deUint32                                                                requiredSubgroupSize[5]
 }
 
 Move<VkCommandBuffer> makeCommandBuffer(
@@ -2287,12 +2549,21 @@ deUint32 getResultBinding (const VkShaderStageFlagBits shaderStage)
        return -1;
 }
 
-tcu::TestStatus vkt::subgroups::makeTessellationEvaluationFrameBufferTest (
+tcu::TestStatus vkt::subgroups::makeTessellationEvaluationFrameBufferTest(
        Context& context, VkFormat format, SSBOData* extraData,
-       deUint32 extraDataCount,
-       bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
+       deUint32 extraDataCount, const void* internalData,
+       bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
        const VkShaderStageFlags shaderStage)
 {
+       return makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult, shaderStage, 0u, 0u);
+}
+
+tcu::TestStatus vkt::subgroups::makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(
+       Context& context, VkFormat format, SSBOData* extraData,
+       deUint32 extraDataCount, const void* internalData,
+       bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
+       const VkShaderStageFlags shaderStage, const deUint32 tessShaderStageCreateFlags, const deUint32 requiredSubgroupSize)
+{
        const DeviceInterface&                                  vk                                              = context.getDeviceInterface();
        const VkDevice                                                  device                                  = context.getDevice();
        const deUint32                                                  maxWidth                                = getMaxWidth();
@@ -2350,11 +2621,20 @@ tcu::TestStatus vkt::subgroups::makeTessellationEvaluationFrameBufferTest (
 
        const Unique<VkPipelineLayout>                  pipelineLayout                  (makePipelineLayout(vk, device, *descriptorSetLayout));
 
+       const deUint32 requiredSubgroupSizes[5] = {0u,
+                                                                                          ((shaderStage & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? requiredSubgroupSize : 0u),
+                                                                                          ((shaderStage & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? requiredSubgroupSize : 0u),
+                                                                                          0u,
+                                                                                          0u};
+
        const Unique<VkPipeline>                                pipeline                                (makeGraphicsPipeline(context, *pipelineLayout,
-                                                                                                                                       VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT |
-                                                                                                                                       VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
-                                                                                                                                       *vertexShaderModule, *fragmentShaderModule, DE_NULL, *teCtrlShaderModule, *teEvalShaderModule,
-                                                                                                                                       *renderPass, VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, &vertexInputBinding, &vertexInputAttribute, true, format));
+                                                                                                                                                                                 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT |
+                                                                                                                                                                                 VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
+                                                                                                                                                                                 *vertexShaderModule, *fragmentShaderModule, DE_NULL, *teCtrlShaderModule, *teEvalShaderModule,
+                                                                                                                                                                                 *renderPass, VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, &vertexInputBinding, &vertexInputAttribute, true, format,
+                                                                                                                                                                                 0u, ((shaderStage & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? tessShaderStageCreateFlags : 0u),
+                                                                                                                                                                                 ((shaderStage & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? tessShaderStageCreateFlags : 0u),
+                                                                                                                                                                                 0u, 0u, requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
 
        for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
                poolBuilder.addType(inputBuffers[ndx]->getType());
@@ -2465,7 +2745,7 @@ tcu::TestStatus vkt::subgroups::makeTessellationEvaluationFrameBufferTest (
 
                        std::vector<const void*> datas;
                        datas.push_back(allocResult.getHostPtr());
-                       if (!checkResult(datas, width/2u, subgroupSize))
+                       if (!checkResult(internalData, datas, width/2u, subgroupSize))
                                failedIterations++;
                }
        }
@@ -2512,8 +2792,18 @@ bool vkt::subgroups::checkCompute(std::vector<const void*> datas,
 
 tcu::TestStatus vkt::subgroups::makeGeometryFrameBufferTest(
        Context& context, VkFormat format, SSBOData* extraData,
-       deUint32 extraDataCount,
-       bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize))
+       deUint32 extraDataCount, const void* internalData,
+       bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize))
+{
+       return makeGeometryFrameBufferTestRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult,
+                                                                                                                  0u, 0u);
+}
+
+tcu::TestStatus vkt::subgroups::makeGeometryFrameBufferTestRequiredSubgroupSize(
+       Context& context, VkFormat format, SSBOData* extraData,
+       deUint32 extraDataCount, const void* internalData,
+       bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
+       const deUint32 geometryShaderStageCreateFlags, const deUint32 requiredSubgroupSize)
 {
        const DeviceInterface&                                  vk                                              = context.getDeviceInterface();
        const VkDevice                                                  device                                  = context.getDevice();
@@ -2566,10 +2856,14 @@ tcu::TestStatus vkt::subgroups::makeGeometryFrameBufferTest(
 
        const Unique<VkPipelineLayout>                  pipelineLayout                  (makePipelineLayout(vk, device, *descriptorSetLayout));
 
+       const deUint32 requiredSubgroupSizes[5] = {0u, 0u, 0u, requiredSubgroupSize, 0u};
+
        const Unique<VkPipeline>                                pipeline                                (makeGraphicsPipeline(context, *pipelineLayout,
-                                                                                                                                       VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_GEOMETRY_BIT,
-                                                                                                                                       *vertexShaderModule, *fragmentShaderModule, *geometryShaderModule, DE_NULL, DE_NULL,
-                                                                                                                                       *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST, &vertexInputBinding, &vertexInputAttribute, true, format));
+                                                                                                                                                                                 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_GEOMETRY_BIT,
+                                                                                                                                                                                 *vertexShaderModule, *fragmentShaderModule, *geometryShaderModule, DE_NULL, DE_NULL,
+                                                                                                                                                                                 *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST, &vertexInputBinding, &vertexInputAttribute, true, format,
+                                                                                                                                                                                 0u, 0u, 0u, geometryShaderStageCreateFlags, 0u,
+                                                                                                                                                                                 requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
 
        for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
                poolBuilder.addType(inputBuffers[ndx]->getType());
@@ -2686,7 +2980,7 @@ tcu::TestStatus vkt::subgroups::makeGeometryFrameBufferTest(
 
                        std::vector<const void*> datas;
                        datas.push_back(allocResult.getHostPtr());
-                       if (!checkResult(datas, width, subgroupSize))
+                       if (!checkResult(internalData, datas, width, subgroupSize))
                                failedIterations++;
                }
        }
@@ -2705,12 +2999,27 @@ tcu::TestStatus vkt::subgroups::makeGeometryFrameBufferTest(
        return tcu::TestStatus::pass("OK");
 }
 
-
 tcu::TestStatus vkt::subgroups::allStages(
+       Context& context, VkFormat format, SSBOData* extraData,
+       deUint32 extraDataCount, const void* internalData,
+       bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
+       const vk::VkShaderStageFlags shaderStage)
+{
+       return vkt::subgroups::allStagesRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult, shaderStage,
+                                                                                                                0u, 0u, 0u, 0u, 0u, DE_NULL);
+}
+
+tcu::TestStatus vkt::subgroups::allStagesRequiredSubgroupSize(
        Context& context, VkFormat format, SSBOData* extraDatas,
-       deUint32 extraDatasCount,
-       bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
-       const VkShaderStageFlags shaderStageTested)
+       deUint32 extraDatasCount, const void* internalData,
+       bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
+       const VkShaderStageFlags shaderStageTested,
+       const deUint32 vertexShaderStageCreateFlags,
+       const deUint32 tessellationControlShaderStageCreateFlags,
+       const deUint32 tessellationEvalShaderStageCreateFlags,
+       const deUint32 geometryShaderStageCreateFlags,
+       const deUint32 fragmentShaderStageCreateFlags,
+       const deUint32 requiredSubgroupSize[5])
 {
        const DeviceInterface&                  vk                                      = context.getDeviceInterface();
        const VkDevice                                  device                          = context.getDevice();
@@ -2821,10 +3130,13 @@ tcu::TestStatus vkt::subgroups::allStages(
 
        const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
        const Unique<VkPipeline> pipeline(makeGraphicsPipeline(context, *pipelineLayout,
-                                                                               shaderStageRequired,
-                                                                               *vertexShaderModule, *fragmentShaderModule, *geometryShaderModule, *teCtrlShaderModule, *teEvalShaderModule,
-                                                                               *renderPass,
-                                                                               (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? VK_PRIMITIVE_TOPOLOGY_PATCH_LIST : VK_PRIMITIVE_TOPOLOGY_POINT_LIST));
+                                                                                                                  shaderStageRequired,
+                                                                                                                  *vertexShaderModule, *fragmentShaderModule, *geometryShaderModule, *teCtrlShaderModule, *teEvalShaderModule,
+                                                                                                                  *renderPass,
+                                                                                                                  (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? VK_PRIMITIVE_TOPOLOGY_PATCH_LIST : VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
+                                                                                                                  DE_NULL, DE_NULL, false, VK_FORMAT_R32G32B32A32_SFLOAT,
+                                                                                                                  vertexShaderStageCreateFlags, tessellationControlShaderStageCreateFlags, tessellationEvalShaderStageCreateFlags,
+                                                                                                                  geometryShaderStageCreateFlags, fragmentShaderStageCreateFlags, requiredSubgroupSize));
 
        Move <VkDescriptorPool> descriptorPool;
        Move <VkDescriptorSet>  descriptorSet;
@@ -2965,7 +3277,7 @@ tcu::TestStatus vkt::subgroups::allStages(
                                        }
                                }
 
-                               if (!checkResult(datas, (stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? width * 2 : width , subgroupSize))
+                               if (!checkResult(internalData, datas, (stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? width * 2 : width , subgroupSize))
                                        failedIterations++;
                        }
                        if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
@@ -2989,7 +3301,7 @@ tcu::TestStatus vkt::subgroups::allStages(
                                        }
                                }
 
-                               if (!checkResult(datas, width, subgroupSize))
+                               if (!checkResult(internalData, datas, width, subgroupSize))
                                        failedIterations++;
                        }
 
@@ -3012,8 +3324,17 @@ tcu::TestStatus vkt::subgroups::allStages(
 }
 
 tcu::TestStatus vkt::subgroups::makeVertexFrameBufferTest(Context& context, vk::VkFormat format,
-       SSBOData* extraData, deUint32 extraDataCount,
-       bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize))
+       SSBOData* extraData, deUint32 extraDataCount, const void* internalData,
+       bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize))
+{
+       return makeVertexFrameBufferTestRequiredSubgroupSize(context, format, extraData, extraDataCount, internalData, checkResult,
+                                                                                                                0u, 0u);
+}
+
+tcu::TestStatus vkt::subgroups::makeVertexFrameBufferTestRequiredSubgroupSize(Context& context, vk::VkFormat format,
+       SSBOData* extraData, deUint32 extraDataCount, const void* internalData,
+       bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
+       const deUint32 vertexShaderStageCreateFlags, const deUint32 requiredSubgroupSize)
 {
        const DeviceInterface&                                  vk                                              = context.getDeviceInterface();
        const VkDevice                                                  device                                  = context.getDevice();
@@ -3063,12 +3384,15 @@ tcu::TestStatus vkt::subgroups::makeVertexFrameBufferTest(Context& context, vk::
 
        const Unique<VkPipelineLayout>                  pipelineLayout                  (makePipelineLayout(vk, device, *descriptorSetLayout));
 
+       const deUint32 requiredSubgroupSizes[5] = {requiredSubgroupSize, 0u, 0u, 0u, 0u};
        const Unique<VkPipeline>                                pipeline                                (makeGraphicsPipeline(context, *pipelineLayout,
-                                                                                                                                               VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
-                                                                                                                                               *vertexShaderModule, *fragmentShaderModule,
-                                                                                                                                               DE_NULL, DE_NULL, DE_NULL,
-                                                                                                                                               *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
-                                                                                                                                               &vertexInputBinding, &vertexInputAttribute, true, format));
+                                                                                                                                                                                 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
+                                                                                                                                                                                 *vertexShaderModule, *fragmentShaderModule,
+                                                                                                                                                                                 DE_NULL, DE_NULL, DE_NULL,
+                                                                                                                                                                                 *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
+                                                                                                                                                                                 &vertexInputBinding, &vertexInputAttribute, true, format,
+                                                                                                                                                                                 vertexShaderStageCreateFlags, 0u, 0u, 0u, 0u,
+                                                                                                                                                                                 requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
        DescriptorPoolBuilder                                   poolBuilder;
        DescriptorSetUpdateBuilder                              updateBuilder;
 
@@ -3198,7 +3522,7 @@ tcu::TestStatus vkt::subgroups::makeVertexFrameBufferTest(Context& context, vk::
 
                        std::vector<const void*> datas;
                        datas.push_back(allocResult.getHostPtr());
-                       if (!checkResult(datas, width, subgroupSize))
+                       if (!checkResult(internalData, datas, width, subgroupSize))
                                failedIterations++;
                }
        }
@@ -3217,12 +3541,23 @@ tcu::TestStatus vkt::subgroups::makeVertexFrameBufferTest(Context& context, vk::
        return tcu::TestStatus::pass("OK");
 }
 
-
-tcu::TestStatus vkt::subgroups::makeFragmentFrameBufferTest    (Context& context, VkFormat format, SSBOData* extraDatas,
-       deUint32 extraDatasCount,
-       bool (*checkResult)(std::vector<const void*> datas, deUint32 width,
+tcu::TestStatus vkt::subgroups::makeFragmentFrameBufferTest(
+       Context& context, VkFormat format, SSBOData* extraDatas,
+       deUint32 extraDatasCount, const void* internalData,
+       bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width,
                                                deUint32 height, deUint32 subgroupSize))
 {
+       return makeFragmentFrameBufferTestRequiredSubgroupSize(context, format, extraDatas, extraDatasCount, internalData, checkResult,
+                                                                                                                  0u, 0u);
+}
+
+tcu::TestStatus vkt::subgroups::makeFragmentFrameBufferTestRequiredSubgroupSize(
+       Context& context, VkFormat format, SSBOData* extraDatas,
+       deUint32 extraDatasCount, const void* internalData,
+       bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width,
+                                               deUint32 height, deUint32 subgroupSize),
+       const deUint32 fragmentShaderStageCreateFlags, const deUint32 requiredSubgroupSize)
+{
        const DeviceInterface&                                  vk                                              = context.getDeviceInterface();
        const VkDevice                                                  device                                  = context.getDevice();
        const VkQueue                                                   queue                                   = context.getUniversalQueue();
@@ -3267,10 +3602,13 @@ tcu::TestStatus vkt::subgroups::makeFragmentFrameBufferTest     (Context& context, V
                makePipelineLayout(vk, device, *descriptorSetLayout));
 
        const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
+
+       const deUint32 requiredSubgroupSizes[5] = {0u, 0u, 0u, 0u, requiredSubgroupSize};
        const Unique<VkPipeline> pipeline(makeGraphicsPipeline(context, *pipelineLayout,
-                                                                         VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
-                                                                         *vertexShaderModule, *fragmentShaderModule, DE_NULL, DE_NULL, DE_NULL, *renderPass, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
-                                                                         DE_NULL, DE_NULL, true));
+                                                                                                                  VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
+                                                                                                                  *vertexShaderModule, *fragmentShaderModule, DE_NULL, DE_NULL, DE_NULL, *renderPass, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
+                                                                                                                  DE_NULL, DE_NULL, true, VK_FORMAT_R32G32B32A32_SFLOAT,
+                                                                                                                  0u, 0u, 0u, 0u, fragmentShaderStageCreateFlags, requiredSubgroupSize != 0u ? requiredSubgroupSizes : DE_NULL));
 
        DescriptorPoolBuilder poolBuilder;
 
@@ -3399,7 +3737,7 @@ tcu::TestStatus vkt::subgroups::makeFragmentFrameBufferTest       (Context& context, V
                                datas.push_back(resultAlloc.getHostPtr());
                        }
 
-                       if (!checkResult(datas, width, height, subgroupSize))
+                       if (!checkResult(internalData, datas, width, height, subgroupSize))
                        {
                                failedIterations++;
                        }
@@ -3422,11 +3760,68 @@ tcu::TestStatus vkt::subgroups::makeFragmentFrameBufferTest     (Context& context, V
        return tcu::TestStatus::pass("OK");
 }
 
-tcu::TestStatus vkt::subgroups::makeComputeTest(
-       Context& context, VkFormat format, SSBOData* inputs, deUint32 inputsCount,
-       bool (*checkResult)(std::vector<const void*> datas,
+Move<VkPipeline> makeComputePipeline(Context& context,
+                                                                        const VkPipelineLayout pipelineLayout, const VkShaderModule shaderModule,
+                                                                        const deUint32 pipelineShaderStageFlags, const deUint32 pipelineCreateFlags, VkPipeline basePipelineHandle,
+                                                                        deUint32 localSizeX, deUint32 localSizeY, deUint32 localSizeZ, deUint32 requiredSubgroupSize)
+{
+       const deUint32 localSize[3] = {localSizeX, localSizeY, localSizeZ};
+
+       const vk::VkSpecializationMapEntry entries[3] =
+       {
+               {0, sizeof(deUint32) * 0, sizeof(deUint32)},
+               {1, sizeof(deUint32) * 1, sizeof(deUint32)},
+               {2, static_cast<deUint32>(sizeof(deUint32) * 2), sizeof(deUint32)},
+       };
+
+       const vk::VkSpecializationInfo info =
+       {
+               /* mapEntryCount = */ 3,
+               /* pMapEntries   = */ entries,
+               /* dataSize      = */ sizeof(localSize),
+               /* pData         = */ localSize
+       };
+
+       const vk::VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroupSizeCreateInfo =
+       {
+               VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, // VkStructureType    sType;
+               DE_NULL,                                                                                                                                                // void*              pNext;
+               requiredSubgroupSize                                                                                                                    // uint32_t           requiredSubgroupSize;
+       };
+
+       const vk::VkPipelineShaderStageCreateInfo pipelineShaderStageParams =
+       {
+               VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,                            // VkStructureType                                      sType;
+               (requiredSubgroupSize != 0u ? &subgroupSizeCreateInfo : DE_NULL),       // const void*                                          pNext;
+               pipelineShaderStageFlags,                                                                                       // VkPipelineShaderStageCreateFlags     flags;
+               VK_SHADER_STAGE_COMPUTE_BIT,                                                                            // VkShaderStageFlagBits                        stage;
+               shaderModule,                                                                                                           // VkShaderModule                                       module;
+               "main",                                                                                                                         // const char*                                          pName;
+               &info,                                                                                                                          // const VkSpecializationInfo*          pSpecializationInfo;
+       };
+
+       const vk::VkComputePipelineCreateInfo pipelineCreateInfo =
+       {
+               VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // VkStructureType      sType;
+               DE_NULL,                                                                                // const void*                                          pNext;
+               pipelineCreateFlags,                                                    // VkPipelineCreateFlags                        flags;
+               pipelineShaderStageParams,                                              // VkPipelineShaderStageCreateInfo      stage;
+               pipelineLayout,                                                                 // VkPipelineLayout                                     layout;
+               basePipelineHandle,                                                             // VkPipeline                                           basePipelineHandle;
+               0,                                                                                              // deInt32                                                      basePipelineIndex;
+       };
+
+       return createComputePipeline(context.getDeviceInterface(),
+                                                                context.getDevice(), DE_NULL, &pipelineCreateInfo);
+}
+
+tcu::TestStatus vkt::subgroups::makeComputeTestRequiredSubgroupSize(
+       Context& context, VkFormat format, SSBOData* inputs, deUint32 inputsCount, const void* internalData,
+       bool (*checkResult)(const void* internalData, std::vector<const void*> datas,
                                                const deUint32 numWorkgroups[3], const deUint32 localSize[3],
-                                               deUint32 subgroupSize))
+                                               deUint32 subgroupSize),
+       const deUint32 pipelineShaderStageCreateFlags, const deUint32 numWorkgroups[3],
+       const deBool isRequiredSubgroupSize, const deUint32 subgroupSize, const deUint32 localSizesToTest[][3], const deUint32 localSizesToTestCount)
 {
        const DeviceInterface&                                  vk                                              = context.getDeviceInterface();
        const VkDevice                                                  device                                  = context.getDevice();
@@ -3434,9 +3829,25 @@ tcu::TestStatus vkt::subgroups::makeComputeTest(
        const deUint32                                                  queueFamilyIndex                = context.getUniversalQueueFamilyIndex();
        VkDeviceSize                                                    elementSize                             = getFormatSizeInBytes(format);
 
-       const VkDeviceSize resultBufferSize = maxSupportedSubgroupSize() *
-                                                                                 maxSupportedSubgroupSize() *
-                                                                                 maxSupportedSubgroupSize();
+       VkDeviceSize maxSubgroupSize = maxSupportedSubgroupSize();
+
+       if (isRequiredSubgroupSize)
+       {
+               VkPhysicalDeviceSubgroupSizeControlPropertiesEXT subgroupSizeControlProperties;
+               subgroupSizeControlProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT;
+               subgroupSizeControlProperties.pNext = DE_NULL;
+
+               VkPhysicalDeviceProperties2 properties2;
+               properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+               properties2.pNext = &subgroupSizeControlProperties;
+               context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties2);
+               maxSubgroupSize = deMax32(subgroupSizeControlProperties.maxSubgroupSize, static_cast<deUint32>(maxSubgroupSize));
+       }
+
+       const VkDeviceSize resultBufferSize = maxSubgroupSize *
+                                                                                 maxSubgroupSize *
+                                                                                 maxSubgroupSize;
+
        const VkDeviceSize resultBufferSizeInBytes = resultBufferSize * elementSize;
 
        Buffer resultBuffer(
@@ -3539,34 +3950,15 @@ tcu::TestStatus vkt::subgroups::makeComputeTest(
        unsigned totalIterations = 0;
        unsigned failedIterations = 0;
 
-       const deUint32 subgroupSize = getSubgroupSize(context);
-
        const Unique<VkCommandBuffer> cmdBuffer(
                makeCommandBuffer(context, *cmdPool));
 
-       const deUint32 numWorkgroups[3] = {4, 2, 2};
-
-       const deUint32 localSizesToTestCount = 8;
-       deUint32 localSizesToTest[localSizesToTestCount][3] =
-       {
-               {1, 1, 1},
-               {subgroupSize, 1, 1},
-               {1, subgroupSize, 1},
-               {1, 1, subgroupSize},
-               {32, 4, 1},
-               {1, 4, 32},
-               {3, 5, 7},
-               {1, 1, 1} // Isn't used, just here to make double buffering checks easier
-       };
-
-       Move<VkPipeline> pipelines[localSizesToTestCount - 1];
-
-       context.getTestContext().touchWatchdog();
+       Move<VkPipeline> *pipelines = new Move<VkPipeline>[localSizesToTestCount - 1];
        pipelines[0] =
                makeComputePipeline(context, *pipelineLayout, *shaderModule,
-                                                       VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT, (VkPipeline) DE_NULL,
-                                                       localSizesToTest[0][0], localSizesToTest[0][1], localSizesToTest[0][2]);
-       context.getTestContext().touchWatchdog();
+                                                       pipelineShaderStageCreateFlags, VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT, (VkPipeline) DE_NULL,
+                                                       localSizesToTest[0][0], localSizesToTest[0][1], localSizesToTest[0][2],
+                                                       isRequiredSubgroupSize ? subgroupSize : 0u);
 
        for (deUint32 index = 1; index < (localSizesToTestCount - 1); index++)
        {
@@ -3577,9 +3969,9 @@ tcu::TestStatus vkt::subgroups::makeComputeTest(
                context.getTestContext().touchWatchdog();
                pipelines[index] =
                        makeComputePipeline(context, *pipelineLayout, *shaderModule,
-                                                               VK_PIPELINE_CREATE_DERIVATIVE_BIT, *pipelines[0],
-                                                               nextX, nextY, nextZ);
-               context.getTestContext().touchWatchdog();
+                                                               pipelineShaderStageCreateFlags, VK_PIPELINE_CREATE_DERIVATIVE_BIT, *pipelines[0],
+                                                               nextX, nextY, nextZ,
+                                                               isRequiredSubgroupSize ? subgroupSize : 0u);
        }
 
        for (deUint32 index = 0; index < (localSizesToTestCount - 1); index++)
@@ -3624,7 +4016,7 @@ tcu::TestStatus vkt::subgroups::makeComputeTest(
                        }
                }
 
-               if (!checkResult(datas, numWorkgroups, localSizesToTest[index], subgroupSize))
+               if (!checkResult(internalData, datas, numWorkgroups, localSizesToTest[index], subgroupSize))
                {
                        failedIterations++;
                }
@@ -3632,6 +4024,8 @@ tcu::TestStatus vkt::subgroups::makeComputeTest(
                vk.resetCommandBuffer(*cmdBuffer, 0);
        }
 
+       delete[] pipelines;
+
        if (0 < failedIterations)
        {
                unsigned valuesPassed = (failedIterations > totalIterations) ? 0u : (totalIterations - failedIterations);
@@ -3645,3 +4039,33 @@ tcu::TestStatus vkt::subgroups::makeComputeTest(
 
        return tcu::TestStatus::pass("OK");
 }
+
+tcu::TestStatus vkt::subgroups::makeComputeTest(
+       Context& context, VkFormat format, SSBOData* inputs, deUint32 inputsCount, const void* internalData,
+       bool (*checkResult)(const void* internalData, std::vector<const void*> datas,
+                                               const deUint32 numWorkgroups[3], const deUint32 localSize[3],
+                                               deUint32 subgroupSize),
+       deUint32 requiredSubgroupSize, const deUint32 pipelineShaderStageCreateFlags)
+{
+       const deUint32 numWorkgroups[3] = {4, 2, 2};
+       deUint32 subgroupSize = requiredSubgroupSize;
+
+       if(requiredSubgroupSize == 0)
+               subgroupSize = vkt::subgroups::getSubgroupSize(context);
+
+       const deUint32 localSizesToTestCount = 8;
+       deUint32 localSizesToTest[localSizesToTestCount][3] =
+       {
+               {1, 1, 1},
+               {subgroupSize, 1, 1},
+               {1, subgroupSize, 1},
+               {1, 1, subgroupSize},
+               {32, 4, 1},
+               {1, 4, 32},
+               {3, 5, 7},
+               {1, 1, 1} // Isn't used, just here to make double buffering checks easier
+       };
+
+       return makeComputeTestRequiredSubgroupSize(context, format, inputs, inputsCount, internalData, checkResult, pipelineShaderStageCreateFlags,
+                                                                                          numWorkgroups, requiredSubgroupSize != 0u, subgroupSize, localSizesToTest, localSizesToTestCount);
+}
index 2637c18..0bdd971 100644 (file)
@@ -180,34 +180,78 @@ bool checkCompute(std::vector<const void*> datas,
        deUint32 ref);
 
 tcu::TestStatus makeTessellationEvaluationFrameBufferTest(Context& context, vk::VkFormat format,
-       SSBOData* extraData, deUint32 extraDataCount,
-       bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
+       SSBOData* extraData, deUint32 extraDataCount, const void* internalData,
+       bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
        const vk::VkShaderStageFlags shaderStage = vk::VK_SHADER_STAGE_ALL_GRAPHICS);
 
 tcu::TestStatus makeGeometryFrameBufferTest(Context& context, vk::VkFormat format, SSBOData* extraData,
-       deUint32 extraDataCount,
-       bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize));
+       deUint32 extraDataCount, const void* internalData,
+       bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize));
 
 tcu::TestStatus allStages(Context& context, vk::VkFormat format,
-       SSBOData* extraData, deUint32 extraDataCount,
-       bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
+       SSBOData* extraData, deUint32 extraDataCount, const void* internalData,
+       bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
        const vk::VkShaderStageFlags shaderStage);
 
 tcu::TestStatus makeVertexFrameBufferTest(Context& context, vk::VkFormat format,
-       SSBOData* extraData, deUint32 extraDataCount,
-       bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize));
+       SSBOData* extraData, deUint32 extraDataCount, const void* internalData,
+       bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize));
 
 tcu::TestStatus makeFragmentFrameBufferTest(Context& context, vk::VkFormat format,
-       SSBOData* extraData, deUint32 extraDataCount,
-       bool (*checkResult)(std::vector<const void*> datas, deUint32 width,
-                                                                        deUint32 height, deUint32 subgroupSize));
+       SSBOData* extraData, deUint32 extraDataCount, const void* internalData,
+       bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width,
+                                               deUint32 height, deUint32 subgroupSize));
 
 tcu::TestStatus makeComputeTest(
        Context& context, vk::VkFormat format, SSBOData* inputs,
-       deUint32 inputsCount,
-       bool (*checkResult)(std::vector<const void*> datas,
+       deUint32 inputsCount,const void* internalData,
+       bool (*checkResult)(const void* internalData, std::vector<const void*> datas,
                const deUint32 numWorkgroups[3], const deUint32 localSize[3],
-               deUint32 subgroupSize));
+               deUint32 subgroupSize),
+       deUint32 requiredSubgroupSize = 0u, const deUint32 pipelineShaderStageCreateFlags = 0u);
+
+/* Functions needed for VK_EXT_subgroup_size_control tests */
+tcu::TestStatus makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(Context& context, vk::VkFormat format,
+       SSBOData* extraData, deUint32 extraDataCount, const void* internalData,
+       bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
+       const vk::VkShaderStageFlags shaderStage = vk::VK_SHADER_STAGE_ALL_GRAPHICS,
+       const deUint32 tessShaderStageCreateFlags = 0u, const deUint32 requiredSubgroupSize = 0u);
+
+tcu::TestStatus makeGeometryFrameBufferTestRequiredSubgroupSize(Context& context, vk::VkFormat format, SSBOData* extraData,
+       deUint32 extraDataCount, const void* internalData,
+       bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
+       const deUint32 geometryShaderStageCreateFlags = 0u, const deUint32 requiredSubgroupSize = 0u);
+
+tcu::TestStatus allStagesRequiredSubgroupSize(Context& context, vk::VkFormat format,
+       SSBOData* extraData, deUint32 extraDataCount, const void* internalData,
+       bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
+       const vk::VkShaderStageFlags shaderStage,
+       const deUint32 vertexShaderStageCreateFlags,
+       const deUint32 tessellationControlShaderStageCreateFlags,
+       const deUint32 tessellationEvalShaderStageCreateFlags,
+       const deUint32 geometryShaderStageCreateFlags,
+       const deUint32 fragmentShaderStageCreateFlags,
+       const deUint32 requiredSubgroupSize[5]);
+
+tcu::TestStatus makeVertexFrameBufferTestRequiredSubgroupSize(Context& context, vk::VkFormat format,
+       SSBOData* extraData, deUint32 extraDataCount, const void* internalData,
+       bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
+       const deUint32 vertexShaderStageCreateFlags = 0u,
+       const deUint32 requiredSubgroupSize = 0u);
+
+tcu::TestStatus makeFragmentFrameBufferTestRequiredSubgroupSize(Context& context, vk::VkFormat format,
+       SSBOData* extraData, deUint32 extraDataCount, const void* internalData,
+       bool (*checkResult)(const void* internalData, std::vector<const void*> datas, deUint32 width,
+                                               deUint32 height, deUint32 subgroupSize),
+       const deUint32 fragmentShaderStageCreateFlags = 0u, const deUint32 requiredSubgroupSize = 0u);
+
+tcu::TestStatus makeComputeTestRequiredSubgroupSize(
+       Context& context, vk::VkFormat format, SSBOData* inputs, deUint32 inputsCount, const void* internalData,
+       bool (*checkResult)(const void* internalData, std::vector<const void*> datas,
+                                               const deUint32 numWorkgroups[3], const deUint32 localSize[3],
+                                               deUint32 subgroupSize),
+       const deUint32 pipelineShaderStageCreateFlags, const deUint32 numWorkgroups[3],
+       const deBool isRequiredSubgroupSize, const deUint32 subgroupSize, const deUint32 localSizesToTest[][3], const deUint32 localSizesToTestCount);
 } // subgroups
 } // vkt
 
index 17134df..29f91c2 100755 (executable)
@@ -48,15 +48,17 @@ enum OpType
        OPTYPE_LAST
 };
 
-static bool checkVertexPipelineStages(std::vector<const void*> datas,
+static bool checkVertexPipelineStages(const void* internalData, std::vector<const void*> datas,
                                                                          deUint32 width, deUint32)
 {
+       DE_UNREF(internalData);
        return vkt::subgroups::check(datas, width, 0x1F);
 }
 
-static bool checkFragmentPipelineStages(std::vector<const void*> datas,
+static bool checkFragmentPipelineStages(const void* internalData, std::vector<const void*> datas,
                                                                          deUint32 width, deUint32 height, deUint32)
 {
+       DE_UNREF(internalData);
        const deUint32* data =
                reinterpret_cast<const deUint32*>(datas[0]);
        for (deUint32 x = 0u; x < width; ++x)
@@ -81,10 +83,11 @@ static bool checkFragmentPipelineStages(std::vector<const void*> datas,
        return true;
 }
 
-static bool checkCompute(std::vector<const void*> datas,
+static bool checkCompute(const void* internalData, std::vector<const void*> datas,
                                                 const deUint32 numWorkgroups[3], const deUint32 localSize[3],
                                                 deUint32)
 {
+       DE_UNREF(internalData);
        return vkt::subgroups::checkCompute(datas, numWorkgroups, localSize, 0x1F);
 }
 
@@ -116,6 +119,7 @@ struct CaseDefinition
        VkShaderStageFlags      shaderStage;
        VkFormat                        format;
        de::SharedPtr<bool>     geometryPointSizeSupported;
+       deBool                          requiredSubgroupSize;
        deBool              requires8BitUniformBuffer;
        deBool              requires16BitUniformBuffer;
 };
@@ -640,6 +644,27 @@ void supportedCheck (Context& context, CaseDefinition caseDef)
                context.requireDeviceFunctionality("VK_EXT_shader_subgroup_vote");
        }
 
+       if (caseDef.requiredSubgroupSize)
+       {
+               if (!context.requireDeviceFunctionality("VK_EXT_subgroup_size_control"))
+                       TCU_THROW(NotSupportedError, "Device does not support VK_EXT_subgroup_size_control extension");
+               VkPhysicalDeviceSubgroupSizeControlFeaturesEXT subgroupSizeControlFeatures;
+               subgroupSizeControlFeatures.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT;
+               subgroupSizeControlFeatures.pNext = DE_NULL;
+
+               VkPhysicalDeviceFeatures2 features;
+               features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
+               features.pNext = &subgroupSizeControlFeatures;
+
+               context.getInstanceInterface().getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features);
+
+               if (subgroupSizeControlFeatures.subgroupSizeControl == DE_FALSE)
+                       TCU_THROW(NotSupportedError, "Device does not support varying subgroup sizes nor required subgroup size");
+
+               if (subgroupSizeControlFeatures.computeFullSubgroups == DE_FALSE)
+                       TCU_THROW(NotSupportedError, "Device does not support full subgroups in compute shaders");
+       }
+
        *caseDef.geometryPointSizeSupported = subgroups::isTessellationAndGeometryPointSizeSupported(context);
 }
 
@@ -674,15 +699,15 @@ tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
        inputData.initializeType = (OPTYPE_ALLEQUAL == caseDef.opType || OPTYPE_ALLEQUAL_ARB == caseDef.opType) ? subgroups::SSBOData::InitializeZero : subgroups::SSBOData::InitializeNonZero;
 
        if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
-               return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages);
+               return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, DE_NULL, checkVertexPipelineStages);
        else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
-               return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages);
+               return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, DE_NULL, checkVertexPipelineStages);
        else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
-               return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
+               return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, DE_NULL, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
        else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
-               return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
+               return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, DE_NULL, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
        else if (VK_SHADER_STAGE_FRAGMENT_BIT == caseDef.shaderStage)
-               return subgroups::makeFragmentFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkFragmentPipelineStages);
+               return subgroups::makeFragmentFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, DE_NULL, checkFragmentPipelineStages);
        else
                TCU_THROW(InternalError, "Unhandled shader stage");
 }
@@ -705,8 +730,35 @@ tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
                inputData.numElements = subgroups::maxSupportedSubgroupSize();
                inputData.initializeType = (OPTYPE_ALLEQUAL == caseDef.opType || OPTYPE_ALLEQUAL_ARB == caseDef.opType) ? subgroups::SSBOData::InitializeZero : subgroups::SSBOData::InitializeNonZero;
 
-               return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, &inputData,
-                                                                                 1, checkCompute);
+               if (caseDef.requiredSubgroupSize == DE_FALSE)
+                       return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, &inputData,
+                                                                                         1, DE_NULL, checkCompute);
+
+               tcu::TestLog& log       = context.getTestContext().getLog();
+               VkPhysicalDeviceSubgroupSizeControlPropertiesEXT subgroupSizeControlProperties;
+               subgroupSizeControlProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT;
+               subgroupSizeControlProperties.pNext = DE_NULL;
+               VkPhysicalDeviceProperties2 properties;
+               properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+               properties.pNext = &subgroupSizeControlProperties;
+
+               context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
+
+               log << tcu::TestLog::Message << "Testing required subgroup size range [" <<  subgroupSizeControlProperties.minSubgroupSize << ", "
+                       << subgroupSizeControlProperties.maxSubgroupSize << "]" << tcu::TestLog::EndMessage;
+
+               // According to the spec, requiredSubgroupSize must be a power-of-two integer.
+               for (deUint32 size = subgroupSizeControlProperties.minSubgroupSize; size <= subgroupSizeControlProperties.maxSubgroupSize; size *= 2)
+               {
+                       tcu::TestStatus result = subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, &inputData, 1, DE_NULL, checkCompute,
+                                                                                                                               size, VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT);
+                       if (result.getCode() != QP_TEST_RESULT_PASS)
+                       {
+                               log << tcu::TestLog::Message << "subgroupSize " << size << " failed" << tcu::TestLog::EndMessage;
+                               return result;
+                       }
+               }
+               return tcu::TestStatus::pass("OK");
        }
        else
        {
@@ -741,7 +793,7 @@ tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
                inputData.binding                       = 4u;
                inputData.stages                        = stages;
 
-               return subgroups::allStages(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, stages);
+               return subgroups::allStages(context, VK_FORMAT_R32_UINT, &inputData, 1, DE_NULL, checkVertexPipelineStages, stages);
        }
 }
 }
@@ -813,7 +865,7 @@ tcu::TestCaseGroup* createSubgroupsVoteTests(tcu::TestContext& testCtx)
                        const std::string op = de::toLower(getOpTypeName(opTypeIndex));
 
                        {
-                               const CaseDefinition caseDef = { opTypeIndex, VK_SHADER_STAGE_COMPUTE_BIT, format, de::SharedPtr<bool>(new bool),deBool(false),deBool(false) };
+                               CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_COMPUTE_BIT, format, de::SharedPtr<bool>(new bool), deBool(false), deBool(false), DE_FALSE};
                                if (opTypeIndex < OPTYPE_LAST_NON_ARB)
                                {
                                        addFunctionCaseWithPrograms(computeGroup.get(),
@@ -829,7 +881,7 @@ tcu::TestCaseGroup* createSubgroupsVoteTests(tcu::TestContext& testCtx)
                        }
 
                        {
-                               const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_ALL_GRAPHICS, format, de::SharedPtr<bool>(new bool),deBool(false),deBool(false) };
+                               const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_ALL_GRAPHICS, format, de::SharedPtr<bool>(new bool), deBool(false), deBool(false), DE_FALSE};
                                if (opTypeIndex < OPTYPE_LAST_NON_ARB)
                                {
                                        addFunctionCaseWithPrograms(graphicGroup.get(),
@@ -846,7 +898,7 @@ tcu::TestCaseGroup* createSubgroupsVoteTests(tcu::TestContext& testCtx)
 
                        for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
                        {
-                               const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex], format, de::SharedPtr<bool>(new bool),deBool(false),deBool(false) };
+                               const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex], format, de::SharedPtr<bool>(new bool), deBool(false), deBool(false), DE_FALSE};
                                if (opTypeIndex < OPTYPE_LAST_NON_ARB)
                                {
                                        addFunctionCaseWithPrograms(framebufferGroup.get(),
@@ -866,7 +918,7 @@ tcu::TestCaseGroup* createSubgroupsVoteTests(tcu::TestContext& testCtx)
                        }
                        bool needs8BitUBOStorage = isFormat8bitTy(format);
                        bool needs16BitUBOStorage = isFormat16BitTy(format);
-                       const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_FRAGMENT_BIT, format, de::SharedPtr<bool>(new bool),deBool(needs8BitUBOStorage),deBool(needs16BitUBOStorage) };
+                       const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_FRAGMENT_BIT, format, de::SharedPtr<bool>(new bool),deBool(needs8BitUBOStorage), deBool(needs16BitUBOStorage), DE_FALSE };
                        if (opTypeIndex < OPTYPE_LAST_NON_ARB)
                        {
                                addFunctionCaseWithPrograms(fragHelperGroup.get(),