Merge vk-gl-cts/vulkan-cts-1.2.7 into vk-gl-cts/vulkan-cts-1.2.8
[platform/upstream/VK-GL-CTS.git] / external / vulkancts / modules / vulkan / subgroups / vktSubgroupsSizeControlTests.cpp
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2019 The Khronos Group Inc.
6  * Copyright (c) 2019 Valve Corporation.
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  */ /*!
21  * \file
22  * \brief VK_EXT_subgroup_size_control Tests
23  */ /*--------------------------------------------------------------------*/
24
25 #include "vktSubgroupsSizeControlTests.hpp"
26 #include "vktSubgroupsTestsUtils.hpp"
27 #include "vktTestCaseUtil.hpp"
28 #include "tcuTestLog.hpp"
29
30 #include <string>
31 #include <vector>
32
33 using namespace tcu;
34 using namespace std;
35 using namespace vk;
36 using namespace vkt;
37
38 namespace
39 {
40
41 enum RequiredSubgroupSizeMode
42 {
43         REQUIRED_SUBGROUP_SIZE_NONE     = 0,
44         REQUIRED_SUBGROUP_SIZE_MIN      = 1,
45         REQUIRED_SUBGROUP_SIZE_MAX      = 2,
46 };
47
48 struct CaseDefinition
49 {
50         deUint32                        pipelineShaderStageCreateFlags;
51         VkShaderStageFlags      shaderStage;
52         deBool                          requiresBallot;
53         deUint32                        requiredSubgroupSizeMode;
54         de::SharedPtr<bool>     geometryPointSizeSupported;
55 };
56
57 struct internalDataStruct
58 {
59         const Context*                  context;
60         struct CaseDefinition   caseDef;
61         deUint32                                requiredSubgroupSize;
62 };
63
64 // Find greatest common divisor for a and b
65 deUint32 gcd (deUint32 a, deUint32 b)
66 {
67         if ((0 != a) && (0 == b))
68         {
69                 return a;
70         }
71         else
72         {
73                 deUint32 greater = max(a, b);
74                 deUint32 lesser  = min(a, b);
75
76                 return gcd(lesser, greater % lesser);
77         }
78 }
79
80 UVec3   getLocalSizes (const VkPhysicalDeviceProperties&        physicalDeviceProperties,
81                                            deUint32                                                             numWorkGroupInvocations)
82 {
83         DE_ASSERT(numWorkGroupInvocations <= physicalDeviceProperties.limits.maxComputeWorkGroupInvocations);
84         const deUint32 localSizeX = gcd(numWorkGroupInvocations, physicalDeviceProperties.limits.maxComputeWorkGroupSize[0]);
85         const deUint32 localSizeY = gcd(deMax32(numWorkGroupInvocations / localSizeX, 1u), physicalDeviceProperties.limits.maxComputeWorkGroupSize[1]);
86         const deUint32 localSizeZ = deMax32(numWorkGroupInvocations / (localSizeX * localSizeY), 1u);
87
88         return UVec3(localSizeX, localSizeY, localSizeZ);
89 }
90
91 deUint32 getRequiredSubgroupSizeFromMode (Context&                                                                                                      context,
92                                                                                   const CaseDefinition&                                                                         caseDef,
93                                                                                   const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT&       subgroupSizeControlProperties)
94 {
95         switch (caseDef.requiredSubgroupSizeMode)
96         {
97                 case REQUIRED_SUBGROUP_SIZE_MAX:        return subgroupSizeControlProperties.maxSubgroupSize;
98                 case REQUIRED_SUBGROUP_SIZE_MIN:        return subgroupSizeControlProperties.minSubgroupSize;
99                 case REQUIRED_SUBGROUP_SIZE_NONE:       return subgroups::getSubgroupSize(context);
100                 default:                                                        TCU_THROW(NotSupportedError, "Unsupported Subgroup size");
101         }
102 }
103
104 static bool checkVertexPipelineStages (const void*                      internalData,
105                                                                            vector<const void*>  datas,
106                                                                            deUint32                             width,
107                                                                            deUint32)
108 {
109         const struct internalDataStruct*                                                checkInternalData                               = reinterpret_cast<const struct internalDataStruct *>(internalData);
110         const Context*                                                                                  context                                                 = checkInternalData->context;
111         const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT& subgroupSizeControlProperties   = context->getSubgroupSizeControlPropertiesEXT();
112         TestLog&                                                                                                log                                                             = context->getTestContext().getLog();
113         const deUint32*                                                                                 data                                                    = reinterpret_cast<const deUint32*>(datas[0]);
114
115         for (deUint32 i = 0; i < width; i++)
116         {
117                 if (data[i] > subgroupSizeControlProperties.maxSubgroupSize ||
118                         data[i] < subgroupSizeControlProperties.minSubgroupSize)
119                 {
120                         log << TestLog::Message << "gl_SubgroupSize (" << data[i] << ") value is outside limits (" << subgroupSizeControlProperties.minSubgroupSize << ", " << subgroupSizeControlProperties.maxSubgroupSize << ")" << TestLog::EndMessage;
121
122                         return DE_FALSE;
123                 }
124
125                 if (checkInternalData->caseDef.requiredSubgroupSizeMode != REQUIRED_SUBGROUP_SIZE_NONE && data[i] != checkInternalData->requiredSubgroupSize)
126                 {
127                         log << TestLog::Message << "gl_SubgroupSize (" << data[i] << ") is not equal to the required subgroup size value (" << checkInternalData->requiredSubgroupSize << ")" << TestLog::EndMessage;
128
129                         return DE_FALSE;
130                 }
131         }
132
133         return DE_TRUE;
134 }
135
136 static bool checkFragmentPipelineStages (const void*                    internalData,
137                                                                                  vector<const void*>    datas,
138                                                                                  deUint32                               width,
139                                                                                  deUint32                               height,
140                                                                                  deUint32)
141 {
142         const struct internalDataStruct*                                                checkInternalData                               = reinterpret_cast<const struct internalDataStruct *>(internalData);
143         const Context*                                                                                  context                                                 = checkInternalData->context;
144         const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT& subgroupSizeControlProperties   = context->getSubgroupSizeControlPropertiesEXT();
145         TestLog&                                                                                                log                                                             = context->getTestContext().getLog();
146         const deUint32*                                                                                 data                                                    = reinterpret_cast<const deUint32*>(datas[0]);
147
148         for (deUint32 x = 0u; x < width; ++x)
149         {
150                 for (deUint32 y = 0u; y < height; ++y)
151                 {
152                         const deUint32 ndx = (x * height + y);
153
154                         if (data[ndx] > subgroupSizeControlProperties.maxSubgroupSize ||
155                                 data[ndx] < subgroupSizeControlProperties.minSubgroupSize)
156                         {
157                                 log << TestLog::Message << "gl_SubgroupSize (" << data[ndx] << ") value is outside limits (" << subgroupSizeControlProperties.minSubgroupSize << ", " << subgroupSizeControlProperties.maxSubgroupSize << ")" << TestLog::EndMessage;
158
159                                 return DE_FALSE;
160                         }
161
162                         if (checkInternalData->caseDef.requiredSubgroupSizeMode != REQUIRED_SUBGROUP_SIZE_NONE &&
163                                 data[ndx] != checkInternalData->requiredSubgroupSize)
164                         {
165                                 log << TestLog::Message << "gl_SubgroupSize (" << data[ndx] << ") is not equal to the required subgroup size value (" << checkInternalData->requiredSubgroupSize << ")" << TestLog::EndMessage;
166
167                                 return DE_FALSE;
168                         }
169                 }
170         }
171         return true;
172 }
173
174 static bool checkCompute (const void*                   internalData,
175                                                   vector<const void*>   datas,
176                                                   const deUint32                numWorkgroups[3],
177                                                   const deUint32                localSize[3],
178                                                   deUint32)
179 {
180         const struct internalDataStruct*                                                checkInternalData                               = reinterpret_cast<const struct internalDataStruct *>(internalData);
181         const Context*                                                                                  context                                                 = checkInternalData->context;
182         const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT& subgroupSizeControlProperties   = context->getSubgroupSizeControlPropertiesEXT();
183         TestLog&                                                                                                log                                                             = context->getTestContext().getLog();
184         const deUint32                                                                                  globalSizeX                                             = numWorkgroups[0] * localSize[0];
185         const deUint32                                                                                  globalSizeY                                             = numWorkgroups[1] * localSize[1];
186         const deUint32                                                                                  globalSizeZ                                             = numWorkgroups[2] * localSize[2];
187         const deUint32                                                                                  width                                                   = globalSizeX * globalSizeY * globalSizeZ;
188         const deUint32*                                                                                 data                                                    = reinterpret_cast<const deUint32*>(datas[0]);
189
190         for (deUint32 i = 0; i < width; i++)
191         {
192                 if (data[i] > subgroupSizeControlProperties.maxSubgroupSize ||
193                         data[i] < subgroupSizeControlProperties.minSubgroupSize)
194                 {
195                         log << TestLog::Message << "[" << localSize[0] << ", " << localSize[1] << ", " << localSize[2] << "] "
196                                 << "gl_SubgroupSize (" << data[i] << ") value is outside limits (" << subgroupSizeControlProperties.minSubgroupSize << ", " << subgroupSizeControlProperties.maxSubgroupSize << ")" << TestLog::EndMessage;
197
198                         return DE_FALSE;
199                 }
200
201                 if (checkInternalData->caseDef.requiredSubgroupSizeMode != REQUIRED_SUBGROUP_SIZE_NONE &&
202                         data[i] != checkInternalData->requiredSubgroupSize)
203                 {
204                         log << TestLog::Message << "[" << localSize[0] << ", " << localSize[1] << ", " << localSize[2] << "] "
205                                 << "gl_SubgroupSize (" << data[i] << ") is not equal to the required subgroup size value (" << checkInternalData->requiredSubgroupSize << ")" << TestLog::EndMessage;
206
207                         return DE_FALSE;
208                 }
209         }
210
211         return DE_TRUE;
212 }
213
214 static bool checkComputeRequireFull (const void*                        internalData,
215                                                                          vector<const void*>    datas,
216                                                                          const deUint32                 numWorkgroups[3],
217                                                                          const deUint32                 localSize[3],
218                                                                          deUint32)
219 {
220         const struct internalDataStruct*                                                checkInternalData                               = reinterpret_cast<const struct internalDataStruct *>(internalData);
221         const Context*                                                                                  context                                                 = checkInternalData->context;
222         const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT& subgroupSizeControlProperties   = context->getSubgroupSizeControlPropertiesEXT();
223         TestLog&                                                                                                log                                                             = context->getTestContext().getLog();
224         const deUint32                                                                                  globalSizeX                                             = numWorkgroups[0] * localSize[0];
225         const deUint32                                                                                  globalSizeY                                             = numWorkgroups[1] * localSize[1];
226         const deUint32                                                                                  globalSizeZ                                             = numWorkgroups[2] * localSize[2];
227         const deUint32                                                                                  width                                                   = globalSizeX * globalSizeY * globalSizeZ;
228         const UVec4*                                                                                    data                                                    = reinterpret_cast<const UVec4*>(datas[0]);
229         const deUint32                                                                                  numSubgroups                                    = (localSize[0] * localSize[1] * localSize[2]) / checkInternalData->requiredSubgroupSize;
230
231         for (deUint32 i = 0; i < width; i++)
232         {
233                 if (data[i].x() > subgroupSizeControlProperties.maxSubgroupSize ||
234                         data[i].x() < subgroupSizeControlProperties.minSubgroupSize)
235                 {
236                         log << TestLog::Message << "[" << localSize[0] << ", " << localSize[1] << ", " << localSize[2] << "] "
237                                 << "gl_SubgroupSize value ( " << data[i].x() << ") is outside limits [" << subgroupSizeControlProperties.minSubgroupSize << ", " << subgroupSizeControlProperties.maxSubgroupSize << "]" << TestLog::EndMessage;
238                         return DE_FALSE;
239                 }
240
241                 if (data[i].x() != data[i].y())
242                 {
243                         log << TestLog::Message << "[" << localSize[0] << ", " << localSize[1] << ", " << localSize[2] << "] "
244                                 << "gl_SubgroupSize ( " << data[i].x() << ") does not match the active number of subgroup invocations (" << data[i].y() << ")" << TestLog::EndMessage;
245                         return DE_FALSE;
246                 }
247
248                 if (checkInternalData->caseDef.pipelineShaderStageCreateFlags == VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT &&
249                         data[i].x() != checkInternalData->requiredSubgroupSize)
250                 {
251                         log << TestLog::Message << "[" << localSize[0] << ", " << localSize[1] << ", " << localSize[2] << "] "
252                                 << "expected subgroupSize (" << checkInternalData->requiredSubgroupSize << ") doesn't match gl_SubgroupSize ( " << data[i].x() << ")" << TestLog::EndMessage;
253                         return DE_FALSE;
254                 }
255
256                 if (checkInternalData->caseDef.pipelineShaderStageCreateFlags == VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT && data[i].z() != numSubgroups)
257                 {
258                         log << TestLog::Message << "[" << localSize[0] << ", " << localSize[1] << ", " << localSize[2] << "] "
259                                 << "expected number of subgroups dispatched (" << numSubgroups << ") doesn't match gl_NumSubgroups (" << data[i].z() << ")" << TestLog::EndMessage;
260                         return DE_FALSE;
261                 }
262         }
263
264         return DE_TRUE;
265 }
266
267 void initFrameBufferPrograms (SourceCollections& programCollection, CaseDefinition caseDef)
268 {
269         const ShaderBuildOptions        buildOptions    (programCollection.usedVulkanVersion, SPIRV_VERSION_1_3, 0u);
270
271         if (VK_SHADER_STAGE_FRAGMENT_BIT != caseDef.shaderStage)
272                 subgroups::setFragmentShaderFrameBuffer(programCollection);
273
274         if (VK_SHADER_STAGE_VERTEX_BIT != caseDef.shaderStage && VK_SHADER_STAGE_FRAGMENT_BIT != caseDef.shaderStage)
275                 subgroups::setVertexShaderFrameBuffer(programCollection);
276
277         string bdyStr = "uint tempResult = gl_SubgroupSize;\n";
278
279         if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
280         {
281                 ostringstream vertex;
282
283                 vertex << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
284                         << "#extension GL_KHR_shader_subgroup_basic: enable\n"
285                         << "layout(location = 0) in highp vec4 in_position;\n"
286                         << "layout(location = 0) out float out_color;\n"
287                         << "\n"
288                         << "void main (void)\n"
289                         << "{\n"
290                         << bdyStr
291                         << "  out_color = float(tempResult);\n"
292                         << "  gl_Position = in_position;\n"
293                         << "  gl_PointSize = 1.0f;\n"
294                         << "}\n";
295
296                 programCollection.glslSources.add("vert") << glu::VertexSource(vertex.str()) << buildOptions;
297         }
298         else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
299         {
300                 ostringstream geometry;
301
302                 geometry << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
303                         << "#extension GL_KHR_shader_subgroup_basic: enable\n"
304                         << "layout(points) in;\n"
305                         << "layout(points, max_vertices = 1) out;\n"
306                         << "layout(location = 0) out float out_color;\n"
307                         << "void main (void)\n"
308                         << "{\n"
309                         << bdyStr
310                         << "  out_color = float(tempResult);\n"
311                         << "  gl_Position = gl_in[0].gl_Position;\n"
312                         << "  gl_PointSize = 1.0f;"
313                         << "  EmitVertex();\n"
314                         << "  EndPrimitive();\n"
315                         << "}\n";
316
317                 programCollection.glslSources.add("geometry") << glu::GeometrySource(geometry.str()) << buildOptions;
318         }
319         else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
320         {
321                 ostringstream controlSource;
322
323                 controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
324                         << "#extension GL_KHR_shader_subgroup_basic: enable\n"
325                         << "layout(vertices = 2) out;\n"
326                         << "layout(location = 0) out float out_color[];\n"
327                         << "\n"
328                         << "void main (void)\n"
329                         << "{\n"
330                         << "  if (gl_InvocationID == 0)\n"
331                         << "  {\n"
332                         << "    gl_TessLevelOuter[0] = 1.0f;\n"
333                         << "    gl_TessLevelOuter[1] = 1.0f;\n"
334                         << "  }\n"
335                         << bdyStr
336                         << "  out_color[gl_InvocationID ] = float(tempResult);\n"
337                         << "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
338                         << "}\n";
339
340                 programCollection.glslSources.add("tesc") << glu::TessellationControlSource(controlSource.str()) << buildOptions;
341                 subgroups::setTesEvalShaderFrameBuffer(programCollection);
342         }
343         else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
344         {
345                 ostringstream evaluationSource;
346                 evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
347                         << "#extension GL_KHR_shader_subgroup_basic: enable\n"
348                         << "layout(isolines, equal_spacing, ccw ) in;\n"
349                         << "layout(location = 0) out float out_color;\n"
350                         << "void main (void)\n"
351                         << "{\n"
352                         << bdyStr
353                         << "  out_color  = float(tempResult);\n"
354                         << "  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
355                         << "}\n";
356
357                 subgroups::setTesCtrlShaderFrameBuffer(programCollection);
358                 programCollection.glslSources.add("tese") << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
359         }
360         else if (VK_SHADER_STAGE_FRAGMENT_BIT == caseDef.shaderStage)
361         {
362                 const string vertex     = string(glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)) + "\n"
363                         "void main (void)\n"
364                         "{\n"
365                         "  vec2 uv = vec2(float(gl_VertexIndex & 1), float((gl_VertexIndex >> 1) & 1));\n"
366                         "  gl_Position = vec4(uv * 4.0f -2.0f, 0.0f, 1.0f);\n"
367                         "  gl_PointSize = 1.0f;\n"
368                         "}\n";
369                 programCollection.glslSources.add("vert") << glu::VertexSource(vertex) << buildOptions;
370
371                 ostringstream fragmentSource;
372
373                 fragmentSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
374                                            << "precision highp int;\n"
375                                                 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
376                                            << "layout(location = 0) out uint out_color;\n"
377                                            << "void main()\n"
378                                            << "{\n"
379                                            << bdyStr
380                                            << "  out_color = tempResult;\n"
381                                            << "}\n";
382
383                 programCollection.glslSources.add("fragment") << glu::FragmentSource(fragmentSource.str()) << buildOptions;
384         }
385         else
386         {
387                 DE_FATAL("Unsupported shader stage");
388         }
389 }
390
391 string getExtHeader (const CaseDefinition&)
392 {
393         return "#extension GL_KHR_shader_subgroup_basic: enable\n";
394 }
395
396 vector<string> getPerStageHeadDeclarations (const CaseDefinition& caseDef)
397 {
398         const deUint32  stageCount      = subgroups::getStagesCount(caseDef.shaderStage);
399         const bool              fragment        = (caseDef.shaderStage & VK_SHADER_STAGE_FRAGMENT_BIT) != 0;
400         vector<string>  result          (stageCount, string());
401
402         if (fragment)
403                 result.reserve(result.size() + 1);
404
405         for (size_t i = 0; i < result.size(); ++i)
406         {
407                 result[i] =
408                         "layout(set = 0, binding = " + de::toString(i) + ", std430) buffer Buffer1\n"
409                         "{\n"
410                         "  uint result[];\n"
411                         "};\n";
412         }
413
414         if (fragment)
415         {
416                 const string    fragPart        =
417                         "layout(location = 0) out uint result;\n";
418
419                 result.push_back(fragPart);
420         }
421
422         return result;
423 }
424
425 string getTestSource (const CaseDefinition&)
426 {
427         return
428                 "  uint tempResult = gl_SubgroupSize;\n"
429                 "  tempRes = tempResult;\n";
430 }
431
432 void initPrograms (SourceCollections& programCollection, CaseDefinition caseDef)
433 {
434         const SpirvVersion                      spirvVersion            = isAllRayTracingStages(caseDef.shaderStage) ? SPIRV_VERSION_1_4 : SPIRV_VERSION_1_3;
435         const ShaderBuildOptions        buildOptions            (programCollection.usedVulkanVersion, spirvVersion, 0u);
436         const string                            extHeader                       = getExtHeader(caseDef);
437         const string                            testSrc                         = getTestSource(caseDef);
438         const vector<string>            headDeclarations        = getPerStageHeadDeclarations(caseDef);
439
440         subgroups::initStdPrograms(programCollection, buildOptions, caseDef.shaderStage, VK_FORMAT_R32_UINT, *caseDef.geometryPointSizeSupported, extHeader, testSrc, "", headDeclarations);
441 }
442
443 void initProgramsRequireFull (SourceCollections& programCollection, CaseDefinition caseDef)
444 {
445         if (VK_SHADER_STAGE_COMPUTE_BIT != caseDef.shaderStage)
446                 DE_FATAL("Unsupported shader stage");
447
448         ostringstream src;
449
450         src << "#version 450\n"
451                 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
452                 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
453                 << "layout (local_size_x_id = 0, local_size_y_id = 1, "
454                 "local_size_z_id = 2) in;\n"
455                 << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
456                 << "{\n"
457                 << "  uvec4 result[];\n"
458                 << "};\n"
459                 << "\n"
460                 << "void main (void)\n"
461                 << "{\n"
462                 << "  uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
463                 << "  highp uint offset = globalSize.x * ((globalSize.y * "
464                 "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
465                 "gl_GlobalInvocationID.x;\n"
466                 << "   result[offset].x = gl_SubgroupSize;" // save the subgroup size value
467                 << "   uint numActive = subgroupBallotBitCount(subgroupBallot(true));\n"
468                 << "   result[offset].y = numActive;\n" // save the number of active subgroup invocations
469                 << "   result[offset].z = gl_NumSubgroups;" // save the number of subgroups dispatched.
470                 << "}\n";
471
472         programCollection.glslSources.add("comp") << glu::ComputeSource(src.str()) << ShaderBuildOptions(programCollection.usedVulkanVersion, SPIRV_VERSION_1_3, 0u);
473 }
474
475 void supportedCheck (Context& context)
476 {
477         if (!subgroups::isSubgroupSupported(context))
478                 TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
479
480         context.requireDeviceFunctionality("VK_EXT_subgroup_size_control");
481 }
482
483 void supportedCheckFeatures (Context& context, CaseDefinition caseDef)
484 {
485         supportedCheck(context);
486
487         if (!subgroups::areSubgroupOperationsSupportedForStage(context, caseDef.shaderStage))
488         {
489                 TCU_THROW(NotSupportedError, "Shader stage is required to support subgroup operations!");
490         }
491
492         if (caseDef.shaderStage == VK_SHADER_STAGE_ALL_GRAPHICS)
493         {
494                 const VkPhysicalDeviceFeatures&         features        = context.getDeviceFeatures();
495
496                 if (!features.tessellationShader || !features.geometryShader)
497                         TCU_THROW(NotSupportedError, "Device does not support tessellation or geometry shaders");
498         }
499
500         if (caseDef.requiresBallot && !subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_BALLOT_BIT))
501         {
502                 TCU_THROW(NotSupportedError, "Device does not support subgroup ballot operations");
503         }
504
505         if (caseDef.requiredSubgroupSizeMode != REQUIRED_SUBGROUP_SIZE_NONE ||
506                 caseDef.pipelineShaderStageCreateFlags == VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT)
507         {
508                 const VkPhysicalDeviceSubgroupSizeControlFeaturesEXT&   subgroupSizeControlFeatures     = context.getSubgroupSizeControlFeaturesEXT();
509
510                 if (subgroupSizeControlFeatures.subgroupSizeControl == DE_FALSE)
511                         TCU_THROW(NotSupportedError, "Device does not support varying subgroup sizes nor required subgroup size");
512
513                 if (caseDef.requiredSubgroupSizeMode != REQUIRED_SUBGROUP_SIZE_NONE)
514                 {
515                         const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT& subgroupSizeControlProperties   = context.getSubgroupSizeControlPropertiesEXT();
516
517                         if ((subgroupSizeControlProperties.requiredSubgroupSizeStages & caseDef.shaderStage) != caseDef.shaderStage)
518                                 TCU_THROW(NotSupportedError, "Device does not support setting required subgroup size for the stages selected");
519                 }
520         }
521
522         if (caseDef.pipelineShaderStageCreateFlags == VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT)
523         {
524                 const VkPhysicalDeviceSubgroupSizeControlFeaturesEXT&   subgroupSizeControlFeatures     = context.getSubgroupSizeControlFeaturesEXT();
525
526                 if (subgroupSizeControlFeatures.computeFullSubgroups == DE_FALSE)
527                         TCU_THROW(NotSupportedError, "Device does not support full subgroups in compute shaders");
528         }
529
530         *caseDef.geometryPointSizeSupported = subgroups::isTessellationAndGeometryPointSizeSupported(context);
531
532         if (isAllRayTracingStages(caseDef.shaderStage))
533         {
534                 context.requireDeviceFunctionality("VK_KHR_ray_tracing_pipeline");
535         }
536 }
537
538 void supportedCheckFeaturesShader (Context& context, CaseDefinition caseDef)
539 {
540         supportedCheckFeatures(context, caseDef);
541
542         subgroups::supportedCheckShader(context, caseDef.shaderStage);
543 }
544
545 TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
546 {
547         const VkFormat                                  format                  = VK_FORMAT_R32_UINT;
548         const deUint32&                                 flags                   = caseDef.pipelineShaderStageCreateFlags;
549         const struct internalDataStruct internalData    =
550         {
551                 &context,
552                 caseDef,
553                 0u,
554         };
555
556         switch (caseDef.shaderStage)
557         {
558                 case VK_SHADER_STAGE_VERTEX_BIT:                                        return subgroups::makeVertexFrameBufferTestRequiredSubgroupSize(context, format, DE_NULL, 0, &internalData, checkVertexPipelineStages, flags, 0u);
559                 case VK_SHADER_STAGE_GEOMETRY_BIT:                                      return subgroups::makeGeometryFrameBufferTestRequiredSubgroupSize(context, format, DE_NULL, 0, &internalData, checkVertexPipelineStages, flags, 0u);
560                 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:          return subgroups::makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(context, format, DE_NULL, 0, &internalData, checkVertexPipelineStages, caseDef.shaderStage, flags, 0u);
561                 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:       return subgroups::makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(context, format, DE_NULL, 0, &internalData, checkVertexPipelineStages, caseDef.shaderStage, flags, 0u);
562                 case VK_SHADER_STAGE_FRAGMENT_BIT:                                      return subgroups::makeFragmentFrameBufferTestRequiredSubgroupSize(context, format, DE_NULL, 0, &internalData, checkFragmentPipelineStages, flags, 0u);
563                 default:                                                                                        TCU_THROW(InternalError, "Unhandled shader stage");
564         }
565 }
566
567 TestStatus test (Context& context, const CaseDefinition caseDef)
568 {
569         if (isAllComputeStages(caseDef.shaderStage))
570         {
571                 const deUint32                                          numWorkgroups[3]                                                        = {1, 1, 1};
572                 const deUint32                                          subgroupSize                                                            = subgroups::getSubgroupSize(context);
573                 const VkPhysicalDeviceProperties        physicalDeviceProperties                                        = context.getDeviceProperties();
574                 // Calculate the local workgroup sizes to exercise the maximum supported by the driver
575                 const UVec3                                                     localSize                                                                       = getLocalSizes(physicalDeviceProperties, physicalDeviceProperties.limits.maxComputeWorkGroupInvocations);
576                 const deUint32                                          localSizesToTestCount                                           = 16;
577                 const deUint32                                          localSizesToTest[localSizesToTestCount][3]      =
578                 {
579                         {1, 1, 1},
580                         {32, 4, 1},
581                         {32, 1, 4},
582                         {1, 32, 4},
583                         {1, 4, 32},
584                         {4, 1, 32},
585                         {4, 32, 1},
586                         {subgroupSize, 1, 1},
587                         {1, subgroupSize, 1},
588                         {1, 1, subgroupSize},
589                         {3, 5, 7},
590                         {128, 1, 1},
591                         {1, 128, 1},
592                         {1, 1, 64},
593                         {localSize.x(), localSize.y(), localSize.z()},
594                         {1, 1, 1} // Isn't used, just here to make double buffering checks easier
595                 };
596                 const struct internalDataStruct         internalData                                                            =
597                 {
598                         &context,
599                         caseDef,
600                         subgroupSize,
601                 };
602
603                 return subgroups::makeComputeTestRequiredSubgroupSize(context,
604                                                                                                                           VK_FORMAT_R32_UINT,
605                                                                                                                           DE_NULL,
606                                                                                                                           0,
607                                                                                                                           &internalData,
608                                                                                                                           checkCompute,
609                                                                                                                           caseDef.pipelineShaderStageCreateFlags,
610                                                                                                                           numWorkgroups,
611                                                                                                                           DE_FALSE,
612                                                                                                                           subgroupSize,
613                                                                                                                           localSizesToTest,
614                                                                                                                           localSizesToTestCount);
615         }
616         else if (isAllGraphicsStages(caseDef.shaderStage))
617         {
618                 const VkShaderStageFlags        stages                  = subgroups::getPossibleGraphicsSubgroupStages(context, caseDef.shaderStage);
619                 struct internalDataStruct       internalData    =
620                 {
621                         &context,
622                         caseDef,
623                         0u,
624                 };
625
626                 return subgroups::allStagesRequiredSubgroupSize(context,
627                                                                                                                 VK_FORMAT_R32_UINT,
628                                                                                                                 DE_NULL,
629                                                                                                                 0,
630                                                                                                                 &internalData,
631                                                                                                                 checkVertexPipelineStages,
632                                                                                                                 stages,
633                                                                                                                 caseDef.pipelineShaderStageCreateFlags,
634                                                                                                                 caseDef.pipelineShaderStageCreateFlags,
635                                                                                                                 caseDef.pipelineShaderStageCreateFlags,
636                                                                                                                 caseDef.pipelineShaderStageCreateFlags,
637                                                                                                                 caseDef.pipelineShaderStageCreateFlags,
638                                                                                                                 DE_NULL);
639         }
640         else if (isAllRayTracingStages(caseDef.shaderStage))
641         {
642                 const VkShaderStageFlags                stages                  = subgroups::getPossibleRayTracingSubgroupStages(context, caseDef.shaderStage);
643                 const vector<deUint32>                  flags                   (6, caseDef.pipelineShaderStageCreateFlags);
644                 const struct internalDataStruct internalData    =
645                 {
646                         &context,
647                         caseDef,
648                         0u,
649                 };
650
651                 return subgroups::allRayTracingStagesRequiredSubgroupSize(context,
652                                                                                                                                   VK_FORMAT_R32_UINT,
653                                                                                                                                   DE_NULL,
654                                                                                                                                   0,
655                                                                                                                                   &internalData,
656                                                                                                                                   checkVertexPipelineStages,
657                                                                                                                                   stages,
658                                                                                                                                   flags.data(),
659                                                                                                                                   DE_NULL);
660         }
661         else
662                 TCU_THROW(InternalError, "Unknown stage or invalid stage set");
663 }
664
665 TestStatus testRequireFullSubgroups (Context& context, const CaseDefinition caseDef)
666 {
667         DE_ASSERT(VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage);
668         DE_ASSERT(caseDef.requiredSubgroupSizeMode == REQUIRED_SUBGROUP_SIZE_NONE);
669
670         const deUint32                                                                                          numWorkgroups[3]                                                        = {1, 1, 1};
671         const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT&         subgroupSizeControlProperties                           = context.getSubgroupSizeControlPropertiesEXT();
672         const VkPhysicalDeviceProperties&                                                       physicalDeviceProperties                                        = context.getDeviceProperties();
673         // Calculate the local workgroup sizes to exercise the maximum supported by the driver
674         const UVec3                                                                                                     localSize                                                                       = getLocalSizes(physicalDeviceProperties, physicalDeviceProperties.limits.maxComputeWorkGroupInvocations);
675         const deUint32                                                                                          subgroupSize                                                            = subgroups::getSubgroupSize(context);
676         // For full subgroups and allow varying subgroup size, localsize X must be a multiple of maxSubgroupSize.
677         // We set local size X for this test to the maximum, regardless if allow varying subgroup size is enabled or not.
678         const deUint32                                                                                          localSizesToTestCount                                           = 7;
679         const deUint32                                                                                          localSizesToTest[localSizesToTestCount][3]      =
680         {
681                 {subgroupSizeControlProperties.maxSubgroupSize, 1, 1},
682                 {subgroupSizeControlProperties.maxSubgroupSize, 4, 1},
683                 {subgroupSizeControlProperties.maxSubgroupSize, 1, 4},
684                 {subgroupSizeControlProperties.maxSubgroupSize * 2, 1, 2},
685                 {subgroupSizeControlProperties.maxSubgroupSize * 4, 1, 1},
686                 {localSize.x(), localSize.y(), localSize.z()},
687                 {1, 1, 1} // Isn't used, just here to make double buffering checks easier
688         };
689         const struct internalDataStruct                                                         internalData                                                            =
690         {
691                 &context,
692                 caseDef,
693                 subgroupSize,
694         };
695
696         return subgroups::makeComputeTestRequiredSubgroupSize(context,
697                                                                                                                   VK_FORMAT_R32G32B32A32_UINT,
698                                                                                                                   DE_NULL,
699                                                                                                                   0,
700                                                                                                                   &internalData,
701                                                                                                                   checkComputeRequireFull,
702                                                                                                                   caseDef.pipelineShaderStageCreateFlags,
703                                                                                                                   numWorkgroups,
704                                                                                                                   DE_FALSE,
705                                                                                                                   subgroupSize,
706                                                                                                                   localSizesToTest,
707                                                                                                                   localSizesToTestCount);
708 }
709
710 TestStatus testRequireSubgroupSize (Context& context, const CaseDefinition caseDef)
711 {
712         if (isAllComputeStages(caseDef.shaderStage))
713         {
714                 const deUint32                                                                                  numWorkgroups[3]                                                        = {1, 1, 1};
715                 const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT& subgroupSizeControlProperties                           = context.getSubgroupSizeControlPropertiesEXT();
716                 const VkPhysicalDeviceProperties&                                               physicalDeviceProperties                                        = context.getDeviceProperties();
717                 const deUint32                                                                                  requiredSubgroupSize                                            = getRequiredSubgroupSizeFromMode(context, caseDef, subgroupSizeControlProperties);
718                 const deUint64                                                                                  maxSubgroupLimitSize                                            = (deUint64)requiredSubgroupSize * subgroupSizeControlProperties.maxComputeWorkgroupSubgroups;
719                 const deUint32                                                                                  maxTotalLocalSize                                                       = (deUint32)min<deUint64>(maxSubgroupLimitSize, physicalDeviceProperties.limits.maxComputeWorkGroupInvocations);
720                 const UVec3                                                                                             localSize                                                                       = getLocalSizes(physicalDeviceProperties, maxTotalLocalSize);
721                 const deUint32                                                                                  localSizesToTest[5][3]  =
722                 {
723                         {localSize.x(), localSize.y(), localSize.z()},
724                         {requiredSubgroupSize, 1, 1},
725                         {1, requiredSubgroupSize, 1},
726                         {1, 1, requiredSubgroupSize},
727                         {1, 1, 1} // Isn't used, just here to make double buffering checks easier
728                 };
729
730                 deUint32 localSizesToTestCount = 5;
731                 if (caseDef.pipelineShaderStageCreateFlags & VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT)
732                         localSizesToTestCount = 3;
733
734                 struct internalDataStruct internalData =
735                 {
736                         &context,                               //  const Context*                      context;
737                         caseDef,                                //  struct CaseDefinition       caseDef;
738                         requiredSubgroupSize,   //  deUint32                            requiredSubgroupSize;
739                 };
740
741                 // Depending on the flag we need to run one verification function or another.
742                 return subgroups::makeComputeTestRequiredSubgroupSize(context,
743                                                                                                                           VK_FORMAT_R32G32B32A32_UINT,
744                                                                                                                           DE_NULL,
745                                                                                                                           0,
746                                                                                                                           &internalData,
747                                                                                                                           caseDef.pipelineShaderStageCreateFlags == VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT ? checkComputeRequireFull : checkCompute,
748                                                                                                                           caseDef.pipelineShaderStageCreateFlags,
749                                                                                                                           numWorkgroups,
750                                                                                                                           DE_TRUE,
751                                                                                                                           requiredSubgroupSize,
752                                                                                                                           localSizesToTest,
753                                                                                                                           localSizesToTestCount);
754         }
755         else if (isAllGraphicsStages(caseDef.shaderStage))
756         {
757                 const VkShaderStageFlags                                                                stages                                                  = subgroups::getPossibleGraphicsSubgroupStages(context, caseDef.shaderStage);
758                 const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT& subgroupSizeControlProperties   = context.getSubgroupSizeControlPropertiesEXT();
759                 const deUint32                                                                                  requiredSubgroupSize                    = getRequiredSubgroupSizeFromMode(context, caseDef, subgroupSizeControlProperties);
760                 const deUint32                                                                                  requiredSubgroupSizes[5]                = { requiredSubgroupSize, requiredSubgroupSize, requiredSubgroupSize, requiredSubgroupSize, requiredSubgroupSize};
761                 const struct internalDataStruct                                                 internalData                                    =
762                 {
763                         &context,                               //  const Context*                      context;
764                         caseDef,                                //  struct CaseDefinition       caseDef;
765                         requiredSubgroupSize,   //  deUint32                            requiredSubgroupSize;
766                 };
767
768                 return subgroups::allStagesRequiredSubgroupSize(context,
769                                                                                                                 VK_FORMAT_R32_UINT,
770                                                                                                                 DE_NULL,
771                                                                                                                 0,
772                                                                                                                 &internalData,
773                                                                                                                 checkVertexPipelineStages,
774                                                                                                                 stages,
775                                                                                                                 caseDef.pipelineShaderStageCreateFlags,
776                                                                                                                 caseDef.pipelineShaderStageCreateFlags,
777                                                                                                                 caseDef.pipelineShaderStageCreateFlags,
778                                                                                                                 caseDef.pipelineShaderStageCreateFlags,
779                                                                                                                 caseDef.pipelineShaderStageCreateFlags,
780                                                                                                                 requiredSubgroupSizes);
781         }
782         else if (isAllRayTracingStages(caseDef.shaderStage))
783         {
784                 const VkShaderStageFlags                                                                stages                                                  = subgroups::getPossibleRayTracingSubgroupStages(context, caseDef.shaderStage);
785                 const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT& subgroupSizeControlProperties   = context.getSubgroupSizeControlPropertiesEXT();
786                 const deUint32                                                                                  requiredSubgroupSize                    = getRequiredSubgroupSizeFromMode(context, caseDef, subgroupSizeControlProperties);
787                 const vector<deUint32>                                                                  flags                                                   (6, caseDef.pipelineShaderStageCreateFlags);
788                 const vector<deUint32>                                                                  requiredSubgroupSizes                   (6, requiredSubgroupSize);
789                 const struct internalDataStruct                                                 internalData                                    =
790                 {
791                         &context,                               //  const Context*                      context;
792                         caseDef,                                //  struct CaseDefinition       caseDef;
793                         requiredSubgroupSize,   //  deUint32                            requiredSubgroupSize;
794                 };
795
796                 return subgroups::allRayTracingStagesRequiredSubgroupSize(context,
797                                                                                                                                   VK_FORMAT_R32_UINT,
798                                                                                                                                   DE_NULL,
799                                                                                                                                   0,
800                                                                                                                                   &internalData,
801                                                                                                                                   checkVertexPipelineStages,
802                                                                                                                                   stages,
803                                                                                                                                   flags.data(),
804                                                                                                                                   requiredSubgroupSizes.data());
805         }
806         else
807                 TCU_THROW(InternalError, "Unknown stage or invalid stage set");
808 }
809
810 TestStatus noSSBOtestRequireSubgroupSize (Context& context, const CaseDefinition caseDef)
811 {
812         const VkPhysicalDeviceSubgroupSizeControlPropertiesEXT& subgroupSizeControlProperties   = context.getSubgroupSizeControlPropertiesEXT();
813         const deUint32                                                                                  requiredSubgroupSize                    = getRequiredSubgroupSizeFromMode(context, caseDef, subgroupSizeControlProperties);
814         const VkFormat                                                                                  format                                                  = VK_FORMAT_R32_UINT;
815         const deUint32&                                                                                 flags                                                   = caseDef.pipelineShaderStageCreateFlags;
816         const deUint32&                                                                                 size                                                    = requiredSubgroupSize;
817         struct internalDataStruct                                                               internalData                                    =
818         {
819                 &context,
820                 caseDef,
821                 requiredSubgroupSize,
822         };
823
824         switch (caseDef.shaderStage)
825         {
826                 case VK_SHADER_STAGE_VERTEX_BIT:                                        return subgroups::makeVertexFrameBufferTestRequiredSubgroupSize(context, format, DE_NULL, 0, &internalData, checkVertexPipelineStages, flags, size);
827                 case VK_SHADER_STAGE_GEOMETRY_BIT:                                      return subgroups::makeGeometryFrameBufferTestRequiredSubgroupSize(context, format, DE_NULL, 0, &internalData, checkVertexPipelineStages, flags, size);
828                 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:          return subgroups::makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(context, format, DE_NULL, 0, &internalData, checkVertexPipelineStages, caseDef.shaderStage, flags, size);
829                 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:       return subgroups::makeTessellationEvaluationFrameBufferTestRequiredSubgroupSize(context, format, DE_NULL, 0, &internalData, checkVertexPipelineStages, caseDef.shaderStage, flags, size);
830                 case VK_SHADER_STAGE_FRAGMENT_BIT:                                      return subgroups::makeFragmentFrameBufferTestRequiredSubgroupSize(context, format, DE_NULL, 0, &internalData, checkFragmentPipelineStages, flags, size);
831                 default:                                                                                        TCU_THROW(InternalError, "Unhandled shader stage");
832         }
833 }
834
835 TestStatus testSanitySubgroupSizeProperties (Context& context)
836 {
837         VkPhysicalDeviceSubgroupSizeControlPropertiesEXT subgroupSizeControlProperties;
838         subgroupSizeControlProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT;
839         subgroupSizeControlProperties.pNext = DE_NULL;
840
841         VkPhysicalDeviceSubgroupProperties subgroupProperties;
842         subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
843         subgroupProperties.pNext = &subgroupSizeControlProperties;
844
845         VkPhysicalDeviceProperties2 properties;
846         properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
847         properties.pNext = &subgroupProperties;
848
849         context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
850
851         if (subgroupProperties.subgroupSize > subgroupSizeControlProperties.maxSubgroupSize ||
852                 subgroupProperties.subgroupSize < subgroupSizeControlProperties.minSubgroupSize)
853         {
854                 ostringstream error;
855                 error << "subgroupSize (" << subgroupProperties.subgroupSize << ") is not between maxSubgroupSize (";
856                 error << subgroupSizeControlProperties.maxSubgroupSize << ") and minSubgroupSize (";
857                 error << subgroupSizeControlProperties.minSubgroupSize << ")";
858
859                 return TestStatus::fail(error.str().c_str());
860         }
861
862         return TestStatus::pass("OK");
863 }
864 }
865
866 namespace vkt
867 {
868 namespace subgroups
869 {
870 TestCaseGroup* createSubgroupsSizeControlTests (TestContext& testCtx)
871 {
872         de::MovePtr<TestCaseGroup>      group                           (new TestCaseGroup(testCtx, "size_control", "VK_EXT_subgroup_size_control tests"));
873         de::MovePtr<TestCaseGroup>      framebufferGroup        (new TestCaseGroup(testCtx, "framebuffer", "Subgroup size control category tests: framebuffer"));
874         de::MovePtr<TestCaseGroup>      computeGroup            (new TestCaseGroup(testCtx, "compute", "Subgroup size control category tests: compute"));
875         de::MovePtr<TestCaseGroup>      graphicsGroup           (new TestCaseGroup(testCtx, "graphics", "Subgroup size control category tests: graphics"));
876         de::MovePtr<TestCaseGroup>      raytracingGroup         (new TestCaseGroup(testCtx, "ray_tracing", "Subgroup size control category tests: ray tracing"));
877         de::MovePtr<TestCaseGroup>      genericGroup            (new TestCaseGroup(testCtx, "generic", "Subgroup size control category tests: generic"));
878         const VkShaderStageFlags        stages[]                        =
879         {
880                 VK_SHADER_STAGE_VERTEX_BIT,
881                 VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
882                 VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
883                 VK_SHADER_STAGE_GEOMETRY_BIT,
884                 VK_SHADER_STAGE_FRAGMENT_BIT,
885         };
886
887         // Test sanity of the subgroup size properties.
888         {
889                 addFunctionCase(genericGroup.get(), "subgroup_size_properties", "", supportedCheck, testSanitySubgroupSizeProperties);
890         }
891
892         // Allow varying subgroup case.
893         {
894                 const CaseDefinition caseDefCompute = {VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT, VK_SHADER_STAGE_COMPUTE_BIT, DE_FALSE, REQUIRED_SUBGROUP_SIZE_NONE, de::SharedPtr<bool>(new bool)};
895                 addFunctionCaseWithPrograms(computeGroup.get(), "allow_varying_subgroup_size", "", supportedCheckFeatures, initPrograms, test, caseDefCompute);
896                 const CaseDefinition caseDefAllGraphics = {VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT, VK_SHADER_STAGE_ALL_GRAPHICS, DE_FALSE, REQUIRED_SUBGROUP_SIZE_NONE, de::SharedPtr<bool>(new bool)};
897                 addFunctionCaseWithPrograms(graphicsGroup.get(), "allow_varying_subgroup_size", "", supportedCheckFeaturesShader, initPrograms, test, caseDefAllGraphics);
898                 const CaseDefinition caseDefAllRaytracing = {VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT, SHADER_STAGE_ALL_RAY_TRACING, DE_FALSE, REQUIRED_SUBGROUP_SIZE_NONE, de::SharedPtr<bool>(new bool)};
899                 addFunctionCaseWithPrograms(raytracingGroup.get(), "allow_varying_subgroup_size", "", supportedCheckFeaturesShader, initPrograms, test, caseDefAllRaytracing);
900
901                 for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
902                 {
903                         const CaseDefinition caseDefStage = {VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT, stages[stageIndex], DE_FALSE, REQUIRED_SUBGROUP_SIZE_NONE, de::SharedPtr<bool>(new bool)};
904                         addFunctionCaseWithPrograms(framebufferGroup.get(),  getShaderStageName(caseDefStage.shaderStage) + "_allow_varying_subgroup_size", "", supportedCheckFeaturesShader, initFrameBufferPrograms, noSSBOtest, caseDefStage);
905                 }
906         }
907
908         // Require full subgroups case (only compute shaders).
909         {
910                 const CaseDefinition caseDef = {VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT, VK_SHADER_STAGE_COMPUTE_BIT, DE_TRUE, REQUIRED_SUBGROUP_SIZE_NONE, de::SharedPtr<bool>(new bool)};
911                 addFunctionCaseWithPrograms(computeGroup.get(), "require_full_subgroups", "", supportedCheckFeatures, initProgramsRequireFull, testRequireFullSubgroups, caseDef);
912         }
913
914         // Require full subgroups together with allow varying subgroup (only compute shaders).
915         {
916                 deUint32 flags = VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT | VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT;
917                 const CaseDefinition caseDef = {flags, VK_SHADER_STAGE_COMPUTE_BIT, DE_TRUE, REQUIRED_SUBGROUP_SIZE_NONE, de::SharedPtr<bool>(new bool)};
918                 addFunctionCaseWithPrograms(computeGroup.get(), "require_full_subgroups_allow_varying_subgroup_size", "", supportedCheckFeatures, initProgramsRequireFull, testRequireFullSubgroups, caseDef);
919         }
920
921         // Tests to check setting a required subgroup size value.
922         {
923                 const CaseDefinition caseDefAllGraphicsMax = {0u, VK_SHADER_STAGE_ALL_GRAPHICS, DE_FALSE, REQUIRED_SUBGROUP_SIZE_MAX, de::SharedPtr<bool>(new bool)};
924                 addFunctionCaseWithPrograms(graphicsGroup.get(), "required_subgroup_size_max", "", supportedCheckFeaturesShader, initPrograms, testRequireSubgroupSize, caseDefAllGraphicsMax);
925                 const CaseDefinition caseDefComputeMax = {0u, VK_SHADER_STAGE_COMPUTE_BIT, DE_FALSE, REQUIRED_SUBGROUP_SIZE_MAX, de::SharedPtr<bool>(new bool)};
926                 addFunctionCaseWithPrograms(computeGroup.get(), "required_subgroup_size_max", "", supportedCheckFeatures, initPrograms, testRequireSubgroupSize, caseDefComputeMax);
927                 const CaseDefinition caseDefAllRaytracingMax = {0u, SHADER_STAGE_ALL_RAY_TRACING, DE_FALSE, REQUIRED_SUBGROUP_SIZE_MAX, de::SharedPtr<bool>(new bool)};
928                 addFunctionCaseWithPrograms(raytracingGroup.get(), "required_subgroup_size_max", "", supportedCheckFeaturesShader, initPrograms, testRequireSubgroupSize, caseDefAllRaytracingMax);
929
930                 const CaseDefinition caseDefAllGraphicsMin = {0u, VK_SHADER_STAGE_ALL_GRAPHICS, DE_FALSE, REQUIRED_SUBGROUP_SIZE_MIN, de::SharedPtr<bool>(new bool)};
931                 addFunctionCaseWithPrograms(graphicsGroup.get(), "required_subgroup_size_min", "", supportedCheckFeaturesShader, initPrograms, testRequireSubgroupSize, caseDefAllGraphicsMin);
932                 const CaseDefinition caseDefComputeMin = {0u, VK_SHADER_STAGE_COMPUTE_BIT, DE_FALSE, REQUIRED_SUBGROUP_SIZE_MIN, de::SharedPtr<bool>(new bool)};
933                 addFunctionCaseWithPrograms(computeGroup.get(), "required_subgroup_size_min", "", supportedCheckFeatures, initPrograms, testRequireSubgroupSize, caseDefComputeMin);
934                 const CaseDefinition caseDefAllRaytracingMin = {0u, SHADER_STAGE_ALL_RAY_TRACING, DE_FALSE, REQUIRED_SUBGROUP_SIZE_MIN, de::SharedPtr<bool>(new bool)};
935                 addFunctionCaseWithPrograms(raytracingGroup.get(), "required_subgroup_size_min", "", supportedCheckFeaturesShader, initPrograms, testRequireSubgroupSize, caseDefAllRaytracingMin);
936                 for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
937                 {
938                         const CaseDefinition caseDefStageMax = {0u, stages[stageIndex], DE_FALSE, REQUIRED_SUBGROUP_SIZE_MAX, de::SharedPtr<bool>(new bool)};
939                         addFunctionCaseWithPrograms(framebufferGroup.get(),  getShaderStageName(caseDefStageMax.shaderStage) + "_required_subgroup_size_max", "", supportedCheckFeaturesShader, initFrameBufferPrograms, noSSBOtestRequireSubgroupSize, caseDefStageMax);
940                         const CaseDefinition caseDefStageMin = {0u, stages[stageIndex], DE_FALSE, REQUIRED_SUBGROUP_SIZE_MIN, de::SharedPtr<bool>(new bool)};
941                         addFunctionCaseWithPrograms(framebufferGroup.get(),  getShaderStageName(caseDefStageMin.shaderStage) + "_required_subgroup_size_min", "", supportedCheckFeaturesShader, initFrameBufferPrograms, noSSBOtestRequireSubgroupSize, caseDefStageMin);
942                 }
943         }
944
945         // Tests to check setting a required subgroup size value, together with require full subgroups (only compute shaders).
946         {
947                 deUint32 flags = VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT;
948                 const CaseDefinition caseDefMax = {flags, VK_SHADER_STAGE_COMPUTE_BIT, DE_TRUE, REQUIRED_SUBGROUP_SIZE_MAX, de::SharedPtr<bool>(new bool)};
949                 addFunctionCaseWithPrograms(computeGroup.get(), "required_subgroup_size_max_require_full_subgroups", "", supportedCheckFeatures, initProgramsRequireFull, testRequireSubgroupSize, caseDefMax);
950                 const CaseDefinition caseDefMin = {flags, VK_SHADER_STAGE_COMPUTE_BIT, DE_TRUE, REQUIRED_SUBGROUP_SIZE_MIN, de::SharedPtr<bool>(new bool)};
951                 addFunctionCaseWithPrograms(computeGroup.get(), "required_subgroup_size_min_require_full_subgroups", "", supportedCheckFeatures, initProgramsRequireFull, testRequireSubgroupSize, caseDefMin);
952         }
953
954         group->addChild(genericGroup.release());
955         group->addChild(graphicsGroup.release());
956         group->addChild(computeGroup.release());
957         group->addChild(framebufferGroup.release());
958         group->addChild(raytracingGroup.release());
959
960         return group.release();
961 }
962
963 } // subgroups
964 } // vkt