Merge remote-tracking branch 'goog/upstream-vulkan-cts-next' into vulkan-cts-1.1...
[platform/upstream/VK-GL-CTS.git] / external / vulkancts / modules / vulkan / subgroups / vktSubgroupsBasicTests.cpp
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2017 The Khronos Group Inc.
6  * Copyright (c) 2017 Codeplay Software Ltd.
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  */ /*!
21  * \file
22  * \brief Subgroups Tests
23  */ /*--------------------------------------------------------------------*/
24
25 #include "vktSubgroupsBasicTests.hpp"
26 #include "vktSubgroupsTestsUtils.hpp"
27
28 #include <string>
29 #include <vector>
30
31 using namespace tcu;
32 using namespace std;
33 using namespace vk;
34 using namespace vkt;
35
36 namespace
37 {
38 static const deUint32                   ELECTED_VALUE           = 42u;
39 static const deUint32                   UNELECTED_VALUE         = 13u;
40 static const vk::VkDeviceSize   SHADER_BUFFER_SIZE      = 4096ull; // min(maxUniformBufferRange, maxImageDimension1D)
41
42 static bool checkFragmentSubgroupElect(std::vector<const void*> datas,
43                                                                            deUint32 width, deUint32 height, deUint32)
44 {
45         const deUint32* const resultData =
46                 reinterpret_cast<const deUint32*>(datas[0]);
47         deUint32 poisonValuesFound = 0;
48
49         for (deUint32 x = 0; x < width; ++x)
50         {
51                 for (deUint32 y = 0; y < height; ++y)
52                 {
53                         deUint32 val = resultData[y * width + x];
54
55                         switch (val)
56                         {
57                                 default:
58                                         // some garbage value was found!
59                                         return false;
60                                 case UNELECTED_VALUE:
61                                         break;
62                                 case ELECTED_VALUE:
63                                         poisonValuesFound++;
64                                         break;
65                         }
66                 }
67         }
68
69         // we used an atomicly incremented counter to note how many subgroups we used for the fragment shader
70         const deUint32 numSubgroupsUsed =
71                 *reinterpret_cast<const deUint32*>(datas[1]);
72
73         return numSubgroupsUsed == poisonValuesFound;
74 }
75
76 static bool checkFragmentSubgroupBarriers(std::vector<const void*> datas,
77                 deUint32 width, deUint32 height, deUint32)
78 {
79         const deUint32* const resultData = reinterpret_cast<const deUint32*>(datas[0]);
80
81         // We used this SSBO to generate our unique value!
82         const deUint32 ref = *reinterpret_cast<const deUint32*>(datas[3]);
83
84         for (deUint32 x = 0; x < width; ++x)
85         {
86                 for (deUint32 y = 0; y < height; ++y)
87                 {
88                         deUint32 val = resultData[x * height + y];
89
90                         if (val != ref)
91                         {
92                                 return false;
93                         }
94                 }
95         }
96
97         return true;
98 }
99
100 static bool checkFragmentSubgroupBarriersNoSSBO(std::vector<const void*> datas,
101                 deUint32 width, deUint32 height, deUint32)
102 {
103         const float* const      resultData      = reinterpret_cast<const float*>(datas[0]);
104
105         for (deUint32 x = 0u; x < width; ++x)
106         {
107                 for (deUint32 y = 0u; y < height; ++y)
108                 {
109                         const deUint32 ndx = (x * height + y) * 4u;
110                         if (1.0f == resultData[ndx +2])
111                         {
112                                 if(resultData[ndx] != resultData[ndx +1])
113                                 {
114                                         return false;
115                                 }
116                         }
117                         else if (resultData[ndx] != resultData[ndx +3])
118                         {
119                                 return false;
120                         }
121                 }
122         }
123
124         return true;
125 }
126
127 static bool checkVertexPipelineStagesSubgroupElectNoSSBO(std::vector<const void*> datas,
128                 deUint32 width, deUint32)
129 {
130         const float* const      resultData                      = reinterpret_cast<const float*>(datas[0]);
131         float                           poisonValuesFound       = 0.0f;
132         float                           numSubgroupsUsed        = 0.0f;
133
134         for (deUint32 x = 0; x < width; ++x)
135         {
136                 deUint32 val = static_cast<deUint32>(resultData[x * 2]);
137                 numSubgroupsUsed += resultData[x * 2 + 1];
138
139                 switch (val)
140                 {
141                         default:
142                                 // some garbage value was found!
143                                 return false;
144                         case UNELECTED_VALUE:
145                                 break;
146                         case ELECTED_VALUE:
147                                 poisonValuesFound += 1.0f;
148                                 break;
149                 }
150         }
151         return numSubgroupsUsed == poisonValuesFound;
152 }
153
154 static bool checkVertexPipelineStagesSubgroupElect(std::vector<const void*> datas,
155                 deUint32 width, deUint32)
156 {
157         const deUint32* const resultData =
158                 reinterpret_cast<const deUint32*>(datas[0]);
159         deUint32 poisonValuesFound = 0;
160
161         for (deUint32 x = 0; x < width; ++x)
162         {
163                 deUint32 val = resultData[x];
164
165                 switch (val)
166                 {
167                         default:
168                                 // some garbage value was found!
169                                 return false;
170                         case UNELECTED_VALUE:
171                                 break;
172                         case ELECTED_VALUE:
173                                 poisonValuesFound++;
174                                 break;
175                 }
176         }
177
178         // we used an atomicly incremented counter to note how many subgroups we used for the vertex shader
179         const deUint32 numSubgroupsUsed =
180                 *reinterpret_cast<const deUint32*>(datas[1]);
181
182         return numSubgroupsUsed == poisonValuesFound;
183 }
184
185 static bool checkVertexPipelineStagesSubgroupBarriers(std::vector<const void*> datas,
186                 deUint32 width, deUint32)
187 {
188         const deUint32* const resultData = reinterpret_cast<const deUint32*>(datas[0]);
189
190         // We used this SSBO to generate our unique value!
191         const deUint32 ref = *reinterpret_cast<const deUint32*>(datas[3]);
192
193         for (deUint32 x = 0; x < width; ++x)
194         {
195                 deUint32 val = resultData[x];
196
197                 if (val != ref)
198                 {
199                         return false;
200                 }
201         }
202
203         return true;
204 }
205
206 static bool checkVertexPipelineStagesSubgroupBarriersNoSSBO(std::vector<const void*> datas,
207                 deUint32 width, deUint32)
208 {
209         const float* const      resultData      = reinterpret_cast<const float*>(datas[0]);
210
211         for (deUint32 x = 0u; x < width; ++x)
212         {
213                 const deUint32 ndx = x*4u;
214                 if (1.0f == resultData[ndx +2])
215                 {
216                         if(resultData[ndx] != resultData[ndx +1])
217                         {
218                                 return false;
219                         }
220                 }
221                 else if (resultData[ndx] != resultData[ndx +3])
222                 {
223                         return false;
224                 }
225
226         }
227
228         return true;
229 }
230
231 static bool checkComputeSubgroupElect(std::vector<const void*> datas,
232                                                                           const deUint32 numWorkgroups[3], const deUint32 localSize[3],
233                                                                           deUint32)
234 {
235         const deUint32* data = reinterpret_cast<const deUint32*>(datas[0]);
236
237         for (deUint32 nX = 0; nX < numWorkgroups[0]; ++nX)
238         {
239                 for (deUint32 nY = 0; nY < numWorkgroups[1]; ++nY)
240                 {
241                         for (deUint32 nZ = 0; nZ < numWorkgroups[2]; ++nZ)
242                         {
243                                 for (deUint32 lX = 0; lX < localSize[0]; ++lX)
244                                 {
245                                         for (deUint32 lY = 0; lY < localSize[1]; ++lY)
246                                         {
247                                                 for (deUint32 lZ = 0; lZ < localSize[2];
248                                                                 ++lZ)
249                                                 {
250                                                         const deUint32 globalInvocationX =
251                                                                 nX * localSize[0] + lX;
252                                                         const deUint32 globalInvocationY =
253                                                                 nY * localSize[1] + lY;
254                                                         const deUint32 globalInvocationZ =
255                                                                 nZ * localSize[2] + lZ;
256
257                                                         const deUint32 globalSizeX =
258                                                                 numWorkgroups[0] * localSize[0];
259                                                         const deUint32 globalSizeY =
260                                                                 numWorkgroups[1] * localSize[1];
261
262                                                         const deUint32 offset =
263                                                                 globalSizeX *
264                                                                 ((globalSizeY *
265                                                                   globalInvocationZ) +
266                                                                  globalInvocationY) +
267                                                                 globalInvocationX;
268                                                         if (1 != data[offset])
269                                                         {
270                                                                 return false;
271                                                         }
272                                                 }
273                                         }
274                                 }
275                         }
276                 }
277         }
278
279         return true;
280 }
281
282 static bool checkComputeSubgroupBarriers(std::vector<const void*> datas,
283                 const deUint32 numWorkgroups[3], const deUint32 localSize[3],
284                 deUint32)
285 {
286         const deUint32* data = reinterpret_cast<const deUint32*>(datas[0]);
287
288         // We used this SSBO to generate our unique value!
289         const deUint32 ref = *reinterpret_cast<const deUint32*>(datas[2]);
290
291         for (deUint32 nX = 0; nX < numWorkgroups[0]; ++nX)
292         {
293                 for (deUint32 nY = 0; nY < numWorkgroups[1]; ++nY)
294                 {
295                         for (deUint32 nZ = 0; nZ < numWorkgroups[2]; ++nZ)
296                         {
297                                 for (deUint32 lX = 0; lX < localSize[0]; ++lX)
298                                 {
299                                         for (deUint32 lY = 0; lY < localSize[1]; ++lY)
300                                         {
301                                                 for (deUint32 lZ = 0; lZ < localSize[2];
302                                                                 ++lZ)
303                                                 {
304                                                         const deUint32 globalInvocationX =
305                                                                 nX * localSize[0] + lX;
306                                                         const deUint32 globalInvocationY =
307                                                                 nY * localSize[1] + lY;
308                                                         const deUint32 globalInvocationZ =
309                                                                 nZ * localSize[2] + lZ;
310
311                                                         const deUint32 globalSizeX =
312                                                                 numWorkgroups[0] * localSize[0];
313                                                         const deUint32 globalSizeY =
314                                                                 numWorkgroups[1] * localSize[1];
315
316                                                         const deUint32 offset =
317                                                                 globalSizeX *
318                                                                 ((globalSizeY *
319                                                                   globalInvocationZ) +
320                                                                  globalInvocationY) +
321                                                                 globalInvocationX;
322
323                                                         if (ref != data[offset])
324                                                         {
325                                                                 return false;
326                                                         }
327                                                 }
328                                         }
329                                 }
330                         }
331                 }
332         }
333
334         return true;
335 }
336
337 enum OpType
338 {
339         OPTYPE_ELECT = 0,
340         OPTYPE_SUBGROUP_BARRIER,
341         OPTYPE_SUBGROUP_MEMORY_BARRIER,
342         OPTYPE_SUBGROUP_MEMORY_BARRIER_BUFFER,
343         OPTYPE_SUBGROUP_MEMORY_BARRIER_SHARED,
344         OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE,
345         OPTYPE_LAST
346 };
347
348 std::string getOpTypeName(int opType)
349 {
350         switch (opType)
351         {
352                 default:
353                         DE_FATAL("Unsupported op type");
354                 case OPTYPE_ELECT:
355                         return "subgroupElect";
356                 case OPTYPE_SUBGROUP_BARRIER:
357                         return "subgroupBarrier";
358                 case OPTYPE_SUBGROUP_MEMORY_BARRIER:
359                         return "subgroupMemoryBarrier";
360                 case OPTYPE_SUBGROUP_MEMORY_BARRIER_BUFFER:
361                         return "subgroupMemoryBarrierBuffer";
362                 case OPTYPE_SUBGROUP_MEMORY_BARRIER_SHARED:
363                         return "subgroupMemoryBarrierShared";
364                 case OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE:
365                         return "subgroupMemoryBarrierImage";
366         }
367 }
368
369 struct CaseDefinition
370 {
371         int                                     opType;
372         VkShaderStageFlags      shaderStage;
373         bool                            noSSBO;
374 };
375
376 void initFrameBufferPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
377 {
378         std::ostringstream                      vertexSrc;
379         std::ostringstream                      fragmentSrc;
380         if(VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
381         {
382                 fragmentSrc     << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
383                         << "layout(location = 0) in vec4 in_color;\n"
384                         << "layout(location = 0) out vec4 out_color;\n"
385                         << "void main()\n"
386                         <<"{\n"
387                         << "    out_color = in_color;\n"
388                         << "}\n";
389                 programCollection.glslSources.add("fragment") << glu::FragmentSource(fragmentSrc.str()) << vk::ShaderBuildOptions(vk::SPIRV_VERSION_1_3, 0u);
390         }
391         else if (VK_SHADER_STAGE_FRAGMENT_BIT == caseDef.shaderStage)
392         {
393                 programCollection.glslSources.add("vert") << glu::VertexSource(subgroups::getVertShaderForStage(caseDef.shaderStage)) << vk::ShaderBuildOptions(vk::SPIRV_VERSION_1_3, 0u);
394         }
395
396         if (OPTYPE_ELECT == caseDef.opType)
397         {
398                 if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
399                 {
400                         vertexSrc       << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
401                                 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
402                                 << "layout(location = 0) out vec4 out_color;\n"
403                                 << "layout(location = 0) in highp vec4 in_position;\n"
404                                 << "\n"
405                                 << "void main (void)\n"
406                                 << "{\n"
407                                 << "  if (subgroupElect())\n"
408                                 << "  {\n"
409                                 << "    out_color.r = " << ELECTED_VALUE << ";\n"
410                                 << "    out_color.g = 1.0f;\n"
411                                 << "  }\n"
412                                 << "  else\n"
413                                 << "  {\n"
414                                 << "    out_color.r = " << UNELECTED_VALUE << ";\n"
415                                 << "    out_color.g = 0.0f;\n"
416                                 << "  }\n"
417                                 << "  gl_Position = in_position;\n"
418                                 << "  gl_PointSize = 1.0f;\n"
419                                 << "}\n";
420                         programCollection.glslSources.add("vert")
421                                         << glu::VertexSource(vertexSrc.str()) << vk::ShaderBuildOptions(vk::SPIRV_VERSION_1_3, 0u);
422                 }
423                 else
424                 {
425                         DE_FATAL("Unsupported shader stage");
426                 }
427         }
428         else
429         {
430                 std::ostringstream bdy;
431                 switch (caseDef.opType)
432                 {
433                         default:
434                                 DE_FATAL("Unhandled op type!");
435                         case OPTYPE_SUBGROUP_BARRIER:
436                         case OPTYPE_SUBGROUP_MEMORY_BARRIER:
437                         case OPTYPE_SUBGROUP_MEMORY_BARRIER_BUFFER:
438                                 bdy << " tempResult2 = tempBuffer[id];\n"
439                                         << "  if (subgroupElect())\n"
440                                         << "  {\n"
441                                         << "    tempResult = value;\n"
442                                         << "    out_color.b = 1.0f;\n"
443                                         << "  }\n"
444                                          << "  else\n"
445                                         << "  {\n"
446                                         << "    tempResult = tempBuffer[id];\n"
447                                         << "  }\n"
448                                         << "  " << getOpTypeName(caseDef.opType) << "();\n";
449                                 break;
450                         case OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE:
451                                 bdy <<"tempResult2 = imageLoad(tempImage, ivec2(id, 0)).x;\n"
452                                         << "  if (subgroupElect())\n"
453                                         << "  {\n"
454                                         << "    tempResult = value;\n"
455                                         << "    out_color.b = 1.0f;\n"
456                                         << "  }\n"
457                                         << "  else\n"
458                                         << "  {\n"
459                                         << "    tempResult = imageLoad(tempImage, ivec2(id, 0)).x;\n"
460                                         << "  }\n"
461                                         << "  subgroupMemoryBarrierImage();\n";
462
463                                 break;
464                 }
465
466                 if (VK_SHADER_STAGE_FRAGMENT_BIT == caseDef.shaderStage)
467                 {
468                         fragmentSrc     << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
469                                 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
470                                 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
471                                 << "layout(location = 0) out vec4 out_color;\n"
472                                 << "\n"
473                                 << "layout(set = 0, binding = 0) uniform Buffer1\n"
474                                 << "{\n"
475                                 << "  uint tempBuffer["<<SHADER_BUFFER_SIZE/4ull<<"];\n"
476                                 << "};\n"
477                                 << "\n"
478                                 << "layout(set = 0, binding = 1) uniform Buffer2\n"
479                                 << "{\n"
480                                 << "  uint value;\n"
481                                 << "};\n"
482                                 << (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? "layout(set = 0, binding = 2, r32ui) readonly uniform highp uimage2D tempImage;\n" : "\n")
483                                 << "void main (void)\n"
484                                 << "{\n"
485                                 << "  if (gl_HelperInvocation) return;\n"
486                                 << "  uint id = 0;\n"
487                                 << "  if (subgroupElect())\n"
488                                 << "  {\n"
489                                 << "    id = uint(gl_FragCoord.x);\n"
490                                 << "  }\n"
491                                 << "  id = subgroupBroadcastFirst(id);\n"
492                                 << "  uint localId = id;\n"
493                                 << "  uint tempResult = 0u;\n"
494                                 << "  uint tempResult2 = 0u;\n"
495                                 << "  out_color.b = 0.0f;\n"
496                                 << bdy.str()
497                                 << "  out_color.r = float(tempResult);\n"
498                                 << "  out_color.g = float(value);\n"
499                                 << "  out_color.a = float(tempResult2);\n"
500                                 << "}\n";
501
502                         programCollection.glslSources.add("fragment") << glu::FragmentSource(fragmentSrc.str()) << vk::ShaderBuildOptions(vk::SPIRV_VERSION_1_3, 0u);
503                 }
504                 else if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
505                 {
506                         vertexSrc       << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
507                                 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
508                                 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
509                                 <<"\n"
510                                 << "layout(location = 0) out vec4 out_color;\n"
511                                 << "layout(location = 0) in highp vec4 in_position;\n"
512                                 << "\n"
513                                 << "layout(set = 0, binding = 0) uniform Buffer1\n"
514                                 << "{\n"
515                                 << "  uint tempBuffer["<<SHADER_BUFFER_SIZE/4ull<<"];\n"
516                                 << "};\n"
517                                 << "\n"
518                                 << "layout(set = 0, binding = 1) uniform Buffer2\n"
519                                 << "{\n"
520                                 << "  uint value;\n"
521                                 << "};\n"
522                                 << (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? "layout(set = 0, binding = 2, r32ui) readonly uniform highp uimage2D tempImage;\n" : "\n")
523                                 << "void main (void)\n"
524                                 << "{\n"
525                                 << "  uint id = 0;\n"
526                                 << "  if (subgroupElect())\n"
527                                 << "  {\n"
528                                 << "    id = gl_VertexIndex;\n"
529                                 << "  }\n"
530                                 << "  id = subgroupBroadcastFirst(id);\n"
531                                 << "  uint tempResult = 0u;\n"
532                                 << "  uint tempResult2 = 0u;\n"
533                                 << "  out_color.b = 0.0f;\n"
534                                 << bdy.str()
535                                 << "  out_color.r = float(tempResult);\n"
536                                 << "  out_color.g = float(value);\n"
537                                 << "  out_color.a = float(tempResult2);\n"
538                                 << "  gl_Position = in_position;\n"
539                                 << "  gl_PointSize = 1.0f;\n"
540                                 << "}\n";
541
542                         programCollection.glslSources.add("vert") << glu::VertexSource(vertexSrc.str()) << vk::ShaderBuildOptions(vk::SPIRV_VERSION_1_3, 0u);
543                 }
544                 else
545                 {
546                         DE_FATAL("Unsupported shader stage");
547                 }
548         }
549 }
550
551 void initPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
552 {
553         if (OPTYPE_ELECT == caseDef.opType)
554         {
555                 if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
556                 {
557                         std::ostringstream src;
558
559                         src << "#version 450\n"
560                                 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
561                                 << "layout (local_size_x_id = 0, local_size_y_id = 1, "
562                                 "local_size_z_id = 2) in;\n"
563                                 << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
564                                 << "{\n"
565                                 << "  uint result[];\n"
566                                 << "};\n"
567                                 << "\n"
568                                 << subgroups::getSharedMemoryBallotHelper()
569                                 << "void main (void)\n"
570                                 << "{\n"
571                                 << "  uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
572                                 << "  highp uint offset = globalSize.x * ((globalSize.y * "
573                                 "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
574                                 "gl_GlobalInvocationID.x;\n"
575                                 << "  uint value = " << UNELECTED_VALUE << ";\n"
576                                 << "  if (subgroupElect())\n"
577                                 << "  {\n"
578                                 << "    value = " << ELECTED_VALUE << ";\n"
579                                 << "  }\n"
580                                 << "  uvec4 bits = bitCount(sharedMemoryBallot(value == " << ELECTED_VALUE << "));\n"
581                                 << "  result[offset] = bits.x + bits.y + bits.z + bits.w;\n"
582                                 << "}\n";
583
584                         programCollection.glslSources.add("comp")
585                                         << glu::ComputeSource(src.str()) << vk::ShaderBuildOptions(vk::SPIRV_VERSION_1_3, 0u);
586                 }
587                 else if (VK_SHADER_STAGE_FRAGMENT_BIT == caseDef.shaderStage)
588                 {
589                         programCollection.glslSources.add("vert")
590                                         << glu::VertexSource(subgroups::getVertShaderForStage(caseDef.shaderStage)) << vk::ShaderBuildOptions(vk::SPIRV_VERSION_1_3, 0u);
591
592                         std::ostringstream frag;
593
594                         frag << "#version 450\n"
595                                  << "#extension GL_KHR_shader_subgroup_basic: enable\n"
596                                  << "layout(location = 0) out uint data;\n"
597                                  << "layout(set = 0, binding = 0, std430) buffer Buffer\n"
598                                  << "{\n"
599                                  << "  uint numSubgroupsExecuted;\n"
600                                  << "};\n"
601                                  << "void main (void)\n"
602                                  << "{\n"
603                                  << "  if (gl_HelperInvocation) return;\n"
604                                  << "  if (subgroupElect())\n"
605                                  << "  {\n"
606                                  << "    data = " << ELECTED_VALUE << ";\n"
607                                  << "    atomicAdd(numSubgroupsExecuted, 1);\n"
608                                  << "  }\n"
609                                  << "  else\n"
610                                  << "  {\n"
611                                  << "    data = " << UNELECTED_VALUE << ";\n"
612                                  << "  }\n"
613                                  << "}\n";
614
615                         programCollection.glslSources.add("frag")
616                                         << glu::FragmentSource(frag.str()) << vk::ShaderBuildOptions(vk::SPIRV_VERSION_1_3, 0u);
617                 }
618                 else if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
619                 {
620                         std::ostringstream src;
621
622                         src << "#version 450\n"
623                                 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
624                                 << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
625                                 << "{\n"
626                                 << "  uint result[];\n"
627                                 << "};\n"
628                                 << "layout(set = 0, binding = 1, std430) buffer Buffer2\n"
629                                 << "{\n"
630                                 << "  uint numSubgroupsExecuted;\n"
631                                 << "};\n"
632                                 << "\n"
633                                 << "void main (void)\n"
634                                 << "{\n"
635                                 << "  if (subgroupElect())\n"
636                                 << "  {\n"
637                                 << "    result[gl_VertexIndex] = " << ELECTED_VALUE << ";\n"
638                                 << "    atomicAdd(numSubgroupsExecuted, 1);\n"
639                                 << "  }\n"
640                                 << "  else\n"
641                                 << "  {\n"
642                                 << "    result[gl_VertexIndex] = " << UNELECTED_VALUE << ";\n"
643                                 << "  }\n"
644                                 << "  gl_PointSize = 1.0f;\n"
645                                 << "}\n";
646
647                         programCollection.glslSources.add("vert")
648                                         << glu::VertexSource(src.str()) << vk::ShaderBuildOptions(vk::SPIRV_VERSION_1_3, 0u);
649                 }
650                 else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
651                 {
652                         programCollection.glslSources.add("vert")
653                                         << glu::VertexSource(subgroups::getVertShaderForStage(caseDef.shaderStage)) << vk::ShaderBuildOptions(vk::SPIRV_VERSION_1_3, 0u);
654
655                         std::ostringstream src;
656
657                         src << "#version 450\n"
658                                 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
659                                 << "layout(points) in;\n"
660                                 << "layout(points, max_vertices = 1) out;\n"
661                                 << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
662                                 << "{\n"
663                                 << "  uint result[];\n"
664                                 << "};\n"
665                                 << "layout(set = 0, binding = 1, std430) buffer Buffer2\n"
666                                 << "{\n"
667                                 << "  uint numSubgroupsExecuted;\n"
668                                 << "};\n"
669                                 << "\n"
670                                 << "void main (void)\n"
671                                 << "{\n"
672                                 << "  if (subgroupElect())\n"
673                                 << "  {\n"
674                                 << "    result[gl_PrimitiveIDIn] = " << ELECTED_VALUE << ";\n"
675                                 << "    atomicAdd(numSubgroupsExecuted, 1);\n"
676                                 << "  }\n"
677                                 << "  else\n"
678                                 << "  {\n"
679                                 << "    result[gl_PrimitiveIDIn] = " << UNELECTED_VALUE << ";\n"
680                                 << "  }\n"
681                                 << "}\n";
682
683                         programCollection.glslSources.add("geom")
684                                         << glu::GeometrySource(src.str()) << vk::ShaderBuildOptions(vk::SPIRV_VERSION_1_3, 0u);
685                 }
686                 else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
687                 {
688                         programCollection.glslSources.add("vert")
689                                         << glu::VertexSource(subgroups::getVertShaderForStage(caseDef.shaderStage)) << vk::ShaderBuildOptions(vk::SPIRV_VERSION_1_3, 0u);
690
691                         programCollection.glslSources.add("tese")
692                                         << glu::TessellationEvaluationSource("#version 450\nlayout(isolines) in;\nvoid main (void) {}\n");
693
694                         std::ostringstream src;
695
696                         src << "#version 450\n"
697                                 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
698                                 << "layout(vertices=1) out;\n"
699                                 << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
700                                 << "{\n"
701                                 << "  uint result[];\n"
702                                 << "};\n"
703                                 << "layout(set = 0, binding = 1, std430) buffer Buffer2\n"
704                                 << "{\n"
705                                 << "  uint numSubgroupsExecuted;\n"
706                                 << "};\n"
707                                 << "\n"
708                                 << "void main (void)\n"
709                                 << "{\n"
710                                 << "  if (subgroupElect())\n"
711                                 << "  {\n"
712                                 << "    result[gl_PrimitiveID] = " << ELECTED_VALUE << ";\n"
713                                 << "    atomicAdd(numSubgroupsExecuted, 1);\n"
714                                 << "  }\n"
715                                 << "  else\n"
716                                 << "  {\n"
717                                 << "    result[gl_PrimitiveID] = " << UNELECTED_VALUE << ";\n"
718                                 << "  }\n"
719                                 << "}\n";
720
721                         programCollection.glslSources.add("tesc")
722                                         << glu::TessellationControlSource(src.str()) << vk::ShaderBuildOptions(vk::SPIRV_VERSION_1_3, 0u);
723                 }
724                 else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
725                 {
726                         programCollection.glslSources.add("vert")
727                                         << glu::VertexSource(subgroups::getVertShaderForStage(caseDef.shaderStage)) << vk::ShaderBuildOptions(vk::SPIRV_VERSION_1_3, 0u);
728
729                         programCollection.glslSources.add("tesc")
730                                         << glu::TessellationControlSource("#version 450\nlayout(vertices=1) out;\nvoid main (void) { for(uint i = 0; i < 4; i++) { gl_TessLevelOuter[i] = 1.0f; } }\n");
731
732                         std::ostringstream src;
733
734                         src << "#version 450\n"
735                                 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
736                                 << "layout(isolines) in;\n"
737                                 << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
738                                 << "{\n"
739                                 << "  uint result[];\n"
740                                 << "};\n"
741                                 << "layout(set = 0, binding = 1, std430) buffer Buffer2\n"
742                                 << "{\n"
743                                 << "  uint numSubgroupsExecuted;\n"
744                                 << "};\n"
745                                 << "\n"
746                                 << "void main (void)\n"
747                                 << "{\n"
748                                 << "  if (subgroupElect())\n"
749                                 << "  {\n"
750                                 << "    result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = " << ELECTED_VALUE << ";\n"
751                                 << "    atomicAdd(numSubgroupsExecuted, 1);\n"
752                                 << "  }\n"
753                                 << "  else\n"
754                                 << "  {\n"
755                                 << "    result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = " << UNELECTED_VALUE << ";\n"
756                                 << "  }\n"
757                                 << "}\n";
758
759                         programCollection.glslSources.add("tese")
760                                         << glu::TessellationEvaluationSource(src.str()) << vk::ShaderBuildOptions(vk::SPIRV_VERSION_1_3, 0u);
761                 }
762                 else
763                 {
764                         DE_FATAL("Unsupported shader stage");
765                 }
766         }
767         else
768         {
769                 std::ostringstream bdy;
770
771                 switch (caseDef.opType)
772                 {
773                         default:
774                                 DE_FATAL("Unhandled op type!");
775                         case OPTYPE_SUBGROUP_BARRIER:
776                         case OPTYPE_SUBGROUP_MEMORY_BARRIER:
777                         case OPTYPE_SUBGROUP_MEMORY_BARRIER_BUFFER:
778                                 bdy << "  if (subgroupElect())\n"
779                                         << "  {\n"
780                                         << "    tempBuffer[id] = value;\n"
781                                         << "  }\n"
782                                         << "  " << getOpTypeName(caseDef.opType) << "();\n"
783                                         << "  tempResult = tempBuffer[id];\n";
784                                 break;
785                         case OPTYPE_SUBGROUP_MEMORY_BARRIER_SHARED:
786                                 bdy << "  if (subgroupElect())\n"
787                                         << "  {\n"
788                                         << "    tempShared[localId] = value;\n"
789                                         << "  }\n"
790                                         << "  subgroupMemoryBarrierShared();\n"
791                                         << "  tempResult = tempShared[localId];\n";
792                                 break;
793                         case OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE:
794                                 bdy << "  if (subgroupElect())\n"
795                                         << "  {\n"
796                                         << "    imageStore(tempImage, ivec2(id, 0), ivec4(value));\n"
797                                         << "  }\n"
798                                         << "  subgroupMemoryBarrierImage();\n"
799                                         << "  tempResult = imageLoad(tempImage, ivec2(id, 0)).x;\n";
800                                 break;
801                 }
802
803                 if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
804                 {
805                         std::ostringstream src;
806
807                         src << "#version 450\n"
808                                 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
809                                 << "layout (local_size_x_id = 0, local_size_y_id = 1, "
810                                 "local_size_z_id = 2) in;\n"
811                                 << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
812                                 << "{\n"
813                                 << "  uint result[];\n"
814                                 << "};\n"
815                                 << "layout(set = 0, binding = 1, std430) buffer Buffer2\n"
816                                 << "{\n"
817                                 << "  uint tempBuffer[];\n"
818                                 << "};\n"
819                                 << "layout(set = 0, binding = 2, std430) buffer Buffer3\n"
820                                 << "{\n"
821                                 << "  uint value;\n"
822                                 << "};\n"
823                                 << "layout(set = 0, binding = 3, r32ui) uniform uimage2D tempImage;\n"
824                                 << "shared uint tempShared[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];\n"
825                                 << "\n"
826                                 << "void main (void)\n"
827                                 << "{\n"
828                                 << "  uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
829                                 << "  highp uint offset = globalSize.x * ((globalSize.y * "
830                                 "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
831                                 "gl_GlobalInvocationID.x;\n"
832                                 << "  uint localId = gl_SubgroupID;\n"
833                                 << "  uint id = globalSize.x * ((globalSize.y * "
834                                 "gl_WorkGroupID.z) + gl_WorkGroupID.y) + "
835                                 "gl_WorkGroupID.x + localId;\n"
836                                 << "  uint tempResult = 0;\n"
837                                 << bdy.str()
838                                 << "  result[offset] = tempResult;\n"
839                                 << "}\n";
840
841                         programCollection.glslSources.add("comp")
842                                         << glu::ComputeSource(src.str()) << vk::ShaderBuildOptions(vk::SPIRV_VERSION_1_3, 0u);
843                 }
844                 else if (VK_SHADER_STAGE_FRAGMENT_BIT == caseDef.shaderStage)
845                 {
846                         programCollection.glslSources.add("vert")
847                                         << glu::VertexSource(subgroups::getVertShaderForStage(caseDef.shaderStage)) << vk::ShaderBuildOptions(vk::SPIRV_VERSION_1_3, 0u);
848
849                         std::ostringstream frag;
850
851                         frag << "#version 450\n"
852                                  << "#extension GL_KHR_shader_subgroup_basic: enable\n"
853                                  << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
854                                  << "layout(location = 0) out uint result;\n"
855                                  << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
856                                  << "{\n"
857                                  << "  uint tempBuffer[];\n"
858                                  << "};\n"
859                                  << "layout(set = 0, binding = 1, std430) buffer Buffer2\n"
860                                  << "{\n"
861                                  << "  uint subgroupID;\n"
862                                  << "};\n"
863                                  << "layout(set = 0, binding = 2, std430) buffer Buffer3\n"
864                                  << "{\n"
865                                  << "  uint value;\n"
866                                  << "};\n"
867                                  << "layout(set = 0, binding = 3, r32ui) uniform uimage2D tempImage;\n"
868                                  << "void main (void)\n"
869                                  << "{\n"
870                                  << "  if (gl_HelperInvocation) return;\n"
871                                  << "  uint id = 0;\n"
872                                  << "  if (subgroupElect())\n"
873                                  << "  {\n"
874                                  << "    id = atomicAdd(subgroupID, 1);\n"
875                                  << "  }\n"
876                                  << "  id = subgroupBroadcastFirst(id);\n"
877                                  << "  uint localId = id;\n"
878                                  << "  uint tempResult = 0;\n"
879                                  << bdy.str()
880                                  << "  result = tempResult;\n"
881                                  << "}\n";
882
883                         programCollection.glslSources.add("frag")
884                                         << glu::FragmentSource(frag.str()) << vk::ShaderBuildOptions(vk::SPIRV_VERSION_1_3, 0u);
885                 }
886                 else if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
887                 {
888                         std::ostringstream src;
889
890                         src << "#version 450\n"
891                                 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
892                                 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
893                                 << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
894                                 << "{\n"
895                                 << "  uint result[];\n"
896                                 << "};\n"
897                                 << "layout(set = 0, binding = 1, std430) buffer Buffer2\n"
898                                 << "{\n"
899                                 << "  uint tempBuffer[];\n"
900                                 << "};\n"
901                                 << "layout(set = 0, binding = 2, std430) buffer Buffer3\n"
902                                 << "{\n"
903                                 << "  uint subgroupID;\n"
904                                 << "};\n"
905                                 << "layout(set = 0, binding = 3, std430) buffer Buffer4\n"
906                                 << "{\n"
907                                 << "  uint value;\n"
908                                 << "};\n"
909                                 << "layout(set = 0, binding = 4, r32ui) uniform uimage2D tempImage;\n"
910                                 << "void main (void)\n"
911                                 << "{\n"
912                                 << "  uint id = 0;\n"
913                                 << "  if (subgroupElect())\n"
914                                 << "  {\n"
915                                 << "    id = atomicAdd(subgroupID, 1);\n"
916                                 << "  }\n"
917                                 << "  id = subgroupBroadcastFirst(id);\n"
918                                 << "  uint localId = id;\n"
919                                 << "  uint tempResult = 0;\n"
920                                 << bdy.str()
921                                 << "  result[gl_VertexIndex] = tempResult;\n"
922                                 << "  gl_PointSize = 1.0f;\n"
923                                 << "}\n";
924
925                         programCollection.glslSources.add("vert")
926                                         << glu::VertexSource(src.str()) << vk::ShaderBuildOptions(vk::SPIRV_VERSION_1_3, 0u);
927                 }
928                 else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
929                 {
930                         programCollection.glslSources.add("vert")
931                                         << glu::VertexSource(subgroups::getVertShaderForStage(caseDef.shaderStage)) << vk::ShaderBuildOptions(vk::SPIRV_VERSION_1_3, 0u);
932
933                         std::ostringstream src;
934
935                         src << "#version 450\n"
936                                 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
937                                 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
938                                 << "layout(points) in;\n"
939                                 << "layout(points, max_vertices = 1) out;\n"
940                                 << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
941                                 << "{\n"
942                                 << "  uint result[];\n"
943                                 << "};\n"
944                                 << "layout(set = 0, binding = 1, std430) buffer Buffer2\n"
945                                 << "{\n"
946                                 << "  uint tempBuffer[];\n"
947                                 << "};\n"
948                                 << "layout(set = 0, binding = 2, std430) buffer Buffer3\n"
949                                 << "{\n"
950                                 << "  uint subgroupID;\n"
951                                 << "};\n"
952                                 << "layout(set = 0, binding = 3, std430) buffer Buffer4\n"
953                                 << "{\n"
954                                 << "  uint value;\n"
955                                 << "};\n"
956                                 << "layout(set = 0, binding = 4, r32ui) uniform uimage2D tempImage;\n"
957                                 << "void main (void)\n"
958                                 << "{\n"
959                                 << "  uint id = 0;\n"
960                                 << "  if (subgroupElect())\n"
961                                 << "  {\n"
962                                 << "    id = atomicAdd(subgroupID, 1);\n"
963                                 << "  }\n"
964                                 << "  id = subgroupBroadcastFirst(id);\n"
965                                 << "  uint localId = id;\n"
966                                 << "  uint tempResult = 0;\n"
967                                 << bdy.str()
968                                 << "  result[gl_PrimitiveIDIn] = tempResult;\n"
969                                 << "}\n";
970
971                         programCollection.glslSources.add("geom")
972                                         << glu::GeometrySource(src.str()) << vk::ShaderBuildOptions(vk::SPIRV_VERSION_1_3, 0u);
973                 }
974                 else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
975                 {
976                         programCollection.glslSources.add("vert")
977                                         << glu::VertexSource(subgroups::getVertShaderForStage(caseDef.shaderStage)) << vk::ShaderBuildOptions(vk::SPIRV_VERSION_1_3, 0u);
978
979                         programCollection.glslSources.add("tese")
980                                         << glu::TessellationEvaluationSource("#version 450\nlayout(isolines) in;\nvoid main (void) {}\n");
981
982                         std::ostringstream src;
983
984                         src << "#version 450\n"
985                                 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
986                                 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
987                                 << "layout(vertices=1) out;\n"
988                                 << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
989                                 << "{\n"
990                                 << "  uint result[];\n"
991                                 << "};\n"
992                                 << "layout(set = 0, binding = 1, std430) buffer Buffer2\n"
993                                 << "{\n"
994                                 << "  uint tempBuffer[];\n"
995                                 << "};\n"
996                                 << "layout(set = 0, binding = 2, std430) buffer Buffer3\n"
997                                 << "{\n"
998                                 << "  uint subgroupID;\n"
999                                 << "};\n"
1000                                 << "layout(set = 0, binding = 3, std430) buffer Buffer4\n"
1001                                 << "{\n"
1002                                 << "  uint value;\n"
1003                                 << "};\n"
1004                                 << "layout(set = 0, binding = 4, r32ui) uniform uimage2D tempImage;\n"
1005                                 << "void main (void)\n"
1006                                 << "{\n"
1007                                 << "  uint id = 0;\n"
1008                                 << "  if (subgroupElect())\n"
1009                                 << "  {\n"
1010                                 << "    id = atomicAdd(subgroupID, 1);\n"
1011                                 << "  }\n"
1012                                 << "  id = subgroupBroadcastFirst(id);\n"
1013                                 << "  uint localId = id;\n"
1014                                 << "  uint tempResult = 0;\n"
1015                                 << bdy.str()
1016                                 << "  result[gl_PrimitiveID] = tempResult;\n"
1017                                 << "}\n";
1018
1019                         programCollection.glslSources.add("tesc")
1020                                         << glu::TessellationControlSource(src.str()) << vk::ShaderBuildOptions(vk::SPIRV_VERSION_1_3, 0u);
1021                 }
1022                 else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
1023                 {
1024                         programCollection.glslSources.add("vert")
1025                                         << glu::VertexSource(subgroups::getVertShaderForStage(caseDef.shaderStage)) << vk::ShaderBuildOptions(vk::SPIRV_VERSION_1_3, 0u);
1026
1027                         programCollection.glslSources.add("tesc")
1028                                         << glu::TessellationControlSource("#version 450\nlayout(vertices=1) out;\nvoid main (void) { for(uint i = 0; i < 4; i++) { gl_TessLevelOuter[i] = 1.0f; } }\n");
1029
1030                         std::ostringstream src;
1031
1032                         src << "#version 450\n"
1033                                 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
1034                                 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
1035                                 << "layout(isolines) in;\n"
1036                                 << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
1037                                 << "{\n"
1038                                 << "  uint result[];\n"
1039                                 << "};\n"
1040                                 << "layout(set = 0, binding = 1, std430) buffer Buffer2\n"
1041                                 << "{\n"
1042                                 << "  uint tempBuffer[];\n"
1043                                 << "};\n"
1044                                 << "layout(set = 0, binding = 2, std430) buffer Buffer3\n"
1045                                 << "{\n"
1046                                 << "  uint subgroupID;\n"
1047                                 << "};\n"
1048                                 << "layout(set = 0, binding = 3, std430) buffer Buffer4\n"
1049                                 << "{\n"
1050                                 << "  uint value;\n"
1051                                 << "};\n"
1052                                 << "layout(set = 0, binding = 4, r32ui) uniform uimage2D tempImage;\n"
1053                                 << "void main (void)\n"
1054                                 << "{\n"
1055                                 << "  uint id = 0;\n"
1056                                 << "  if (subgroupElect())\n"
1057                                 << "  {\n"
1058                                 << "    id = atomicAdd(subgroupID, 1);\n"
1059                                 << "  }\n"
1060                                 << "  id = subgroupBroadcastFirst(id);\n"
1061                                 << "  uint localId = id;\n"
1062                                 << "  uint tempResult = 0;\n"
1063                                 << bdy.str()
1064                                 << "  result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = tempResult;\n"
1065                                 << "}\n";
1066
1067                         programCollection.glslSources.add("tese")
1068                                         << glu::TessellationEvaluationSource(src.str()) << vk::ShaderBuildOptions(vk::SPIRV_VERSION_1_3, 0u);
1069                 }
1070                 else
1071                 {
1072                         DE_FATAL("Unsupported shader stage");
1073                 }
1074         }
1075 }
1076
1077 tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
1078 {
1079         if (!subgroups::isSubgroupSupported(context))
1080                 TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
1081
1082         if (!subgroups::areSubgroupOperationsSupportedForStage(
1083                                 context, caseDef.shaderStage))
1084         {
1085                 if (subgroups::areSubgroupOperationsRequiredForStage(
1086                                         caseDef.shaderStage))
1087                 {
1088                         return tcu::TestStatus::fail(
1089                                            "Shader stage " +
1090                                            subgroups::getShaderStageName(caseDef.shaderStage) +
1091                                            " is required to support subgroup operations!");
1092                 }
1093                 else
1094                 {
1095                         TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
1096                 }
1097         }
1098
1099         if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_BASIC_BIT))
1100         {
1101                 return tcu::TestStatus::fail(
1102                                    "Subgroup feature " +
1103                                    subgroups::getSubgroupFeatureName(VK_SUBGROUP_FEATURE_BASIC_BIT) +
1104                                    " is a required capability!");
1105         }
1106
1107         if (OPTYPE_ELECT != caseDef.opType && VK_SHADER_STAGE_COMPUTE_BIT != caseDef.shaderStage)
1108         {
1109                 if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_BALLOT_BIT))
1110                 {
1111                         TCU_THROW(NotSupportedError, "Subgroup basic operation non-compute stage test required that ballot operations are supported!");
1112                 }
1113         }
1114
1115         //Tests which don't use the SSBO
1116         if(caseDef.noSSBO)
1117         {
1118                 if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
1119                 {
1120                         if (OPTYPE_ELECT == caseDef.opType)
1121                         {
1122                                 return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32G32_SFLOAT,
1123                                                                                                  DE_NULL, 0u, checkVertexPipelineStagesSubgroupElectNoSSBO);
1124                         }
1125                         else
1126                         {
1127                                 const deUint32                                          inputDatasCount = OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? 3u : 2u;
1128                                 std::vector<subgroups::SSBOData>        inputDatas              (inputDatasCount);
1129
1130                                 inputDatas[0].format = VK_FORMAT_R32_UINT;
1131                                 inputDatas[0].numElements = SHADER_BUFFER_SIZE/4ull;
1132                                 inputDatas[0].initializeType = subgroups::SSBOData::InitializeNonZero;
1133
1134                                 inputDatas[1].format = VK_FORMAT_R32_UINT;
1135                                 inputDatas[1].numElements = 1ull;
1136                                 inputDatas[1].initializeType = subgroups::SSBOData::InitializeNonZero;
1137
1138                                 if(OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType )
1139                                 {
1140                                         inputDatas[2].format = VK_FORMAT_R32_UINT;
1141                                         inputDatas[2].numElements = SHADER_BUFFER_SIZE;
1142                                         inputDatas[2].initializeType = subgroups::SSBOData::InitializeNone;
1143                                         inputDatas[2].isImage = true;
1144                                 }
1145
1146                                 DE_ASSERT(SHADER_BUFFER_SIZE/4ull > subgroups::getSubgroupSize(context));
1147                                 return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32G32B32A32_SFLOAT,
1148                                                                                                  &inputDatas[0], inputDatasCount, checkVertexPipelineStagesSubgroupBarriersNoSSBO);
1149                         }
1150                 }
1151
1152                 if (VK_SHADER_STAGE_FRAGMENT_BIT == caseDef.shaderStage)
1153                 {
1154                                 const deUint32                                          inputDatasCount = OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? 3u : 2u;
1155                                 std::vector<subgroups::SSBOData>        inputDatas              (inputDatasCount);
1156
1157                                 inputDatas[0].format = VK_FORMAT_R32_UINT;
1158                                 inputDatas[0].numElements = SHADER_BUFFER_SIZE/4ull;
1159                                 inputDatas[0].initializeType = subgroups::SSBOData::InitializeNonZero;
1160
1161                                 inputDatas[1].format = VK_FORMAT_R32_UINT;
1162                                 inputDatas[1].numElements = 1ull;
1163                                 inputDatas[1].initializeType = subgroups::SSBOData::InitializeNonZero;
1164
1165                                 if(OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType )
1166                                 {
1167                                         inputDatas[2].format = VK_FORMAT_R32_UINT;
1168                                         inputDatas[2].numElements = SHADER_BUFFER_SIZE;
1169                                         inputDatas[2].initializeType = subgroups::SSBOData::InitializeNone;
1170                                         inputDatas[2].isImage = true;
1171                                 }
1172
1173                         return subgroups::makeFragmentFrameBufferTest(context, VK_FORMAT_R32G32B32A32_SFLOAT,
1174                                                                                            &inputDatas[0], inputDatasCount, checkFragmentSubgroupBarriersNoSSBO);
1175                 }
1176         }
1177
1178         if ((VK_SHADER_STAGE_FRAGMENT_BIT != caseDef.shaderStage) &&
1179                         (VK_SHADER_STAGE_COMPUTE_BIT != caseDef.shaderStage))
1180         {
1181                 if (!subgroups::isVertexSSBOSupportedForDevice(context))
1182                 {
1183                         TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
1184                 }
1185         }
1186
1187         if (VK_SHADER_STAGE_FRAGMENT_BIT == caseDef.shaderStage)
1188         {
1189                 if (!subgroups::isFragmentSSBOSupportedForDevice(context))
1190                 {
1191                         TCU_THROW(NotSupportedError, "Subgroup basic operation require that the fragment stage be able to write to SSBOs!");
1192                 }
1193
1194                 if (OPTYPE_ELECT == caseDef.opType)
1195                 {
1196                         subgroups::SSBOData inputData;
1197                         inputData.format = VK_FORMAT_R32_UINT;
1198                         inputData.numElements = 1;
1199                         inputData.initializeType = subgroups::SSBOData::InitializeZero;
1200
1201                         return subgroups::makeFragmentTest(context, VK_FORMAT_R32_UINT,
1202                                                                                            &inputData, 1, checkFragmentSubgroupElect);
1203                 }
1204                 else
1205                 {
1206                         const deUint32 inputDatasCount = 4;
1207                         subgroups::SSBOData inputDatas[inputDatasCount];
1208                         inputDatas[0].format = VK_FORMAT_R32_UINT;
1209                         inputDatas[0].numElements = SHADER_BUFFER_SIZE;
1210                         inputDatas[0].initializeType = subgroups::SSBOData::InitializeNonZero;
1211
1212                         inputDatas[1].format = VK_FORMAT_R32_UINT;
1213                         inputDatas[1].numElements = 1;
1214                         inputDatas[1].initializeType = subgroups::SSBOData::InitializeZero;
1215
1216                         inputDatas[2].format = VK_FORMAT_R32_UINT;
1217                         inputDatas[2].numElements = 1;
1218                         inputDatas[2].initializeType = subgroups::SSBOData::InitializeNonZero;
1219
1220                         inputDatas[3].format = VK_FORMAT_R32_UINT;
1221                         inputDatas[3].numElements = SHADER_BUFFER_SIZE;
1222                         inputDatas[3].initializeType = subgroups::SSBOData::InitializeNone;
1223                         inputDatas[3].isImage = true;
1224
1225                         return subgroups::makeFragmentTest(context, VK_FORMAT_R32_UINT,
1226                                                                                            inputDatas, inputDatasCount, checkFragmentSubgroupBarriers);
1227
1228                 }
1229         }
1230         else if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
1231         {
1232                 if (OPTYPE_ELECT == caseDef.opType)
1233                 {
1234                         return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT,
1235                                                                                           DE_NULL, 0, checkComputeSubgroupElect);
1236                 }
1237                 else
1238                 {
1239                         const deUint32 inputDatasCount = 3;
1240                         subgroups::SSBOData inputDatas[inputDatasCount];
1241                         inputDatas[0].format = VK_FORMAT_R32_UINT;
1242                         inputDatas[0].numElements = SHADER_BUFFER_SIZE;
1243                         inputDatas[0].initializeType = subgroups::SSBOData::InitializeNone;
1244
1245                         inputDatas[1].format = VK_FORMAT_R32_UINT;
1246                         inputDatas[1].numElements = 1;
1247                         inputDatas[1].initializeType = subgroups::SSBOData::InitializeNonZero;
1248
1249                         inputDatas[2].format = VK_FORMAT_R32_UINT;
1250                         inputDatas[2].numElements = SHADER_BUFFER_SIZE;
1251                         inputDatas[2].initializeType = subgroups::SSBOData::InitializeNone;
1252                         inputDatas[2].isImage = true;
1253
1254                         return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT,
1255                                                                                           inputDatas, inputDatasCount, checkComputeSubgroupBarriers);
1256                 }
1257         }
1258         else if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
1259         {
1260                 if (OPTYPE_ELECT == caseDef.opType)
1261                 {
1262                         subgroups::SSBOData inputData;
1263                         inputData.format = VK_FORMAT_R32_UINT;
1264                         inputData.numElements = 1;
1265                         inputData.initializeType = subgroups::SSBOData::InitializeZero;
1266
1267                         return subgroups::makeVertexTest(context, VK_FORMAT_R32_UINT,
1268                                                                                          &inputData, 1, checkVertexPipelineStagesSubgroupElect);
1269                 }
1270                 else
1271                 {
1272                         const deUint32 inputDatasCount = 4;
1273                         subgroups::SSBOData inputDatas[inputDatasCount];
1274                         inputDatas[0].format = VK_FORMAT_R32_UINT;
1275                         inputDatas[0].numElements = SHADER_BUFFER_SIZE;
1276                         inputDatas[0].initializeType = subgroups::SSBOData::InitializeNonZero;
1277
1278                         inputDatas[1].format = VK_FORMAT_R32_UINT;
1279                         inputDatas[1].numElements = 1;
1280                         inputDatas[1].initializeType = subgroups::SSBOData::InitializeZero;
1281
1282                         inputDatas[2].format = VK_FORMAT_R32_UINT;
1283                         inputDatas[2].numElements = 1;
1284                         inputDatas[2].initializeType = subgroups::SSBOData::InitializeNonZero;
1285
1286                         inputDatas[3].format = VK_FORMAT_R32_UINT;
1287                         inputDatas[3].numElements = SHADER_BUFFER_SIZE;
1288                         inputDatas[3].initializeType = subgroups::SSBOData::InitializeNone;
1289                         inputDatas[3].isImage = true;
1290
1291                         return subgroups::makeVertexTest(context, VK_FORMAT_R32_UINT,
1292                                                                                          inputDatas, inputDatasCount, checkVertexPipelineStagesSubgroupBarriers);
1293                 }
1294         }
1295         else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
1296         {
1297                 if (OPTYPE_ELECT == caseDef.opType)
1298                 {
1299                         subgroups::SSBOData inputData;
1300                         inputData.format = VK_FORMAT_R32_UINT;
1301                         inputData.numElements = 1;
1302                         inputData.initializeType = subgroups::SSBOData::InitializeZero;
1303
1304                         return subgroups::makeGeometryTest(context, VK_FORMAT_R32_UINT,
1305                                                                                            &inputData, 1, checkVertexPipelineStagesSubgroupElect);
1306                 }
1307                 else
1308                 {
1309                         const deUint32 inputDatasCount = 4;
1310                         subgroups::SSBOData inputDatas[inputDatasCount];
1311                         inputDatas[0].format = VK_FORMAT_R32_UINT;
1312                         inputDatas[0].numElements = SHADER_BUFFER_SIZE;
1313                         inputDatas[0].initializeType = subgroups::SSBOData::InitializeNonZero;
1314
1315                         inputDatas[1].format = VK_FORMAT_R32_UINT;
1316                         inputDatas[1].numElements = 1;
1317                         inputDatas[1].initializeType = subgroups::SSBOData::InitializeZero;
1318
1319                         inputDatas[2].format = VK_FORMAT_R32_UINT;
1320                         inputDatas[2].numElements = 1;
1321                         inputDatas[2].initializeType = subgroups::SSBOData::InitializeNonZero;
1322
1323                         inputDatas[3].format = VK_FORMAT_R32_UINT;
1324                         inputDatas[3].numElements = SHADER_BUFFER_SIZE;
1325                         inputDatas[3].initializeType = subgroups::SSBOData::InitializeNone;
1326                         inputDatas[3].isImage = true;
1327
1328                         return subgroups::makeGeometryTest(context, VK_FORMAT_R32_UINT,
1329                                                                                            inputDatas, inputDatasCount, checkVertexPipelineStagesSubgroupBarriers);
1330                 }
1331         }
1332         else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
1333         {
1334                 if (OPTYPE_ELECT == caseDef.opType)
1335                 {
1336                         subgroups::SSBOData inputData;
1337                         inputData.format = VK_FORMAT_R32_UINT;
1338                         inputData.numElements = 1;
1339                         inputData.initializeType = subgroups::SSBOData::InitializeZero;
1340
1341                         return subgroups::makeTessellationControlTest(context, VK_FORMAT_R32_UINT,
1342                                         &inputData, 1, checkVertexPipelineStagesSubgroupElect);
1343                 }
1344                 else
1345                 {
1346                         const deUint32 inputDatasCount = 4;
1347                         subgroups::SSBOData inputDatas[inputDatasCount];
1348                         inputDatas[0].format = VK_FORMAT_R32_UINT;
1349                         inputDatas[0].numElements = SHADER_BUFFER_SIZE;
1350                         inputDatas[0].initializeType = subgroups::SSBOData::InitializeNonZero;
1351
1352                         inputDatas[1].format = VK_FORMAT_R32_UINT;
1353                         inputDatas[1].numElements = 1;
1354                         inputDatas[1].initializeType = subgroups::SSBOData::InitializeZero;
1355
1356                         inputDatas[2].format = VK_FORMAT_R32_UINT;
1357                         inputDatas[2].numElements = 1;
1358                         inputDatas[2].initializeType = subgroups::SSBOData::InitializeNonZero;
1359
1360                         inputDatas[3].format = VK_FORMAT_R32_UINT;
1361                         inputDatas[3].numElements = SHADER_BUFFER_SIZE;
1362                         inputDatas[3].initializeType = subgroups::SSBOData::InitializeNone;
1363                         inputDatas[3].isImage = true;
1364
1365                         return subgroups::makeTessellationControlTest(context, VK_FORMAT_R32_UINT,
1366                                         inputDatas, inputDatasCount, checkVertexPipelineStagesSubgroupBarriers);
1367                 }
1368         }
1369         else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
1370         {
1371                 if (OPTYPE_ELECT == caseDef.opType)
1372                 {
1373                         subgroups::SSBOData inputData;
1374                         inputData.format = VK_FORMAT_R32_UINT;
1375                         inputData.numElements = 1;
1376                         inputData.initializeType = subgroups::SSBOData::InitializeZero;
1377
1378                         return subgroups::makeTessellationEvaluationTest(context, VK_FORMAT_R32_UINT,
1379                                         &inputData, 1, checkVertexPipelineStagesSubgroupElect);
1380                 }
1381                 else
1382                 {
1383                         const deUint32 inputDatasCount = 4;
1384                         subgroups::SSBOData inputDatas[inputDatasCount];
1385                         inputDatas[0].format = VK_FORMAT_R32_UINT;
1386                         inputDatas[0].numElements = SHADER_BUFFER_SIZE;
1387                         inputDatas[0].initializeType = subgroups::SSBOData::InitializeNonZero;
1388
1389                         inputDatas[1].format = VK_FORMAT_R32_UINT;
1390                         inputDatas[1].numElements = 1;
1391                         inputDatas[1].initializeType = subgroups::SSBOData::InitializeZero;
1392
1393                         inputDatas[2].format = VK_FORMAT_R32_UINT;
1394                         inputDatas[2].numElements = 1;
1395                         inputDatas[2].initializeType = subgroups::SSBOData::InitializeNonZero;
1396
1397                         inputDatas[3].format = VK_FORMAT_R32_UINT;
1398                         inputDatas[3].numElements = SHADER_BUFFER_SIZE;
1399                         inputDatas[3].initializeType = subgroups::SSBOData::InitializeNone;
1400                         inputDatas[3].isImage = true;
1401
1402                         return subgroups::makeTessellationEvaluationTest(context, VK_FORMAT_R32_UINT,
1403                                         inputDatas, inputDatasCount, checkVertexPipelineStagesSubgroupBarriers);
1404                 }
1405         }
1406         else
1407         {
1408                 TCU_THROW(InternalError, "Unhandled shader stage");
1409         }
1410 }
1411 }
1412
1413 namespace vkt
1414 {
1415 namespace subgroups
1416 {
1417 tcu::TestCaseGroup* createSubgroupsBasicTests(tcu::TestContext& testCtx)
1418 {
1419         de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(
1420                         testCtx, "basic", "Subgroup basic category tests"));
1421
1422         const VkShaderStageFlags stages[] =
1423         {
1424                 VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
1425                 VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
1426                 VK_SHADER_STAGE_GEOMETRY_BIT,
1427                 VK_SHADER_STAGE_VERTEX_BIT,
1428                 VK_SHADER_STAGE_FRAGMENT_BIT,
1429                 VK_SHADER_STAGE_COMPUTE_BIT
1430         };
1431
1432         for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
1433         {
1434                 const VkShaderStageFlags stage = stages[stageIndex];
1435
1436                 for (int opTypeIndex = 0; opTypeIndex < OPTYPE_LAST; ++opTypeIndex)
1437                 {
1438                         if ((OPTYPE_SUBGROUP_MEMORY_BARRIER_SHARED == opTypeIndex) &&
1439                                         (VK_SHADER_STAGE_COMPUTE_BIT != stage))
1440                         {
1441                                 // Shared isn't available in non compute shaders.
1442                                 continue;
1443                         }
1444
1445                         CaseDefinition caseDef = {opTypeIndex, stage, false};
1446
1447                         std::string op = getOpTypeName(opTypeIndex);
1448
1449                         addFunctionCaseWithPrograms(group.get(),
1450                                                                                 de::toLower(op) +
1451                                                                                 "_" + getShaderStageName(stage), "",
1452                                                                                 initPrograms, test, caseDef);
1453
1454                         if ((VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT) & stage )
1455                         {
1456                                 if (OPTYPE_ELECT != caseDef.opType || VK_SHADER_STAGE_FRAGMENT_BIT != stage)
1457                                 {
1458                                         caseDef.noSSBO = true;
1459                                         addFunctionCaseWithPrograms(group.get(),
1460                                                                 de::toLower(op) + "_" +
1461                                                                 getShaderStageName(stage)+"_framebuffer", "",
1462                                                                 initFrameBufferPrograms, test, caseDef);
1463                                 }
1464                         }
1465                 }
1466         }
1467
1468         return group.release();
1469 }
1470
1471 } // subgroups
1472 } // vkt